From 213eca7f4888e9817e8076cdab6b9f7295c181f6 Mon Sep 17 00:00:00 2001
From: Greg KH <gregkh@suse.de>
Date: Wed, 30 Jan 2008 13:29:58 +0100
Subject: [PATCH 001/890] kobj: fix threshold_init_device/kobject_uevent_env
 oops

the logic in this function is just crazy.  It's recursive, but we
can circumvent the creation for the kobject and whole creation of the
threshold_block if some conditions are met.  That's why we see the
allocate_threshold_blocks so many times in the callstack, yet only a few
kobjects created.

Then we blow up in kobject_uevent_env() on the first debug printk.
Which means that we are just passing in garbage.

Man, this is one time that comments in code would have been very nice to
have, and why forward goto's into major code blocks are just evil...

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 753588755fee..073afa7dd89a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -450,7 +450,8 @@ recurse:
 	if (err)
 		goto out_free;
 
-	kobject_uevent(&b->kobj, KOBJ_ADD);
+	if (b)
+		kobject_uevent(&b->kobj, KOBJ_ADD);
 
 	return err;
 

From a6fa8e5a6172a5a5bc06ed04f34e50b36c978127 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Wed, 30 Jan 2008 13:30:00 +0100
Subject: [PATCH 002/890] time: clean hungarian notation from timers

Clean up hungarian notation from timer code.

Signed-off-by: Pavel Machek <pavel@suse.cz>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timer.h |  6 ++--
 kernel/timer.c        | 80 +++++++++++++++++++++----------------------
 2 files changed, 42 insertions(+), 44 deletions(-)

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 78cf899b4409..de0e71359ede 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -5,7 +5,7 @@
 #include <linux/ktime.h>
 #include <linux/stddef.h>
 
-struct tvec_t_base_s;
+struct tvec_base;
 
 struct timer_list {
 	struct list_head entry;
@@ -14,7 +14,7 @@ struct timer_list {
 	void (*function)(unsigned long);
 	unsigned long data;
 
-	struct tvec_t_base_s *base;
+	struct tvec_base *base;
 #ifdef CONFIG_TIMER_STATS
 	void *start_site;
 	char start_comm[16];
@@ -22,7 +22,7 @@ struct timer_list {
 #endif
 };
 
-extern struct tvec_t_base_s boot_tvec_bases;
+extern struct tvec_base boot_tvec_bases;
 
 #define TIMER_INITIALIZER(_function, _expires, _data) {		\
 		.function = (_function),			\
diff --git a/kernel/timer.c b/kernel/timer.c
index f739dfb539ce..aadfbc8367f5 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -58,59 +58,57 @@ EXPORT_SYMBOL(jiffies_64);
 #define TVN_MASK (TVN_SIZE - 1)
 #define TVR_MASK (TVR_SIZE - 1)
 
-typedef struct tvec_s {
+struct tvec {
 	struct list_head vec[TVN_SIZE];
-} tvec_t;
+};
 
-typedef struct tvec_root_s {
+struct tvec_root {
 	struct list_head vec[TVR_SIZE];
-} tvec_root_t;
+};
 
-struct tvec_t_base_s {
+struct tvec_base {
 	spinlock_t lock;
 	struct timer_list *running_timer;
 	unsigned long timer_jiffies;
-	tvec_root_t tv1;
-	tvec_t tv2;
-	tvec_t tv3;
-	tvec_t tv4;
-	tvec_t tv5;
+	struct tvec_root tv1;
+	struct tvec tv2;
+	struct tvec tv3;
+	struct tvec tv4;
+	struct tvec tv5;
 } ____cacheline_aligned;
 
-typedef struct tvec_t_base_s tvec_base_t;
-
-tvec_base_t boot_tvec_bases;
+struct tvec_base boot_tvec_bases;
 EXPORT_SYMBOL(boot_tvec_bases);
-static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases;
+static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
 
 /*
- * Note that all tvec_bases is 2 byte aligned and lower bit of
+ * Note that all tvec_bases are 2 byte aligned and lower bit of
  * base in timer_list is guaranteed to be zero. Use the LSB for
  * the new flag to indicate whether the timer is deferrable
  */
 #define TBASE_DEFERRABLE_FLAG		(0x1)
 
 /* Functions below help us manage 'deferrable' flag */
-static inline unsigned int tbase_get_deferrable(tvec_base_t *base)
+static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
 {
 	return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG);
 }
 
-static inline tvec_base_t *tbase_get_base(tvec_base_t *base)
+static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
 {
-	return ((tvec_base_t *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
+	return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
 }
 
 static inline void timer_set_deferrable(struct timer_list *timer)
 {
-	timer->base = ((tvec_base_t *)((unsigned long)(timer->base) |
+	timer->base = ((struct tvec_base *)((unsigned long)(timer->base) |
 				       TBASE_DEFERRABLE_FLAG));
 }
 
 static inline void
-timer_set_base(struct timer_list *timer, tvec_base_t *new_base)
+timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
 {
-	timer->base = (tvec_base_t *)((unsigned long)(new_base) |
+	timer->base = (struct tvec_base *)((unsigned long)(new_base) |
 				      tbase_get_deferrable(timer->base));
 }
 
@@ -246,7 +244,7 @@ unsigned long round_jiffies_relative(unsigned long j)
 EXPORT_SYMBOL_GPL(round_jiffies_relative);
 
 
-static inline void set_running_timer(tvec_base_t *base,
+static inline void set_running_timer(struct tvec_base *base,
 					struct timer_list *timer)
 {
 #ifdef CONFIG_SMP
@@ -254,7 +252,7 @@ static inline void set_running_timer(tvec_base_t *base,
 #endif
 }
 
-static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
+static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
 {
 	unsigned long expires = timer->expires;
 	unsigned long idx = expires - base->timer_jiffies;
@@ -371,14 +369,14 @@ static inline void detach_timer(struct timer_list *timer,
  * possible to set timer->base = NULL and drop the lock: the timer remains
  * locked.
  */
-static tvec_base_t *lock_timer_base(struct timer_list *timer,
+static struct tvec_base *lock_timer_base(struct timer_list *timer,
 					unsigned long *flags)
 	__acquires(timer->base->lock)
 {
-	tvec_base_t *base;
+	struct tvec_base *base;
 
 	for (;;) {
-		tvec_base_t *prelock_base = timer->base;
+		struct tvec_base *prelock_base = timer->base;
 		base = tbase_get_base(prelock_base);
 		if (likely(base != NULL)) {
 			spin_lock_irqsave(&base->lock, *flags);
@@ -393,7 +391,7 @@ static tvec_base_t *lock_timer_base(struct timer_list *timer,
 
 int __mod_timer(struct timer_list *timer, unsigned long expires)
 {
-	tvec_base_t *base, *new_base;
+	struct tvec_base *base, *new_base;
 	unsigned long flags;
 	int ret = 0;
 
@@ -445,7 +443,7 @@ EXPORT_SYMBOL(__mod_timer);
  */
 void add_timer_on(struct timer_list *timer, int cpu)
 {
-	tvec_base_t *base = per_cpu(tvec_bases, cpu);
+	struct tvec_base *base = per_cpu(tvec_bases, cpu);
 	unsigned long flags;
 
 	timer_stats_timer_set_start_info(timer);
@@ -508,7 +506,7 @@ EXPORT_SYMBOL(mod_timer);
  */
 int del_timer(struct timer_list *timer)
 {
-	tvec_base_t *base;
+	struct tvec_base *base;
 	unsigned long flags;
 	int ret = 0;
 
@@ -539,7 +537,7 @@ EXPORT_SYMBOL(del_timer);
  */
 int try_to_del_timer_sync(struct timer_list *timer)
 {
-	tvec_base_t *base;
+	struct tvec_base *base;
 	unsigned long flags;
 	int ret = -1;
 
@@ -591,7 +589,7 @@ int del_timer_sync(struct timer_list *timer)
 EXPORT_SYMBOL(del_timer_sync);
 #endif
 
-static int cascade(tvec_base_t *base, tvec_t *tv, int index)
+static int cascade(struct tvec_base *base, struct tvec *tv, int index)
 {
 	/* cascade all the timers from tv up one level */
 	struct timer_list *timer, *tmp;
@@ -620,7 +618,7 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index)
  * This function cascades all vectors and executes all expired timer
  * vectors.
  */
-static inline void __run_timers(tvec_base_t *base)
+static inline void __run_timers(struct tvec_base *base)
 {
 	struct timer_list *timer;
 
@@ -678,13 +676,13 @@ static inline void __run_timers(tvec_base_t *base)
  * is used on S/390 to stop all activity when a cpus is idle.
  * This functions needs to be called disabled.
  */
-static unsigned long __next_timer_interrupt(tvec_base_t *base)
+static unsigned long __next_timer_interrupt(struct tvec_base *base)
 {
 	unsigned long timer_jiffies = base->timer_jiffies;
 	unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
 	int index, slot, array, found = 0;
 	struct timer_list *nte;
-	tvec_t *varray[4];
+	struct tvec *varray[4];
 
 	/* Look for timer events in tv1. */
 	index = slot = timer_jiffies & TVR_MASK;
@@ -716,7 +714,7 @@ cascade:
 	varray[3] = &base->tv5;
 
 	for (array = 0; array < 4; array++) {
-		tvec_t *varp = varray[array];
+		struct tvec *varp = varray[array];
 
 		index = slot = timer_jiffies & TVN_MASK;
 		do {
@@ -795,7 +793,7 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now,
  */
 unsigned long get_next_timer_interrupt(unsigned long now)
 {
-	tvec_base_t *base = __get_cpu_var(tvec_bases);
+	struct tvec_base *base = __get_cpu_var(tvec_bases);
 	unsigned long expires;
 
 	spin_lock(&base->lock);
@@ -894,7 +892,7 @@ static inline void calc_load(unsigned long ticks)
  */
 static void run_timer_softirq(struct softirq_action *h)
 {
-	tvec_base_t *base = __get_cpu_var(tvec_bases);
+	struct tvec_base *base = __get_cpu_var(tvec_bases);
 
 	hrtimer_run_pending();
 
@@ -1223,7 +1221,7 @@ static struct lock_class_key base_lock_keys[NR_CPUS];
 static int __cpuinit init_timers_cpu(int cpu)
 {
 	int j;
-	tvec_base_t *base;
+	struct tvec_base *base;
 	static char __cpuinitdata tvec_base_done[NR_CPUS];
 
 	if (!tvec_base_done[cpu]) {
@@ -1278,7 +1276,7 @@ static int __cpuinit init_timers_cpu(int cpu)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
+static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
 {
 	struct timer_list *timer;
 
@@ -1292,8 +1290,8 @@ static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
 
 static void __cpuinit migrate_timers(int cpu)
 {
-	tvec_base_t *old_base;
-	tvec_base_t *new_base;
+	struct tvec_base *old_base;
+	struct tvec_base *new_base;
 	int i;
 
 	BUG_ON(cpu_online(cpu));

From 4c9dc6412247abf4972080c51cd16a58c4009c19 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Wed, 30 Jan 2008 13:30:00 +0100
Subject: [PATCH 003/890] time: timer cleanups

Small cleanups to tick-related code. Wrong preempt count is followed
by BUG(), so it is hardly KERN_WARNING.

Signed-off-by: Pavel Machek <pavel@suse.cz>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/tick-sched.c | 4 ++--
 kernel/timer.c           | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1a21b6fdb674..d36ee2fd1a3b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -296,7 +296,7 @@ void tick_nohz_stop_sched_tick(void)
 			/* Check, if the timer was already in the past */
 			if (hrtimer_active(&ts->sched_timer))
 				goto out;
-		} else if(!tick_program_event(expires, 0))
+		} else if (!tick_program_event(expires, 0))
 				goto out;
 		/*
 		 * We are past the event already. So we crossed a
@@ -507,7 +507,7 @@ static inline void tick_nohz_switch_to_nohz(void) { }
  */
 #ifdef CONFIG_HIGH_RES_TIMERS
 /*
- * We rearm the timer until we get disabled by the idle code
+ * We rearm the timer until we get disabled by the idle code.
  * Called with interrupts disabled and timer->base->cpu_base->lock held.
  */
 static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
diff --git a/kernel/timer.c b/kernel/timer.c
index aadfbc8367f5..23f7ead78fae 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -655,7 +655,7 @@ static inline void __run_timers(struct tvec_base *base)
 				int preempt_count = preempt_count();
 				fn(data);
 				if (preempt_count != preempt_count()) {
-					printk(KERN_WARNING "huh, entered %p "
+					printk(KERN_ERR "huh, entered %p "
 					       "with preempt_count %08x, exited"
 					       " with %08x?\n",
 					       fn, preempt_count,

From b10db7f0d2b589a7f88dc3026e150756cb437a28 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Wed, 30 Jan 2008 13:30:00 +0100
Subject: [PATCH 004/890] time: more timer related cleanups

I was confused by FSEC = 10^15 NSEC statement, plus small whitespace
fixes. When there's copyright, there should be GPL.

Signed-off-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/hpet.c       | 3 ++-
 arch/x86/kernel/process_64.c | 2 +-
 kernel/softirq.c             | 4 +++-
 kernel/time/tick-sched.c     | 2 +-
 kernel/time/timer_stats.c    | 2 +-
 5 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 2f99ee206b95..9ec2ab793042 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -16,7 +16,8 @@
 #define HPET_MASK	CLOCKSOURCE_MASK(32)
 #define HPET_SHIFT	22
 
-/* FSEC = 10^-15 NSEC = 10^-9 */
+/* FSEC = 10^-15
+   NSEC = 10^-9 */
 #define FSEC_PER_NSEC	1000000
 
 /*
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ab79e1dfa023..c2db7ef93565 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -207,7 +207,7 @@ static inline void play_dead(void)
  * low exit latency (ie sit in a loop waiting for
  * somebody to say that they'd like to reschedule)
  */
-void cpu_idle (void)
+void cpu_idle(void)
 {
 	current_thread_info()->status |= TS_POLLING;
 	/* endless idle loop with no priority at all */
diff --git a/kernel/softirq.c b/kernel/softirq.c
index bd89bc4eb0b9..8fe1ff40102d 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -3,7 +3,9 @@
  *
  *	Copyright (C) 1992 Linus Torvalds
  *
- * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
+ *	Distribute under GPLv2.
+ *
+ *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
  */
 
 #include <linux/module.h>
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d36ee2fd1a3b..49e12f6a4bab 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -9,7 +9,7 @@
  *
  *  Started by: Thomas Gleixner and Ingo Molnar
  *
- *  For licencing details see kernel-base/COPYING
+ *  Distribute under GPLv2.
  */
 #include <linux/cpu.h>
 #include <linux/err.h>
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index c36bb7ed0301..417da8c5bc72 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -26,7 +26,7 @@
  * the pid and cmdline from the owner process if applicable.
  *
  * Start/stop data collection:
- * # echo 1[0] >/proc/timer_stats
+ * # echo [1|0] >/proc/timer_stats
  *
  * Display the information collected so far:
  * # cat /proc/timer_stats

From 186e3cb8a465bac010ee3b020768d2fa2b505aef Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:01 +0100
Subject: [PATCH 005/890] timer: clean up tick-broadcast.c

clean up tick-broadcast.c

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/time/tick-broadcast.c | 7 ++-----
 kernel/time/tick-internal.h  | 2 --
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 5b86698faa0b..e1bd50cbbf5d 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -126,9 +126,9 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
 /*
  * Broadcast the event to the cpus, which are set in the mask
  */
-int tick_do_broadcast(cpumask_t mask)
+static void tick_do_broadcast(cpumask_t mask)
 {
-	int ret = 0, cpu = smp_processor_id();
+	int cpu = smp_processor_id();
 	struct tick_device *td;
 
 	/*
@@ -138,7 +138,6 @@ int tick_do_broadcast(cpumask_t mask)
 		cpu_clear(cpu, mask);
 		td = &per_cpu(tick_cpu_device, cpu);
 		td->evtdev->event_handler(td->evtdev);
-		ret = 1;
 	}
 
 	if (!cpus_empty(mask)) {
@@ -151,9 +150,7 @@ int tick_do_broadcast(cpumask_t mask)
 		cpu = first_cpu(mask);
 		td = &per_cpu(tick_cpu_device, cpu);
 		td->evtdev->broadcast(mask);
-		ret = 1;
 	}
-	return ret;
 }
 
 /*
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index bb13f2724905..f13f2b7f4fd4 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -70,8 +70,6 @@ static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
  * Broadcasting support
  */
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-extern int tick_do_broadcast(cpumask_t mask);
-
 extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
 extern int tick_check_broadcast_device(struct clock_event_device *dev);
 extern int tick_is_broadcast_device(struct clock_event_device *dev);

From 1d76c2622813fbc692b0d323028cfef9ee36051a Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Wed, 30 Jan 2008 13:30:01 +0100
Subject: [PATCH 006/890] clocksource: make CLOCKSOURCE_MASK bullet-proof

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clocksource.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 107787aacb64..07b42153de24 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -103,7 +103,7 @@ struct clocksource {
 #define CLOCK_SOURCE_VALID_FOR_HRES		0x20
 
 /* simplify initialization of mask field */
-#define CLOCKSOURCE_MASK(bits) (cycle_t)(bits<64 ? ((1ULL<<bits)-1) : -1)
+#define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
 
 /**
  * clocksource_khz2mult - calculates mult from khz and shift

From efd9ac8630e89b9ee7ce64008bd7783952374f37 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Date: Wed, 30 Jan 2008 13:30:01 +0100
Subject: [PATCH 007/890] time: fold __get_realtime_clock_ts() into
 getnstimeofday()

  - getnstimeofday() was just a wrapper around __get_realtime_clock_ts()
  - Replace calls to __get_realtime_clock_ts() by calls to getnstimeofday()
  - Fix bogus reference to get_realtime_clock_ts(), which never existed

Signed-off-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timekeeping.c | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index ab46ae8c062b..77680195cf84 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -82,13 +82,12 @@ static inline s64 __get_nsec_offset(void)
 }
 
 /**
- * __get_realtime_clock_ts - Returns the time of day in a timespec
+ * getnstimeofday - Returns the time of day in a timespec
  * @ts:		pointer to the timespec to be set
  *
- * Returns the time of day in a timespec. Used by
- * do_gettimeofday() and get_realtime_clock_ts().
+ * Returns the time of day in a timespec.
  */
-static inline void __get_realtime_clock_ts(struct timespec *ts)
+void getnstimeofday(struct timespec *ts)
 {
 	unsigned long seq;
 	s64 nsecs;
@@ -104,30 +103,19 @@ static inline void __get_realtime_clock_ts(struct timespec *ts)
 	timespec_add_ns(ts, nsecs);
 }
 
-/**
- * getnstimeofday - Returns the time of day in a timespec
- * @ts:		pointer to the timespec to be set
- *
- * Returns the time of day in a timespec.
- */
-void getnstimeofday(struct timespec *ts)
-{
-	__get_realtime_clock_ts(ts);
-}
-
 EXPORT_SYMBOL(getnstimeofday);
 
 /**
  * do_gettimeofday - Returns the time of day in a timeval
  * @tv:		pointer to the timeval to be set
  *
- * NOTE: Users should be converted to using get_realtime_clock_ts()
+ * NOTE: Users should be converted to using getnstimeofday()
  */
 void do_gettimeofday(struct timeval *tv)
 {
 	struct timespec now;
 
-	__get_realtime_clock_ts(&now);
+	getnstimeofday(&now);
 	tv->tv_sec = now.tv_sec;
 	tv->tv_usec = now.tv_nsec/1000;
 }

From 1077f5a917b7c630231037826b344b2f7f5b903f Mon Sep 17 00:00:00 2001
From: Parag Warudkar <parag.warudkar@gmail.com>
Date: Wed, 30 Jan 2008 13:30:01 +0100
Subject: [PATCH 008/890] clocksource.c: use init_timer_deferrable for
 clocksource_watchdog

clocksource_watchdog can use a deferrable timer - reduces wakeups from
idle per second.

Signed-off-by: Parag Warudkar <parag.warudkar@gmail.com>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/clocksource.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 8d6125ad2cf0..cabfa193efb3 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -175,7 +175,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
 			if (watchdog)
 				del_timer(&watchdog_timer);
 			watchdog = cs;
-			init_timer(&watchdog_timer);
+			init_timer_deferrable(&watchdog_timer);
 			watchdog_timer.function = clocksource_watchdog;
 
 			/* Reset watchdog cycles */

From 1ada5cba6a0318f90e45b38557e7b5206a9cba38 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:30:02 +0100
Subject: [PATCH 009/890] clocksource: make clocksource watchdog cycle through
 online CPUs

This way it checks if the clocks are synchronized between CPUs too.
This might be able to detect slowly drifting TSCs which only
go wrong over longer time.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/clocksource.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index cabfa193efb3..edd5ef8e1765 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -142,8 +142,13 @@ static void clocksource_watchdog(unsigned long data)
 	}
 
 	if (!list_empty(&watchdog_list)) {
-		__mod_timer(&watchdog_timer,
-			    watchdog_timer.expires + WATCHDOG_INTERVAL);
+		/* Cycle through CPUs to check if the CPUs stay synchronized to
+		 * each other. */
+		int next_cpu = next_cpu(raw_smp_processor_id(), cpu_online_map);
+		if (next_cpu >= NR_CPUS)
+			next_cpu = first_cpu(cpu_online_map);
+		watchdog_timer.expires += WATCHDOG_INTERVAL;
+		add_timer_on(&watchdog_timer, next_cpu);
 	}
 	spin_unlock(&watchdog_lock);
 }
@@ -165,7 +170,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
 		if (!started && watchdog) {
 			watchdog_last = watchdog->read();
 			watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
-			add_timer(&watchdog_timer);
+			add_timer_on(&watchdog_timer, first_cpu(cpu_online_map));
 		}
 	} else {
 		if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
@@ -186,7 +191,8 @@ static void clocksource_check_watchdog(struct clocksource *cs)
 				watchdog_last = watchdog->read();
 				watchdog_timer.expires =
 					jiffies + WATCHDOG_INTERVAL;
-				add_timer(&watchdog_timer);
+				add_timer_on(&watchdog_timer,
+						first_cpu(cpu_online_map));
 			}
 		}
 	}

From df619e6bafeb8e129aba3173d384544d7fa6e3c2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:30:02 +0100
Subject: [PATCH 010/890] x86: offer is_hpet_enabled() on !CONFIG_HPET_TIMER
 too

offer is_hpet_enabled() on !CONFIG_HPET_TIMER too.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/hpet.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h
index ad8d6e758785..624f600f7161 100644
--- a/include/asm-x86/hpet.h
+++ b/include/asm-x86/hpet.h
@@ -80,10 +80,11 @@ extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id);
 
 #endif /* CONFIG_HPET_EMULATE_RTC */
 
-#else
+#else /* CONFIG_HPET_TIMER */
 
 static inline int hpet_enable(void) { return 0; }
 static inline unsigned long hpet_readl(unsigned long a) { return 0; }
+static inline int is_hpet_enabled(void) { return 0; }
 
-#endif /* CONFIG_HPET_TIMER */
+#endif
 #endif /* ASM_X86_HPET_H */

From 316da3b3fc8efa9a5d2c99e0d449f01ff38c6aba Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:02 +0100
Subject: [PATCH 011/890] x86: restrict PIT clocksource usage

PIT clocksource is registered unconditionally even when HPET is enabled
or when PIT is replaced by the local APIC timer. In both cases PIT can
not be used as it is stopped and the readout would be stale.

Prevent registering PIT in those cases.

patch depends on:

  x86: offer is_hpet_enabled() on !CONFIG_HPET_TIMER too

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/i8253.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index a42c80745325..0f8f35458a8f 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -197,7 +197,15 @@ static struct clocksource clocksource_pit = {
 
 static int __init init_pit_clocksource(void)
 {
-	if (num_possible_cpus() > 1) /* PIT does not scale! */
+	 /*
+	  * Several reasons not to register PIT as a clocksource:
+	  *
+	  * - On SMP PIT does not scale due to i8253_lock
+	  * - when HPET is enabled
+	  * - when local APIC timer is active (PIT is switched off)
+	  */
+	if (num_possible_cpus() > 1 || is_hpet_enabled() ||
+	    pit_clockevent.mode != CLOCK_EVT_MODE_PERIODIC)
 		return 0;
 
 	clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);

From 4713e22ce81eb8b3353e16435362eb3d0ec95640 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:02 +0100
Subject: [PATCH 012/890] clocksource: add unregister function to disable
 unusable clocksources

On x86 the PIT might become an unusable clocksource. Add an unregister
function to provide a possibilty to remove the PIT from the list of
available clock sources.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/i8253.c     |  1 +
 include/linux/clocksource.h |  1 +
 kernel/time/clocksource.c   | 15 +++++++++++++++
 3 files changed, 17 insertions(+)

diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 0f8f35458a8f..decc5d294d76 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -13,6 +13,7 @@
 #include <asm/delay.h>
 #include <asm/i8253.h>
 #include <asm/io.h>
+#include <asm/hpet.h>
 
 DEFINE_SPINLOCK(i8253_lock);
 EXPORT_SYMBOL(i8253_lock);
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 07b42153de24..85778a4b1209 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -215,6 +215,7 @@ static inline void clocksource_calculate_interval(struct clocksource *c,
 
 /* used to install a new clocksource */
 extern int clocksource_register(struct clocksource*);
+extern void clocksource_unregister(struct clocksource*);
 extern struct clocksource* clocksource_get_next(void);
 extern void clocksource_change_rating(struct clocksource *cs, int rating);
 extern void clocksource_resume(void);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index edd5ef8e1765..6e9259a5d501 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -337,6 +337,21 @@ void clocksource_change_rating(struct clocksource *cs, int rating)
 	spin_unlock_irqrestore(&clocksource_lock, flags);
 }
 
+/**
+ * clocksource_unregister - remove a registered clocksource
+ */
+void clocksource_unregister(struct clocksource *cs)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&clocksource_lock, flags);
+	list_del(&cs->list);
+	if (clocksource_override == cs)
+		clocksource_override = NULL;
+	next_clocksource = select_clocksource();
+	spin_unlock_irqrestore(&clocksource_lock, flags);
+}
+
 #ifdef CONFIG_SYSFS
 /**
  * sysfs_show_current_clocksources - sysfs interface for current clocksource

From 1a0c009ac53de4a7664a1239936f0bc258133156 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:03 +0100
Subject: [PATCH 013/890] x86: unregister PIT clocksource when PIT is disabled

The following scenario might leave PIT as a disfunctional clock source:

    PIT is registered as clocksource
    PM_TIMER is registered as clocksource and enables highres/dyntick mode
    PIT is switched to oneshot mode
    -> now the readout of PIT is bogus, but the user might select PIT
    via the sysfs override, which would break the box as the time
    readout is unusable.

Unregister the PIT clocksource when the PIT clock event device is switched
into shutdown / oneshot mode.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/i8253.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index decc5d294d76..377c3f8411f8 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -18,6 +18,12 @@
 DEFINE_SPINLOCK(i8253_lock);
 EXPORT_SYMBOL(i8253_lock);
 
+#ifdef CONFIG_X86_32
+static void pit_disable_clocksource(void);
+#else
+static inline void pit_disable_clocksource(void) { }
+#endif
+
 /*
  * HPET replaces the PIT, when enabled. So we need to know, which of
  * the two timers is used
@@ -52,11 +58,13 @@ static void init_pit_timer(enum clock_event_mode mode,
 			outb_p(0, PIT_CH0);
 			outb_p(0, PIT_CH0);
 		}
+		pit_disable_clocksource();
 		break;
 
 	case CLOCK_EVT_MODE_ONESHOT:
 		/* One shot setup */
 		outb_p(0x38, PIT_MODE);
+		pit_disable_clocksource();
 		break;
 
 	case CLOCK_EVT_MODE_RESUME:
@@ -196,6 +204,17 @@ static struct clocksource clocksource_pit = {
 	.shift	= 20,
 };
 
+static void pit_disable_clocksource(void)
+{
+	/*
+	 * Use mult to check whether it is registered or not
+	 */
+	if (clocksource_pit.mult) {
+		clocksource_unregister(&clocksource_pit);
+		clocksource_pit.mult = 0;
+	}
+}
+
 static int __init init_pit_clocksource(void)
 {
 	 /*

From 45fe4fe19120a22f7339f5bb110447170c25fca9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:30:03 +0100
Subject: [PATCH 014/890] x86: make clockevents more robust

detect zero event-device multiplicators - they then cause
division-by-zero crashes if a clockevent has been initialized
incorrectly.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/clockevents.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 5fb139fef9fa..3e59fce6dd43 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -41,6 +41,11 @@ unsigned long clockevent_delta2ns(unsigned long latch,
 {
 	u64 clc = ((u64) latch << evt->shift);
 
+	if (unlikely(!evt->mult)) {
+		evt->mult = 1;
+		WARN_ON(1);
+	}
+
 	do_div(clc, evt->mult);
 	if (clc < 1000)
 		clc = 1000;
@@ -151,6 +156,14 @@ static void clockevents_notify_released(void)
 void clockevents_register_device(struct clock_event_device *dev)
 {
 	BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+	/*
+	 * A nsec2cyc multiplicator of 0 is invalid and we'd crash
+	 * on it, so fix it up and emit a warning:
+	 */
+	if (unlikely(!dev->mult)) {
+		dev->mult = 1;
+		WARN_ON(1);
+	}
 
 	spin_lock(&clockevents_lock);
 

From e3f37a54f690d3e64995ea7ecea08c5ab3070faf Mon Sep 17 00:00:00 2001
From: Balaji Rao <balajirrao@gmail.com>
Date: Wed, 30 Jan 2008 13:30:03 +0100
Subject: [PATCH 015/890] x86: assign IRQs to HPET timers

The userspace API for the HPET (see Documentation/hpet.txt) did not work. The
HPET_IE_ON ioctl was failing as there was no IRQ assigned to the timer
device. This patch fixes it by allocating IRQs to timer blocks in the HPET.

arch/x86/kernel/hpet.c |   13 +++++--------
drivers/char/hpet.c    |   45 ++++++++++++++++++++++++++++++++++++++-------
include/linux/hpet.h   |    2 +-
3 files changed, 44 insertions(+), 16 deletions(-)

Signed-off-by: Balaji Rao <balajirrao@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/hpet.c | 13 +++++-------
 drivers/char/hpet.c    | 47 +++++++++++++++++++++++++++++++++++-------
 include/linux/hpet.h   |  2 +-
 3 files changed, 45 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 9ec2ab793042..786aa227afdf 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -117,8 +117,7 @@ int is_hpet_enabled(void)
 static void hpet_reserve_platform_timers(unsigned long id)
 {
 	struct hpet __iomem *hpet = hpet_virt_address;
-	struct hpet_timer __iomem *timer = &hpet->hpet_timers[2];
-	unsigned int nrtimers, i;
+	unsigned int nrtimers;
 	struct hpet_data hd;
 
 	nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
@@ -133,16 +132,14 @@ static void hpet_reserve_platform_timers(unsigned long id)
 #ifdef CONFIG_HPET_EMULATE_RTC
 	hpet_reserve_timer(&hd, 1);
 #endif
-
 	hd.hd_irq[0] = HPET_LEGACY_8254;
 	hd.hd_irq[1] = HPET_LEGACY_RTC;
 
-	for (i = 2; i < nrtimers; timer++, i++)
-		hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >>
-			Tn_INT_ROUTE_CNF_SHIFT;
-
+	/*
+	 * IRQs for the other timers are assigned dynamically
+	 * in hpet_alloc
+	 */
 	hpet_alloc(&hd);
-
 }
 #else
 static void hpet_reserve_platform_timers(unsigned long id) { }
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 4c16778e3f84..593b32cfbc33 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -806,14 +806,14 @@ static unsigned long hpet_calibrate(struct hpets *hpetp)
 
 int hpet_alloc(struct hpet_data *hdp)
 {
-	u64 cap, mcfg;
+	u64 cap, mcfg, hpet_config;
 	struct hpet_dev *devp;
-	u32 i, ntimer;
+	u32 i, ntimer, irq;
 	struct hpets *hpetp;
 	size_t siz;
 	struct hpet __iomem *hpet;
 	static struct hpets *last = NULL;
-	unsigned long period;
+	unsigned long period, irq_bitmap;
 	unsigned long long temp;
 
 	/*
@@ -840,12 +840,42 @@ int hpet_alloc(struct hpet_data *hdp)
 	hpetp->hp_hpet_phys = hdp->hd_phys_address;
 
 	hpetp->hp_ntimer = hdp->hd_nirqs;
-
-	for (i = 0; i < hdp->hd_nirqs; i++)
-		hpetp->hp_dev[i].hd_hdwirq = hdp->hd_irq[i];
-
 	hpet = hpetp->hp_hpet;
 
+	/* Assign IRQs statically for legacy devices */
+	hpetp->hp_dev[0].hd_hdwirq = hdp->hd_irq[0];
+	hpetp->hp_dev[1].hd_hdwirq = hdp->hd_irq[1];
+
+	/* Assign IRQs dynamically for the others */
+	for (i = 2, devp = &hpetp->hp_dev[2]; i < hdp->hd_nirqs; i++, devp++) {
+		struct hpet_timer __iomem *timer;
+
+		timer = &hpet->hpet_timers[devp - hpetp->hp_dev];
+
+		hpet_config = readq(&timer->hpet_config);
+		irq_bitmap = (hpet_config & Tn_INT_ROUTE_CAP_MASK)
+			>> Tn_INT_ROUTE_CAP_SHIFT;
+		if (!irq_bitmap)
+			irq = 0;        /* No valid IRQ Assignable */
+		else {
+			irq = find_first_bit(&irq_bitmap, 32);
+			do {
+				hpet_config |= irq << Tn_INT_ROUTE_CNF_SHIFT;
+				writeq(hpet_config, &timer->hpet_config);
+
+				/*
+				 * Verify whether we have written a valid
+				 * IRQ number by reading it back again
+				 */
+				hpet_config = readq(&timer->hpet_config);
+				if (irq == (hpet_config & Tn_INT_ROUTE_CNF_MASK)
+						>> Tn_INT_ROUTE_CNF_SHIFT)
+					break;  /* Success */
+			} while ((irq = (find_next_bit(&irq_bitmap, 32, irq))));
+		}
+		hpetp->hp_dev[i].hd_hdwirq = irq;
+	}
+
 	cap = readq(&hpet->hpet_cap);
 
 	ntimer = ((cap & HPET_NUM_TIM_CAP_MASK) >> HPET_NUM_TIM_CAP_SHIFT) + 1;
@@ -875,7 +905,8 @@ int hpet_alloc(struct hpet_data *hdp)
 		hpetp->hp_which, hdp->hd_phys_address,
 		hpetp->hp_ntimer > 1 ? "s" : "");
 	for (i = 0; i < hpetp->hp_ntimer; i++)
-		printk("%s %d", i > 0 ? "," : "", hdp->hd_irq[i]);
+		printk("%s %d", i > 0 ? "," : "",
+				hpetp->hp_dev[i].hd_hdwirq);
 	printk("\n");
 
 	printk(KERN_INFO "hpet%u: %u %d-bit timers, %Lu Hz\n",
diff --git a/include/linux/hpet.h b/include/linux/hpet.h
index 707f7cb9e795..e3c0b2aa944c 100644
--- a/include/linux/hpet.h
+++ b/include/linux/hpet.h
@@ -64,7 +64,7 @@ struct hpet {
  */
 
 #define	Tn_INT_ROUTE_CAP_MASK		(0xffffffff00000000ULL)
-#define	Tn_INI_ROUTE_CAP_SHIFT		(32UL)
+#define	Tn_INT_ROUTE_CAP_SHIFT		(32UL)
 #define	Tn_FSB_INT_DELCAP_MASK		(0x8000UL)
 #define	Tn_FSB_INT_DELCAP_SHIFT		(15)
 #define	Tn_FSB_EN_CNF_MASK		(0x4000UL)

From 37a47db8d7f0f38dac5acf5a13abbc8f401707fa Mon Sep 17 00:00:00 2001
From: Balaji Rao <balajirrao@gmail.com>
Date: Wed, 30 Jan 2008 13:30:03 +0100
Subject: [PATCH 016/890] x86: assign IRQs to HPET timers, fix

Looks like IRQ 31 is assigned to timer 3, even without the patch!
I wonder who wrote the number 31. But the manual says that it is
zero by default.

I think we should check whether the timer has been allocated an IRQ before
proceeding to assign one to it.  Here is a patch that does this.

Signed-off-by: Balaji Rao <balajirrao@gmail.com>
Tested-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/hpet.c | 10 +++++-----
 drivers/char/hpet.c    |  6 ++++++
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 786aa227afdf..a3c56c9b8a02 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -117,7 +117,8 @@ int is_hpet_enabled(void)
 static void hpet_reserve_platform_timers(unsigned long id)
 {
 	struct hpet __iomem *hpet = hpet_virt_address;
-	unsigned int nrtimers;
+	struct hpet_timer __iomem *timer = &hpet->hpet_timers[2];
+	unsigned int nrtimers, i;
 	struct hpet_data hd;
 
 	nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
@@ -135,10 +136,9 @@ static void hpet_reserve_platform_timers(unsigned long id)
 	hd.hd_irq[0] = HPET_LEGACY_8254;
 	hd.hd_irq[1] = HPET_LEGACY_RTC;
 
-	/*
-	 * IRQs for the other timers are assigned dynamically
-	 * in hpet_alloc
-	 */
+       for (i = 2; i < nrtimers; timer++, i++)
+	       hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >>
+		       Tn_INT_ROUTE_CNF_SHIFT;
 	hpet_alloc(&hd);
 }
 #else
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 593b32cfbc33..22f5fd02ea87 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -852,6 +852,12 @@ int hpet_alloc(struct hpet_data *hdp)
 
 		timer = &hpet->hpet_timers[devp - hpetp->hp_dev];
 
+		/* Check if there's already an IRQ assigned to the timer */
+		if (hdp->hd_irq[i]) {
+			hpetp->hp_dev[i].hd_hdwirq = hdp->hd_irq[i];
+			continue;
+		}
+
 		hpet_config = readq(&timer->hpet_config);
 		irq_bitmap = (hpet_config & Tn_INT_ROUTE_CAP_MASK)
 			>> Tn_INT_ROUTE_CAP_SHIFT;

From bbe4d18ac2e058c56adb0cd71f49d9ed3216a405 Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Wed, 30 Jan 2008 13:30:03 +0100
Subject: [PATCH 017/890] NTP: correct inconsistent ntp interval/tick_length
 usage

I recently noticed on one of my boxes that when synched with an NTP
server, the drift value reported for the system was ~283ppm. While in
some cases, clock hardware can be that bad, it struck me as unusual as
the system was using the acpi_pm clocksource, which is one of the more
trustworthy and accurate clocksources on x86 hardware.

I brought up another system and let it sync to the same NTP server, and
I noticed a similar 280some ppm drift.

In looking at the code, I found that the acpi_pm's constant frequency
was being computed correctly at boot-up, however once the system was up,
even without the ntp daemon running, the clocksource's frequency was
being modified by the clocksource_adjust() function.

Digging deeper, I realized that in the code that keeps track of how much
the clocksource is skewing from the ntp desired time, we were using
different lengths to establish how long an time interval was.

The clocksource was being setup with the following interval:
	NTP_INTERVAL_LENGTH = NSEC_PER_SEC/NTP_INTERVAL_FREQ

While the ntp code was using the tick_length_base value:
	tick_length_base ~= (tick_usec * NSEC_PER_USEC * USER_HZ)
					/NTP_INTERVAL_FREQ

The subtle difference is:
	(tick_usec * NSEC_PER_USEC * USER_HZ) != NSEC_PER_SEC

This difference in calculation was causing the clocksource correction
code to apply a correction factor to the clocksource so the two
intervals were the same, however this results in the actual frequency of
the clocksource to be made incorrect. I believe this difference would
affect all clocksources, although to differing degrees depending on the
clocksource resolution.

The issue was introduced when my HZ free ntp patch landed in 2.6.21-rc1,
so my apologies for the mistake, and for not noticing it until now.

The following patch, corrects the clocksource's initialization code so
it uses the same interval length as the code in ntp.c. After applying
this patch, the drift value for the same system went from ~283ppm to
only 2.635ppm.

I believe this patch to be good, however it does affect all arches and
I've only tested on x86, so some caution is advised. I do think it would
be a likely candidate for a stable 2.6.24.x release.

Any thoughts or feedback would be appreciated.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timekeeping.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 77680195cf84..092a2366b5a9 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -186,7 +186,8 @@ static void change_clocksource(void)
 
 	clock->error = 0;
 	clock->xtime_nsec = 0;
-	clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
+	clocksource_calculate_interval(clock,
+		(unsigned long)(current_tick_length()>>TICK_LENGTH_SHIFT));
 
 	tick_clock_notify();
 
@@ -243,7 +244,8 @@ void __init timekeeping_init(void)
 	ntp_clear();
 
 	clock = clocksource_get_next();
-	clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
+	clocksource_calculate_interval(clock,
+		(unsigned long)(current_tick_length()>>TICK_LENGTH_SHIFT));
 	clock->cycle_last = clocksource_read(clock);
 
 	xtime.tv_sec = sec;

From 6378ddb592158db4b42197f1bc8666228800e379 Mon Sep 17 00:00:00 2001
From: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed, 30 Jan 2008 13:30:04 +0100
Subject: [PATCH 018/890] time: track accurate idle time with
 tick_sched.idle_sleeptime

Current idle time in kstat is based on jiffies and is coarse grained.
tick_sched.idle_sleeptime is making some attempt to keep track of idle time
in a fine grained manner.  But, it is not handling the time spent in
interrupts fully.

Make tick_sched.idle_sleeptime accurate with respect to time spent on
handling interrupts and also add tick_sched.idle_lastupdate, which keeps
track of last time when idle_sleeptime was updated.

This statistics will be crucial for cpufreq-ondemand governor, which can
shed some conservative gaurd band that is uses today while setting the
frequency.  The ondemand changes that uses the exact idle time is coming
soon.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/tick.h     |  6 ++++
 kernel/softirq.c         |  7 +++-
 kernel/time/tick-sched.c | 74 +++++++++++++++++++++++++++-------------
 3 files changed, 62 insertions(+), 25 deletions(-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index f4a1395e05ff..0fadf95debe1 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -51,8 +51,10 @@ struct tick_sched {
 	unsigned long			idle_jiffies;
 	unsigned long			idle_calls;
 	unsigned long			idle_sleeps;
+	int				idle_active;
 	ktime_t				idle_entrytime;
 	ktime_t				idle_sleeptime;
+	ktime_t				idle_lastupdate;
 	ktime_t				sleep_length;
 	unsigned long			last_jiffies;
 	unsigned long			next_jiffies;
@@ -103,6 +105,8 @@ extern void tick_nohz_stop_sched_tick(void);
 extern void tick_nohz_restart_sched_tick(void);
 extern void tick_nohz_update_jiffies(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
+extern void tick_nohz_stop_idle(int cpu);
+extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 # else
 static inline void tick_nohz_stop_sched_tick(void) { }
 static inline void tick_nohz_restart_sched_tick(void) { }
@@ -113,6 +117,8 @@ static inline ktime_t tick_nohz_get_sleep_length(void)
 
 	return len;
 }
+static inline void tick_nohz_stop_idle(int cpu) { }
+static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return 0; }
 # endif /* !NO_HZ */
 
 #endif
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 8fe1ff40102d..d7837d45419e 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -280,9 +280,14 @@ asmlinkage void do_softirq(void)
  */
 void irq_enter(void)
 {
+#ifdef CONFIG_NO_HZ
+	int cpu = smp_processor_id();
+	if (idle_cpu(cpu) && !in_interrupt())
+		tick_nohz_stop_idle(cpu);
+#endif
 	__irq_enter();
 #ifdef CONFIG_NO_HZ
-	if (idle_cpu(smp_processor_id()))
+	if (idle_cpu(cpu))
 		tick_nohz_update_jiffies();
 #endif
 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 49e12f6a4bab..63f24b550695 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -143,6 +143,44 @@ void tick_nohz_update_jiffies(void)
 	local_irq_restore(flags);
 }
 
+void tick_nohz_stop_idle(int cpu)
+{
+	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+
+	if (ts->idle_active) {
+		ktime_t now, delta;
+		now = ktime_get();
+		delta = ktime_sub(now, ts->idle_entrytime);
+		ts->idle_lastupdate = now;
+		ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
+		ts->idle_active = 0;
+	}
+}
+
+static ktime_t tick_nohz_start_idle(int cpu)
+{
+	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+	ktime_t now, delta;
+
+	now = ktime_get();
+	if (ts->idle_active) {
+		delta = ktime_sub(now, ts->idle_entrytime);
+		ts->idle_lastupdate = now;
+		ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
+	}
+	ts->idle_entrytime = now;
+	ts->idle_active = 1;
+	return now;
+}
+
+u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
+{
+	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+
+	*last_update_time = ktime_to_us(ts->idle_lastupdate);
+	return ktime_to_us(ts->idle_sleeptime);
+}
+
 /**
  * tick_nohz_stop_sched_tick - stop the idle tick from the idle task
  *
@@ -155,13 +193,14 @@ void tick_nohz_stop_sched_tick(void)
 	unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
 	unsigned long rt_jiffies;
 	struct tick_sched *ts;
-	ktime_t last_update, expires, now, delta;
+	ktime_t last_update, expires, now;
 	struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
 	int cpu;
 
 	local_irq_save(flags);
 
 	cpu = smp_processor_id();
+	now = tick_nohz_start_idle(cpu);
 	ts = &per_cpu(tick_cpu_sched, cpu);
 
 	/*
@@ -193,19 +232,7 @@ void tick_nohz_stop_sched_tick(void)
 		}
 	}
 
-	now = ktime_get();
-	/*
-	 * When called from irq_exit we need to account the idle sleep time
-	 * correctly.
-	 */
-	if (ts->tick_stopped) {
-		delta = ktime_sub(now, ts->idle_entrytime);
-		ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
-	}
-
-	ts->idle_entrytime = now;
 	ts->idle_calls++;
-
 	/* Read jiffies and the time when jiffies were updated last */
 	do {
 		seq = read_seqbegin(&xtime_lock);
@@ -337,23 +364,22 @@ void tick_nohz_restart_sched_tick(void)
 	int cpu = smp_processor_id();
 	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
 	unsigned long ticks;
-	ktime_t now, delta;
-
-	if (!ts->tick_stopped)
-		return;
-
-	/* Update jiffies first */
-	now = ktime_get();
+	ktime_t now;
 
 	local_irq_disable();
+	tick_nohz_stop_idle(cpu);
+
+	if (!ts->tick_stopped) {
+		local_irq_enable();
+		return;
+	}
+
+	/* Update jiffies first */
 	select_nohz_load_balancer(0);
+	now = ktime_get();
 	tick_do_update_jiffies64(now);
 	cpu_clear(cpu, nohz_cpu_mask);
 
-	/* Account the idle time */
-	delta = ktime_sub(now, ts->idle_entrytime);
-	ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
-
 	/*
 	 * We stopped the tick in idle. Update process times would miss the
 	 * time we slept as update_process_times does only a 1 tick

From 9484b1eb4d05957d3114dc63026448eb66106c06 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 30 Jan 2008 13:30:04 +0100
Subject: [PATCH 019/890] x86: fix make mrproper

Michael Opdenacker reported:

For backward compatibility with earlier (< 2.6.24) kernels,
arch/i386/boot/bzImage or arch/x86_64/boot/bzImage symbolic links to
arch/x86/boot/bzImage are created when you build an x86 kernel. The
arch/i386 or arch/x86_64 directories are then created for this only
purpose.

Issue: these generated directories and symbolic links are *not cleaned
up* when you run "make mrproper" (and thus "make distclean"). This
disturbs the production of patches, because the source tree is left with
generated files and directories.

Sam has an alternative fix:

The directory is killed during make clean as opposed to make mrproper.

Reported-by: Michael Opdenacker <michael-lists@free-electrons.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Makefile_32 | 3 ++-
 arch/x86/Makefile_64 | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/Makefile_32 b/arch/x86/Makefile_32
index 50394da2f6c1..f806ceccff7a 100644
--- a/arch/x86/Makefile_32
+++ b/arch/x86/Makefile_32
@@ -156,7 +156,8 @@ install:
 	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install
 
 archclean:
-	$(Q)rm -rf $(objtree)/arch/i386/boot
+	$(Q)rm -rf $(objtree)/arch/i386
+	$(Q)rm -rf $(objtree)/arch/x86_64
 	$(Q)$(MAKE) $(clean)=arch/x86/boot
 
 define archhelp
diff --git a/arch/x86/Makefile_64 b/arch/x86/Makefile_64
index a804860022e6..93cf17769ff5 100644
--- a/arch/x86/Makefile_64
+++ b/arch/x86/Makefile_64
@@ -123,7 +123,8 @@ endif
 	$(Q)$(MAKE) $(build)=arch/x86/vdso $@
 
 archclean:
-	$(Q)rm -rf $(objtree)/arch/x86_64/boot
+	$(Q)rm -rf $(objtree)/arch/i386
+	$(Q)rm -rf $(objtree)/arch/x86_64
 	$(Q)$(MAKE) $(clean)=$(boot)
 
 define archhelp

From 5c9c9bec0589be696c70c5efb448b17d5ab720e2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 30 Jan 2008 13:30:04 +0100
Subject: [PATCH 020/890] x86: hibernation: document __save_processor_state()
 on x86

Document the fact that __save_processor_state() has to save all CPU
registers referred to by the kernel in case a different kernel is
used to load and restore a hibernation image containing it.

Sigend-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/suspend_64.c | 20 ++++++++++++++++++++
 include/asm-x86/suspend_64.h |  9 ++++++++-
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/kernel/suspend_64.c
index 2e5efaaf8800..569f1b540e36 100644
--- a/arch/x86/kernel/suspend_64.c
+++ b/arch/x86/kernel/suspend_64.c
@@ -19,6 +19,21 @@ extern const void __nosave_begin, __nosave_end;
 
 struct saved_context saved_context;
 
+/**
+ *	__save_processor_state - save CPU registers before creating a
+ *		hibernation image and before restoring the memory state from it
+ *	@ctxt - structure to store the registers contents in
+ *
+ *	NOTE: If there is a CPU register the modification of which by the
+ *	boot kernel (ie. the kernel used for loading the hibernation image)
+ *	might affect the operations of the restored target kernel (ie. the one
+ *	saved in the hibernation image), then its contents must be saved by this
+ *	function.  In other words, if kernel A is hibernated and different
+ *	kernel B is used for loading the hibernation image into memory, the
+ *	kernel A's __save_processor_state() function must save all registers
+ *	needed by kernel A, so that it can operate correctly after the resume
+ *	regardless of what kernel B does in the meantime.
+ */
 void __save_processor_state(struct saved_context *ctxt)
 {
 	kernel_fpu_begin();
@@ -69,6 +84,11 @@ static void do_fpu_end(void)
 	kernel_fpu_end();
 }
 
+/**
+ *	__restore_processor_state - restore the contents of CPU registers saved
+ *		by __save_processor_state()
+ *	@ctxt - structure to load the registers contents from
+ */
 void __restore_processor_state(struct saved_context *ctxt)
 {
 	/*
diff --git a/include/asm-x86/suspend_64.h b/include/asm-x86/suspend_64.h
index c505a76bcf6e..4404668f9aa4 100644
--- a/include/asm-x86/suspend_64.h
+++ b/include/asm-x86/suspend_64.h
@@ -15,7 +15,14 @@ arch_prepare_suspend(void)
 	return 0;
 }
 
-/* Image of the saved processor state. If you touch this, fix acpi/wakeup.S. */
+/*
+ * Image of the saved processor state, used by the low level ACPI suspend to
+ * RAM code and by the low level hibernation code.
+ *
+ * If you modify it, fix arch/x86/kernel/acpi/wakeup_64.S and make sure that
+ * __save/__restore_processor_state(), defined in arch/x86/kernel/suspend_64.c,
+ * still work as required.
+ */
 struct saved_context {
 	struct pt_regs regs;
   	u16 ds, es, fs, gs, ss;

From 4c6b8b4d62fb4cb843c32db71e0a8301039908f3 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Wed, 30 Jan 2008 13:30:04 +0100
Subject: [PATCH 021/890] x86: fix: s2ram + P4 + tsc = annoyance

s2ram recently became useful here, except for the kernel's annoying
habit of disabling my P4's perfectly good TSC.

[  107.894470] CPU 1 is now offline
[  107.894474] SMP alternatives: switching to UP code
[  107.895832] CPU0 attaching sched-domain:
[  107.895836]  domain 0: span 1
[  107.895838]   groups: 1
[  107.896097] CPU1 is down
[    3.726156] Intel machine check architecture supported.
[    3.726165] Intel machine check reporting enabled on CPU#0.
[    3.726167] CPU0: Intel P4/Xeon Extended MCE MSRs (12) available
[    3.726170] CPU0: Thermal monitoring enabled
[    3.726175] Back to C!
[    3.726708] Force enabled HPET at resume
[    3.726775] Enabling non-boot CPUs ...
[    3.727049] CPU0 attaching NULL sched-domain.
[    3.727165] SMP alternatives: switching to SMP code
[    3.727858] Booting processor 1/1 eip 3000
[    3.727862] CPU 1 irqstacks, hard=b042f000 soft=b042d000
[    3.738173] Initializing CPU#1
[    3.798912] Calibrating delay using timer specific routine.. 5986.12 BogoMIPS (lpj=2993061)
[    3.798920] CPU: After generic identify, caps: bfebfbff 00000000 00000000 00000000 00004400 00000000 00000000 00000000
[    3.798931] CPU: Trace cache: 12K uops, L1 D cache: 8K
[    3.798934] CPU: L2 cache: 512K
[    3.798936] CPU: Physical Processor ID: 0
[    3.798938] CPU: After all inits, caps: bfebfbff 00000000 00000000 0000b080 00004400 00000000 00000000 00000000
[    3.798946] Intel machine check architecture supported.
[    3.798952] Intel machine check reporting enabled on CPU#1.
[    3.798955] CPU1: Intel P4/Xeon Extended MCE MSRs (12) available
[    3.798959] CPU1: Thermal monitoring enabled
[    3.799161] CPU1: Intel(R) Pentium(R) 4 CPU 3.00GHz stepping 09
[    3.799187] checking TSC synchronization [CPU#0 -> CPU#1]:
[    3.819181] Measured 63588552840 cycles TSC warp between CPUs, turning off TSC clock.
[    3.819184] Marking TSC unstable due to: check_tsc_sync_source failed.

If check_tsc_warp() is called after initial boot, and the TSC has in the
meantime been set (BIOS, user, silicon, elves) to a value lower than the
last stored/stale value, we blame the TSC.  Reset to pristine condition
after every test.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/tsc_sync.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 9125efe66a06..05d8f25de6ae 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -129,23 +129,23 @@ void __cpuinit check_tsc_sync_source(int cpu)
 	while (atomic_read(&stop_count) != cpus-1)
 		cpu_relax();
 
-	/*
-	 * Reset it - just in case we boot another CPU later:
-	 */
-	atomic_set(&start_count, 0);
-
 	if (nr_warps) {
 		printk("\n");
 		printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs,"
 				    " turning off TSC clock.\n", max_warp);
 		mark_tsc_unstable("check_tsc_sync_source failed");
-		nr_warps = 0;
-		max_warp = 0;
-		last_tsc = 0;
 	} else {
 		printk(" passed.\n");
 	}
 
+	/*
+	 * Reset it - just in case we boot another CPU later:
+	 */
+	atomic_set(&start_count, 0);
+	nr_warps = 0;
+	max_warp = 0;
+	last_tsc = 0;
+
 	/*
 	 * Let the target continue with the bootup:
 	 */

From b02aae9cf52956dfe1bec73f77f81a3d05d3902b Mon Sep 17 00:00:00 2001
From: Rene Herman <rene.herman@gmail.com>
Date: Wed, 30 Jan 2008 13:30:05 +0100
Subject: [PATCH 022/890] x86: provide a DMI based port 0x80 I/O delay
 override.

x86: provide a DMI based port 0x80 I/O delay override.

Certain (HP) laptops experience trouble from our port 0x80 I/O delay
writes. This patch provides for a DMI based switch to the "alternate
diagnostic port" 0xed (as used by some BIOSes as well) for these.

David P. Reed confirmed that port 0xed works for him and provides a
proper delay. The symptoms of _not_ working are a hanging machine,
with "hwclock" use being a direct trigger.

Earlier versions of this attempted to simply use udelay(2), with the
2 being a value tested to be a nicely conservative upper-bound with
help from many on the linux-kernel mailinglist but that approach has
two problems.

First, pre-loops_per_jiffy calibration (which is post PIT init while
some implementations of the PIT are actually one of the historically
problematic devices that need the delay) udelay() isn't particularly
well-defined. We could initialise loops_per_jiffy conservatively (and
based on CPU family so as to not unduly delay old machines) which
would sort of work, but...

Second, delaying isn't the only effect that a write to port 0x80 has.
It's also a PCI posting barrier which some devices may be explicitly
or implicitly relying on. Alan Cox did a survey and found evidence
that additionally some drivers may be racy on SMP without the bus
locking outb.

Switching to an inb() makes the timing too unpredictable and as such,
this DMI based switch should be the safest approach for now. Any more
invasive changes should get more rigid testing first. It's moreover
only very few machines with the problem and a DMI based hack seems
to fit that situation.

This also introduces a command-line parameter "io_delay" to override
the DMI based choice again:

	io_delay=<standard|alternate>

where "standard" means using the standard port 0x80 and "alternate"
port 0xed.

This retains the udelay method as a config (CONFIG_UDELAY_IO_DELAY) and
command-line ("io_delay=udelay") choice for testing purposes as well.

This does not change the io_delay() in the boot code which is using
the same port 0x80 I/O delay but those do not appear to be a problem
as David P. Reed reported the problem was already gone after using the
udelay version. He moreover reported that booting with "acpi=off" also
fixed things and seeing as how ACPI isn't touched until after this DMI
based I/O port switch I believe it's safe to leave the ones in the boot
code be.

The DMI strings from David's HP Pavilion dv9000z are in there already
and we need to get/verify the DMI info from other machines with the
problem, notably the HP Pavilion dv6000z.

This patch is partly based on earlier patches from Pavel Machek and
David P. Reed.

Signed-off-by: Rene Herman <rene.herman@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/kernel-parameters.txt |   8 +++
 arch/x86/Kconfig.debug              |   9 +++
 arch/x86/boot/compressed/misc_32.c  |   8 +--
 arch/x86/boot/compressed/misc_64.c  |   8 +--
 arch/x86/kernel/Makefile_32         |   2 +-
 arch/x86/kernel/Makefile_64         |   2 +-
 arch/x86/kernel/io_delay.c          | 106 ++++++++++++++++++++++++++++
 arch/x86/kernel/setup_32.c          |   2 +
 arch/x86/kernel/setup_64.c          |   2 +
 include/asm-x86/io_32.h             |   8 ++-
 include/asm-x86/io_64.h             |  35 +++++----
 11 files changed, 166 insertions(+), 24 deletions(-)
 create mode 100644 arch/x86/kernel/io_delay.c

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 880f882160e2..9e6056058425 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -794,6 +794,14 @@ and is between 256 and 4096 characters. It is defined in the file
 			for translation below 32 bit and if not available
 			then look in the higher range.
 
+	io_delay=	[X86-32,X86-64] I/O delay method
+		standard
+			Standard port 0x80 delay
+		alternate
+			Alternate port 0xed delay
+		udelay
+			Simple two microsecond delay
+
 	io7=		[HW] IO7 for Marvel based alpha systems
 			See comment before marvel_specify_io7 in
 			arch/alpha/kernel/core_marvel.c.
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 761ca7b5f120..40aba670fb37 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -112,4 +112,13 @@ config IOMMU_LEAK
 	  Add a simple leak tracer to the IOMMU code. This is useful when you
 	  are debugging a buggy device driver that leaks IOMMU mappings.
 
+config UDELAY_IO_DELAY
+	bool "Delay I/O through udelay instead of outb"
+	depends on DEBUG_KERNEL
+	help
+	  Make inb_p/outb_p use udelay() based delays by default. Please note
+	  that udelay() does not have the same bus-level side-effects that
+	  the normal outb based delay does meaning this could cause drivers
+	  to change behaviour and/or bugs to surface.
+
 endmenu
diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c
index b74d60d1b2fa..288e16283ef9 100644
--- a/arch/x86/boot/compressed/misc_32.c
+++ b/arch/x86/boot/compressed/misc_32.c
@@ -276,10 +276,10 @@ static void putstr(const char *s)
 	RM_SCREEN_INFO.orig_y = y;
 
 	pos = (x + cols * y) * 2;	/* Update cursor position */
-	outb_p(14, vidport);
-	outb_p(0xff & (pos >> 9), vidport+1);
-	outb_p(15, vidport);
-	outb_p(0xff & (pos >> 1), vidport+1);
+	outb(14, vidport);
+	outb(0xff & (pos >> 9), vidport+1);
+	outb(15, vidport);
+	outb(0xff & (pos >> 1), vidport+1);
 }
 
 static void* memset(void* s, int c, unsigned n)
diff --git a/arch/x86/boot/compressed/misc_64.c b/arch/x86/boot/compressed/misc_64.c
index 6ea015aa65e4..43e5fcc37be9 100644
--- a/arch/x86/boot/compressed/misc_64.c
+++ b/arch/x86/boot/compressed/misc_64.c
@@ -269,10 +269,10 @@ static void putstr(const char *s)
 	RM_SCREEN_INFO.orig_y = y;
 
 	pos = (x + cols * y) * 2;	/* Update cursor position */
-	outb_p(14, vidport);
-	outb_p(0xff & (pos >> 9), vidport+1);
-	outb_p(15, vidport);
-	outb_p(0xff & (pos >> 1), vidport+1);
+	outb(14, vidport);
+	outb(0xff & (pos >> 9), vidport+1);
+	outb(15, vidport);
+	outb(0xff & (pos >> 1), vidport+1);
 }
 
 static void* memset(void* s, int c, unsigned n)
diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index a7bc93c27662..0cc1981d1e38 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -8,7 +8,7 @@ CPPFLAGS_vmlinux.lds += -Ui386
 obj-y	:= process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
 		ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \
 		pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\
-		quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o
+		quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o
 
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-y				+= cpu/
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index 5a88890d8ee9..08a68f0d8fda 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -11,7 +11,7 @@ obj-y	:= process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
 		x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \
 		setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \
 		pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \
-		i8253.o
+		i8253.o io_delay.o
 
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-y				+= cpu/
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
new file mode 100644
index 000000000000..4d955e74b974
--- /dev/null
+++ b/arch/x86/kernel/io_delay.c
@@ -0,0 +1,106 @@
+/*
+ * I/O delay strategies for inb_p/outb_p
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/dmi.h>
+#include <asm/io.h>
+
+/*
+ * Allow for a DMI based override of port 0x80 needed for certain HP laptops
+ */
+#define IO_DELAY_PORT_STD 0x80
+#define IO_DELAY_PORT_ALT 0xed
+
+static void standard_io_delay(void)
+{
+	asm volatile ("outb %%al, %0" : : "N" (IO_DELAY_PORT_STD));
+}
+
+static void alternate_io_delay(void)
+{
+	asm volatile ("outb %%al, %0" : : "N" (IO_DELAY_PORT_ALT));
+}
+
+/*
+ * 2 usecs is an upper-bound for the outb delay but note that udelay doesn't
+ * have the bus-level side-effects that outb does
+ */
+#define IO_DELAY_USECS 2
+
+/*
+ * High on a hill was a lonely goatherd
+ */
+static void udelay_io_delay(void)
+{
+	udelay(IO_DELAY_USECS);
+}
+
+#ifndef CONFIG_UDELAY_IO_DELAY
+static void (*io_delay)(void) = standard_io_delay;
+#else
+static void (*io_delay)(void) = udelay_io_delay;
+#endif
+
+/*
+ * Paravirt wants native_io_delay to be a constant.
+ */
+void native_io_delay(void)
+{
+	io_delay();
+}
+EXPORT_SYMBOL(native_io_delay);
+
+#ifndef CONFIG_UDELAY_IO_DELAY
+static int __init dmi_alternate_io_delay_port(const struct dmi_system_id *id)
+{
+	printk(KERN_NOTICE "%s: using alternate I/O delay port\n", id->ident);
+	io_delay = alternate_io_delay;
+	return 0;
+}
+
+static struct dmi_system_id __initdata alternate_io_delay_port_dmi_table[] = {
+	{
+		.callback	= dmi_alternate_io_delay_port,
+		.ident		= "HP Pavilion dv9000z",
+		.matches	= {
+			DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+			DMI_MATCH(DMI_BOARD_NAME, "30B9")
+		}
+	},
+	{
+	}
+};
+
+static int __initdata io_delay_override;
+
+void __init io_delay_init(void)
+{
+	if (!io_delay_override)
+		dmi_check_system(alternate_io_delay_port_dmi_table);
+}
+#endif
+
+static int __init io_delay_param(char *s)
+{
+	if (!s)
+		return -EINVAL;
+
+	if (!strcmp(s, "standard"))
+		io_delay = standard_io_delay;
+	else if (!strcmp(s, "alternate"))
+		io_delay = alternate_io_delay;
+	else if (!strcmp(s, "udelay"))
+		io_delay = udelay_io_delay;
+	else
+		return -EINVAL;
+
+#ifndef CONFIG_UDELAY_IO_DELAY
+	io_delay_override = 1;
+#endif
+	return 0;
+}
+
+early_param("io_delay", io_delay_param);
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 9c24b45b513c..51bdc0b1b72e 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -648,6 +648,8 @@ void __init setup_arch(char **cmdline_p)
 
 	dmi_scan_machine();
 
+	io_delay_init();;
+
 #ifdef CONFIG_X86_GENERICARCH
 	generic_apic_probe();
 #endif	
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 30d94d1d5f5f..ec976edf0399 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -311,6 +311,8 @@ void __init setup_arch(char **cmdline_p)
 
 	dmi_scan_machine();
 
+	io_delay_init();
+
 #ifdef CONFIG_SMP
 	/* setup to use the static apicid table during kernel startup */
 	x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init;
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index fe881cd1e6f4..a8d25c38b91c 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -250,10 +250,14 @@ static inline void flush_write_buffers(void)
 
 #endif /* __KERNEL__ */
 
-static inline void native_io_delay(void)
+#ifndef CONFIG_UDELAY_IO_DELAY
+extern void io_delay_init(void);
+#else
+static inline void io_delay_init(void)
 {
-	asm volatile("outb %%al,$0x80" : : : "memory");
 }
+#endif
+extern void native_io_delay(void);
 
 #if defined(CONFIG_PARAVIRT)
 #include <asm/paravirt.h>
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index a037b0794332..5bebaf961692 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -35,13 +35,24 @@
   *  - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
   */
 
-#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
-
-#ifdef REALLY_SLOW_IO
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
+#ifndef CONFIG_UDELAY_IO_DELAY
+extern void io_delay_init(void);
 #else
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
+static inline void io_delay_init(void)
+{
+}
 #endif
+extern void native_io_delay(void);
+
+static inline void slow_down_io(void)
+{
+	native_io_delay();
+#ifdef REALLY_SLOW_IO
+	native_io_delay();
+	native_io_delay();
+	native_io_delay();
+#endif
+}
 
 /*
  * Talk about misusing macros..
@@ -50,21 +61,21 @@
 static inline void out##s(unsigned x value, unsigned short port) {
 
 #define __OUT2(s,s1,s2) \
-__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" : : "a" (value), "Nd" (port))
 
 #define __OUT(s,s1,x) \
-__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \
+__OUT1(s,x) __OUT2(s,s1,"w"); } \
+__OUT1(s##_p,x) __OUT2(s,s1,"w"); slow_down_io(); }
 
 #define __IN1(s) \
 static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
 
 #define __IN2(s,s1,s2) \
-__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" : "=a" (_v) : "Nd" (port))
 
-#define __IN(s,s1,i...) \
-__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+#define __IN(s,s1) \
+__IN1(s) __IN2(s,s1,"w"); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w"); slow_down_io(); return _v; }
 
 #define __INS(s) \
 static inline void ins##s(unsigned short port, void * addr, unsigned long count) \

From 6e7c402590b75b6b45138792445ee0f0315a8473 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:30:05 +0100
Subject: [PATCH 023/890] x86: various changes and cleanups to in_p/out_p delay
 details

various changes to the in_p/out_p delay details:

- add the io_delay=none method
- make each method selectable from the kernel config
- simplify the delay code a bit by getting rid of an indirect function call
- add the /proc/sys/kernel/io_delay_type sysctl
- change 'io_delay=standard|alternate' to io_delay=0x80 and io_delay=0xed
- make the io delay config not depend on CONFIG_DEBUG_KERNEL

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: "David P. Reed" <dpreed@reed.com>
---
 Documentation/kernel-parameters.txt |  12 ++--
 arch/x86/Kconfig.debug              |  79 +++++++++++++++++++--
 arch/x86/kernel/io_delay.c          | 106 +++++++++++++---------------
 include/asm-x86/io_32.h             |  10 +--
 include/asm-x86/io_64.h             |  10 +--
 kernel/sysctl.c                     |   9 +++
 6 files changed, 143 insertions(+), 83 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9e6056058425..b427b7c0e5d0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -795,12 +795,14 @@ and is between 256 and 4096 characters. It is defined in the file
 			then look in the higher range.
 
 	io_delay=	[X86-32,X86-64] I/O delay method
-		standard
-			Standard port 0x80 delay
-		alternate
-			Alternate port 0xed delay
+		0x80
+			Standard port 0x80 based delay
+		0xed
+			Alternate port 0xed based delay (needed on some systems)
 		udelay
-			Simple two microsecond delay
+			Simple two microseconds delay
+		none
+			No delay
 
 	io7=		[HW] IO7 for Marvel based alpha systems
 			See comment before marvel_specify_io7 in
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 40aba670fb37..77eda46f97b8 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -112,13 +112,78 @@ config IOMMU_LEAK
 	  Add a simple leak tracer to the IOMMU code. This is useful when you
 	  are debugging a buggy device driver that leaks IOMMU mappings.
 
-config UDELAY_IO_DELAY
-	bool "Delay I/O through udelay instead of outb"
-	depends on DEBUG_KERNEL
+#
+# IO delay types:
+#
+
+config IO_DELAY_TYPE_0X80
+	int
+	default "0"
+
+config IO_DELAY_TYPE_0XED
+	int
+	default "1"
+
+config IO_DELAY_TYPE_UDELAY
+	int
+	default "2"
+
+config IO_DELAY_TYPE_NONE
+	int
+	default "3"
+
+choice
+	prompt "IO delay type"
+	default IO_DELAY_0X80
+
+config IO_DELAY_0X80
+	bool "port 0x80 based port-IO delay [recommended]"
 	help
-	  Make inb_p/outb_p use udelay() based delays by default. Please note
-	  that udelay() does not have the same bus-level side-effects that
-	  the normal outb based delay does meaning this could cause drivers
-	  to change behaviour and/or bugs to surface.
+	  This is the traditional Linux IO delay used for in/out_p.
+	  It is the most tested hence safest selection here.
+
+config IO_DELAY_0XED
+	bool "port 0xed based port-IO delay"
+	help
+	  Use port 0xed as the IO delay. This frees up port 0x80 which is
+	  often used as a hardware-debug port.
+
+config IO_DELAY_UDELAY
+	bool "udelay based port-IO delay"
+	help
+	  Use udelay(2) as the IO delay method. This provides the delay
+	  while not having any side-effect on the IO port space.
+
+config IO_DELAY_NONE
+	bool "no port-IO delay"
+	help
+	  No port-IO delay. Will break on old boxes that require port-IO
+	  delay for certain operations. Should work on most new machines.
+
+endchoice
+
+if IO_DELAY_0X80
+config DEFAULT_IO_DELAY_TYPE
+	int
+	default IO_DELAY_TYPE_0X80
+endif
+
+if IO_DELAY_0XED
+config DEFAULT_IO_DELAY_TYPE
+	int
+	default IO_DELAY_TYPE_0XED
+endif
+
+if IO_DELAY_UDELAY
+config DEFAULT_IO_DELAY_TYPE
+	int
+	default IO_DELAY_TYPE_UDELAY
+endif
+
+if IO_DELAY_NONE
+config DEFAULT_IO_DELAY_TYPE
+	int
+	default IO_DELAY_TYPE_NONE
+endif
 
 endmenu
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index 4d955e74b974..f052e34dc94c 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -1,5 +1,9 @@
 /*
  * I/O delay strategies for inb_p/outb_p
+ *
+ * Allow for a DMI based override of port 0x80, needed for certain HP laptops
+ * and possibly other systems. Also allow for the gradual elimination of
+ * outb_p/inb_p API uses.
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -8,98 +12,86 @@
 #include <linux/dmi.h>
 #include <asm/io.h>
 
-/*
- * Allow for a DMI based override of port 0x80 needed for certain HP laptops
- */
-#define IO_DELAY_PORT_STD 0x80
-#define IO_DELAY_PORT_ALT 0xed
+int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE;
+EXPORT_SYMBOL_GPL(io_delay_type);
 
-static void standard_io_delay(void)
-{
-	asm volatile ("outb %%al, %0" : : "N" (IO_DELAY_PORT_STD));
-}
-
-static void alternate_io_delay(void)
-{
-	asm volatile ("outb %%al, %0" : : "N" (IO_DELAY_PORT_ALT));
-}
-
-/*
- * 2 usecs is an upper-bound for the outb delay but note that udelay doesn't
- * have the bus-level side-effects that outb does
- */
-#define IO_DELAY_USECS 2
-
-/*
- * High on a hill was a lonely goatherd
- */
-static void udelay_io_delay(void)
-{
-	udelay(IO_DELAY_USECS);
-}
-
-#ifndef CONFIG_UDELAY_IO_DELAY
-static void (*io_delay)(void) = standard_io_delay;
-#else
-static void (*io_delay)(void) = udelay_io_delay;
-#endif
+static int __initdata io_delay_override;
 
 /*
  * Paravirt wants native_io_delay to be a constant.
  */
 void native_io_delay(void)
 {
-	io_delay();
+	switch (io_delay_type) {
+	default:
+	case CONFIG_IO_DELAY_TYPE_0X80:
+		asm volatile ("outb %al, $0x80");
+		break;
+	case CONFIG_IO_DELAY_TYPE_0XED:
+		asm volatile ("outb %al, $0xed");
+		break;
+	case CONFIG_IO_DELAY_TYPE_UDELAY:
+		/*
+		 * 2 usecs is an upper-bound for the outb delay but
+		 * note that udelay doesn't have the bus-level
+		 * side-effects that outb does, nor does udelay() have
+		 * precise timings during very early bootup (the delays
+		 * are shorter until calibrated):
+		 */
+		udelay(2);
+	case CONFIG_IO_DELAY_TYPE_NONE:
+		break;
+	}
 }
 EXPORT_SYMBOL(native_io_delay);
 
-#ifndef CONFIG_UDELAY_IO_DELAY
-static int __init dmi_alternate_io_delay_port(const struct dmi_system_id *id)
+static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id)
 {
-	printk(KERN_NOTICE "%s: using alternate I/O delay port\n", id->ident);
-	io_delay = alternate_io_delay;
+	if (io_delay_type == CONFIG_IO_DELAY_TYPE_0X80) {
+		printk(KERN_NOTICE "%s: using 0xed I/O delay port\n",
+			id->ident);
+		io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
+	}
+
 	return 0;
 }
 
-static struct dmi_system_id __initdata alternate_io_delay_port_dmi_table[] = {
+/*
+ * Quirk table for systems that misbehave (lock up, etc.) if port
+ * 0x80 is used:
+ */
+static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = {
 	{
-		.callback	= dmi_alternate_io_delay_port,
+		.callback	= dmi_io_delay_0xed_port,
 		.ident		= "HP Pavilion dv9000z",
 		.matches	= {
 			DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
 			DMI_MATCH(DMI_BOARD_NAME, "30B9")
 		}
 	},
-	{
-	}
+	{ }
 };
 
-static int __initdata io_delay_override;
-
 void __init io_delay_init(void)
 {
 	if (!io_delay_override)
-		dmi_check_system(alternate_io_delay_port_dmi_table);
+		dmi_check_system(io_delay_0xed_port_dmi_table);
 }
-#endif
 
 static int __init io_delay_param(char *s)
 {
-	if (!s)
-		return -EINVAL;
-
-	if (!strcmp(s, "standard"))
-		io_delay = standard_io_delay;
-	else if (!strcmp(s, "alternate"))
-		io_delay = alternate_io_delay;
+	if (!strcmp(s, "0x80"))
+		io_delay_type = CONFIG_IO_DELAY_TYPE_0X80;
+	else if (!strcmp(s, "0xed"))
+		io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
 	else if (!strcmp(s, "udelay"))
-		io_delay = udelay_io_delay;
+		io_delay_type = CONFIG_IO_DELAY_TYPE_UDELAY;
+	else if (!strcmp(s, "none"))
+		io_delay_type = CONFIG_IO_DELAY_TYPE_NONE;
 	else
 		return -EINVAL;
 
-#ifndef CONFIG_UDELAY_IO_DELAY
 	io_delay_override = 1;
-#endif
 	return 0;
 }
 
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index a8d25c38b91c..2a04bd17eac5 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -250,15 +250,11 @@ static inline void flush_write_buffers(void)
 
 #endif /* __KERNEL__ */
 
-#ifndef CONFIG_UDELAY_IO_DELAY
-extern void io_delay_init(void);
-#else
-static inline void io_delay_init(void)
-{
-}
-#endif
 extern void native_io_delay(void);
 
+extern int io_delay_type;
+extern void io_delay_init(void);
+
 #if defined(CONFIG_PARAVIRT)
 #include <asm/paravirt.h>
 #else
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index 5bebaf961692..dbcc03aa1c6a 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -35,15 +35,11 @@
   *  - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
   */
 
-#ifndef CONFIG_UDELAY_IO_DELAY
-extern void io_delay_init(void);
-#else
-static inline void io_delay_init(void)
-{
-}
-#endif
 extern void native_io_delay(void);
 
+extern int io_delay_type;
+extern void io_delay_init(void);
+
 static inline void slow_down_io(void)
 {
 	native_io_delay();
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4bc8e48434a7..357b68ba23ec 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -53,6 +53,7 @@
 #ifdef CONFIG_X86
 #include <asm/nmi.h>
 #include <asm/stacktrace.h>
+#include <asm/io.h>
 #endif
 
 static int deprecated_sysctl_warning(struct __sysctl_args *args);
@@ -727,6 +728,14 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "io_delay_type",
+		.data		= &io_delay_type,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #endif
 #if defined(CONFIG_MMU)
 	{

From d0049e71c6e14a3b0a5b8cedaa1325a1a91fecb0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:30:05 +0100
Subject: [PATCH 024/890] x86: make io_delay=0xed the default

make io_delay=0xed the default. This frees up port 0x80 which is
a debug port on some machines and locks up certain laptops.

Testing only for now. Try the io_delay=0x80 boot option if this does not
work for you.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig.debug | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 77eda46f97b8..b747ab38814d 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -134,7 +134,7 @@ config IO_DELAY_TYPE_NONE
 
 choice
 	prompt "IO delay type"
-	default IO_DELAY_0X80
+	default IO_DELAY_0XED
 
 config IO_DELAY_0X80
 	bool "port 0x80 based port-IO delay [recommended]"

From f9fc58910ebc448b0b7d37af1bf57a896a78e9c4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:30:05 +0100
Subject: [PATCH 025/890] x86: add DMI quirk for io-delay hangs on Compaq
 Presario V6000 laptops

add the DMI strings provided by Islam Amer <pharon@gmail.com>, for
the Compaq Presario V6000 (Quanta/30B7).

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/io_delay.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index f052e34dc94c..bd49321034db 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -61,6 +61,14 @@ static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id)
  * 0x80 is used:
  */
 static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = {
+	{
+		.callback	= dmi_io_delay_0xed_port,
+		.ident		= "Compaq Presario V6000",
+		.matches	= {
+			DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+			DMI_MATCH(DMI_BOARD_NAME, "30B7")
+		}
+	},
 	{
 		.callback	= dmi_io_delay_0xed_port,
 		.ident		= "HP Pavilion dv9000z",
@@ -69,6 +77,14 @@ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = {
 			DMI_MATCH(DMI_BOARD_NAME, "30B9")
 		}
 	},
+	{
+		.callback	= dmi_io_delay_0xed_port,
+		.ident		= "HP Pavilion tx1000",
+		.matches	= {
+			DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+			DMI_MATCH(DMI_BOARD_NAME, "30BF")
+		}
+	},
 	{ }
 };
 

From 83bd01024b1fdfc41d9b758e5669e80fca72df66 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:06 +0100
Subject: [PATCH 026/890] x86: protect against sigaltstack wraparound

cf http://lkml.org/lkml/2007/10/3/41

To summarize: on Linux, SA_ONSTACK decides whether you are already on the
signal stack based on the value of the SP at the time of a signal.  If
you are not already inside the range, you are not "on the signal stack"
and so the new signal handler frame starts over at the base of the signal
stack.

sigaltstack (and sigstack before it) was invented in BSD.  There, the
SA_ONSTACK behavior has always been different.  It uses a kernel state
flag to decide, rather than the SP value.  When you first take an
SA_ONSTACK signal and switch to the alternate signal stack, it sets the
SS_ONSTACK flag in the thread's sigaltstack state in the kernel.
Thereafter you are "on the signal stack" and don't switch SP before
pushing a handler frame no matter what the SP value is.  Only when you
sigreturn from the original handler context do you clear the SS_ONSTACK
flag so that a new handler frame will start over at the base of the
alternate signal stack.

The undesireable effect of the Linux behavior is that an overflow of the
alternate signal stack can not only go undetected, but lead to a ring
buffer effect of clobbering the original handler frame at the base of the
signal stack for each successive signal that comes just after the
overflow.  This is what Shi Weihua's test case demonstrates.  Normally
this does not come up because of the signal mask, but the test case uses
SA_NODEFER for its SIGSEGV handler.

The other subtle part of the existing Linux semantics is that a simple
longjmp out of a signal handler serves to take you off the signal stack
in a safe and reliable fashion without having used sigreturn (nor having
just returned from the handler normally, which means the same).  After
the longjmp (or even informal stack switching not via any proper libc or
kernel interface), the alternate signal stack stands ready to be used
again.

A paranoid program would allocate a PROT_NONE red zone around its
alternate signal stack.  Then a small overflow would trigger a SIGSEGV in
handler setup, and be fatal (core dump) whether or not SIGSEGV is
blocked.  As with thread stack red zones, that cannot catch all overflows
(or underflows).  e.g., a local array as large as page size allocated in
a function called from a handler, but not actually touched before more
calls push more stack, could cause an overflow that silently pushes into
some unrelated allocated pages.

The BSD behavior does not do anything in particular about overflow.  But
it does at least avoid the wraparound or "ring buffer effect", so you'll
just get a straightforward all-out overflow down your address space past
the low end of the alternate signal stack.  I don't know what the BSD
behavior is for longjmp out of an SA_ONSTACK handler.

The POSIX wording relating to sigaltstack is pretty minimal.  I don't
think it speaks to this issue one way or another.  (The program that
overflows its stack is clearly in undefined behavior territory of one
sort or another anyhow.)

Given the longjmp issue and the potential for highly subtle complications
in existing programs relying on this in arcane ways deep in their code, I
am very dubious about changing the behavior to the BSD style persistent
flag.  I think Shi Weihua's patches have a similar effect by tracking the
SP used in the last handler setup.

I think it would be sensible for the signal handler setup code to detect
when it would itself be causing a stack overflow.  Maybe something like
the following patch (untested).  This issue exists in the same way on all
machines, so ideally they would all do a similar check.

When it's the handler function itself or its callees that cause the
overflow, rather than the signal handler frame setup alone crossing the
boundary, this still won't help.  But I don't see any way to distinguish
that from the valid longjmp case.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/signal_32.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 20f29e4c1d33..5c6170c44b00 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -295,6 +295,13 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
 	/* Default to using normal stack */
 	esp = regs->esp;
 
+	/*
+	 * If we are on the alternate signal stack and would overflow it, don't.
+	 * Return an always-bogus address instead so we will die with SIGSEGV.
+	 */
+	if (on_sig_stack(esp) && !likely(on_sig_stack(esp - frame_size)))
+		return (void __user *) -1L;
+
 	/* This is the X/Open sanctioned signal stack switching.  */
 	if (ka->sa.sa_flags & SA_ONSTACK) {
 		if (sas_ss_flags(esp) == 0)

From 53d517cdbaac704352b3d0c10fecb99e0b54572e Mon Sep 17 00:00:00 2001
From: Guillaume Chazarain <guichaz@yahoo.fr>
Date: Wed, 30 Jan 2008 13:30:06 +0100
Subject: [PATCH 027/890] x86: scale cyc_2_nsec according to CPU frequency

scale the sched_clock() cyc_2_nsec scaling factor according to
CPU frequency changes.

[ mingo@elte.hu: simplified it and fixed it for SMP. ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/tsc_32.c | 43 +++++++++++++++++++++++++-----
 arch/x86/kernel/tsc_64.c | 57 +++++++++++++++++++++++++++++++++-------
 include/asm-x86/timer.h  | 23 ++++++++++++----
 3 files changed, 102 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index 9ebc0dab66b4..00bb4c1c0593 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -5,6 +5,7 @@
 #include <linux/jiffies.h>
 #include <linux/init.h>
 #include <linux/dmi.h>
+#include <linux/percpu.h>
 
 #include <asm/delay.h>
 #include <asm/tsc.h>
@@ -80,13 +81,31 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable);
  *
  *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
  */
-unsigned long cyc2ns_scale __read_mostly;
 
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
+DEFINE_PER_CPU(unsigned long, cyc2ns);
 
-static inline void set_cyc2ns_scale(unsigned long cpu_khz)
+static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 {
-	cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
+	unsigned long flags, prev_scale, *scale;
+	unsigned long long tsc_now, ns_now;
+
+	local_irq_save(flags);
+	sched_clock_idle_sleep_event();
+
+	scale = &per_cpu(cyc2ns, cpu);
+
+	rdtscll(tsc_now);
+	ns_now = __cycles_2_ns(tsc_now);
+
+	prev_scale = *scale;
+	if (cpu_khz)
+		*scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
+
+	/*
+	 * Start smoothly with the new frequency:
+	 */
+	sched_clock_idle_wakeup_event(0);
+	local_irq_restore(flags);
 }
 
 /*
@@ -239,7 +258,9 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
 						ref_freq, freq->new);
 			if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
 				tsc_khz = cpu_khz;
-				set_cyc2ns_scale(cpu_khz);
+				preempt_disable();
+				set_cyc2ns_scale(cpu_khz, smp_processor_id());
+				preempt_enable();
 				/*
 				 * TSC based sched_clock turns
 				 * to junk w/ cpufreq
@@ -367,6 +388,8 @@ static inline void check_geode_tsc_reliable(void) { }
 
 void __init tsc_init(void)
 {
+	int cpu;
+
 	if (!cpu_has_tsc || tsc_disable)
 		goto out_no_tsc;
 
@@ -380,7 +403,15 @@ void __init tsc_init(void)
 				(unsigned long)cpu_khz / 1000,
 				(unsigned long)cpu_khz % 1000);
 
-	set_cyc2ns_scale(cpu_khz);
+	/*
+	 * Secondary CPUs do not run through tsc_init(), so set up
+	 * all the scale factors for all CPUs, assuming the same
+	 * speed as the bootup CPU. (cpufreq notifiers will fix this
+	 * up if their speed diverges)
+	 */
+	for_each_possible_cpu(cpu)
+		set_cyc2ns_scale(cpu_khz, cpu);
+
 	use_tsc_delay();
 
 	/* Check and install the TSC clocksource */
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index 9c70af45b42b..32edd2c50e94 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -10,6 +10,7 @@
 
 #include <asm/hpet.h>
 #include <asm/timex.h>
+#include <asm/timer.h>
 
 static int notsc __initdata = 0;
 
@@ -18,16 +19,48 @@ EXPORT_SYMBOL(cpu_khz);
 unsigned int tsc_khz;
 EXPORT_SYMBOL(tsc_khz);
 
-static unsigned int cyc2ns_scale __read_mostly;
+/* Accelerators for sched_clock()
+ * convert from cycles(64bits) => nanoseconds (64bits)
+ *  basic equation:
+ *		ns = cycles / (freq / ns_per_sec)
+ *		ns = cycles * (ns_per_sec / freq)
+ *		ns = cycles * (10^9 / (cpu_khz * 10^3))
+ *		ns = cycles * (10^6 / cpu_khz)
+ *
+ *	Then we use scaling math (suggested by george@mvista.com) to get:
+ *		ns = cycles * (10^6 * SC / cpu_khz) / SC
+ *		ns = cycles * cyc2ns_scale / SC
+ *
+ *	And since SC is a constant power of two, we can convert the div
+ *  into a shift.
+ *
+ *  We can use khz divisor instead of mhz to keep a better precision, since
+ *  cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
+ *  (mathieu.desnoyers@polymtl.ca)
+ *
+ *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
+ */
+DEFINE_PER_CPU(unsigned long, cyc2ns);
 
-static inline void set_cyc2ns_scale(unsigned long khz)
+static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 {
-	cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / khz;
-}
+	unsigned long flags, prev_scale, *scale;
+	unsigned long long tsc_now, ns_now;
 
-static unsigned long long cycles_2_ns(unsigned long long cyc)
-{
-	return (cyc * cyc2ns_scale) >> NS_SCALE;
+	local_irq_save(flags);
+	sched_clock_idle_sleep_event();
+
+	scale = &per_cpu(cyc2ns, cpu);
+
+	rdtscll(tsc_now);
+	ns_now = __cycles_2_ns(tsc_now);
+
+	prev_scale = *scale;
+	if (cpu_khz)
+		*scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
+
+	sched_clock_idle_wakeup_event(0);
+	local_irq_restore(flags);
 }
 
 unsigned long long sched_clock(void)
@@ -100,7 +133,9 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 			mark_tsc_unstable("cpufreq changes");
 	}
 
-	set_cyc2ns_scale(tsc_khz_ref);
+	preempt_disable();
+	set_cyc2ns_scale(tsc_khz_ref, smp_processor_id());
+	preempt_enable();
 
 	return 0;
 }
@@ -151,7 +186,7 @@ static unsigned long __init tsc_read_refs(unsigned long *pm,
 void __init tsc_calibrate(void)
 {
 	unsigned long flags, tsc1, tsc2, tr1, tr2, pm1, pm2, hpet1, hpet2;
-	int hpet = is_hpet_enabled();
+	int hpet = is_hpet_enabled(), cpu;
 
 	local_irq_save(flags);
 
@@ -206,7 +241,9 @@ void __init tsc_calibrate(void)
 	}
 
 	tsc_khz = tsc2 / tsc1;
-	set_cyc2ns_scale(tsc_khz);
+
+	for_each_possible_cpu(cpu)
+		set_cyc2ns_scale(tsc_khz, cpu);
 }
 
 /*
diff --git a/include/asm-x86/timer.h b/include/asm-x86/timer.h
index 0db7e994fb8b..4f6fcb050c11 100644
--- a/include/asm-x86/timer.h
+++ b/include/asm-x86/timer.h
@@ -2,6 +2,7 @@
 #define _ASMi386_TIMER_H
 #include <linux/init.h>
 #include <linux/pm.h>
+#include <linux/percpu.h>
 
 #define TICK_SIZE (tick_nsec / 1000)
 
@@ -16,7 +17,7 @@ extern int recalibrate_cpu_khz(void);
 #define calculate_cpu_khz() native_calculate_cpu_khz()
 #endif
 
-/* Accellerators for sched_clock()
+/* Accelerators for sched_clock()
  * convert from cycles(64bits) => nanoseconds (64bits)
  *  basic equation:
  *		ns = cycles / (freq / ns_per_sec)
@@ -31,20 +32,32 @@ extern int recalibrate_cpu_khz(void);
  *	And since SC is a constant power of two, we can convert the div
  *  into a shift.
  *
- *  We can use khz divisor instead of mhz to keep a better percision, since
+ *  We can use khz divisor instead of mhz to keep a better precision, since
  *  cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
  *  (mathieu.desnoyers@polymtl.ca)
  *
  *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
  */
-extern unsigned long cyc2ns_scale __read_mostly;
+
+DECLARE_PER_CPU(unsigned long, cyc2ns);
 
 #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
 
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
 {
-	return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+	return cyc * per_cpu(cyc2ns, smp_processor_id()) >> CYC2NS_SCALE_FACTOR;
 }
 
+static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+	unsigned long long ns;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ns = __cycles_2_ns(cyc);
+	local_irq_restore(flags);
+
+	return ns;
+}
 
 #endif

From 5ee613b6751cd91db4b6bd7c1dc9d2f9cf65cde2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:30:06 +0100
Subject: [PATCH 028/890] x86: idle wakeup event in the HLT loop

do a proper idle-wakeup event on HLT as well - some CPUs stop the TSC
in HLT too, not just when going through the ACPI methods.

(the ACPI idle code already does this.)

[ update the 64-bit side too, as noticed by Jiri Slaby. ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process_32.c | 15 ++++++++++++---
 arch/x86/kernel/process_64.c | 13 ++++++++++---
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 46d391d49de8..a63d2d2556ee 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -113,10 +113,19 @@ void default_idle(void)
 		smp_mb();
 
 		local_irq_disable();
-		if (!need_resched())
+		if (!need_resched()) {
+			ktime_t t0, t1;
+			u64 t0n, t1n;
+
+			t0 = ktime_get();
+			t0n = ktime_to_ns(t0);
 			safe_halt();	/* enables interrupts racelessly */
-		else
-			local_irq_enable();
+			local_irq_disable();
+			t1 = ktime_get();
+			t1n = ktime_to_ns(t1);
+			sched_clock_idle_wakeup_event(t1n - t0n);
+		}
+		local_irq_enable();
 		current_thread_info()->status |= TS_POLLING;
 	} else {
 		/* loop is done by the caller */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c2db7ef93565..40fed477f3e5 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -116,9 +116,16 @@ static void default_idle(void)
 	smp_mb();
 	local_irq_disable();
 	if (!need_resched()) {
-		/* Enables interrupts one instruction before HLT.
-		   x86 special cases this so there is no race. */
-		safe_halt();
+		ktime_t t0, t1;
+		u64 t0n, t1n;
+
+		t0 = ktime_get();
+		t0n = ktime_to_ns(t0);
+		safe_halt();	/* enables interrupts racelessly */
+		local_irq_disable();
+		t1 = ktime_get();
+		t1n = ktime_to_ns(t1);
+		sched_clock_idle_wakeup_event(t1n - t0n);
 	} else
 		local_irq_enable();
 	current_thread_info()->status |= TS_POLLING;

From 39d44a51474a52bec6d72d30ebc76f5159101d90 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 30 Jan 2008 13:30:06 +0100
Subject: [PATCH 029/890] x86: enable irq in default_idle on 64-bit

local_irq_enable() is missing after sched_clock_idle_wakeup_event().

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/process_64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 40fed477f3e5..2c9e59448f4c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -126,8 +126,8 @@ static void default_idle(void)
 		t1 = ktime_get();
 		t1n = ktime_to_ns(t1);
 		sched_clock_idle_wakeup_event(t1n - t0n);
-	} else
-		local_irq_enable();
+	}
+	local_irq_enable();
 	current_thread_info()->status |= TS_POLLING;
 }
 

From e845c06bced7f5f325e0f9aefd3207cd45c21ff2 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:30:06 +0100
Subject: [PATCH 030/890] x86: add <asm/asm.h>

Create <asm/asm.h>, with common definitions suitable for assembly
unification.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/asm.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 include/asm-x86/asm.h

diff --git a/include/asm-x86/asm.h b/include/asm-x86/asm.h
new file mode 100644
index 000000000000..b5006eb9acba
--- /dev/null
+++ b/include/asm-x86/asm.h
@@ -0,0 +1,18 @@
+#ifndef _ASM_X86_ASM_H
+#define _ASM_X86_ASM_H
+
+#ifdef CONFIG_X86_32
+/* 32 bits */
+
+# define _ASM_PTR	" .long "
+# define _ASM_ALIGN	" .balign 4 "
+
+#else
+/* 64 bits */
+
+# define _ASM_PTR	" .quad "
+# define _ASM_ALIGN	" .balign 8 "
+
+#endif /* CONFIG_X86_32 */
+
+#endif /* _ASM_X86_ASM_H */

From 7b11fb51567dedeaf6dc03f0135c0a8bb2399818 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:30:07 +0100
Subject: [PATCH 031/890] x86: unify asm/cpufeature.h

asm/cpufeature.h was already almost unified; this completes the job.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/cpufeature.h    | 191 +++++++++++++++++++++++++++++++-
 include/asm-x86/cpufeature_32.h | 176 -----------------------------
 include/asm-x86/cpufeature_64.h |  30 -----
 3 files changed, 187 insertions(+), 210 deletions(-)
 delete mode 100644 include/asm-x86/cpufeature_32.h
 delete mode 100644 include/asm-x86/cpufeature_64.h

diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index b7160a4598d7..4c7875554d01 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -1,5 +1,188 @@
-#ifdef CONFIG_X86_32
-# include "cpufeature_32.h"
-#else
-# include "cpufeature_64.h"
+/*
+ * Defines x86 CPU feature bits
+ */
+#ifndef _ASM_X86_CPUFEATURE_H
+#define _ASM_X86_CPUFEATURE_H
+
+#ifndef __ASSEMBLY__
+#include <linux/bitops.h>
 #endif
+#include <asm/required-features.h>
+
+#define NCAPINTS	8	/* N 32-bit words worth of info */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
+#define X86_FEATURE_FPU		(0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME		(0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE		(0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE		(0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC		(0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR		(0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */
+#define X86_FEATURE_PAE		(0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE		(0*32+ 7) /* Machine Check Architecture */
+#define X86_FEATURE_CX8		(0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC	(0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP		(0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR	(0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE		(0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA		(0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV	(0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */
+#define X86_FEATURE_PAT		(0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36	(0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN		(0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLSH	(0*32+19) /* Supports the CLFLUSH instruction */
+#define X86_FEATURE_DS		(0*32+21) /* Debug Store */
+#define X86_FEATURE_ACPI	(0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX		(0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR	(0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */
+					  /* of FPU context), and CR4.OSFXSR available */
+#define X86_FEATURE_XMM		(0*32+25) /* Streaming SIMD Extensions */
+#define X86_FEATURE_XMM2	(0*32+26) /* Streaming SIMD Extensions-2 */
+#define X86_FEATURE_SELFSNOOP	(0*32+27) /* CPU self snoop */
+#define X86_FEATURE_HT		(0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_ACC		(0*32+29) /* Automatic clock control */
+#define X86_FEATURE_IA64	(0*32+30) /* IA-64 processor */
+
+/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+/* Don't duplicate feature flags which are redundant with Intel! */
+#define X86_FEATURE_SYSCALL	(1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MP		(1*32+19) /* MP Capable. */
+#define X86_FEATURE_NX		(1*32+20) /* Execute Disable */
+#define X86_FEATURE_MMXEXT	(1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_RDTSCP	(1*32+27) /* RDTSCP */
+#define X86_FEATURE_LM		(1*32+29) /* Long Mode (x86-64) */
+#define X86_FEATURE_3DNOWEXT	(1*32+30) /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW	(1*32+31) /* 3DNow! */
+
+/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+#define X86_FEATURE_RECOVERY	(2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN	(2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI	(2*32+ 3) /* LongRun table interface */
+
+/* Other features, Linux-defined mapping, word 3 */
+/* This range is used for feature bits which conflict or are synthesized */
+#define X86_FEATURE_CXMMX	(3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR	(3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR	(3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR	(3*32+ 3) /* Centaur MCRs (= MTRRs) */
+/* cpu types for specific tunings: */
+#define X86_FEATURE_K8		(3*32+ 4) /* Opteron, Athlon64 */
+#define X86_FEATURE_K7		(3*32+ 5) /* Athlon */
+#define X86_FEATURE_P3		(3*32+ 6) /* P3 */
+#define X86_FEATURE_P4		(3*32+ 7) /* P4 */
+#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
+#define X86_FEATURE_UP		(3*32+ 9) /* smp kernel running on up */
+#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */
+#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS	(3*32+12)  /* Precise-Event Based Sampling */
+#define X86_FEATURE_BTS		(3*32+13)  /* Branch Trace Store */
+/* 14 free */
+#define X86_FEATURE_SYNC_RDTSC	(3*32+15)  /* RDTSC synchronizes the CPU */
+#define X86_FEATURE_REP_GOOD	(3*32+16) /* rep microcode works well on this CPU */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
+#define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
+#define X86_FEATURE_MWAIT	(4*32+ 3) /* Monitor/Mwait support */
+#define X86_FEATURE_DSCPL	(4*32+ 4) /* CPL Qualified Debug Store */
+#define X86_FEATURE_EST		(4*32+ 7) /* Enhanced SpeedStep */
+#define X86_FEATURE_TM2		(4*32+ 8) /* Thermal Monitor 2 */
+#define X86_FEATURE_CID		(4*32+10) /* Context ID */
+#define X86_FEATURE_CX16	(4*32+13) /* CMPXCHG16B */
+#define X86_FEATURE_XTPR	(4*32+14) /* Send Task Priority Messages */
+#define X86_FEATURE_DCA		(4*32+18) /* Direct Cache Access */
+
+/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
+#define X86_FEATURE_XSTORE	(5*32+ 2) /* on-CPU RNG present (xstore insn) */
+#define X86_FEATURE_XSTORE_EN	(5*32+ 3) /* on-CPU RNG enabled */
+#define X86_FEATURE_XCRYPT	(5*32+ 6) /* on-CPU crypto (xcrypt insn) */
+#define X86_FEATURE_XCRYPT_EN	(5*32+ 7) /* on-CPU crypto enabled */
+#define X86_FEATURE_ACE2	(5*32+ 8) /* Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN	(5*32+ 9) /* ACE v2 enabled */
+#define X86_FEATURE_PHE		(5*32+ 10) /* PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN	(5*32+ 11) /* PHE enabled */
+#define X86_FEATURE_PMM		(5*32+ 12) /* PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN	(5*32+ 13) /* PMM enabled */
+
+/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
+#define X86_FEATURE_LAHF_LM	(6*32+ 0) /* LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY	(6*32+ 1) /* If yes HyperThreading not valid */
+
+/*
+ * Auxiliary flags: Linux defined - For features scattered in various
+ * CPUID levels like 0x6, 0xA etc
+ */
+#define X86_FEATURE_IDA		(7*32+ 0) /* Intel Dynamic Acceleration */
+
+#define cpu_has(c, bit)							\
+	(__builtin_constant_p(bit) &&					\
+	 ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) ||	\
+	   (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) ||	\
+	   (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) ||	\
+	   (((bit)>>5)==3 && (1UL<<((bit)&31) & REQUIRED_MASK3)) ||	\
+	   (((bit)>>5)==4 && (1UL<<((bit)&31) & REQUIRED_MASK4)) ||	\
+	   (((bit)>>5)==5 && (1UL<<((bit)&31) & REQUIRED_MASK5)) ||	\
+	   (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) ||	\
+	   (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) )	\
+	  ? 1 :								\
+	  test_bit(bit, (c)->x86_capability))
+#define boot_cpu_has(bit)	cpu_has(&boot_cpu_data, bit)
+
+#define cpu_has_fpu		boot_cpu_has(X86_FEATURE_FPU)
+#define cpu_has_vme		boot_cpu_has(X86_FEATURE_VME)
+#define cpu_has_de		boot_cpu_has(X86_FEATURE_DE)
+#define cpu_has_pse		boot_cpu_has(X86_FEATURE_PSE)
+#define cpu_has_tsc		boot_cpu_has(X86_FEATURE_TSC)
+#define cpu_has_pae		boot_cpu_has(X86_FEATURE_PAE)
+#define cpu_has_pge		boot_cpu_has(X86_FEATURE_PGE)
+#define cpu_has_apic		boot_cpu_has(X86_FEATURE_APIC)
+#define cpu_has_sep		boot_cpu_has(X86_FEATURE_SEP)
+#define cpu_has_mtrr		boot_cpu_has(X86_FEATURE_MTRR)
+#define cpu_has_mmx		boot_cpu_has(X86_FEATURE_MMX)
+#define cpu_has_fxsr		boot_cpu_has(X86_FEATURE_FXSR)
+#define cpu_has_xmm		boot_cpu_has(X86_FEATURE_XMM)
+#define cpu_has_xmm2		boot_cpu_has(X86_FEATURE_XMM2)
+#define cpu_has_xmm3		boot_cpu_has(X86_FEATURE_XMM3)
+#define cpu_has_ht		boot_cpu_has(X86_FEATURE_HT)
+#define cpu_has_mp		boot_cpu_has(X86_FEATURE_MP)
+#define cpu_has_nx		boot_cpu_has(X86_FEATURE_NX)
+#define cpu_has_k6_mtrr		boot_cpu_has(X86_FEATURE_K6_MTRR)
+#define cpu_has_cyrix_arr	boot_cpu_has(X86_FEATURE_CYRIX_ARR)
+#define cpu_has_centaur_mcr	boot_cpu_has(X86_FEATURE_CENTAUR_MCR)
+#define cpu_has_xstore		boot_cpu_has(X86_FEATURE_XSTORE)
+#define cpu_has_xstore_enabled	boot_cpu_has(X86_FEATURE_XSTORE_EN)
+#define cpu_has_xcrypt		boot_cpu_has(X86_FEATURE_XCRYPT)
+#define cpu_has_xcrypt_enabled	boot_cpu_has(X86_FEATURE_XCRYPT_EN)
+#define cpu_has_ace2		boot_cpu_has(X86_FEATURE_ACE2)
+#define cpu_has_ace2_enabled	boot_cpu_has(X86_FEATURE_ACE2_EN)
+#define cpu_has_phe		boot_cpu_has(X86_FEATURE_PHE)
+#define cpu_has_phe_enabled	boot_cpu_has(X86_FEATURE_PHE_EN)
+#define cpu_has_pmm		boot_cpu_has(X86_FEATURE_PMM)
+#define cpu_has_pmm_enabled	boot_cpu_has(X86_FEATURE_PMM_EN)
+#define cpu_has_ds		boot_cpu_has(X86_FEATURE_DS)
+#define cpu_has_pebs		boot_cpu_has(X86_FEATURE_PEBS)
+#define cpu_has_clflush		boot_cpu_has(X86_FEATURE_CLFLSH)
+#define cpu_has_bts		boot_cpu_has(X86_FEATURE_BTS)
+
+#ifdef CONFIG_X86_64
+
+#undef  cpu_has_vme
+#define cpu_has_vme		0
+
+#undef  cpu_has_pae
+#define cpu_has_pae		___BUG___
+
+#undef  cpu_has_mp
+#define cpu_has_mp		1
+
+#undef  cpu_has_k6_mtrr
+#define cpu_has_k6_mtrr		0
+
+#undef  cpu_has_cyrix_arr
+#define cpu_has_cyrix_arr	0
+
+#undef  cpu_has_centaur_mcr
+#define cpu_has_centaur_mcr	0
+
+#endif /* CONFIG_X86_64 */
+
+#endif /* _ASM_X86_CPUFEATURE_H */
diff --git a/include/asm-x86/cpufeature_32.h b/include/asm-x86/cpufeature_32.h
deleted file mode 100644
index f17e688dfb05..000000000000
--- a/include/asm-x86/cpufeature_32.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * cpufeature.h
- *
- * Defines x86 CPU feature bits
- */
-
-#ifndef __ASM_I386_CPUFEATURE_H
-#define __ASM_I386_CPUFEATURE_H
-
-#ifndef __ASSEMBLY__
-#include <linux/bitops.h>
-#endif
-#include <asm/required-features.h>
-
-#define NCAPINTS	8	/* N 32-bit words worth of info */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
-#define X86_FEATURE_FPU		(0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME		(0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE		(0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE 	(0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC		(0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR		(0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */
-#define X86_FEATURE_PAE		(0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE		(0*32+ 7) /* Machine Check Architecture */
-#define X86_FEATURE_CX8		(0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC	(0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP		(0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR	(0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE		(0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA		(0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV	(0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */
-#define X86_FEATURE_PAT		(0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36	(0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN		(0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLSH	(0*32+19) /* Supports the CLFLUSH instruction */
-#define X86_FEATURE_DS		(0*32+21) /* Debug Store */
-#define X86_FEATURE_ACPI	(0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX		(0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR	(0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */
-				          /* of FPU context), and CR4.OSFXSR available */
-#define X86_FEATURE_XMM		(0*32+25) /* Streaming SIMD Extensions */
-#define X86_FEATURE_XMM2	(0*32+26) /* Streaming SIMD Extensions-2 */
-#define X86_FEATURE_SELFSNOOP	(0*32+27) /* CPU self snoop */
-#define X86_FEATURE_HT		(0*32+28) /* Hyper-Threading */
-#define X86_FEATURE_ACC		(0*32+29) /* Automatic clock control */
-#define X86_FEATURE_IA64	(0*32+30) /* IA-64 processor */
-
-/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
-/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL	(1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP		(1*32+19) /* MP Capable. */
-#define X86_FEATURE_NX		(1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT	(1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_RDTSCP	(1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM		(1*32+29) /* Long Mode (x86-64) */
-#define X86_FEATURE_3DNOWEXT	(1*32+30) /* AMD 3DNow! extensions */
-#define X86_FEATURE_3DNOW	(1*32+31) /* 3DNow! */
-
-/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY	(2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN	(2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI	(2*32+ 3) /* LongRun table interface */
-
-/* Other features, Linux-defined mapping, word 3 */
-/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX	(3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR	(3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR	(3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR	(3*32+ 3) /* Centaur MCRs (= MTRRs) */
-/* cpu types for specific tunings: */
-#define X86_FEATURE_K8		(3*32+ 4) /* Opteron, Athlon64 */
-#define X86_FEATURE_K7		(3*32+ 5) /* Athlon */
-#define X86_FEATURE_P3		(3*32+ 6) /* P3 */
-#define X86_FEATURE_P4		(3*32+ 7) /* P4 */
-#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_UP		(3*32+ 9) /* smp kernel running on up */
-#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */
-#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS	(3*32+12)  /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS		(3*32+13)  /* Branch Trace Store */
-/* 14 free */
-#define X86_FEATURE_SYNC_RDTSC	(3*32+15)  /* RDTSC synchronizes the CPU */
-#define X86_FEATURE_REP_GOOD   (3*32+16) /* rep microcode works well on this CPU */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-#define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
-#define X86_FEATURE_MWAIT	(4*32+ 3) /* Monitor/Mwait support */
-#define X86_FEATURE_DSCPL	(4*32+ 4) /* CPL Qualified Debug Store */
-#define X86_FEATURE_EST		(4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2		(4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_CID		(4*32+10) /* Context ID */
-#define X86_FEATURE_CX16        (4*32+13) /* CMPXCHG16B */
-#define X86_FEATURE_XTPR	(4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_DCA		(4*32+18) /* Direct Cache Access */
-
-/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
-#define X86_FEATURE_XSTORE	(5*32+ 2) /* on-CPU RNG present (xstore insn) */
-#define X86_FEATURE_XSTORE_EN	(5*32+ 3) /* on-CPU RNG enabled */
-#define X86_FEATURE_XCRYPT	(5*32+ 6) /* on-CPU crypto (xcrypt insn) */
-#define X86_FEATURE_XCRYPT_EN	(5*32+ 7) /* on-CPU crypto enabled */
-#define X86_FEATURE_ACE2	(5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN	(5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE		(5*32+ 10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN	(5*32+ 11) /* PHE enabled */
-#define X86_FEATURE_PMM		(5*32+ 12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN	(5*32+ 13) /* PMM enabled */
-
-/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
-#define X86_FEATURE_LAHF_LM	(6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY	(6*32+ 1) /* If yes HyperThreading not valid */
-
-/*
- * Auxiliary flags: Linux defined - For features scattered in various
- * CPUID levels like 0x6, 0xA etc
- */
-#define X86_FEATURE_IDA		(7*32+ 0) /* Intel Dynamic Acceleration */
-
-#define cpu_has(c, bit)							\
-	(__builtin_constant_p(bit) &&					\
-	 ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) ||	\
-	   (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) ||	\
-	   (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) ||	\
-	   (((bit)>>5)==3 && (1UL<<((bit)&31) & REQUIRED_MASK3)) ||	\
-	   (((bit)>>5)==4 && (1UL<<((bit)&31) & REQUIRED_MASK4)) ||	\
-	   (((bit)>>5)==5 && (1UL<<((bit)&31) & REQUIRED_MASK5)) ||	\
-	   (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) ||	\
-	   (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) )	\
-	  ? 1 :								\
-	  test_bit(bit, (c)->x86_capability))
-#define boot_cpu_has(bit)	cpu_has(&boot_cpu_data, bit)
-
-#define cpu_has_fpu		boot_cpu_has(X86_FEATURE_FPU)
-#define cpu_has_vme		boot_cpu_has(X86_FEATURE_VME)
-#define cpu_has_de		boot_cpu_has(X86_FEATURE_DE)
-#define cpu_has_pse		boot_cpu_has(X86_FEATURE_PSE)
-#define cpu_has_tsc		boot_cpu_has(X86_FEATURE_TSC)
-#define cpu_has_pae		boot_cpu_has(X86_FEATURE_PAE)
-#define cpu_has_pge		boot_cpu_has(X86_FEATURE_PGE)
-#define cpu_has_apic		boot_cpu_has(X86_FEATURE_APIC)
-#define cpu_has_sep		boot_cpu_has(X86_FEATURE_SEP)
-#define cpu_has_mtrr		boot_cpu_has(X86_FEATURE_MTRR)
-#define cpu_has_mmx		boot_cpu_has(X86_FEATURE_MMX)
-#define cpu_has_fxsr		boot_cpu_has(X86_FEATURE_FXSR)
-#define cpu_has_xmm		boot_cpu_has(X86_FEATURE_XMM)
-#define cpu_has_xmm2		boot_cpu_has(X86_FEATURE_XMM2)
-#define cpu_has_xmm3		boot_cpu_has(X86_FEATURE_XMM3)
-#define cpu_has_ht		boot_cpu_has(X86_FEATURE_HT)
-#define cpu_has_mp		boot_cpu_has(X86_FEATURE_MP)
-#define cpu_has_nx		boot_cpu_has(X86_FEATURE_NX)
-#define cpu_has_k6_mtrr		boot_cpu_has(X86_FEATURE_K6_MTRR)
-#define cpu_has_cyrix_arr	boot_cpu_has(X86_FEATURE_CYRIX_ARR)
-#define cpu_has_centaur_mcr	boot_cpu_has(X86_FEATURE_CENTAUR_MCR)
-#define cpu_has_xstore		boot_cpu_has(X86_FEATURE_XSTORE)
-#define cpu_has_xstore_enabled	boot_cpu_has(X86_FEATURE_XSTORE_EN)
-#define cpu_has_xcrypt		boot_cpu_has(X86_FEATURE_XCRYPT)
-#define cpu_has_xcrypt_enabled	boot_cpu_has(X86_FEATURE_XCRYPT_EN)
-#define cpu_has_ace2		boot_cpu_has(X86_FEATURE_ACE2)
-#define cpu_has_ace2_enabled	boot_cpu_has(X86_FEATURE_ACE2_EN)
-#define cpu_has_phe		boot_cpu_has(X86_FEATURE_PHE)
-#define cpu_has_phe_enabled	boot_cpu_has(X86_FEATURE_PHE_EN)
-#define cpu_has_pmm		boot_cpu_has(X86_FEATURE_PMM)
-#define cpu_has_pmm_enabled	boot_cpu_has(X86_FEATURE_PMM_EN)
-#define cpu_has_ds		boot_cpu_has(X86_FEATURE_DS)
-#define cpu_has_pebs 		boot_cpu_has(X86_FEATURE_PEBS)
-#define cpu_has_clflush		boot_cpu_has(X86_FEATURE_CLFLSH)
-#define cpu_has_bts 		boot_cpu_has(X86_FEATURE_BTS)
-
-#endif /* __ASM_I386_CPUFEATURE_H */
-
-/* 
- * Local Variables:
- * mode:c
- * comment-column:42
- * End:
- */
diff --git a/include/asm-x86/cpufeature_64.h b/include/asm-x86/cpufeature_64.h
deleted file mode 100644
index e18496b7b850..000000000000
--- a/include/asm-x86/cpufeature_64.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * cpufeature_32.h
- *
- * Defines x86 CPU feature bits
- */
-
-#ifndef __ASM_X8664_CPUFEATURE_H
-#define __ASM_X8664_CPUFEATURE_H
-
-#include "cpufeature_32.h"
-
-#undef  cpu_has_vme
-#define cpu_has_vme            0
-
-#undef  cpu_has_pae
-#define cpu_has_pae            ___BUG___
-
-#undef  cpu_has_mp
-#define cpu_has_mp             1 /* XXX */
-
-#undef  cpu_has_k6_mtrr
-#define cpu_has_k6_mtrr        0
-
-#undef  cpu_has_cyrix_arr
-#define cpu_has_cyrix_arr      0
-
-#undef  cpu_has_centaur_mcr
-#define cpu_has_centaur_mcr    0
-
-#endif /* __ASM_X8664_CPUFEATURE_H */

From d94448b1fdc91ee1d235607f6bbe595464e4fbb9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:07 +0100
Subject: [PATCH 032/890] x86: clean up arch/x86/ia32/fpu32.c

White space and coding style clenaup.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/fpu32.c | 134 +++++++++++++++++++++---------------------
 1 file changed, 67 insertions(+), 67 deletions(-)

diff --git a/arch/x86/ia32/fpu32.c b/arch/x86/ia32/fpu32.c
index 2c8209a3605a..459bf743128f 100644
--- a/arch/x86/ia32/fpu32.c
+++ b/arch/x86/ia32/fpu32.c
@@ -1,8 +1,8 @@
-/* 
+/*
  * Copyright 2002 Andi Kleen, SuSE Labs.
  * FXSAVE<->i387 conversion support. Based on code by Gareth Hughes.
  * This is used for ptrace, signals and coredumps in 32bit emulation.
- */ 
+ */
 
 #include <linux/sched.h>
 #include <asm/sigcontext32.h>
@@ -13,96 +13,97 @@
 static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
 {
 	unsigned int tmp; /* to avoid 16 bit prefixes in the code */
- 
+
 	/* Transform each pair of bits into 01 (valid) or 00 (empty) */
-        tmp = ~twd;
-        tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
-        /* and move the valid bits to the lower byte. */
-        tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
-        tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
-        tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
-        return tmp;
+	tmp = ~twd;
+	tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
+	/* and move the valid bits to the lower byte. */
+	tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
+	tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
+	tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
+	return tmp;
 }
 
+#define FPREG_ADDR(f, n)	((void *)&(f)->st_space + (n) * 16);
+#define FP_EXP_TAG_VALID	0
+#define FP_EXP_TAG_ZERO		1
+#define FP_EXP_TAG_SPECIAL	2
+#define FP_EXP_TAG_EMPTY	3
+
 static inline unsigned long twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
 {
-	struct _fpxreg *st = NULL;
+	struct _fpxreg *st;
 	unsigned long tos = (fxsave->swd >> 11) & 7;
 	unsigned long twd = (unsigned long) fxsave->twd;
 	unsigned long tag;
 	unsigned long ret = 0xffff0000;
 	int i;
 
-#define FPREG_ADDR(f, n)	((void *)&(f)->st_space + (n) * 16);
-
-	for (i = 0 ; i < 8 ; i++) {
+	for (i = 0; i < 8; i++, twd >>= 1) {
 		if (twd & 0x1) {
-			st = FPREG_ADDR( fxsave, (i - tos) & 7 );
+			st = FPREG_ADDR(fxsave, (i - tos) & 7);
 
 			switch (st->exponent & 0x7fff) {
 			case 0x7fff:
-				tag = 2;		/* Special */
+				tag = FP_EXP_TAG_SPECIAL;
 				break;
 			case 0x0000:
-				if ( !st->significand[0] &&
-				     !st->significand[1] &&
-				     !st->significand[2] &&
-				     !st->significand[3] ) {
-					tag = 1;	/* Zero */
-				} else {
-					tag = 2;	/* Special */
-				}
+				if (!st->significand[0] &&
+				    !st->significand[1] &&
+				    !st->significand[2] &&
+				    !st->significand[3])
+					tag = FP_EXP_TAG_ZERO;
+				else
+					tag = FP_EXP_TAG_SPECIAL;
 				break;
 			default:
-				if (st->significand[3] & 0x8000) {
-					tag = 0;	/* Valid */
-				} else {
-					tag = 2;	/* Special */
-				}
+				if (st->significand[3] & 0x8000)
+					tag = FP_EXP_TAG_VALID;
+				else
+					tag = FP_EXP_TAG_SPECIAL;
 				break;
 			}
 		} else {
-			tag = 3;			/* Empty */
+			tag = FP_EXP_TAG_EMPTY;
 		}
-		ret |= (tag << (2 * i));
-		twd = twd >> 1;
+		ret |= tag << (2 * i);
 	}
 	return ret;
 }
 
+#define G(num, val) err |= __get_user(val, num + (u32 __user *)buf)
 
 static inline int convert_fxsr_from_user(struct i387_fxsave_struct *fxsave,
 					 struct _fpstate_ia32 __user *buf)
 {
 	struct _fpxreg *to;
 	struct _fpreg __user *from;
-	int i;
+	int i, err = 0;
 	u32 v;
-	int err = 0;
 
-#define G(num,val) err |= __get_user(val, num + (u32 __user *)buf)
 	G(0, fxsave->cwd);
 	G(1, fxsave->swd);
 	G(2, fxsave->twd);
 	fxsave->twd = twd_i387_to_fxsr(fxsave->twd);
 	G(3, fxsave->rip);
 	G(4, v);
-	fxsave->fop = v>>16;	/* cs ignored */
+	/* cs ignored */
+	fxsave->fop = v>>16;
 	G(5, fxsave->rdp);
 	/* 6: ds ignored */
-#undef G
-	if (err) 
-		return -1; 
+	if (err)
+		return -1;
 
 	to = (struct _fpxreg *)&fxsave->st_space[0];
 	from = &buf->_st[0];
-	for (i = 0 ; i < 8 ; i++, to++, from++) {
+	for (i = 0; i < 8; i++, to++, from++) {
 		if (__copy_from_user(to, from, sizeof(*from)))
 			return -1;
 	}
 	return 0;
 }
 
+#define P(num, val) err |= __put_user(val, num + (u32 __user *)buf)
 
 static inline int convert_fxsr_to_user(struct _fpstate_ia32 __user *buf,
 				       struct i387_fxsave_struct *fxsave,
@@ -111,60 +112,59 @@ static inline int convert_fxsr_to_user(struct _fpstate_ia32 __user *buf,
 {
 	struct _fpreg __user *to;
 	struct _fpxreg *from;
-	int i;
-	u16 cs,ds; 
-	int err = 0; 
+	int i, err = 0;
+	u16 cs, ds;
 
 	if (tsk == current) {
-		/* should be actually ds/cs at fpu exception time,
-		   but that information is not available in 64bit mode. */
-		asm("movw %%ds,%0 " : "=r" (ds)); 
-		asm("movw %%cs,%0 " : "=r" (cs)); 		
-	} else { /* ptrace. task has stopped. */
+		/*
+		 * should be actually ds/cs at fpu exception time, but
+		 * that information is not available in 64bit mode.
+		 */
+		asm("movw %%ds,%0 " : "=r" (ds));
+		asm("movw %%cs,%0 " : "=r" (cs));
+	} else {
+		 /* ptrace. task has stopped. */
 		ds = tsk->thread.ds;
 		cs = regs->cs;
-	} 
+	}
 
-#define P(num,val) err |= __put_user(val, num + (u32 __user *)buf)
 	P(0, (u32)fxsave->cwd | 0xffff0000);
 	P(1, (u32)fxsave->swd | 0xffff0000);
 	P(2, twd_fxsr_to_i387(fxsave));
 	P(3, (u32)fxsave->rip);
-	P(4,  cs | ((u32)fxsave->fop) << 16); 
+	P(4,  cs | ((u32)fxsave->fop) << 16);
 	P(5, fxsave->rdp);
 	P(6, 0xffff0000 | ds);
-#undef P
 
-	if (err) 
-		return -1; 
+	if (err)
+		return -1;
 
 	to = &buf->_st[0];
 	from = (struct _fpxreg *) &fxsave->st_space[0];
-	for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
+	for (i = 0; i < 8; i++, to++, from++) {
 		if (__copy_to_user(to, from, sizeof(*to)))
 			return -1;
 	}
 	return 0;
 }
 
-int restore_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 __user *buf, int fsave) 
-{ 
+int restore_i387_ia32(struct task_struct *tsk,
+		      struct _fpstate_ia32 __user *buf, int fsave)
+{
 	clear_fpu(tsk);
-	if (!fsave) { 
-		if (__copy_from_user(&tsk->thread.i387.fxsave, 
+	if (!fsave) {
+		if (__copy_from_user(&tsk->thread.i387.fxsave,
 				     &buf->_fxsr_env[0],
 				     sizeof(struct i387_fxsave_struct)))
 			return -1;
 		tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
 		set_stopped_child_used_math(tsk);
-	} 
+	}
 	return convert_fxsr_from_user(&tsk->thread.i387.fxsave, buf);
-}  
+}
 
-int save_i387_ia32(struct task_struct *tsk, 
-		   struct _fpstate_ia32 __user *buf, 
-		   struct pt_regs *regs,
-		   int fsave)
+int save_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 __user *buf,
+		   struct pt_regs *regs, int fsave)
 {
 	int err = 0;
 
@@ -174,8 +174,8 @@ int save_i387_ia32(struct task_struct *tsk,
 	if (fsave)
 		return 0;
 	err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status);
-	if (fsave) 
-		return err ? -1 : 1; 	
+	if (fsave)
+		return err ? -1 : 1;
 	err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
 	err |= __copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave,
 			      sizeof(struct i387_fxsave_struct));

From 8edf8bee889a3d13ae2db0f7992821fb5dc78bfa Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:07 +0100
Subject: [PATCH 033/890] x86: clean up arch/x86/ia32/aout32.c

White space and coding style clenaup.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_aout.c | 210 +++++++++++++++++++++-----------------
 1 file changed, 114 insertions(+), 96 deletions(-)

diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index f82e1a94fcb7..a764e4e95314 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -36,38 +36,44 @@
 #undef WARN_OLD
 #undef CORE_DUMP /* probably broken */
 
-static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
-static int load_aout_library(struct file*);
+static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs);
+static int load_aout_library(struct file *);
 
 #ifdef CORE_DUMP
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
+			  unsigned long limit);
 
 /*
  * fill in the user structure for a core dump..
  */
-static void dump_thread32(struct pt_regs * regs, struct user32 * dump)
+static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
 {
-	u32 fs,gs;
+	u32 fs, gs;
 
 /* changed the size calculations - should hopefully work better. lbt */
 	dump->magic = CMAGIC;
 	dump->start_code = 0;
 	dump->start_stack = regs->rsp & ~(PAGE_SIZE - 1);
 	dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
-	dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
+	dump->u_dsize = ((unsigned long)
+			 (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
 	dump->u_dsize -= dump->u_tsize;
 	dump->u_ssize = 0;
-	dump->u_debugreg[0] = current->thread.debugreg0;  
-	dump->u_debugreg[1] = current->thread.debugreg1;  
-	dump->u_debugreg[2] = current->thread.debugreg2;  
-	dump->u_debugreg[3] = current->thread.debugreg3;  
-	dump->u_debugreg[4] = 0;  
-	dump->u_debugreg[5] = 0;  
-	dump->u_debugreg[6] = current->thread.debugreg6;  
-	dump->u_debugreg[7] = current->thread.debugreg7;  
+	dump->u_debugreg[0] = current->thread.debugreg0;
+	dump->u_debugreg[1] = current->thread.debugreg1;
+	dump->u_debugreg[2] = current->thread.debugreg2;
+	dump->u_debugreg[3] = current->thread.debugreg3;
+	dump->u_debugreg[4] = 0;
+	dump->u_debugreg[5] = 0;
+	dump->u_debugreg[6] = current->thread.debugreg6;
+	dump->u_debugreg[7] = current->thread.debugreg7;
 
-	if (dump->start_stack < 0xc0000000)
-		dump->u_ssize = ((unsigned long) (0xc0000000 - dump->start_stack)) >> PAGE_SHIFT;
+	if (dump->start_stack < 0xc0000000) {
+		unsigned long tmp;
+
+		tmp = (unsigned long) (0xc0000000 - dump->start_stack);
+		dump->u_ssize = tmp >> PAGE_SHIFT;
+	}
 
 	dump->regs.ebx = regs->rbx;
 	dump->regs.ecx = regs->rcx;
@@ -79,7 +85,7 @@ static void dump_thread32(struct pt_regs * regs, struct user32 * dump)
 	dump->regs.ds = current->thread.ds;
 	dump->regs.es = current->thread.es;
 	asm("movl %%fs,%0" : "=r" (fs)); dump->regs.fs = fs;
-	asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs; 
+	asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs;
 	dump->regs.orig_eax = regs->orig_rax;
 	dump->regs.eip = regs->rip;
 	dump->regs.cs = regs->cs;
@@ -90,7 +96,7 @@ static void dump_thread32(struct pt_regs * regs, struct user32 * dump)
 #if 1 /* FIXME */
 	dump->u_fpvalid = 0;
 #else
-	dump->u_fpvalid = dump_fpu (regs, &dump->i387);
+	dump->u_fpvalid = dump_fpu(regs, &dump->i387);
 #endif
 }
 
@@ -128,15 +134,19 @@ static int dump_write(struct file *file, const void *addr, int nr)
 	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
 }
 
-#define DUMP_WRITE(addr, nr)	\
+#define DUMP_WRITE(addr, nr)			     \
 	if (!dump_write(file, (void *)(addr), (nr))) \
 		goto end_coredump;
 
-#define DUMP_SEEK(offset) \
-if (file->f_op->llseek) { \
-	if (file->f_op->llseek(file,(offset),0) != (offset)) \
- 		goto end_coredump; \
-} else file->f_pos = (offset)
+#define DUMP_SEEK(offset)						\
+	if (file->f_op->llseek) {					\
+		if (file->f_op->llseek(file, (offset), 0) != (offset))	\
+			goto end_coredump;				\
+	} else								\
+		file->f_pos = (offset)
+
+#define START_DATA()	(u.u_tsize << PAGE_SHIFT)
+#define START_STACK(u)	(u.start_stack)
 
 /*
  * Routine writes a core dump image in the current directory.
@@ -148,62 +158,70 @@ if (file->f_op->llseek) { \
  * dumping of the process results in another error..
  */
 
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
+			  unsigned long limit)
 {
 	mm_segment_t fs;
 	int has_dumped = 0;
 	unsigned long dump_start, dump_size;
 	struct user32 dump;
-#       define START_DATA(u)	(u.u_tsize << PAGE_SHIFT)
-#       define START_STACK(u)   (u.start_stack)
 
 	fs = get_fs();
 	set_fs(KERNEL_DS);
 	has_dumped = 1;
 	current->flags |= PF_DUMPCORE;
-       	strncpy(dump.u_comm, current->comm, sizeof(current->comm));
-	dump.u_ar0 = (u32)(((unsigned long)(&dump.regs)) - ((unsigned long)(&dump)));
+	strncpy(dump.u_comm, current->comm, sizeof(current->comm));
+	dump.u_ar0 = (u32)(((unsigned long)(&dump.regs)) -
+			   ((unsigned long)(&dump)));
 	dump.signal = signr;
 	dump_thread32(regs, &dump);
 
-/* If the size of the dump file exceeds the rlimit, then see what would happen
-   if we wrote the stack, but not the data area.  */
+	/*
+	 * If the size of the dump file exceeds the rlimit, then see
+	 * what would happen if we wrote the stack, but not the data
+	 * area.
+	 */
 	if ((dump.u_dsize + dump.u_ssize + 1) * PAGE_SIZE > limit)
 		dump.u_dsize = 0;
 
-/* Make sure we have enough room to write the stack and data areas. */
+	/* Make sure we have enough room to write the stack and data areas. */
 	if ((dump.u_ssize + 1) * PAGE_SIZE > limit)
 		dump.u_ssize = 0;
 
-/* make sure we actually have a data and stack area to dump */
+	/* make sure we actually have a data and stack area to dump */
 	set_fs(USER_DS);
-	if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_DATA(dump), dump.u_dsize << PAGE_SHIFT))
+	if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_DATA(dump),
+		       dump.u_dsize << PAGE_SHIFT))
 		dump.u_dsize = 0;
-	if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_STACK(dump), dump.u_ssize << PAGE_SHIFT))
+	if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_STACK(dump),
+		       dump.u_ssize << PAGE_SHIFT))
 		dump.u_ssize = 0;
 
 	set_fs(KERNEL_DS);
-/* struct user */
-	DUMP_WRITE(&dump,sizeof(dump));
-/* Now dump all of the user data.  Include malloced stuff as well */
+	/* struct user */
+	DUMP_WRITE(&dump, sizeof(dump));
+	/* Now dump all of the user data.  Include malloced stuff as well */
 	DUMP_SEEK(PAGE_SIZE);
-/* now we start writing out the user space info */
+	/* now we start writing out the user space info */
 	set_fs(USER_DS);
-/* Dump the data area */
+	/* Dump the data area */
 	if (dump.u_dsize != 0) {
 		dump_start = START_DATA(dump);
 		dump_size = dump.u_dsize << PAGE_SHIFT;
-		DUMP_WRITE(dump_start,dump_size);
+		DUMP_WRITE(dump_start, dump_size);
 	}
-/* Now prepare to dump the stack area */
+	/* Now prepare to dump the stack area */
 	if (dump.u_ssize != 0) {
 		dump_start = START_STACK(dump);
 		dump_size = dump.u_ssize << PAGE_SHIFT;
-		DUMP_WRITE(dump_start,dump_size);
+		DUMP_WRITE(dump_start, dump_size);
 	}
-/* Finally dump the task struct.  Not be used by gdb, but could be useful */
+	/*
+	 * Finally dump the task struct.  Not be used by gdb, but
+	 * could be useful
+	 */
 	set_fs(KERNEL_DS);
-	DUMP_WRITE(current,sizeof(*current));
+	DUMP_WRITE(current, sizeof(*current));
 end_coredump:
 	set_fs(fs);
 	return has_dumped;
@@ -217,35 +235,34 @@ end_coredump:
  */
 static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm)
 {
-	u32 __user *argv;
-	u32 __user *envp;
-	u32 __user *sp;
-	int argc = bprm->argc;
-	int envc = bprm->envc;
+	u32 __user *argv, *envp, *sp;
+	int argc = bprm->argc, envc = bprm->envc;
 
 	sp = (u32 __user *) ((-(unsigned long)sizeof(u32)) & (unsigned long) p);
 	sp -= envc+1;
 	envp = sp;
 	sp -= argc+1;
 	argv = sp;
-	put_user((unsigned long) envp,--sp);
-	put_user((unsigned long) argv,--sp);
-	put_user(argc,--sp);
+	put_user((unsigned long) envp, --sp);
+	put_user((unsigned long) argv, --sp);
+	put_user(argc, --sp);
 	current->mm->arg_start = (unsigned long) p;
-	while (argc-->0) {
+	while (argc-- > 0) {
 		char c;
-		put_user((u32)(unsigned long)p,argv++);
+
+		put_user((u32)(unsigned long)p, argv++);
 		do {
-			get_user(c,p++);
+			get_user(c, p++);
 		} while (c);
 	}
 	put_user(0, argv);
 	current->mm->arg_end = current->mm->env_start = (unsigned long) p;
-	while (envc-->0) {
+	while (envc-- > 0) {
 		char c;
-		put_user((u32)(unsigned long)p,envp++);
+
+		put_user((u32)(unsigned long)p, envp++);
 		do {
-			get_user(c,p++);
+			get_user(c, p++);
 		} while (c);
 	}
 	put_user(0, envp);
@@ -257,20 +274,18 @@ static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm)
  * These are the functions used to load a.out style executables and shared
  * libraries.  There is no binary dependent code anywhere else.
  */
-
-static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
+static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 {
+	unsigned long error, fd_offset, rlim;
 	struct exec ex;
-	unsigned long error;
-	unsigned long fd_offset;
-	unsigned long rlim;
 	int retval;
 
 	ex = *((struct exec *) bprm->buf);		/* exec-header */
 	if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
 	     N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
 	    N_TRSIZE(ex) || N_DRSIZE(ex) ||
-	    i_size_read(bprm->file->f_path.dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
+	    i_size_read(bprm->file->f_path.dentry->d_inode) <
+	    ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
 		return -ENOEXEC;
 	}
 
@@ -291,13 +306,13 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	if (retval)
 		return retval;
 
-	regs->cs = __USER32_CS; 
+	regs->cs = __USER32_CS;
 	regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
 		regs->r13 = regs->r14 = regs->r15 = 0;
 
 	/* OK, This is the point of no return */
 	set_personality(PER_LINUX);
-	set_thread_flag(TIF_IA32); 
+	set_thread_flag(TIF_IA32);
 	clear_thread_flag(TIF_ABI_PENDING);
 
 	current->mm->end_code = ex.a_text +
@@ -311,7 +326,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 
 	current->mm->mmap = NULL;
 	compute_creds(bprm);
- 	current->flags &= ~PF_FORKNOEXEC;
+	current->flags &= ~PF_FORKNOEXEC;
 
 	if (N_MAGIC(ex) == OMAGIC) {
 		unsigned long text_addr, map_size;
@@ -338,30 +353,30 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 			send_sig(SIGKILL, current, 0);
 			return error;
 		}
-			 
+
 		flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data);
 	} else {
 #ifdef WARN_OLD
 		static unsigned long error_time, error_time2;
 		if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
-		    (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ)
-		{
+		    (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ) {
 			printk(KERN_NOTICE "executable not page aligned\n");
 			error_time2 = jiffies;
 		}
 
 		if ((fd_offset & ~PAGE_MASK) != 0 &&
-		    (jiffies-error_time) > 5*HZ)
-		{
-			printk(KERN_WARNING 
-			       "fd_offset is not page aligned. Please convert program: %s\n",
+		    (jiffies - error_time) > 5*HZ) {
+			printk(KERN_WARNING
+			       "fd_offset is not page aligned. Please convert "
+			       "program: %s\n",
 			       bprm->file->f_path.dentry->d_name.name);
 			error_time = jiffies;
 		}
 #endif
 
-		if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
+		if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) {
 			loff_t pos = fd_offset;
+
 			down_write(&current->mm->mmap_sem);
 			do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
 			up_write(&current->mm->mmap_sem);
@@ -376,9 +391,10 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 
 		down_write(&current->mm->mmap_sem);
 		error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
-			PROT_READ | PROT_EXEC,
-			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT,
-			fd_offset);
+				PROT_READ | PROT_EXEC,
+				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
+				MAP_EXECUTABLE | MAP_32BIT,
+				fd_offset);
 		up_write(&current->mm->mmap_sem);
 
 		if (error != N_TXTADDR(ex)) {
@@ -387,9 +403,10 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 		}
 
 		down_write(&current->mm->mmap_sem);
- 		error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
+		error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
 				PROT_READ | PROT_WRITE | PROT_EXEC,
-				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT,
+				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
+				MAP_EXECUTABLE | MAP_32BIT,
 				fd_offset + ex.a_text);
 		up_write(&current->mm->mmap_sem);
 		if (error != N_DATADDR(ex)) {
@@ -403,9 +420,9 @@ beyond_if:
 	set_brk(current->mm->start_brk, current->mm->brk);
 
 	retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
-	if (retval < 0) { 
-		/* Someone check-me: is this error path enough? */ 
-		send_sig(SIGKILL, current, 0); 
+	if (retval < 0) {
+		/* Someone check-me: is this error path enough? */
+		send_sig(SIGKILL, current, 0);
 		return retval;
 	}
 
@@ -414,7 +431,7 @@ beyond_if:
 	/* start thread */
 	asm volatile("movl %0,%%fs" :: "r" (0)); \
 	asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS));
-	load_gs_index(0); 
+	load_gs_index(0);
 	(regs)->rip = ex.a_entry;
 	(regs)->rsp = current->mm->start_stack;
 	(regs)->eflags = 0x200;
@@ -425,7 +442,7 @@ beyond_if:
 	set_fs(USER_DS);
 	if (unlikely(current->ptrace & PT_PTRACED)) {
 		if (current->ptrace & PT_TRACE_EXEC)
-			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
+			ptrace_notify((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
 		else
 			send_sig(SIGTRAP, current, 0);
 	}
@@ -434,9 +451,8 @@ beyond_if:
 
 static int load_aout_library(struct file *file)
 {
-	struct inode * inode;
-	unsigned long bss, start_addr, len;
-	unsigned long error;
+	struct inode *inode;
+	unsigned long bss, start_addr, len, error;
 	int retval;
 	struct exec ex;
 
@@ -450,7 +466,8 @@ static int load_aout_library(struct file *file)
 	/* We come in here for the regular a.out style of shared libraries */
 	if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
 	    N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
-	    i_size_read(inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
+	    i_size_read(inode) <
+	    ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
 		goto out;
 	}
 
@@ -467,10 +484,10 @@ static int load_aout_library(struct file *file)
 
 #ifdef WARN_OLD
 		static unsigned long error_time;
-		if ((jiffies-error_time) > 5*HZ)
-		{
-			printk(KERN_WARNING 
-			       "N_TXTOFF is not page aligned. Please convert library: %s\n",
+		if ((jiffies-error_time) > 5*HZ) {
+			printk(KERN_WARNING
+			       "N_TXTOFF is not page aligned. Please convert "
+			       "library: %s\n",
 			       file->f_path.dentry->d_name.name);
 			error_time = jiffies;
 		}
@@ -478,11 +495,12 @@ static int load_aout_library(struct file *file)
 		down_write(&current->mm->mmap_sem);
 		do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
 		up_write(&current->mm->mmap_sem);
-		
+
 		file->f_op->read(file, (char __user *)start_addr,
 			ex.a_text + ex.a_data, &pos);
 		flush_icache_range((unsigned long) start_addr,
-				   (unsigned long) start_addr + ex.a_text + ex.a_data);
+				   (unsigned long) start_addr + ex.a_text +
+				   ex.a_data);
 
 		retval = 0;
 		goto out;

From 99b9cdf758af7004a716cf304dd5cd41af7db227 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:07 +0100
Subject: [PATCH 034/890] x86: clean up arch/x86/ia32/ia32_signal.c

White space and coding style clenaup.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_signal.c | 367 ++++++++++++++++++------------------
 1 file changed, 181 insertions(+), 186 deletions(-)

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 6ea19c25f90d..0fc5d8563e19 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -43,7 +43,8 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
 int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
 {
 	int err;
-	if (!access_ok (VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
+
+	if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
 		return -EFAULT;
 
 	/* If you change siginfo_t structure, please make sure that
@@ -53,16 +54,19 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
 	   3 ints plus the relevant union member.  */
 	err = __put_user(from->si_signo, &to->si_signo);
 	err |= __put_user(from->si_errno, &to->si_errno);
- 	err |= __put_user((short)from->si_code, &to->si_code);
+	err |= __put_user((short)from->si_code, &to->si_code);
 
 	if (from->si_code < 0) {
 		err |= __put_user(from->si_pid, &to->si_pid);
- 		err |= __put_user(from->si_uid, &to->si_uid);
- 		err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr);
+		err |= __put_user(from->si_uid, &to->si_uid);
+		err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr);
 	} else {
- 		/* First 32bits of unions are always present:
- 		 * si_pid === si_band === si_tid === si_addr(LS half) */
-		err |= __put_user(from->_sifields._pad[0], &to->_sifields._pad[0]);
+		/*
+		 * First 32bits of unions are always present:
+		 * si_pid === si_band === si_tid === si_addr(LS half)
+		 */
+		err |= __put_user(from->_sifields._pad[0],
+				  &to->_sifields._pad[0]);
 		switch (from->si_code >> 16) {
 		case __SI_FAULT >> 16:
 			break;
@@ -76,14 +80,15 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
 			err |= __put_user(from->si_uid, &to->si_uid);
 			break;
 		case __SI_POLL >> 16:
-			err |= __put_user(from->si_fd, &to->si_fd); 
+			err |= __put_user(from->si_fd, &to->si_fd);
 			break;
 		case __SI_TIMER >> 16:
-			err |= __put_user(from->si_overrun, &to->si_overrun); 
+			err |= __put_user(from->si_overrun, &to->si_overrun);
 			err |= __put_user(ptr_to_compat(from->si_ptr),
-					&to->si_ptr);
+					  &to->si_ptr);
 			break;
-		case __SI_RT >> 16: /* This is not generated by the kernel as of now.  */
+			 /* This is not generated by the kernel as of now.  */
+		case __SI_RT >> 16:
 		case __SI_MESGQ >> 16:
 			err |= __put_user(from->si_uid, &to->si_uid);
 			err |= __put_user(from->si_int, &to->si_int);
@@ -97,7 +102,8 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
 {
 	int err;
 	u32 ptr32;
-	if (!access_ok (VERIFY_READ, from, sizeof(compat_siginfo_t)))
+
+	if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t)))
 		return -EFAULT;
 
 	err = __get_user(to->si_signo, &from->si_signo);
@@ -112,8 +118,7 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
 	return err;
 }
 
-asmlinkage long
-sys32_sigsuspend(int history0, int history1, old_sigset_t mask)
+asmlinkage long sys32_sigsuspend(int history0, int history1, old_sigset_t mask)
 {
 	mask &= _BLOCKABLE;
 	spin_lock_irq(&current->sighand->siglock);
@@ -128,36 +133,37 @@ sys32_sigsuspend(int history0, int history1, old_sigset_t mask)
 	return -ERESTARTNOHAND;
 }
 
-asmlinkage long
-sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
-		  stack_ia32_t __user *uoss_ptr, 
-		  struct pt_regs *regs)
+asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
+				  stack_ia32_t __user *uoss_ptr,
+				  struct pt_regs *regs)
 {
-	stack_t uss,uoss; 
+	stack_t uss, uoss;
 	int ret;
-	mm_segment_t seg; 
-	if (uss_ptr) { 
+	mm_segment_t seg;
+
+	if (uss_ptr) {
 		u32 ptr;
-		memset(&uss,0,sizeof(stack_t));
-		if (!access_ok(VERIFY_READ,uss_ptr,sizeof(stack_ia32_t)) ||
+
+		memset(&uss, 0, sizeof(stack_t));
+		if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t)) ||
 			    __get_user(ptr, &uss_ptr->ss_sp) ||
 			    __get_user(uss.ss_flags, &uss_ptr->ss_flags) ||
 			    __get_user(uss.ss_size, &uss_ptr->ss_size))
 			return -EFAULT;
 		uss.ss_sp = compat_ptr(ptr);
 	}
-	seg = get_fs(); 
-	set_fs(KERNEL_DS); 
+	seg = get_fs();
+	set_fs(KERNEL_DS);
 	ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->rsp);
-	set_fs(seg); 
+	set_fs(seg);
 	if (ret >= 0 && uoss_ptr)  {
-		if (!access_ok(VERIFY_WRITE,uoss_ptr,sizeof(stack_ia32_t)) ||
+		if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t)) ||
 		    __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) ||
 		    __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) ||
 		    __put_user(uoss.ss_size, &uoss_ptr->ss_size))
 			ret = -EFAULT;
-	} 	
-	return ret;	
+	}
+	return ret;
 }
 
 /*
@@ -186,18 +192,6 @@ struct rt_sigframe
 	char retcode[8];
 };
 
-static int
-ia32_restore_sigcontext(struct pt_regs *regs, struct sigcontext_ia32 __user *sc, unsigned int *peax)
-{
-	unsigned int err = 0;
-	
-	/* Always make any pending restarted system calls return -EINTR */
-	current_thread_info()->restart_block.fn = do_no_restart_syscall;
-
-#if DEBUG_SIG
-	printk("SIG restore_sigcontext: sc=%p err(%x) eip(%x) cs(%x) flg(%x)\n",
-		sc, sc->err, sc->eip, sc->cs, sc->eflags);
-#endif
 #define COPY(x)		{ \
 	unsigned int reg;			\
 	err |= __get_user(reg, &sc->e ##x);	\
@@ -205,68 +199,78 @@ ia32_restore_sigcontext(struct pt_regs *regs, struct sigcontext_ia32 __user *sc,
 }
 
 #define RELOAD_SEG(seg,mask)						\
-	{ unsigned int cur; 						\
+	{ unsigned int cur;						\
 	  unsigned short pre;						\
 	  err |= __get_user(pre, &sc->seg);				\
-    	  asm volatile("movl %%" #seg ",%0" : "=r" (cur));		\
-	  pre |= mask; 							\
-	  if (pre != cur) loadsegment(seg,pre); }
+	  asm volatile("movl %%" #seg ",%0" : "=r" (cur));		\
+	  pre |= mask;							\
+	  if (pre != cur) loadsegment(seg, pre); }
 
-	/* Reload fs and gs if they have changed in the signal handler.
-	   This does not handle long fs/gs base changes in the handler, but 
-	   does not clobber them at least in the normal case. */ 
-	
-	{
-		unsigned gs, oldgs; 
-		err |= __get_user(gs, &sc->gs);
-		gs |= 3; 
-		asm("movl %%gs,%0" : "=r" (oldgs));
-		if (gs != oldgs)
-		load_gs_index(gs); 
-	} 
-	RELOAD_SEG(fs,3);
-	RELOAD_SEG(ds,3);
-	RELOAD_SEG(es,3);
+static int ia32_restore_sigcontext(struct pt_regs *regs,
+				   struct sigcontext_ia32 __user *sc,
+				   unsigned int *peax)
+{
+	unsigned int tmpflags, gs, oldgs, err = 0;
+	struct _fpstate_ia32 __user *buf;
+	u32 tmp;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+#if DEBUG_SIG
+	printk(KERN_DEBUG "SIG restore_sigcontext: "
+	       "sc=%p err(%x) eip(%x) cs(%x) flg(%x)\n",
+	       sc, sc->err, sc->eip, sc->cs, sc->eflags);
+#endif
+
+	/*
+	 * Reload fs and gs if they have changed in the signal
+	 * handler.  This does not handle long fs/gs base changes in
+	 * the handler, but does not clobber them at least in the
+	 * normal case.
+	 */
+	err |= __get_user(gs, &sc->gs);
+	gs |= 3;
+	asm("movl %%gs,%0" : "=r" (oldgs));
+	if (gs != oldgs)
+		load_gs_index(gs);
+
+	RELOAD_SEG(fs, 3);
+	RELOAD_SEG(ds, 3);
+	RELOAD_SEG(es, 3);
 
 	COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
 	COPY(dx); COPY(cx); COPY(ip);
-	/* Don't touch extended registers */ 
-	
-	err |= __get_user(regs->cs, &sc->cs); 
-	regs->cs |= 3;  
-	err |= __get_user(regs->ss, &sc->ss); 
-	regs->ss |= 3; 
+	/* Don't touch extended registers */
 
-	{
-		unsigned int tmpflags;
-		err |= __get_user(tmpflags, &sc->eflags);
-		regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
-		regs->orig_rax = -1;		/* disable syscall checks */
-	}
+	err |= __get_user(regs->cs, &sc->cs);
+	regs->cs |= 3;
+	err |= __get_user(regs->ss, &sc->ss);
+	regs->ss |= 3;
 
-	{
-		u32 tmp;
-		struct _fpstate_ia32 __user * buf;
-		err |= __get_user(tmp, &sc->fpstate);
-		buf = compat_ptr(tmp);
-		if (buf) {
-			if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
-				goto badframe;
-			err |= restore_i387_ia32(current, buf, 0);
-		} else {
-			struct task_struct *me = current;
-			if (used_math()) {
-				clear_fpu(me);
-				clear_used_math();
-			}
+	err |= __get_user(tmpflags, &sc->eflags);
+	regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
+	/* disable syscall checks */
+	regs->orig_rax = -1;
+
+	err |= __get_user(tmp, &sc->fpstate);
+	buf = compat_ptr(tmp);
+	if (buf) {
+		if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
+			goto badframe;
+		err |= restore_i387_ia32(current, buf, 0);
+	} else {
+		struct task_struct *me = current;
+
+		if (used_math()) {
+			clear_fpu(me);
+			clear_used_math();
 		}
 	}
 
-	{ 
-		u32 tmp;
-		err |= __get_user(tmp, &sc->eax);
-		*peax = tmp;
-	}
+	err |= __get_user(tmp, &sc->eax);
+	*peax = tmp;
+
 	return err;
 
 badframe:
@@ -283,7 +287,8 @@ asmlinkage long sys32_sigreturn(struct pt_regs *regs)
 		goto badframe;
 	if (__get_user(set.sig[0], &frame->sc.oldmask)
 	    || (_COMPAT_NSIG_WORDS > 1
-		&& __copy_from_user((((char *) &set.sig) + 4), &frame->extramask,
+		&& __copy_from_user((((char *) &set.sig) + 4),
+				    &frame->extramask,
 				    sizeof(frame->extramask))))
 		goto badframe;
 
@@ -292,7 +297,7 @@ asmlinkage long sys32_sigreturn(struct pt_regs *regs)
 	current->blocked = set;
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
-	
+
 	if (ia32_restore_sigcontext(regs, &frame->sc, &eax))
 		goto badframe;
 	return eax;
@@ -300,7 +305,7 @@ asmlinkage long sys32_sigreturn(struct pt_regs *regs)
 badframe:
 	signal_fault(regs, frame, "32bit sigreturn");
 	return 0;
-}	
+}
 
 asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
 {
@@ -321,7 +326,7 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
 	current->blocked = set;
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
-	
+
 	if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
 		goto badframe;
 
@@ -332,17 +337,17 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
 	return eax;
 
 badframe:
-	signal_fault(regs,frame,"32bit rt sigreturn");
+	signal_fault(regs, frame, "32bit rt sigreturn");
 	return 0;
-}	
+}
 
 /*
  * Set up a signal frame.
  */
 
-static int
-ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, struct _fpstate_ia32 __user *fpstate,
-		 struct pt_regs *regs, unsigned int mask)
+static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
+				 struct _fpstate_ia32 __user *fpstate,
+				 struct pt_regs *regs, unsigned int mask)
 {
 	int tmp, err = 0;
 
@@ -375,7 +380,7 @@ ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, struct _fpstate_ia32 __
 	tmp = save_i387_ia32(current, fpstate, regs, 0);
 	if (tmp < 0)
 		err = -EFAULT;
-	else { 
+	else {
 		clear_used_math();
 		stts();
 		err |= __put_user(ptr_to_compat(tmp ? fpstate : NULL),
@@ -392,8 +397,8 @@ ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, struct _fpstate_ia32 __
 /*
  * Determine which stack to use..
  */
-static void __user *
-get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
+				 size_t frame_size)
 {
 	unsigned long rsp;
 
@@ -409,9 +414,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
 	/* This is the legacy signal stack switching. */
 	else if ((regs->ss & 0xffff) != __USER_DS &&
 		!(ka->sa.sa_flags & SA_RESTORER) &&
-		 ka->sa.sa_restorer) {
+		 ka->sa.sa_restorer)
 		rsp = (unsigned long) ka->sa.sa_restorer;
-	}
 
 	rsp -= frame_size;
 	/* Align the stack pointer according to the i386 ABI,
@@ -421,11 +425,25 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
 }
 
 int ia32_setup_frame(int sig, struct k_sigaction *ka,
-		     compat_sigset_t *set, struct pt_regs * regs)
+		     compat_sigset_t *set, struct pt_regs *regs)
 {
 	struct sigframe __user *frame;
+	void __user *restorer;
 	int err = 0;
 
+	/* copy_to_user optimizes that into a single 8 byte store */
+	static const struct {
+		u16 poplmovl;
+		u32 val;
+		u16 int80;
+		u16 pad;
+	} __attribute__((packed)) code = {
+		0xb858,		 /* popl %eax ; movl $...,%eax */
+		__NR_ia32_sigreturn,
+		0x80cd,		/* int $0x80 */
+		0,
+	};
+
 	frame = get_sigframe(ka, regs, sizeof(*frame));
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
@@ -443,38 +461,24 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 	if (_COMPAT_NSIG_WORDS > 1) {
 		err |= __copy_to_user(frame->extramask, &set->sig[1],
 				      sizeof(frame->extramask));
+		if (err)
+			goto give_sigsegv;
 	}
-	if (err)
-		goto give_sigsegv;
 
 	/* Return stub is in 32bit vsyscall page */
-	{ 
-		void __user *restorer;
-		if (current->binfmt->hasvdso)
-			restorer = VSYSCALL32_SIGRETURN;
-		else
-			restorer = (void *)&frame->retcode;
-		if (ka->sa.sa_flags & SA_RESTORER)
-			restorer = ka->sa.sa_restorer;       
-		err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
-	}
-	/* These are actually not used anymore, but left because some 
-	   gdb versions depend on them as a marker. */
-	{ 
-		/* copy_to_user optimizes that into a single 8 byte store */
-		static const struct { 
-			u16 poplmovl;
-			u32 val;
-			u16 int80;    
-			u16 pad; 
-		} __attribute__((packed)) code = { 
-			0xb858,		 /* popl %eax ; movl $...,%eax */
-			__NR_ia32_sigreturn,   
-			0x80cd,		/* int $0x80 */
-			0,
-		}; 
-		err |= __copy_to_user(frame->retcode, &code, 8); 
-	}
+	if (current->binfmt->hasvdso)
+		restorer = VSYSCALL32_SIGRETURN;
+	else
+		restorer = (void *)&frame->retcode;
+	if (ka->sa.sa_flags & SA_RESTORER)
+		restorer = ka->sa.sa_restorer;
+	err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
+
+	/*
+	 * These are actually not used anymore, but left because some
+	 * gdb versions depend on them as a marker.
+	 */
+	err |= __copy_to_user(frame->retcode, &code, 8);
 	if (err)
 		goto give_sigsegv;
 
@@ -487,11 +491,11 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 	regs->rdx = 0;
 	regs->rcx = 0;
 
-	asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); 
-	asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); 
+	asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
+	asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
 
-	regs->cs = __USER32_CS; 
-	regs->ss = __USER32_DS; 
+	regs->cs = __USER32_CS;
+	regs->ss = __USER32_DS;
 
 	set_fs(USER_DS);
 	regs->eflags &= ~TF_MASK;
@@ -499,8 +503,8 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 		ptrace_notify(SIGTRAP);
 
 #if DEBUG_SIG
-	printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
-		current->comm, current->pid, frame, regs->rip, frame->pretcode);
+	printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
+	       current->comm, current->pid, frame, regs->rip, frame->pretcode);
 #endif
 
 	return 0;
@@ -511,25 +515,34 @@ give_sigsegv:
 }
 
 int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-			compat_sigset_t *set, struct pt_regs * regs)
+			compat_sigset_t *set, struct pt_regs *regs)
 {
 	struct rt_sigframe __user *frame;
+	struct exec_domain *ed = current_thread_info()->exec_domain;
+	void __user *restorer = VSYSCALL32_RTSIGRETURN;
 	int err = 0;
 
+	/* __copy_to_user optimizes that into a single 8 byte store */
+	static const struct {
+		u8 movl;
+		u32 val;
+		u16 int80;
+		u16 pad;
+		u8  pad2;
+	} __attribute__((packed)) code = {
+		0xb8,
+		__NR_ia32_rt_sigreturn,
+		0x80cd,
+		0,
+	};
+
 	frame = get_sigframe(ka, regs, sizeof(*frame));
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		goto give_sigsegv;
 
-	{
-		struct exec_domain *ed = current_thread_info()->exec_domain;
-		err |= __put_user((ed
-		    	   && ed->signal_invmap
-		    	   && sig < 32
-		    	   ? ed->signal_invmap[sig]
-			   : sig),
-			  &frame->sig);
-	}
+	err |= __put_user((ed && ed->signal_invmap && sig < 32
+			   ? ed->signal_invmap[sig] : sig), &frame->sig);
 	err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo);
 	err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc);
 	err |= copy_siginfo_to_user32(&frame->info, info);
@@ -544,38 +557,20 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 			  &frame->uc.uc_stack.ss_flags);
 	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
 	err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
-			        regs, set->sig[0]);
+				     regs, set->sig[0]);
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 	if (err)
 		goto give_sigsegv;
 
-	
-	{ 
-		void __user *restorer = VSYSCALL32_RTSIGRETURN; 
-		if (ka->sa.sa_flags & SA_RESTORER)
-			restorer = ka->sa.sa_restorer;       
-		err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
-	}
+	if (ka->sa.sa_flags & SA_RESTORER)
+		restorer = ka->sa.sa_restorer;
+	err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
 
-	/* This is movl $,%eax ; int $0x80 */
-	/* Not actually used anymore, but left because some gdb versions
-	   need it. */ 
-	{ 
-		/* __copy_to_user optimizes that into a single 8 byte store */
-		static const struct { 
-			u8 movl; 
-			u32 val; 
-			u16 int80; 
-			u16 pad;
-			u8  pad2;				
-		} __attribute__((packed)) code = { 
-			0xb8,
-			__NR_ia32_rt_sigreturn,
-			0x80cd,
-			0,
-		}; 
-		err |= __copy_to_user(frame->retcode, &code, 8); 
-	} 
+	/*
+	 * Not actually used anymore, but left because some gdb
+	 * versions need it.
+	 */
+	err |= __copy_to_user(frame->retcode, &code, 8);
 	if (err)
 		goto give_sigsegv;
 
@@ -593,11 +588,11 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	regs->rdx = (unsigned long) &frame->info;
 	regs->rcx = (unsigned long) &frame->uc;
 
-	asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); 
-	asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); 
-	
-	regs->cs = __USER32_CS; 
-	regs->ss = __USER32_DS; 
+	asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
+	asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
+
+	regs->cs = __USER32_CS;
+	regs->ss = __USER32_DS;
 
 	set_fs(USER_DS);
 	regs->eflags &= ~TF_MASK;
@@ -605,8 +600,8 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		ptrace_notify(SIGTRAP);
 
 #if DEBUG_SIG
-	printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
-		current->comm, current->pid, frame, regs->rip, frame->pretcode);
+	printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
+	       current->comm, current->pid, frame, regs->rip, frame->pretcode);
 #endif
 
 	return 0;

From 2da06b4e5da96fff4f07cc35af1db407e1e21aa7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:08 +0100
Subject: [PATCH 035/890] x86: clean up arch/x86/ia32/ipc32.c

White space and coding style cleanup.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ipc32.c | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/arch/x86/ia32/ipc32.c b/arch/x86/ia32/ipc32.c
index 7b3342e5aab5..d21991ce606c 100644
--- a/arch/x86/ia32/ipc32.c
+++ b/arch/x86/ia32/ipc32.c
@@ -9,9 +9,8 @@
 #include <linux/ipc.h>
 #include <linux/compat.h>
 
-asmlinkage long
-sys32_ipc(u32 call, int first, int second, int third,
-		compat_uptr_t ptr, u32 fifth)
+asmlinkage long sys32_ipc(u32 call, int first, int second, int third,
+			  compat_uptr_t ptr, u32 fifth)
 {
 	int version;
 
@@ -19,36 +18,35 @@ sys32_ipc(u32 call, int first, int second, int third,
 	call &= 0xffff;
 
 	switch (call) {
-	      case SEMOP:
+	case SEMOP:
 		/* struct sembuf is the same on 32 and 64bit :)) */
 		return sys_semtimedop(first, compat_ptr(ptr), second, NULL);
-	      case SEMTIMEDOP:
+	case SEMTIMEDOP:
 		return compat_sys_semtimedop(first, compat_ptr(ptr), second,
 						compat_ptr(fifth));
-	      case SEMGET:
+	case SEMGET:
 		return sys_semget(first, second, third);
-	      case SEMCTL:
+	case SEMCTL:
 		return compat_sys_semctl(first, second, third, compat_ptr(ptr));
 
-	      case MSGSND:
+	case MSGSND:
 		return compat_sys_msgsnd(first, second, third, compat_ptr(ptr));
-	      case MSGRCV:
+	case MSGRCV:
 		return compat_sys_msgrcv(first, second, fifth, third,
 					 version, compat_ptr(ptr));
-	      case MSGGET:
+	case MSGGET:
 		return sys_msgget((key_t) first, second);
-	      case MSGCTL:
+	case MSGCTL:
 		return compat_sys_msgctl(first, second, compat_ptr(ptr));
 
-	      case SHMAT:
+	case SHMAT:
 		return compat_sys_shmat(first, second, third, version,
 					compat_ptr(ptr));
-		break;
-	      case SHMDT:
+	case SHMDT:
 		return sys_shmdt(compat_ptr(ptr));
-	      case SHMGET:
+	case SHMGET:
 		return sys_shmget(first, (unsigned)second, third);
-	      case SHMCTL:
+	case SHMCTL:
 		return compat_sys_shmctl(first, second, compat_ptr(ptr));
 	}
 	return -ENOSYS;

From 5de15d42e4326b11ff9f3e733343fe7d4ece700b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:08 +0100
Subject: [PATCH 036/890] x86: clean up arch/x86/ia32/ptrace32.c

White space and coding style clenaup.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ptrace32.c | 190 ++++++++++++++++++++++-----------------
 1 file changed, 107 insertions(+), 83 deletions(-)

diff --git a/arch/x86/ia32/ptrace32.c b/arch/x86/ia32/ptrace32.c
index 4a233ad6269c..9d754b640205 100644
--- a/arch/x86/ia32/ptrace32.c
+++ b/arch/x86/ia32/ptrace32.c
@@ -1,13 +1,13 @@
-/* 
+/*
  * 32bit ptrace for x86-64.
  *
  * Copyright 2001,2002 Andi Kleen, SuSE Labs.
- * Some parts copied from arch/i386/kernel/ptrace.c. See that file for earlier 
+ * Some parts copied from arch/i386/kernel/ptrace.c. See that file for earlier
  * copyright.
- * 
- * This allows to access 64bit processes too; but there is no way to see the extended 
- * register contents.
- */ 
+ *
+ * This allows to access 64bit processes too; but there is no way to
+ * see the extended register contents.
+ */
 
 #include <linux/kernel.h>
 #include <linux/stddef.h>
@@ -35,8 +35,9 @@
  */
 #define FLAG_MASK 0x54dd5UL
 
-#define R32(l,q) \
-	case offsetof(struct user32, regs.l): stack[offsetof(struct pt_regs, q)/8] = val; break
+#define R32(l,q)							\
+	case offsetof(struct user32, regs.l):				\
+		stack[offsetof(struct pt_regs, q) / 8] = val; break
 
 static int putreg32(struct task_struct *child, unsigned regno, u32 val)
 {
@@ -45,30 +46,35 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
 
 	switch (regno) {
 	case offsetof(struct user32, regs.fs):
-		if (val && (val & 3) != 3) return -EIO; 
+		if (val && (val & 3) != 3)
+			return -EIO;
 		child->thread.fsindex = val & 0xffff;
 		break;
 	case offsetof(struct user32, regs.gs):
-		if (val && (val & 3) != 3) return -EIO; 
+		if (val && (val & 3) != 3)
+			return -EIO;
 		child->thread.gsindex = val & 0xffff;
 		break;
 	case offsetof(struct user32, regs.ds):
-		if (val && (val & 3) != 3) return -EIO; 
+		if (val && (val & 3) != 3)
+			return -EIO;
 		child->thread.ds = val & 0xffff;
 		break;
 	case offsetof(struct user32, regs.es):
 		child->thread.es = val & 0xffff;
 		break;
-	case offsetof(struct user32, regs.ss): 
-		if ((val & 3) != 3) return -EIO;
-        	stack[offsetof(struct pt_regs, ss)/8] = val & 0xffff;
+	case offsetof(struct user32, regs.ss):
+		if ((val & 3) != 3)
+			return -EIO;
+		stack[offsetof(struct pt_regs, ss)/8] = val & 0xffff;
 		break;
-	case offsetof(struct user32, regs.cs): 
-		if ((val & 3) != 3) return -EIO;
+	case offsetof(struct user32, regs.cs):
+		if ((val & 3) != 3)
+			return -EIO;
 		stack[offsetof(struct pt_regs, cs)/8] = val & 0xffff;
 		break;
 
-	R32(ebx, rbx); 
+	R32(ebx, rbx);
 	R32(ecx, rcx);
 	R32(edx, rdx);
 	R32(edi, rdi);
@@ -81,12 +87,13 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
 
 	case offsetof(struct user32, regs.eflags): {
 		__u64 *flags = &stack[offsetof(struct pt_regs, eflags)/8];
+
 		val &= FLAG_MASK;
 		*flags = val | (*flags & ~FLAG_MASK);
 		break;
 	}
 
-	case offsetof(struct user32, u_debugreg[4]): 
+	case offsetof(struct user32, u_debugreg[4]):
 	case offsetof(struct user32, u_debugreg[5]):
 		return -EIO;
 
@@ -108,36 +115,40 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
 
 	case offsetof(struct user32, u_debugreg[6]):
 		child->thread.debugreg6 = val;
-		break; 
+		break;
 
 	case offsetof(struct user32, u_debugreg[7]):
 		val &= ~DR_CONTROL_RESERVED;
 		/* See arch/i386/kernel/ptrace.c for an explanation of
 		 * this awkward check.*/
-		for(i=0; i<4; i++)
+		for (i = 0; i < 4; i++)
 			if ((0x5454 >> ((val >> (16 + 4*i)) & 0xf)) & 1)
 			       return -EIO;
-		child->thread.debugreg7 = val; 
+		child->thread.debugreg7 = val;
 		if (val)
 			set_tsk_thread_flag(child, TIF_DEBUG);
 		else
 			clear_tsk_thread_flag(child, TIF_DEBUG);
-		break; 
-		    
+		break;
+
 	default:
 		if (regno > sizeof(struct user32) || (regno & 3))
 			return -EIO;
-	       
-		/* Other dummy fields in the virtual user structure are ignored */ 
-		break; 		
+
+		/*
+		 * Other dummy fields in the virtual user structure
+		 * are ignored
+		 */
+		break;
 	}
 	return 0;
 }
 
 #undef R32
 
-#define R32(l,q) \
-	case offsetof(struct user32, regs.l): *val = stack[offsetof(struct pt_regs, q)/8]; break
+#define R32(l,q)							\
+	case offsetof(struct user32, regs.l):				\
+		*val = stack[offsetof(struct pt_regs, q)/8]; break
 
 static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
 {
@@ -145,7 +156,7 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
 
 	switch (regno) {
 	case offsetof(struct user32, regs.fs):
-	        *val = child->thread.fsindex;
+		*val = child->thread.fsindex;
 		break;
 	case offsetof(struct user32, regs.gs):
 		*val = child->thread.gsindex;
@@ -159,7 +170,7 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
 
 	R32(cs, cs);
 	R32(ss, ss);
-	R32(ebx, rbx); 
+	R32(ebx, rbx);
 	R32(ecx, rcx);
 	R32(edx, rdx);
 	R32(edi, rdi);
@@ -171,32 +182,35 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
 	R32(eflags, eflags);
 	R32(esp, rsp);
 
-	case offsetof(struct user32, u_debugreg[0]): 
-		*val = child->thread.debugreg0; 
-		break; 
-	case offsetof(struct user32, u_debugreg[1]): 
-		*val = child->thread.debugreg1; 
-		break; 
-	case offsetof(struct user32, u_debugreg[2]): 
-		*val = child->thread.debugreg2; 
-		break; 
-	case offsetof(struct user32, u_debugreg[3]): 
-		*val = child->thread.debugreg3; 
-		break; 
-	case offsetof(struct user32, u_debugreg[6]): 
-		*val = child->thread.debugreg6; 
-		break; 
-	case offsetof(struct user32, u_debugreg[7]): 
-		*val = child->thread.debugreg7; 
-		break; 
-		    
+	case offsetof(struct user32, u_debugreg[0]):
+		*val = child->thread.debugreg0;
+		break;
+	case offsetof(struct user32, u_debugreg[1]):
+		*val = child->thread.debugreg1;
+		break;
+	case offsetof(struct user32, u_debugreg[2]):
+		*val = child->thread.debugreg2;
+		break;
+	case offsetof(struct user32, u_debugreg[3]):
+		*val = child->thread.debugreg3;
+		break;
+	case offsetof(struct user32, u_debugreg[6]):
+		*val = child->thread.debugreg6;
+		break;
+	case offsetof(struct user32, u_debugreg[7]):
+		*val = child->thread.debugreg7;
+		break;
+
 	default:
 		if (regno > sizeof(struct user32) || (regno & 3))
 			return -EIO;
 
-		/* Other dummy fields in the virtual user structure are ignored */ 
+		/*
+		 * Other dummy fields in the virtual user structure
+		 * are ignored
+		 */
 		*val = 0;
-		break; 		
+		break;
 	}
 	return 0;
 }
@@ -205,10 +219,11 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
 
 static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data)
 {
-	int ret;
-	compat_siginfo_t __user *si32 = compat_ptr(data);
-	siginfo_t ssi; 
 	siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t));
+	compat_siginfo_t __user *si32 = compat_ptr(data);
+	siginfo_t ssi;
+	int ret;
+
 	if (request == PTRACE_SETSIGINFO) {
 		memset(&ssi, 0, sizeof(siginfo_t));
 		ret = copy_siginfo_from_user32(&ssi, si32);
@@ -231,12 +246,12 @@ static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data)
 asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 {
 	struct task_struct *child;
-	struct pt_regs *childregs; 
+	struct pt_regs *childregs;
 	void __user *datap = compat_ptr(data);
 	int ret;
 	__u32 val;
 
-	switch (request) { 
+	switch (request) {
 	case PTRACE_TRACEME:
 	case PTRACE_ATTACH:
 	case PTRACE_KILL:
@@ -248,7 +263,7 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 	case PTRACE_SETOPTIONS:
 	case PTRACE_SET_THREAD_AREA:
 	case PTRACE_GET_THREAD_AREA:
-		return sys_ptrace(request, pid, addr, data); 
+		return sys_ptrace(request, pid, addr, data);
 
 	default:
 		return -EINVAL;
@@ -257,7 +272,7 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 	case PTRACE_PEEKDATA:
 	case PTRACE_POKEDATA:
 	case PTRACE_POKETEXT:
-	case PTRACE_POKEUSR:       
+	case PTRACE_POKEUSR:
 	case PTRACE_PEEKUSR:
 	case PTRACE_GETREGS:
 	case PTRACE_SETREGS:
@@ -287,17 +302,19 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 	case PTRACE_PEEKDATA:
 	case PTRACE_PEEKTEXT:
 		ret = 0;
-		if (access_process_vm(child, addr, &val, sizeof(u32), 0)!=sizeof(u32))
+		if (access_process_vm(child, addr, &val, sizeof(u32), 0) !=
+		    sizeof(u32))
 			ret = -EIO;
 		else
-			ret = put_user(val, (unsigned int __user *)datap); 
-		break; 
+			ret = put_user(val, (unsigned int __user *)datap);
+		break;
 
 	case PTRACE_POKEDATA:
 	case PTRACE_POKETEXT:
 		ret = 0;
-		if (access_process_vm(child, addr, &data, sizeof(u32), 1)!=sizeof(u32))
-			ret = -EIO; 
+		if (access_process_vm(child, addr, &data, sizeof(u32), 1) !=
+		    sizeof(u32))
+			ret = -EIO;
 		break;
 
 	case PTRACE_PEEKUSR:
@@ -312,14 +329,15 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 
 	case PTRACE_GETREGS: { /* Get all gp regs from the child. */
 		int i;
-	  	if (!access_ok(VERIFY_WRITE, datap, 16*4)) {
+
+		if (!access_ok(VERIFY_WRITE, datap, 16*4)) {
 			ret = -EIO;
 			break;
 		}
 		ret = 0;
-		for ( i = 0; i <= 16*4 ; i += sizeof(__u32) ) {
+		for (i = 0; i <= 16*4; i += sizeof(__u32)) {
 			getreg32(child, i, &val);
-			ret |= __put_user(val,(u32 __user *)datap);
+			ret |= __put_user(val, (u32 __user *)datap);
 			datap += sizeof(u32);
 		}
 		break;
@@ -328,12 +346,13 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
 		unsigned long tmp;
 		int i;
-	  	if (!access_ok(VERIFY_READ, datap, 16*4)) {
+
+		if (!access_ok(VERIFY_READ, datap, 16*4)) {
 			ret = -EIO;
 			break;
 		}
-		ret = 0; 
-		for ( i = 0; i <= 16*4; i += sizeof(u32) ) {
+		ret = 0;
+		for (i = 0; i <= 16*4; i += sizeof(u32)) {
 			ret |= __get_user(tmp, (u32 __user *)datap);
 			putreg32(child, i, tmp);
 			datap += sizeof(u32);
@@ -342,17 +361,17 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 	}
 
 	case PTRACE_GETFPREGS:
-		ret = -EIO; 
-		if (!access_ok(VERIFY_READ, compat_ptr(data), 
+		ret = -EIO;
+		if (!access_ok(VERIFY_READ, compat_ptr(data),
 			       sizeof(struct user_i387_struct)))
 			break;
 		save_i387_ia32(child, datap, childregs, 1);
-		ret = 0; 
+		ret = 0;
 			break;
 
 	case PTRACE_SETFPREGS:
 		ret = -EIO;
-		if (!access_ok(VERIFY_WRITE, datap, 
+		if (!access_ok(VERIFY_WRITE, datap,
 			       sizeof(struct user_i387_struct)))
 			break;
 		ret = 0;
@@ -360,9 +379,10 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 		restore_i387_ia32(child, datap, 1);
 		break;
 
-	case PTRACE_GETFPXREGS: { 
+	case PTRACE_GETFPXREGS: {
 		struct user32_fxsr_struct __user *u = datap;
-		init_fpu(child); 
+
+		init_fpu(child);
 		ret = -EIO;
 		if (!access_ok(VERIFY_WRITE, u, sizeof(*u)))
 			break;
@@ -370,27 +390,31 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 		if (__copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u)))
 			break;
 		ret = __put_user(childregs->cs, &u->fcs);
-		ret |= __put_user(child->thread.ds, &u->fos); 
-		break; 
-	} 
-	case PTRACE_SETFPXREGS: { 
+		ret |= __put_user(child->thread.ds, &u->fos);
+		break;
+	}
+	case PTRACE_SETFPXREGS: {
 		struct user32_fxsr_struct __user *u = datap;
+
 		unlazy_fpu(child);
 		ret = -EIO;
 		if (!access_ok(VERIFY_READ, u, sizeof(*u)))
 			break;
-		/* no checking to be bug-to-bug compatible with i386. */
-		/* but silence warning */
+		/*
+		 * no checking to be bug-to-bug compatible with i386.
+		 * but silence warning
+		 */
 		if (__copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u)))
 			;
 		set_stopped_child_used_math(child);
 		child->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
-		ret = 0; 
+		ret = 0;
 		break;
 	}
 
 	case PTRACE_GETEVENTMSG:
-		ret = put_user(child->ptrace_message,(unsigned int __user *)compat_ptr(data));
+		ret = put_user(child->ptrace_message,
+			       (unsigned int __user *)compat_ptr(data));
 		break;
 
 	default:

From c202f298de59c17c0a9799dc0e1b9e0629347935 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:08 +0100
Subject: [PATCH 037/890] x86: clean up arch/x86/ia32/sys_ia32.c

White space and coding style clenaup.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/sys_ia32.c | 496 ++++++++++++++++++++-------------------
 include/linux/compat.h   |   4 +
 2 files changed, 256 insertions(+), 244 deletions(-)

diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index bee96d614432..58991abc5b59 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -1,29 +1,29 @@
 /*
  * sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Based on
- *             sys_sparc32 
+ *             sys_sparc32
  *
  * Copyright (C) 2000		VA Linux Co
  * Copyright (C) 2000		Don Dugger <n0ano@valinux.com>
- * Copyright (C) 1999 		Arun Sharma <arun.sharma@intel.com>
- * Copyright (C) 1997,1998 	Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- * Copyright (C) 1997 		David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1999		Arun Sharma <arun.sharma@intel.com>
+ * Copyright (C) 1997,1998	Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997		David S. Miller (davem@caip.rutgers.edu)
  * Copyright (C) 2000		Hewlett-Packard Co.
  * Copyright (C) 2000		David Mosberger-Tang <davidm@hpl.hp.com>
- * Copyright (C) 2000,2001,2002	Andi Kleen, SuSE Labs (x86-64 port) 
+ * Copyright (C) 2000,2001,2002	Andi Kleen, SuSE Labs (x86-64 port)
  *
  * These routines maintain argument size conversion between 32bit and 64bit
- * environment. In 2.5 most of this should be moved to a generic directory. 
+ * environment. In 2.5 most of this should be moved to a generic directory.
  *
  * This file assumes that there is a hole at the end of user address space.
- * 
- * Some of the functions are LE specific currently. These are hopefully all marked.
- * This should be fixed.
+ *
+ * Some of the functions are LE specific currently. These are
+ * hopefully all marked.  This should be fixed.
  */
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/fs.h> 
-#include <linux/file.h> 
+#include <linux/fs.h>
+#include <linux/file.h>
 #include <linux/signal.h>
 #include <linux/syscalls.h>
 #include <linux/resource.h>
@@ -90,43 +90,44 @@ int cp_compat_stat(struct kstat *kbuf, struct compat_stat __user *ubuf)
 	if (sizeof(ino) < sizeof(kbuf->ino) && ino != kbuf->ino)
 		return -EOVERFLOW;
 	if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct compat_stat)) ||
-	    __put_user (old_encode_dev(kbuf->dev), &ubuf->st_dev) ||
-	    __put_user (ino, &ubuf->st_ino) ||
-	    __put_user (kbuf->mode, &ubuf->st_mode) ||
-	    __put_user (kbuf->nlink, &ubuf->st_nlink) ||
-	    __put_user (uid, &ubuf->st_uid) ||
-	    __put_user (gid, &ubuf->st_gid) ||
-	    __put_user (old_encode_dev(kbuf->rdev), &ubuf->st_rdev) ||
-	    __put_user (kbuf->size, &ubuf->st_size) ||
-	    __put_user (kbuf->atime.tv_sec, &ubuf->st_atime) ||
-	    __put_user (kbuf->atime.tv_nsec, &ubuf->st_atime_nsec) ||
-	    __put_user (kbuf->mtime.tv_sec, &ubuf->st_mtime) ||
-	    __put_user (kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec) ||
-	    __put_user (kbuf->ctime.tv_sec, &ubuf->st_ctime) ||
-	    __put_user (kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec) ||
-	    __put_user (kbuf->blksize, &ubuf->st_blksize) ||
-	    __put_user (kbuf->blocks, &ubuf->st_blocks))
+	    __put_user(old_encode_dev(kbuf->dev), &ubuf->st_dev) ||
+	    __put_user(ino, &ubuf->st_ino) ||
+	    __put_user(kbuf->mode, &ubuf->st_mode) ||
+	    __put_user(kbuf->nlink, &ubuf->st_nlink) ||
+	    __put_user(uid, &ubuf->st_uid) ||
+	    __put_user(gid, &ubuf->st_gid) ||
+	    __put_user(old_encode_dev(kbuf->rdev), &ubuf->st_rdev) ||
+	    __put_user(kbuf->size, &ubuf->st_size) ||
+	    __put_user(kbuf->atime.tv_sec, &ubuf->st_atime) ||
+	    __put_user(kbuf->atime.tv_nsec, &ubuf->st_atime_nsec) ||
+	    __put_user(kbuf->mtime.tv_sec, &ubuf->st_mtime) ||
+	    __put_user(kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec) ||
+	    __put_user(kbuf->ctime.tv_sec, &ubuf->st_ctime) ||
+	    __put_user(kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec) ||
+	    __put_user(kbuf->blksize, &ubuf->st_blksize) ||
+	    __put_user(kbuf->blocks, &ubuf->st_blocks))
 		return -EFAULT;
 	return 0;
 }
 
-asmlinkage long
-sys32_truncate64(char __user * filename, unsigned long offset_low, unsigned long offset_high)
+asmlinkage long sys32_truncate64(char __user *filename,
+				 unsigned long offset_low,
+				 unsigned long offset_high)
 {
        return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low);
 }
 
-asmlinkage long
-sys32_ftruncate64(unsigned int fd, unsigned long offset_low, unsigned long offset_high)
+asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low,
+				  unsigned long offset_high)
 {
        return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low);
 }
 
-/* Another set for IA32/LFS -- x86_64 struct stat is different due to 
-   support for 64bit inode numbers. */
-
-static int
-cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
+/*
+ * Another set for IA32/LFS -- x86_64 struct stat is different due to
+ * support for 64bit inode numbers.
+ */
+static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
 {
 	typeof(ubuf->st_uid) uid = 0;
 	typeof(ubuf->st_gid) gid = 0;
@@ -134,38 +135,39 @@ cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
 	SET_GID(gid, stat->gid);
 	if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct stat64)) ||
 	    __put_user(huge_encode_dev(stat->dev), &ubuf->st_dev) ||
-	    __put_user (stat->ino, &ubuf->__st_ino) ||
-	    __put_user (stat->ino, &ubuf->st_ino) ||
-	    __put_user (stat->mode, &ubuf->st_mode) ||
-	    __put_user (stat->nlink, &ubuf->st_nlink) ||
-	    __put_user (uid, &ubuf->st_uid) ||
-	    __put_user (gid, &ubuf->st_gid) ||
-	    __put_user (huge_encode_dev(stat->rdev), &ubuf->st_rdev) ||
-	    __put_user (stat->size, &ubuf->st_size) ||
-	    __put_user (stat->atime.tv_sec, &ubuf->st_atime) ||
-	    __put_user (stat->atime.tv_nsec, &ubuf->st_atime_nsec) ||
-	    __put_user (stat->mtime.tv_sec, &ubuf->st_mtime) ||
-	    __put_user (stat->mtime.tv_nsec, &ubuf->st_mtime_nsec) ||
-	    __put_user (stat->ctime.tv_sec, &ubuf->st_ctime) ||
-	    __put_user (stat->ctime.tv_nsec, &ubuf->st_ctime_nsec) ||
-	    __put_user (stat->blksize, &ubuf->st_blksize) ||
-	    __put_user (stat->blocks, &ubuf->st_blocks))
+	    __put_user(stat->ino, &ubuf->__st_ino) ||
+	    __put_user(stat->ino, &ubuf->st_ino) ||
+	    __put_user(stat->mode, &ubuf->st_mode) ||
+	    __put_user(stat->nlink, &ubuf->st_nlink) ||
+	    __put_user(uid, &ubuf->st_uid) ||
+	    __put_user(gid, &ubuf->st_gid) ||
+	    __put_user(huge_encode_dev(stat->rdev), &ubuf->st_rdev) ||
+	    __put_user(stat->size, &ubuf->st_size) ||
+	    __put_user(stat->atime.tv_sec, &ubuf->st_atime) ||
+	    __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec) ||
+	    __put_user(stat->mtime.tv_sec, &ubuf->st_mtime) ||
+	    __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec) ||
+	    __put_user(stat->ctime.tv_sec, &ubuf->st_ctime) ||
+	    __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec) ||
+	    __put_user(stat->blksize, &ubuf->st_blksize) ||
+	    __put_user(stat->blocks, &ubuf->st_blocks))
 		return -EFAULT;
 	return 0;
 }
 
-asmlinkage long
-sys32_stat64(char __user * filename, struct stat64 __user *statbuf)
+asmlinkage long sys32_stat64(char __user *filename,
+			     struct stat64 __user *statbuf)
 {
 	struct kstat stat;
 	int ret = vfs_stat(filename, &stat);
+
 	if (!ret)
 		ret = cp_stat64(statbuf, &stat);
 	return ret;
 }
 
-asmlinkage long
-sys32_lstat64(char __user * filename, struct stat64 __user *statbuf)
+asmlinkage long sys32_lstat64(char __user *filename,
+			      struct stat64 __user *statbuf)
 {
 	struct kstat stat;
 	int ret = vfs_lstat(filename, &stat);
@@ -174,8 +176,7 @@ sys32_lstat64(char __user * filename, struct stat64 __user *statbuf)
 	return ret;
 }
 
-asmlinkage long
-sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
+asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
 {
 	struct kstat stat;
 	int ret = vfs_fstat(fd, &stat);
@@ -184,9 +185,8 @@ sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
 	return ret;
 }
 
-asmlinkage long
-sys32_fstatat(unsigned int dfd, char __user *filename,
-	      struct stat64 __user* statbuf, int flag)
+asmlinkage long sys32_fstatat(unsigned int dfd, char __user *filename,
+			      struct stat64 __user *statbuf, int flag)
 {
 	struct kstat stat;
 	int error = -EINVAL;
@@ -221,8 +221,7 @@ struct mmap_arg_struct {
 	unsigned int offset;
 };
 
-asmlinkage long
-sys32_mmap(struct mmap_arg_struct __user *arg)
+asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg)
 {
 	struct mmap_arg_struct a;
 	struct file *file = NULL;
@@ -233,33 +232,33 @@ sys32_mmap(struct mmap_arg_struct __user *arg)
 		return -EFAULT;
 
 	if (a.offset & ~PAGE_MASK)
-		return -EINVAL; 
+		return -EINVAL;
 
 	if (!(a.flags & MAP_ANONYMOUS)) {
 		file = fget(a.fd);
 		if (!file)
 			return -EBADF;
 	}
-	
-	mm = current->mm; 
-	down_write(&mm->mmap_sem); 
-	retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, a.offset>>PAGE_SHIFT);
+
+	mm = current->mm;
+	down_write(&mm->mmap_sem);
+	retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags,
+			       a.offset>>PAGE_SHIFT);
 	if (file)
 		fput(file);
 
-	up_write(&mm->mmap_sem); 
+	up_write(&mm->mmap_sem);
 
 	return retval;
 }
 
-asmlinkage long 
-sys32_mprotect(unsigned long start, size_t len, unsigned long prot)
+asmlinkage long sys32_mprotect(unsigned long start, size_t len,
+			       unsigned long prot)
 {
-	return sys_mprotect(start,len,prot); 
+	return sys_mprotect(start, len, prot);
 }
 
-asmlinkage long
-sys32_pipe(int __user *fd)
+asmlinkage long sys32_pipe(int __user *fd)
 {
 	int retval;
 	int fds[2];
@@ -269,13 +268,13 @@ sys32_pipe(int __user *fd)
 		goto out;
 	if (copy_to_user(fd, fds, sizeof(fds)))
 		retval = -EFAULT;
-  out:
+out:
 	return retval;
 }
 
-asmlinkage long
-sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
-		   struct sigaction32 __user *oact,  unsigned int sigsetsize)
+asmlinkage long sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
+				   struct sigaction32 __user *oact,
+				   unsigned int sigsetsize)
 {
 	struct k_sigaction new_ka, old_ka;
 	int ret;
@@ -291,12 +290,17 @@ sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
 		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
 		    __get_user(handler, &act->sa_handler) ||
 		    __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
-		    __get_user(restorer, &act->sa_restorer)||
-		    __copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t)))
+		    __get_user(restorer, &act->sa_restorer) ||
+		    __copy_from_user(&set32, &act->sa_mask,
+				     sizeof(compat_sigset_t)))
 			return -EFAULT;
 		new_ka.sa.sa_handler = compat_ptr(handler);
 		new_ka.sa.sa_restorer = compat_ptr(restorer);
-		/* FIXME: here we rely on _COMPAT_NSIG_WORS to be >= than _NSIG_WORDS << 1 */
+
+		/*
+		 * FIXME: here we rely on _COMPAT_NSIG_WORS to be >=
+		 * than _NSIG_WORDS << 1
+		 */
 		switch (_NSIG_WORDS) {
 		case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6]
 				| (((long)set32.sig[7]) << 32);
@@ -312,7 +316,10 @@ sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
 	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
 
 	if (!ret && oact) {
-		/* FIXME: here we rely on _COMPAT_NSIG_WORS to be >= than _NSIG_WORDS << 1 */
+		/*
+		 * FIXME: here we rely on _COMPAT_NSIG_WORS to be >=
+		 * than _NSIG_WORDS << 1
+		 */
 		switch (_NSIG_WORDS) {
 		case 4:
 			set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32);
@@ -328,23 +335,26 @@ sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
 			set32.sig[0] = old_ka.sa.sa_mask.sig[0];
 		}
 		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
-		    __put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler) ||
-		    __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer) ||
+		    __put_user(ptr_to_compat(old_ka.sa.sa_handler),
+			       &oact->sa_handler) ||
+		    __put_user(ptr_to_compat(old_ka.sa.sa_restorer),
+			       &oact->sa_restorer) ||
 		    __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
-		    __copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t)))
+		    __copy_to_user(&oact->sa_mask, &set32,
+				   sizeof(compat_sigset_t)))
 			return -EFAULT;
 	}
 
 	return ret;
 }
 
-asmlinkage long
-sys32_sigaction (int sig, struct old_sigaction32 __user *act, struct old_sigaction32 __user *oact)
+asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act,
+				struct old_sigaction32 __user *oact)
 {
-        struct k_sigaction new_ka, old_ka;
-        int ret;
+	struct k_sigaction new_ka, old_ka;
+	int ret;
 
-        if (act) {
+	if (act) {
 		compat_old_sigset_t mask;
 		compat_uptr_t handler, restorer;
 
@@ -359,33 +369,35 @@ sys32_sigaction (int sig, struct old_sigaction32 __user *act, struct old_sigacti
 		new_ka.sa.sa_restorer = compat_ptr(restorer);
 
 		siginitset(&new_ka.sa.sa_mask, mask);
-        }
+	}
 
-        ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
 
 	if (!ret && oact) {
 		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
-		    __put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler) ||
-		    __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer) ||
+		    __put_user(ptr_to_compat(old_ka.sa.sa_handler),
+			       &oact->sa_handler) ||
+		    __put_user(ptr_to_compat(old_ka.sa.sa_restorer),
+			       &oact->sa_restorer) ||
 		    __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
 		    __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
 			return -EFAULT;
-        }
+	}
 
 	return ret;
 }
 
-asmlinkage long
-sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
-			compat_sigset_t __user *oset, unsigned int sigsetsize)
+asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
+				     compat_sigset_t __user *oset,
+				     unsigned int sigsetsize)
 {
 	sigset_t s;
 	compat_sigset_t s32;
 	int ret;
 	mm_segment_t old_fs = get_fs();
-	
+
 	if (set) {
-		if (copy_from_user (&s32, set, sizeof(compat_sigset_t)))
+		if (copy_from_user(&s32, set, sizeof(compat_sigset_t)))
 			return -EFAULT;
 		switch (_NSIG_WORDS) {
 		case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
@@ -394,13 +406,14 @@ sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
 		case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
 		}
 	}
-	set_fs (KERNEL_DS);
+	set_fs(KERNEL_DS);
 	ret = sys_rt_sigprocmask(how,
 				 set ? (sigset_t __user *)&s : NULL,
 				 oset ? (sigset_t __user *)&s : NULL,
-				 sigsetsize); 
-	set_fs (old_fs);
-	if (ret) return ret;
+				 sigsetsize);
+	set_fs(old_fs);
+	if (ret)
+		return ret;
 	if (oset) {
 		switch (_NSIG_WORDS) {
 		case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
@@ -408,52 +421,49 @@ sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
 		case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
 		case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
 		}
-		if (copy_to_user (oset, &s32, sizeof(compat_sigset_t)))
+		if (copy_to_user(oset, &s32, sizeof(compat_sigset_t)))
 			return -EFAULT;
 	}
 	return 0;
 }
 
-static inline long
-get_tv32(struct timeval *o, struct compat_timeval __user *i)
+static inline long get_tv32(struct timeval *o, struct compat_timeval __user *i)
 {
-	int err = -EFAULT; 
-	if (access_ok(VERIFY_READ, i, sizeof(*i))) { 
+	int err = -EFAULT;
+
+	if (access_ok(VERIFY_READ, i, sizeof(*i))) {
 		err = __get_user(o->tv_sec, &i->tv_sec);
 		err |= __get_user(o->tv_usec, &i->tv_usec);
 	}
-	return err; 
+	return err;
 }
 
-static inline long
-put_tv32(struct compat_timeval __user *o, struct timeval *i)
+static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i)
 {
 	int err = -EFAULT;
-	if (access_ok(VERIFY_WRITE, o, sizeof(*o))) { 
+
+	if (access_ok(VERIFY_WRITE, o, sizeof(*o))) {
 		err = __put_user(i->tv_sec, &o->tv_sec);
 		err |= __put_user(i->tv_usec, &o->tv_usec);
-	} 
-	return err; 
+	}
+	return err;
 }
 
-extern unsigned int alarm_setitimer(unsigned int seconds);
-
-asmlinkage long
-sys32_alarm(unsigned int seconds)
+asmlinkage long sys32_alarm(unsigned int seconds)
 {
 	return alarm_setitimer(seconds);
 }
 
-/* Translations due to time_t size differences.  Which affects all
-   sorts of things, like timeval and itimerval.  */
-
-extern struct timezone sys_tz;
-
-asmlinkage long
-sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
+/*
+ * Translations due to time_t size differences. Which affects all
+ * sorts of things, like timeval and itimerval.
+ */
+asmlinkage long sys32_gettimeofday(struct compat_timeval __user *tv,
+				   struct timezone __user *tz)
 {
 	if (tv) {
 		struct timeval ktv;
+
 		do_gettimeofday(&ktv);
 		if (put_tv32(tv, &ktv))
 			return -EFAULT;
@@ -465,14 +475,14 @@ sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
 	return 0;
 }
 
-asmlinkage long
-sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
+asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv,
+				   struct timezone __user *tz)
 {
 	struct timeval ktv;
 	struct timespec kts;
 	struct timezone ktz;
 
- 	if (tv) {
+	if (tv) {
 		if (get_tv32(&ktv, tv))
 			return -EFAULT;
 		kts.tv_sec = ktv.tv_sec;
@@ -494,8 +504,7 @@ struct sel_arg_struct {
 	unsigned int tvp;
 };
 
-asmlinkage long
-sys32_old_select(struct sel_arg_struct __user *arg)
+asmlinkage long sys32_old_select(struct sel_arg_struct __user *arg)
 {
 	struct sel_arg_struct a;
 
@@ -505,50 +514,45 @@ sys32_old_select(struct sel_arg_struct __user *arg)
 				 compat_ptr(a.exp), compat_ptr(a.tvp));
 }
 
-extern asmlinkage long
-compat_sys_wait4(compat_pid_t pid, compat_uint_t * stat_addr, int options,
-		 struct compat_rusage *ru);
-
-asmlinkage long
-sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, int options)
+asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr,
+			      int options)
 {
 	return compat_sys_wait4(pid, stat_addr, options, NULL);
 }
 
 /* 32-bit timeval and related flotsam.  */
 
-asmlinkage long
-sys32_sysfs(int option, u32 arg1, u32 arg2)
+asmlinkage long sys32_sysfs(int option, u32 arg1, u32 arg2)
 {
 	return sys_sysfs(option, arg1, arg2);
 }
 
-asmlinkage long
-sys32_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *interval)
+asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid,
+				    struct compat_timespec __user *interval)
 {
 	struct timespec t;
 	int ret;
-	mm_segment_t old_fs = get_fs ();
-	
-	set_fs (KERNEL_DS);
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(KERNEL_DS);
 	ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t);
-	set_fs (old_fs);
+	set_fs(old_fs);
 	if (put_compat_timespec(&t, interval))
 		return -EFAULT;
 	return ret;
 }
 
-asmlinkage long
-sys32_rt_sigpending(compat_sigset_t __user *set, compat_size_t sigsetsize)
+asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set,
+				    compat_size_t sigsetsize)
 {
 	sigset_t s;
 	compat_sigset_t s32;
 	int ret;
 	mm_segment_t old_fs = get_fs();
-		
-	set_fs (KERNEL_DS);
+
+	set_fs(KERNEL_DS);
 	ret = sys_rt_sigpending((sigset_t __user *)&s, sigsetsize);
-	set_fs (old_fs);
+	set_fs(old_fs);
 	if (!ret) {
 		switch (_NSIG_WORDS) {
 		case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
@@ -556,30 +560,29 @@ sys32_rt_sigpending(compat_sigset_t __user *set, compat_size_t sigsetsize)
 		case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
 		case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
 		}
-		if (copy_to_user (set, &s32, sizeof(compat_sigset_t)))
+		if (copy_to_user(set, &s32, sizeof(compat_sigset_t)))
 			return -EFAULT;
 	}
 	return ret;
 }
 
-asmlinkage long
-sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo)
+asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig,
+				      compat_siginfo_t __user *uinfo)
 {
 	siginfo_t info;
 	int ret;
 	mm_segment_t old_fs = get_fs();
-	
+
 	if (copy_siginfo_from_user32(&info, uinfo))
 		return -EFAULT;
-	set_fs (KERNEL_DS);
+	set_fs(KERNEL_DS);
 	ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *)&info);
-	set_fs (old_fs);
+	set_fs(old_fs);
 	return ret;
 }
 
 /* These are here just in case some old ia32 binary calls it. */
-asmlinkage long
-sys32_pause(void)
+asmlinkage long sys32_pause(void)
 {
 	current->state = TASK_INTERRUPTIBLE;
 	schedule();
@@ -599,25 +602,25 @@ struct sysctl_ia32 {
 };
 
 
-asmlinkage long
-sys32_sysctl(struct sysctl_ia32 __user *args32)
+asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *args32)
 {
 	struct sysctl_ia32 a32;
-	mm_segment_t old_fs = get_fs ();
+	mm_segment_t old_fs = get_fs();
 	void __user *oldvalp, *newvalp;
 	size_t oldlen;
 	int __user *namep;
 	long ret;
 
-	if (copy_from_user(&a32, args32, sizeof (a32)))
+	if (copy_from_user(&a32, args32, sizeof(a32)))
 		return -EFAULT;
 
 	/*
-	 * We need to pre-validate these because we have to disable address checking
-	 * before calling do_sysctl() because of OLDLEN but we can't run the risk of the
-	 * user specifying bad addresses here.  Well, since we're dealing with 32 bit
-	 * addresses, we KNOW that access_ok() will always succeed, so this is an
-	 * expensive NOP, but so what...
+	 * We need to pre-validate these because we have to disable
+	 * address checking before calling do_sysctl() because of
+	 * OLDLEN but we can't run the risk of the user specifying bad
+	 * addresses here.  Well, since we're dealing with 32 bit
+	 * addresses, we KNOW that access_ok() will always succeed, so
+	 * this is an expensive NOP, but so what...
 	 */
 	namep = compat_ptr(a32.name);
 	oldvalp = compat_ptr(a32.oldval);
@@ -636,34 +639,34 @@ sys32_sysctl(struct sysctl_ia32 __user *args32)
 	unlock_kernel();
 	set_fs(old_fs);
 
-	if (oldvalp && put_user (oldlen, (int __user *)compat_ptr(a32.oldlenp)))
+	if (oldvalp && put_user(oldlen, (int __user *)compat_ptr(a32.oldlenp)))
 		return -EFAULT;
 
 	return ret;
 }
 #endif
 
-/* warning: next two assume little endian */ 
-asmlinkage long
-sys32_pread(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi)
+/* warning: next two assume little endian */
+asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count,
+			    u32 poslo, u32 poshi)
 {
 	return sys_pread64(fd, ubuf, count,
 			 ((loff_t)AA(poshi) << 32) | AA(poslo));
 }
 
-asmlinkage long
-sys32_pwrite(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi)
+asmlinkage long sys32_pwrite(unsigned int fd, char __user *ubuf, u32 count,
+			     u32 poslo, u32 poshi)
 {
 	return sys_pwrite64(fd, ubuf, count,
 			  ((loff_t)AA(poshi) << 32) | AA(poslo));
 }
 
 
-asmlinkage long
-sys32_personality(unsigned long personality)
+asmlinkage long sys32_personality(unsigned long personality)
 {
 	int ret;
-	if (personality(current->personality) == PER_LINUX32 && 
+
+	if (personality(current->personality) == PER_LINUX32 &&
 		personality == PER_LINUX)
 		personality = PER_LINUX32;
 	ret = sys_personality(personality);
@@ -672,34 +675,33 @@ sys32_personality(unsigned long personality)
 	return ret;
 }
 
-asmlinkage long
-sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, s32 count)
+asmlinkage long sys32_sendfile(int out_fd, int in_fd,
+			       compat_off_t __user *offset, s32 count)
 {
 	mm_segment_t old_fs = get_fs();
 	int ret;
 	off_t of;
-	
+
 	if (offset && get_user(of, offset))
 		return -EFAULT;
-		
+
 	set_fs(KERNEL_DS);
 	ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *)&of : NULL,
 			   count);
 	set_fs(old_fs);
-	
+
 	if (offset && put_user(of, offset))
 		return -EFAULT;
-		
 	return ret;
 }
 
 asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len,
-	unsigned long prot, unsigned long flags,
-	unsigned long fd, unsigned long pgoff)
+			    unsigned long prot, unsigned long flags,
+			    unsigned long fd, unsigned long pgoff)
 {
 	struct mm_struct *mm = current->mm;
 	unsigned long error;
-	struct file * file = NULL;
+	struct file *file = NULL;
 
 	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
 	if (!(flags & MAP_ANONYMOUS)) {
@@ -717,36 +719,35 @@ asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len,
 	return error;
 }
 
-asmlinkage long sys32_olduname(struct oldold_utsname __user * name)
+asmlinkage long sys32_olduname(struct oldold_utsname __user *name)
 {
+	char *arch = "x86_64";
 	int err;
 
 	if (!name)
 		return -EFAULT;
 	if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
 		return -EFAULT;
-  
-  	down_read(&uts_sem);
 
-	err = __copy_to_user(&name->sysname,&utsname()->sysname,
-				__OLD_UTS_LEN);
-	err |= __put_user(0,name->sysname+__OLD_UTS_LEN);
-	err |= __copy_to_user(&name->nodename,&utsname()->nodename,
-				__OLD_UTS_LEN);
-	err |= __put_user(0,name->nodename+__OLD_UTS_LEN);
-	err |= __copy_to_user(&name->release,&utsname()->release,
-				__OLD_UTS_LEN);
-	err |= __put_user(0,name->release+__OLD_UTS_LEN);
-	err |= __copy_to_user(&name->version,&utsname()->version,
-				__OLD_UTS_LEN);
-	err |= __put_user(0,name->version+__OLD_UTS_LEN);
-	{
-		char *arch = "x86_64";
-		if (personality(current->personality) == PER_LINUX32)
-			arch = "i686";
-		 
-		err |= __copy_to_user(&name->machine, arch, strlen(arch)+1);
-	}
+	down_read(&uts_sem);
+
+	err = __copy_to_user(&name->sysname, &utsname()->sysname,
+			     __OLD_UTS_LEN);
+	err |= __put_user(0, name->sysname+__OLD_UTS_LEN);
+	err |= __copy_to_user(&name->nodename, &utsname()->nodename,
+			      __OLD_UTS_LEN);
+	err |= __put_user(0, name->nodename+__OLD_UTS_LEN);
+	err |= __copy_to_user(&name->release, &utsname()->release,
+			      __OLD_UTS_LEN);
+	err |= __put_user(0, name->release+__OLD_UTS_LEN);
+	err |= __copy_to_user(&name->version, &utsname()->version,
+			      __OLD_UTS_LEN);
+	err |= __put_user(0, name->version+__OLD_UTS_LEN);
+
+	if (personality(current->personality) == PER_LINUX32)
+		arch = "i686";
+
+	err |= __copy_to_user(&name->machine, arch, strlen(arch) + 1);
 
 	up_read(&uts_sem);
 
@@ -755,17 +756,19 @@ asmlinkage long sys32_olduname(struct oldold_utsname __user * name)
 	return err;
 }
 
-long sys32_uname(struct old_utsname __user * name)
+long sys32_uname(struct old_utsname __user *name)
 {
 	int err;
+
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err = copy_to_user(name, utsname(), sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof(*name));
 	up_read(&uts_sem);
-	if (personality(current->personality) == PER_LINUX32) 
+	if (personality(current->personality) == PER_LINUX32)
 		err |= copy_to_user(&name->machine, "i686", 5);
-	return err?-EFAULT:0;
+
+	return err ? -EFAULT : 0;
 }
 
 long sys32_ustat(unsigned dev, struct ustat32 __user *u32p)
@@ -773,27 +776,28 @@ long sys32_ustat(unsigned dev, struct ustat32 __user *u32p)
 	struct ustat u;
 	mm_segment_t seg;
 	int ret;
-	
-	seg = get_fs(); 
-	set_fs(KERNEL_DS); 
+
+	seg = get_fs();
+	set_fs(KERNEL_DS);
 	ret = sys_ustat(dev, (struct ustat __user *)&u);
 	set_fs(seg);
-	if (ret >= 0) { 
-		if (!access_ok(VERIFY_WRITE,u32p,sizeof(struct ustat32)) || 
-		    __put_user((__u32) u.f_tfree, &u32p->f_tfree) ||
-		    __put_user((__u32) u.f_tinode, &u32p->f_tfree) ||
-		    __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) ||
-		    __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack)))
-			ret = -EFAULT;
-	}
+	if (ret < 0)
+		return ret;
+
+	if (!access_ok(VERIFY_WRITE, u32p, sizeof(struct ustat32)) ||
+	    __put_user((__u32) u.f_tfree, &u32p->f_tfree) ||
+	    __put_user((__u32) u.f_tinode, &u32p->f_tfree) ||
+	    __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) ||
+	    __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack)))
+		ret = -EFAULT;
 	return ret;
-} 
+}
 
 asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv,
 			     compat_uptr_t __user *envp, struct pt_regs *regs)
 {
 	long error;
-	char * filename;
+	char *filename;
 
 	filename = getname(name);
 	error = PTR_ERR(filename);
@@ -814,16 +818,17 @@ asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp,
 {
 	void __user *parent_tid = (void __user *)regs->rdx;
 	void __user *child_tid = (void __user *)regs->rdi;
+
 	if (!newsp)
 		newsp = regs->rsp;
-        return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
+	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
 }
 
 /*
- * Some system calls that need sign extended arguments. This could be done by a generic wrapper.
- */ 
-
-long sys32_lseek (unsigned int fd, int offset, unsigned int whence)
+ * Some system calls that need sign extended arguments. This could be
+ * done by a generic wrapper.
+ */
+long sys32_lseek(unsigned int fd, int offset, unsigned int whence)
 {
 	return sys_lseek(fd, offset, whence);
 }
@@ -832,49 +837,52 @@ long sys32_kill(int pid, int sig)
 {
 	return sys_kill(pid, sig);
 }
- 
-long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, 
+
+long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
 			__u32 len_low, __u32 len_high, int advice)
-{ 
+{
 	return sys_fadvise64_64(fd,
 			       (((u64)offset_high)<<32) | offset_low,
 			       (((u64)len_high)<<32) | len_low,
-			       advice); 
-} 
+				advice);
+}
 
 long sys32_vm86_warning(void)
-{ 
+{
 	struct task_struct *me = current;
 	static char lastcomm[sizeof(me->comm)];
+
 	if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
-		compat_printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
-		       me->comm);
+		compat_printk(KERN_INFO
+			      "%s: vm86 mode not supported on 64 bit kernel\n",
+			      me->comm);
 		strncpy(lastcomm, me->comm, sizeof(lastcomm));
-	} 
+	}
 	return -ENOSYS;
-} 
+}
 
 long sys32_lookup_dcookie(u32 addr_low, u32 addr_high,
-			  char __user * buf, size_t len)
+			  char __user *buf, size_t len)
 {
 	return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len);
 }
 
-asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, size_t count)
+asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi,
+				   size_t count)
 {
 	return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count);
 }
 
 asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi,
-			   unsigned n_low, unsigned n_hi,  int flags)
+				      unsigned n_low, unsigned n_hi,  int flags)
 {
 	return sys_sync_file_range(fd,
 				   ((u64)off_hi << 32) | off_low,
 				   ((u64)n_hi << 32) | n_low, flags);
 }
 
-asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi, size_t len,
-		     int advice)
+asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi,
+				size_t len, int advice)
 {
 	return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,
 				len, advice);
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 0e69d2cf14aa..ba29d4c59643 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -191,6 +191,10 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
 		compat_ulong_t __user *outp, compat_ulong_t __user *exp,
 		struct compat_timeval __user *tvp);
 
+asmlinkage long compat_sys_wait4(compat_pid_t pid,
+				 compat_uint_t *stat_addr, int options,
+				 struct compat_rusage *ru);
+
 #define BITS_PER_COMPAT_LONG    (8*sizeof(compat_long_t))
 
 #define BITS_TO_COMPAT_LONGS(bits) \

From 6ec875666d826dc6ea228afc9f6b644165081ab5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:08 +0100
Subject: [PATCH 038/890] x86: clean up arch/x86/ia32/syscall32.c

White space and coding style clenaup.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/syscall32.c | 45 +++++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/arch/x86/ia32/syscall32.c b/arch/x86/ia32/syscall32.c
index 15013bac181c..d751d96c2ef2 100644
--- a/arch/x86/ia32/syscall32.c
+++ b/arch/x86/ia32/syscall32.c
@@ -1,8 +1,9 @@
-/* Copyright 2002,2003 Andi Kleen, SuSE Labs */
-
-/* vsyscall handling for 32bit processes. Map a stub page into it 
-   on demand because 32bit cannot reach the kernel's fixmaps */
-
+/*
+ * Copyright 2002,2003 Andi Kleen, SuSE Labs
+ *
+ * vsyscall handling for 32bit processes. Map a stub page into it on
+ * demand because 32bit cannot reach the kernel's fixmaps
+ */
 #include <linux/mm.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
@@ -50,31 +51,33 @@ int syscall32_setup_pages(struct linux_binprm *bprm, int exstack)
 }
 
 static int __init init_syscall32(void)
-{ 
+{
 	char *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
-	if (!syscall32_page) 
-		panic("Cannot allocate syscall32 page"); 
+
+	if (!syscall32_page)
+		panic("Cannot allocate syscall32 page");
 	syscall32_pages[0] = virt_to_page(syscall32_page);
- 	if (use_sysenter > 0) {
- 		memcpy(syscall32_page, syscall32_sysenter,
- 		       syscall32_sysenter_end - syscall32_sysenter);
- 	} else {
-  		memcpy(syscall32_page, syscall32_syscall,
-  		       syscall32_syscall_end - syscall32_syscall);
-  	}	
+	if (use_sysenter > 0) {
+		memcpy(syscall32_page, syscall32_sysenter,
+		       syscall32_sysenter_end - syscall32_sysenter);
+	} else {
+		memcpy(syscall32_page, syscall32_syscall,
+		       syscall32_syscall_end - syscall32_syscall);
+	}
 	return 0;
-} 
-	
-__initcall(init_syscall32); 
+}
+__initcall(init_syscall32);
 
 /* May not be __init: called during resume */
 void syscall32_cpu_init(void)
 {
 	if (use_sysenter < 0)
- 		use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
+		use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
 
-	/* Load these always in case some future AMD CPU supports
-	   SYSENTER from compat mode too. */
+	/*
+	 * Load these always in case some future AMD CPU supports
+	 * SYSENTER from compat mode too.
+	 */
 	checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
 	checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL);
 	checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);

From 5bafb671e20d2a3721589378681326197fc37a5f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:09 +0100
Subject: [PATCH 039/890] x86: clean up arch/x86/ia32/mmap32.c

White space and coding style clenaup.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/mmap32.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/ia32/mmap32.c b/arch/x86/ia32/mmap32.c
index e4b84b4a417a..7649370a3144 100644
--- a/arch/x86/ia32/mmap32.c
+++ b/arch/x86/ia32/mmap32.c
@@ -36,8 +36,8 @@
  *
  * Leave an at least ~128 MB hole.
  */
-#define MIN_GAP (128*1024*1024)
-#define MAX_GAP (TASK_SIZE/6*5)
+#define MIN_GAP (128 * 1024 * 1024)
+#define MAX_GAP (TASK_SIZE / 6 * 5)
 
 static inline unsigned long mmap_base(struct mm_struct *mm)
 {
@@ -45,7 +45,7 @@ static inline unsigned long mmap_base(struct mm_struct *mm)
 	unsigned long random_factor = 0;
 
 	if (current->flags & PF_RANDOMIZE)
-		random_factor = get_random_int() % (1024*1024);
+		random_factor = get_random_int() % (1024 * 1024);
 
 	if (gap < MIN_GAP)
 		gap = MIN_GAP;
@@ -66,8 +66,8 @@ void ia32_pick_mmap_layout(struct mm_struct *mm)
 	 * bit is set, or if the expected stack growth is unlimited:
 	 */
 	if (sysctl_legacy_va_layout ||
-			(current->personality & ADDR_COMPAT_LAYOUT) ||
-			current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) {
+	    (current->personality & ADDR_COMPAT_LAYOUT) ||
+	    current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) {
 		mm->mmap_base = TASK_UNMAPPED_BASE;
 		mm->get_unmapped_area = arch_get_unmapped_area;
 		mm->unmap_area = arch_unmap_area;

From c140df973c07ac328aafd19d4f4c413f2f8902df Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:30:09 +0100
Subject: [PATCH 040/890] x86: clean up arch/x86/kernel/aperture_64.c

whitespace cleanup. No code changed:

   text    data     bss     dec     hex filename
   2080      76       4    2160     870 aperture_64.o.before
   2080      76       4    2160     870 aperture_64.o.after

                                       errors   lines of code   errors/KLOC
 arch/x86/kernel/aperture_64.c            114             299         381.2
 arch/x86/kernel/aperture_64.c              0             315             0

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/aperture_64.c | 242 ++++++++++++++++++----------------
 1 file changed, 129 insertions(+), 113 deletions(-)

diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 5b6992799c9d..250db0527f5d 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -1,12 +1,12 @@
-/* 
+/*
  * Firmware replacement code.
- * 
+ *
  * Work around broken BIOSes that don't set an aperture or only set the
- * aperture in the AGP bridge. 
- * If all fails map the aperture over some low memory.  This is cheaper than 
- * doing bounce buffering. The memory is lost. This is done at early boot 
- * because only the bootmem allocator can allocate 32+MB. 
- * 
+ * aperture in the AGP bridge.
+ * If all fails map the aperture over some low memory.  This is cheaper than
+ * doing bounce buffering. The memory is lost. This is done at early boot
+ * because only the bootmem allocator can allocate 32+MB.
+ *
  * Copyright 2002 Andi Kleen, SuSE Labs.
  */
 #include <linux/kernel.h>
@@ -30,7 +30,7 @@ int gart_iommu_aperture_disabled __initdata = 0;
 int gart_iommu_aperture_allowed __initdata = 0;
 
 int fallback_aper_order __initdata = 1; /* 64MB */
-int fallback_aper_force __initdata = 0; 
+int fallback_aper_force __initdata = 0;
 
 int fix_aperture __initdata = 1;
 
@@ -49,20 +49,20 @@ static void __init insert_aperture_resource(u32 aper_base, u32 aper_size)
 /* This code runs before the PCI subsystem is initialized, so just
    access the northbridge directly. */
 
-static u32 __init allocate_aperture(void) 
+static u32 __init allocate_aperture(void)
 {
 	u32 aper_size;
-	void *p; 
+	void *p;
 
-	if (fallback_aper_order > 7) 
-		fallback_aper_order = 7; 
-	aper_size = (32 * 1024 * 1024) << fallback_aper_order; 
+	if (fallback_aper_order > 7)
+		fallback_aper_order = 7;
+	aper_size = (32 * 1024 * 1024) << fallback_aper_order;
 
-	/* 
-	 * Aperture has to be naturally aligned. This means an 2GB aperture won't
-	 * have much chance of finding a place in the lower 4GB of memory.
-	 * Unfortunately we cannot move it up because that would make the
-	 * IOMMU useless.
+	/*
+	 * Aperture has to be naturally aligned. This means a 2GB aperture
+	 * won't have much chance of finding a place in the lower 4GB of
+	 * memory. Unfortunately we cannot move it up because that would
+	 * make the IOMMU useless.
 	 */
 	p = __alloc_bootmem_nopanic(aper_size, aper_size, 0);
 	if (!p || __pa(p)+aper_size > 0xffffffff) {
@@ -73,54 +73,60 @@ static u32 __init allocate_aperture(void)
 		return 0;
 	}
 	printk("Mapping aperture over %d KB of RAM @ %lx\n",
-	       aper_size >> 10, __pa(p)); 
+	       aper_size >> 10, __pa(p));
 	insert_aperture_resource((u32)__pa(p), aper_size);
-	return (u32)__pa(p); 
+
+	return (u32)__pa(p);
 }
 
 static int __init aperture_valid(u64 aper_base, u32 aper_size)
-{ 
-	if (!aper_base) 
+{
+	if (!aper_base)
 		return 0;
-	if (aper_size < 64*1024*1024) { 
+	if (aper_size < 64*1024*1024) {
 		printk("Aperture too small (%d MB)\n", aper_size>>20);
 		return 0;
 	}
 	if (aper_base + aper_size > 0x100000000UL) {
 		printk("Aperture beyond 4GB. Ignoring.\n");
-		return 0; 
+		return 0;
 	}
 	if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
 		printk("Aperture pointing to e820 RAM. Ignoring.\n");
-		return 0; 
-	} 
+		return 0;
+	}
 	return 1;
-} 
+}
 
 /* Find a PCI capability */
-static __u32 __init find_cap(int num, int slot, int func, int cap) 
-{ 
-	u8 pos;
+static __u32 __init find_cap(int num, int slot, int func, int cap)
+{
 	int bytes;
-	if (!(read_pci_config_16(num,slot,func,PCI_STATUS) & PCI_STATUS_CAP_LIST))
+	u8 pos;
+
+	if (!(read_pci_config_16(num, slot, func, PCI_STATUS) &
+						PCI_STATUS_CAP_LIST))
 		return 0;
-	pos = read_pci_config_byte(num,slot,func,PCI_CAPABILITY_LIST);
-	for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { 
+
+	pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST);
+	for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
 		u8 id;
-		pos &= ~3; 
-		id = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_ID);
+
+		pos &= ~3;
+		id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID);
 		if (id == 0xff)
 			break;
-		if (id == cap) 
-			return pos; 
-		pos = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_NEXT); 
-	} 
+		if (id == cap)
+			return pos;
+		pos = read_pci_config_byte(num, slot, func,
+						pos+PCI_CAP_LIST_NEXT);
+	}
 	return 0;
-} 
+}
 
 /* Read a standard AGPv3 bridge header */
 static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
-{ 
+{
 	u32 apsize;
 	u32 apsizereg;
 	int nbits;
@@ -128,7 +134,7 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
 	u64 aper;
 
 	printk("AGP bridge at %02x:%02x:%02x\n", num, slot, func);
-	apsizereg = read_pci_config_16(num,slot,func, cap + 0x14);
+	apsizereg = read_pci_config_16(num, slot, func, cap + 0x14);
 	if (apsizereg == 0xffffffff) {
 		printk("APSIZE in AGP bridge unreadable\n");
 		return 0;
@@ -136,80 +142,84 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
 
 	apsize = apsizereg & 0xfff;
 	/* Some BIOS use weird encodings not in the AGPv3 table. */
-	if (apsize & 0xff) 
-		apsize |= 0xf00; 
+	if (apsize & 0xff)
+		apsize |= 0xf00;
 	nbits = hweight16(apsize);
 	*order = 7 - nbits;
 	if ((int)*order < 0) /* < 32MB */
 		*order = 0;
-	
-	aper_low = read_pci_config(num,slot,func, 0x10);
-	aper_hi = read_pci_config(num,slot,func,0x14);
+
+	aper_low = read_pci_config(num, slot, func, 0x10);
+	aper_hi = read_pci_config(num, slot, func, 0x14);
 	aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32);
 
-	printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", 
+	printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n",
 	       aper, 32 << *order, apsizereg);
 
 	if (!aperture_valid(aper, (32*1024*1024) << *order))
-	    return 0;
-	return (u32)aper; 
-} 
+		return 0;
+	return (u32)aper;
+}
 
-/* Look for an AGP bridge. Windows only expects the aperture in the
-   AGP bridge and some BIOS forget to initialize the Northbridge too.
-   Work around this here. 
-
-   Do an PCI bus scan by hand because we're running before the PCI
-   subsystem. 
-
-   All K8 AGP bridges are AGPv3 compliant, so we can do this scan
-   generically. It's probably overkill to always scan all slots because
-   the AGP bridges should be always an own bus on the HT hierarchy, 
-   but do it here for future safety. */
+/*
+ * Look for an AGP bridge. Windows only expects the aperture in the
+ * AGP bridge and some BIOS forget to initialize the Northbridge too.
+ * Work around this here.
+ *
+ * Do an PCI bus scan by hand because we're running before the PCI
+ * subsystem.
+ *
+ * All K8 AGP bridges are AGPv3 compliant, so we can do this scan
+ * generically. It's probably overkill to always scan all slots because
+ * the AGP bridges should be always an own bus on the HT hierarchy,
+ * but do it here for future safety.
+ */
 static __u32 __init search_agp_bridge(u32 *order, int *valid_agp)
 {
 	int num, slot, func;
 
 	/* Poor man's PCI discovery */
-	for (num = 0; num < 256; num++) { 
-		for (slot = 0; slot < 32; slot++) { 
-			for (func = 0; func < 8; func++) { 
+	for (num = 0; num < 256; num++) {
+		for (slot = 0; slot < 32; slot++) {
+			for (func = 0; func < 8; func++) {
 				u32 class, cap;
 				u8 type;
-				class = read_pci_config(num,slot,func,
+				class = read_pci_config(num, slot, func,
 							PCI_CLASS_REVISION);
 				if (class == 0xffffffff)
-					break; 
-				
-				switch (class >> 16) { 
+					break;
+
+				switch (class >> 16) {
 				case PCI_CLASS_BRIDGE_HOST:
 				case PCI_CLASS_BRIDGE_OTHER: /* needed? */
 					/* AGP bridge? */
-					cap = find_cap(num,slot,func,PCI_CAP_ID_AGP);
+					cap = find_cap(num, slot, func,
+							PCI_CAP_ID_AGP);
 					if (!cap)
 						break;
-					*valid_agp = 1; 
-					return read_agp(num,slot,func,cap,order);
-				} 
-				
+					*valid_agp = 1;
+					return read_agp(num, slot, func, cap,
+							order);
+				}
+
 				/* No multi-function device? */
-				type = read_pci_config_byte(num,slot,func,
+				type = read_pci_config_byte(num, slot, func,
 							       PCI_HEADER_TYPE);
 				if (!(type & 0x80))
 					break;
-			} 
-		} 
+			}
+		}
 	}
-	printk("No AGP bridge found\n"); 
+	printk("No AGP bridge found\n");
+
 	return 0;
 }
 
 void __init gart_iommu_hole_init(void)
-{ 
-	int fix, num; 
+{
 	u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0;
 	u64 aper_base, last_aper_base = 0;
-	int valid_agp = 0;
+	int fix, num, valid_agp = 0;
 
 	if (gart_iommu_aperture_disabled || !fix_aperture ||
 	    !early_pci_allowed())
@@ -218,24 +228,24 @@ void __init gart_iommu_hole_init(void)
 	printk(KERN_INFO  "Checking aperture...\n");
 
 	fix = 0;
-	for (num = 24; num < 32; num++) {		
+	for (num = 24; num < 32; num++) {
 		if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
 			continue;
 
 		iommu_detected = 1;
 		gart_iommu_aperture = 1;
 
-		aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; 
-		aper_size = (32 * 1024 * 1024) << aper_order; 
+		aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7;
+		aper_size = (32 * 1024 * 1024) << aper_order;
 		aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff;
-		aper_base <<= 25; 
+		aper_base <<= 25;
 
-		printk("CPU %d: aperture @ %Lx size %u MB\n", num-24, 
+		printk("CPU %d: aperture @ %Lx size %u MB\n", num-24,
 		       aper_base, aper_size>>20);
-		
+
 		if (!aperture_valid(aper_base, aper_size)) {
-			fix = 1; 
-			break; 
+			fix = 1;
+			break;
 		}
 
 		if ((last_aper_order && aper_order != last_aper_order) ||
@@ -245,27 +255,28 @@ void __init gart_iommu_hole_init(void)
 		}
 		last_aper_order = aper_order;
 		last_aper_base = aper_base;
-	} 
+	}
 
 	if (!fix && !fallback_aper_force) {
 		if (last_aper_base) {
 			unsigned long n = (32 * 1024 * 1024) << last_aper_order;
+
 			insert_aperture_resource((u32)last_aper_base, n);
 		}
-		return; 
+		return;
 	}
 
 	if (!fallback_aper_force)
-		aper_alloc = search_agp_bridge(&aper_order, &valid_agp); 
-		
-	if (aper_alloc) { 
+		aper_alloc = search_agp_bridge(&aper_order, &valid_agp);
+
+	if (aper_alloc) {
 		/* Got the aperture from the AGP bridge */
 	} else if (swiotlb && !valid_agp) {
 		/* Do nothing */
 	} else if ((!no_iommu && end_pfn > MAX_DMA32_PFN) ||
 		   force_iommu ||
 		   valid_agp ||
-		   fallback_aper_force) { 
+		   fallback_aper_force) {
 		printk("Your BIOS doesn't leave a aperture memory hole\n");
 		printk("Please enable the IOMMU option in the BIOS setup\n");
 		printk("This costs you %d MB of RAM\n",
@@ -273,27 +284,32 @@ void __init gart_iommu_hole_init(void)
 
 		aper_order = fallback_aper_order;
 		aper_alloc = allocate_aperture();
-		if (!aper_alloc) { 
-			/* Could disable AGP and IOMMU here, but it's probably
-			   not worth it. But the later users cannot deal with
-			   bad apertures and turning on the aperture over memory
-			   causes very strange problems, so it's better to 
-			   panic early. */
+		if (!aper_alloc) {
+			/*
+			 * Could disable AGP and IOMMU here, but it's
+			 * probably not worth it. But the later users
+			 * cannot deal with bad apertures and turning
+			 * on the aperture over memory causes very
+			 * strange problems, so it's better to panic
+			 * early.
+			 */
 			panic("Not enough memory for aperture");
 		}
-	} else { 
-		return; 
-	} 
+	} else {
+		return;
+	}
 
 	/* Fix up the north bridges */
-	for (num = 24; num < 32; num++) { 		
+	for (num = 24; num < 32; num++) {
 		if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
-			continue;	
+			continue;
 
-		/* Don't enable translation yet. That is done later. 
-		   Assume this BIOS didn't initialise the GART so 
-		   just overwrite all previous bits */ 
-		write_pci_config(0, num, 3, 0x90, aper_order<<1); 
-		write_pci_config(0, num, 3, 0x94, aper_alloc>>25); 
-	} 
-} 
+		/*
+		 * Don't enable translation yet. That is done later.
+		 * Assume this BIOS didn't initialise the GART so
+		 * just overwrite all previous bits
+		 */
+		write_pci_config(0, num, 3, 0x90, aper_order<<1);
+		write_pci_config(0, num, 3, 0x94, aper_alloc>>25);
+	}
+}

From 31183ba8fd05b6ddc67ab4d726167cbc52e1b346 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:30:10 +0100
Subject: [PATCH 041/890] x86: clean up arch/x86/kernel/aperture_64.c printk()s

clean up arch/x86/kernel/aperture_64.c printk()s.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/aperture_64.c | 42 ++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 250db0527f5d..52d2beac4556 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -66,14 +66,15 @@ static u32 __init allocate_aperture(void)
 	 */
 	p = __alloc_bootmem_nopanic(aper_size, aper_size, 0);
 	if (!p || __pa(p)+aper_size > 0xffffffff) {
-		printk("Cannot allocate aperture memory hole (%p,%uK)\n",
-		       p, aper_size>>10);
+		printk(KERN_ERR
+			"Cannot allocate aperture memory hole (%p,%uK)\n",
+				p, aper_size>>10);
 		if (p)
 			free_bootmem(__pa(p), aper_size);
 		return 0;
 	}
-	printk("Mapping aperture over %d KB of RAM @ %lx\n",
-	       aper_size >> 10, __pa(p));
+	printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
+			aper_size >> 10, __pa(p));
 	insert_aperture_resource((u32)__pa(p), aper_size);
 
 	return (u32)__pa(p);
@@ -83,18 +84,20 @@ static int __init aperture_valid(u64 aper_base, u32 aper_size)
 {
 	if (!aper_base)
 		return 0;
+
 	if (aper_size < 64*1024*1024) {
-		printk("Aperture too small (%d MB)\n", aper_size>>20);
+		printk(KERN_ERR "Aperture too small (%d MB)\n", aper_size>>20);
 		return 0;
 	}
 	if (aper_base + aper_size > 0x100000000UL) {
-		printk("Aperture beyond 4GB. Ignoring.\n");
+		printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n");
 		return 0;
 	}
 	if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
-		printk("Aperture pointing to e820 RAM. Ignoring.\n");
+		printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n");
 		return 0;
 	}
+
 	return 1;
 }
 
@@ -133,10 +136,10 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
 	u32 aper_low, aper_hi;
 	u64 aper;
 
-	printk("AGP bridge at %02x:%02x:%02x\n", num, slot, func);
+	printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", num, slot, func);
 	apsizereg = read_pci_config_16(num, slot, func, cap + 0x14);
 	if (apsizereg == 0xffffffff) {
-		printk("APSIZE in AGP bridge unreadable\n");
+		printk(KERN_ERR "APSIZE in AGP bridge unreadable\n");
 		return 0;
 	}
 
@@ -153,8 +156,8 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
 	aper_hi = read_pci_config(num, slot, func, 0x14);
 	aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32);
 
-	printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n",
-	       aper, 32 << *order, apsizereg);
+	printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n",
+			aper, 32 << *order, apsizereg);
 
 	if (!aperture_valid(aper, (32*1024*1024) << *order))
 		return 0;
@@ -210,7 +213,7 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp)
 			}
 		}
 	}
-	printk("No AGP bridge found\n");
+	printk(KERN_INFO "No AGP bridge found\n");
 
 	return 0;
 }
@@ -240,8 +243,8 @@ void __init gart_iommu_hole_init(void)
 		aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff;
 		aper_base <<= 25;
 
-		printk("CPU %d: aperture @ %Lx size %u MB\n", num-24,
-		       aper_base, aper_size>>20);
+		printk(KERN_INFO "CPU %d: aperture @ %Lx size %u MB\n",
+				num-24, aper_base, aper_size>>20);
 
 		if (!aperture_valid(aper_base, aper_size)) {
 			fix = 1;
@@ -277,10 +280,13 @@ void __init gart_iommu_hole_init(void)
 		   force_iommu ||
 		   valid_agp ||
 		   fallback_aper_force) {
-		printk("Your BIOS doesn't leave a aperture memory hole\n");
-		printk("Please enable the IOMMU option in the BIOS setup\n");
-		printk("This costs you %d MB of RAM\n",
-		       32 << fallback_aper_order);
+		printk(KERN_ERR
+			"Your BIOS doesn't leave a aperture memory hole\n");
+		printk(KERN_ERR
+			"Please enable the IOMMU option in the BIOS setup\n");
+		printk(KERN_ERR
+			"This costs you %d MB of RAM\n",
+				32 << fallback_aper_order);
 
 		aper_order = fallback_aper_order;
 		aper_alloc = allocate_aperture();

From a4ec1effce83796209a0258602b0cf50026d86f2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:30:10 +0100
Subject: [PATCH 042/890] x86: mach-voyager, lindent

lindent the mach-voyager files to get rid of more than 300 style errors:

                                       errors   lines of code   errors/KLOC
 arch/x86/mach-voyager/   [old]           409            3729         109.6
 arch/x86/mach-voyager/   [new]            71            3678          19.3

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mach-voyager/setup.c          |  32 +-
 arch/x86/mach-voyager/voyager_basic.c  | 132 +++--
 arch/x86/mach-voyager/voyager_cat.c    | 601 +++++++++++-----------
 arch/x86/mach-voyager/voyager_smp.c    | 672 +++++++++++--------------
 arch/x86/mach-voyager/voyager_thread.c |  52 +-
 5 files changed, 718 insertions(+), 771 deletions(-)

diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c
index 3bef977cb29b..81257a861984 100644
--- a/arch/x86/mach-voyager/setup.c
+++ b/arch/x86/mach-voyager/setup.c
@@ -44,7 +44,7 @@ void __init trap_init_hook(void)
 {
 }
 
-static struct irqaction irq0  = {
+static struct irqaction irq0 = {
 	.handler = timer_interrupt,
 	.flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL,
 	.mask = CPU_MASK_NONE,
@@ -59,44 +59,47 @@ void __init time_init_hook(void)
 
 /* Hook for machine specific memory setup. */
 
-char * __init machine_specific_memory_setup(void)
+char *__init machine_specific_memory_setup(void)
 {
 	char *who;
 
 	who = "NOT VOYAGER";
 
-	if(voyager_level == 5) {
+	if (voyager_level == 5) {
 		__u32 addr, length;
 		int i;
 
 		who = "Voyager-SUS";
 
 		e820.nr_map = 0;
-		for(i=0; voyager_memory_detect(i, &addr, &length); i++) {
+		for (i = 0; voyager_memory_detect(i, &addr, &length); i++) {
 			add_memory_region(addr, length, E820_RAM);
 		}
 		return who;
-	} else if(voyager_level == 4) {
+	} else if (voyager_level == 4) {
 		__u32 tom;
-		__u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT)<<8;
+		__u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT) << 8;
 		/* select the DINO config space */
 		outb(VOYAGER_DINO, VOYAGER_CAT_CONFIG_PORT);
 		/* Read DINO top of memory register */
 		tom = ((inb(catbase + 0x4) & 0xf0) << 16)
-			+ ((inb(catbase + 0x5) & 0x7f) << 24);
+		    + ((inb(catbase + 0x5) & 0x7f) << 24);
 
-		if(inb(catbase) != VOYAGER_DINO) {
-			printk(KERN_ERR "Voyager: Failed to get DINO for L4, setting tom to EXT_MEM_K\n");
-			tom = (boot_params.screen_info.ext_mem_k)<<10;
+		if (inb(catbase) != VOYAGER_DINO) {
+			printk(KERN_ERR
+			       "Voyager: Failed to get DINO for L4, setting tom to EXT_MEM_K\n");
+			tom = (boot_params.screen_info.ext_mem_k) << 10;
 		}
 		who = "Voyager-TOM";
 		add_memory_region(0, 0x9f000, E820_RAM);
 		/* map from 1M to top of memory */
-		add_memory_region(1*1024*1024, tom - 1*1024*1024, E820_RAM);
+		add_memory_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024,
+				  E820_RAM);
 		/* FIXME: Should check the ASICs to see if I need to
 		 * take out the 8M window.  Just do it at the moment
 		 * */
-		add_memory_region(8*1024*1024, 8*1024*1024, E820_RESERVED);
+		add_memory_region(8 * 1024 * 1024, 8 * 1024 * 1024,
+				  E820_RESERVED);
 		return who;
 	}
 
@@ -114,8 +117,7 @@ char * __init machine_specific_memory_setup(void)
 		unsigned long mem_size;
 
 		/* compare results from other methods and take the greater */
-		if (boot_params.alt_mem_k
-		    < boot_params.screen_info.ext_mem_k) {
+		if (boot_params.alt_mem_k < boot_params.screen_info.ext_mem_k) {
 			mem_size = boot_params.screen_info.ext_mem_k;
 			who = "BIOS-88";
 		} else {
@@ -126,6 +128,6 @@ char * __init machine_specific_memory_setup(void)
 		e820.nr_map = 0;
 		add_memory_region(0, LOWMEMSIZE(), E820_RAM);
 		add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
-  	}
+	}
 	return who;
 }
diff --git a/arch/x86/mach-voyager/voyager_basic.c b/arch/x86/mach-voyager/voyager_basic.c
index 9b77b39b71a6..6a949e4edde8 100644
--- a/arch/x86/mach-voyager/voyager_basic.c
+++ b/arch/x86/mach-voyager/voyager_basic.c
@@ -35,7 +35,7 @@
 /*
  * Power off function, if any
  */
-void (*pm_power_off)(void);
+void (*pm_power_off) (void);
 EXPORT_SYMBOL(pm_power_off);
 
 int voyager_level = 0;
@@ -43,39 +43,38 @@ int voyager_level = 0;
 struct voyager_SUS *voyager_SUS = NULL;
 
 #ifdef CONFIG_SMP
-static void
-voyager_dump(int dummy1, struct tty_struct *dummy3)
+static void voyager_dump(int dummy1, struct tty_struct *dummy3)
 {
 	/* get here via a sysrq */
 	voyager_smp_dump();
 }
 
 static struct sysrq_key_op sysrq_voyager_dump_op = {
-	.handler	= voyager_dump,
-	.help_msg	= "Voyager",
-	.action_msg	= "Dump Voyager Status",
+	.handler = voyager_dump,
+	.help_msg = "Voyager",
+	.action_msg = "Dump Voyager Status",
 };
 #endif
 
-void
-voyager_detect(struct voyager_bios_info *bios)
+void voyager_detect(struct voyager_bios_info *bios)
 {
-	if(bios->len != 0xff) {
-		int class = (bios->class_1 << 8) 
-			| (bios->class_2 & 0xff);
+	if (bios->len != 0xff) {
+		int class = (bios->class_1 << 8)
+		    | (bios->class_2 & 0xff);
 
 		printk("Voyager System detected.\n"
 		       "        Class %x, Revision %d.%d\n",
 		       class, bios->major, bios->minor);
-		if(class == VOYAGER_LEVEL4) 
+		if (class == VOYAGER_LEVEL4)
 			voyager_level = 4;
-		else if(class < VOYAGER_LEVEL5_AND_ABOVE)
+		else if (class < VOYAGER_LEVEL5_AND_ABOVE)
 			voyager_level = 3;
 		else
 			voyager_level = 5;
 		printk("        Architecture Level %d\n", voyager_level);
-		if(voyager_level < 4)
-			printk("\n**WARNING**: Voyager HAL only supports Levels 4 and 5 Architectures at the moment\n\n");
+		if (voyager_level < 4)
+			printk
+			    ("\n**WARNING**: Voyager HAL only supports Levels 4 and 5 Architectures at the moment\n\n");
 		/* install the power off handler */
 		pm_power_off = voyager_power_off;
 #ifdef CONFIG_SMP
@@ -86,15 +85,13 @@ voyager_detect(struct voyager_bios_info *bios)
 	}
 }
 
-void
-voyager_system_interrupt(int cpl, void *dev_id)
+void voyager_system_interrupt(int cpl, void *dev_id)
 {
 	printk("Voyager: detected system interrupt\n");
 }
 
 /* Routine to read information from the extended CMOS area */
-__u8
-voyager_extended_cmos_read(__u16 addr)
+__u8 voyager_extended_cmos_read(__u16 addr)
 {
 	outb(addr & 0xff, 0x74);
 	outb((addr >> 8) & 0xff, 0x75);
@@ -108,12 +105,11 @@ voyager_extended_cmos_read(__u16 addr)
 
 typedef struct ClickMap {
 	struct Entry {
-		__u32	Address;
-		__u32	Length;
+		__u32 Address;
+		__u32 Length;
 	} Entry[CLICK_ENTRIES];
 } ClickMap_t;
 
-
 /* This routine is pretty much an awful hack to read the bios clickmap by
  * mapping it into page 0.  There are usually three regions in the map:
  * 	Base Memory
@@ -122,8 +118,7 @@ typedef struct ClickMap {
  *
  * Returns are 0 for failure and 1 for success on extracting region.
  */
-int __init
-voyager_memory_detect(int region, __u32 *start, __u32 *length)
+int __init voyager_memory_detect(int region, __u32 * start, __u32 * length)
 {
 	int i;
 	int retval = 0;
@@ -132,13 +127,14 @@ voyager_memory_detect(int region, __u32 *start, __u32 *length)
 	unsigned long map_addr;
 	unsigned long old;
 
-	if(region >= CLICK_ENTRIES) {
+	if (region >= CLICK_ENTRIES) {
 		printk("Voyager: Illegal ClickMap region %d\n", region);
 		return 0;
 	}
 
-	for(i = 0; i < sizeof(cmos); i++)
-		cmos[i] = voyager_extended_cmos_read(VOYAGER_MEMORY_CLICKMAP + i);
+	for (i = 0; i < sizeof(cmos); i++)
+		cmos[i] =
+		    voyager_extended_cmos_read(VOYAGER_MEMORY_CLICKMAP + i);
 
 	map_addr = *(unsigned long *)cmos;
 
@@ -147,10 +143,10 @@ voyager_memory_detect(int region, __u32 *start, __u32 *length)
 	pg0[0] = ((map_addr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT);
 	local_flush_tlb();
 	/* now clear everything out but page 0 */
-	map = (ClickMap_t *)(map_addr & (~PAGE_MASK));
+	map = (ClickMap_t *) (map_addr & (~PAGE_MASK));
 
 	/* zero length is the end of the clickmap */
-	if(map->Entry[region].Length != 0) {
+	if (map->Entry[region].Length != 0) {
 		*length = map->Entry[region].Length * CLICK_SIZE;
 		*start = map->Entry[region].Address;
 		retval = 1;
@@ -165,10 +161,9 @@ voyager_memory_detect(int region, __u32 *start, __u32 *length)
 /* voyager specific handling code for timer interrupts.  Used to hand
  * off the timer tick to the SMP code, since the VIC doesn't have an
  * internal timer (The QIC does, but that's another story). */
-void
-voyager_timer_interrupt(void)
+void voyager_timer_interrupt(void)
 {
-	if((jiffies & 0x3ff) == 0) {
+	if ((jiffies & 0x3ff) == 0) {
 
 		/* There seems to be something flaky in either
 		 * hardware or software that is resetting the timer 0
@@ -186,18 +181,20 @@ voyager_timer_interrupt(void)
 		__u16 val;
 
 		spin_lock(&i8253_lock);
-		
+
 		outb_p(0x00, 0x43);
 		val = inb_p(0x40);
 		val |= inb(0x40) << 8;
 		spin_unlock(&i8253_lock);
 
-		if(val > LATCH) {
-			printk("\nVOYAGER: countdown timer value too high (%d), resetting\n\n", val);
+		if (val > LATCH) {
+			printk
+			    ("\nVOYAGER: countdown timer value too high (%d), resetting\n\n",
+			     val);
 			spin_lock(&i8253_lock);
-			outb(0x34,0x43);
-			outb_p(LATCH & 0xff , 0x40);	/* LSB */
-			outb(LATCH >> 8 , 0x40);	/* MSB */
+			outb(0x34, 0x43);
+			outb_p(LATCH & 0xff, 0x40);	/* LSB */
+			outb(LATCH >> 8, 0x40);	/* MSB */
 			spin_unlock(&i8253_lock);
 		}
 	}
@@ -206,14 +203,13 @@ voyager_timer_interrupt(void)
 #endif
 }
 
-void
-voyager_power_off(void)
+void voyager_power_off(void)
 {
 	printk("VOYAGER Power Off\n");
 
-	if(voyager_level == 5) {
+	if (voyager_level == 5) {
 		voyager_cat_power_off();
-	} else if(voyager_level == 4) {
+	} else if (voyager_level == 4) {
 		/* This doesn't apparently work on most L4 machines,
 		 * but the specs say to do this to get automatic power
 		 * off.  Unfortunately, if it doesn't power off the
@@ -222,10 +218,8 @@ voyager_power_off(void)
 #if 0
 		int port;
 
-	  
 		/* enable the voyager Configuration Space */
-		outb((inb(VOYAGER_MC_SETUP) & 0xf0) | 0x8, 
-		     VOYAGER_MC_SETUP);
+		outb((inb(VOYAGER_MC_SETUP) & 0xf0) | 0x8, VOYAGER_MC_SETUP);
 		/* the port for the power off flag is an offset from the
 		   floating base */
 		port = (inb(VOYAGER_SSPB_RELOCATION_PORT) << 8) + 0x21;
@@ -235,62 +229,57 @@ voyager_power_off(void)
 	}
 	/* and wait for it to happen */
 	local_irq_disable();
-	for(;;)
+	for (;;)
 		halt();
 }
 
 /* copied from process.c */
-static inline void
-kb_wait(void)
+static inline void kb_wait(void)
 {
 	int i;
 
-	for (i=0; i<0x10000; i++)
+	for (i = 0; i < 0x10000; i++)
 		if ((inb_p(0x64) & 0x02) == 0)
 			break;
 }
 
-void
-machine_shutdown(void)
+void machine_shutdown(void)
 {
 	/* Architecture specific shutdown needed before a kexec */
 }
 
-void
-machine_restart(char *cmd)
+void machine_restart(char *cmd)
 {
 	printk("Voyager Warm Restart\n");
 	kb_wait();
 
-	if(voyager_level == 5) {
+	if (voyager_level == 5) {
 		/* write magic values to the RTC to inform system that
 		 * shutdown is beginning */
 		outb(0x8f, 0x70);
-		outb(0x5 , 0x71);
-		
+		outb(0x5, 0x71);
+
 		udelay(50);
-		outb(0xfe,0x64);         /* pull reset low */
-	} else if(voyager_level == 4) {
-		__u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT)<<8;
+		outb(0xfe, 0x64);	/* pull reset low */
+	} else if (voyager_level == 4) {
+		__u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT) << 8;
 		__u8 basebd = inb(VOYAGER_MC_SETUP);
-		
+
 		outb(basebd | 0x08, VOYAGER_MC_SETUP);
 		outb(0x02, catbase + 0x21);
 	}
 	local_irq_disable();
-	for(;;)
+	for (;;)
 		halt();
 }
 
-void
-machine_emergency_restart(void)
+void machine_emergency_restart(void)
 {
 	/*for now, just hook this to a warm restart */
 	machine_restart(NULL);
 }
 
-void
-mca_nmi_hook(void)
+void mca_nmi_hook(void)
 {
 	__u8 dumpval __maybe_unused = inb(0xf823);
 	__u8 swnmi __maybe_unused = inb(0xf813);
@@ -301,8 +290,8 @@ mca_nmi_hook(void)
 	/* clear swnmi */
 	outb(0xff, 0xf813);
 	/* tell SUS to ignore dump */
-	if(voyager_level == 5 && voyager_SUS != NULL) {
-		if(voyager_SUS->SUS_mbox == VOYAGER_DUMP_BUTTON_NMI) {
+	if (voyager_level == 5 && voyager_SUS != NULL) {
+		if (voyager_SUS->SUS_mbox == VOYAGER_DUMP_BUTTON_NMI) {
 			voyager_SUS->kernel_mbox = VOYAGER_NO_COMMAND;
 			voyager_SUS->kernel_flags |= VOYAGER_OS_IN_PROGRESS;
 			udelay(1000);
@@ -310,15 +299,14 @@ mca_nmi_hook(void)
 			voyager_SUS->kernel_flags &= ~VOYAGER_OS_IN_PROGRESS;
 		}
 	}
-	printk(KERN_ERR "VOYAGER: Dump switch pressed, printing CPU%d tracebacks\n", smp_processor_id());
+	printk(KERN_ERR
+	       "VOYAGER: Dump switch pressed, printing CPU%d tracebacks\n",
+	       smp_processor_id());
 	show_stack(NULL, NULL);
 	show_state();
 }
 
-
-
-void
-machine_halt(void)
+void machine_halt(void)
 {
 	/* treat a halt like a power off */
 	machine_power_off();
diff --git a/arch/x86/mach-voyager/voyager_cat.c b/arch/x86/mach-voyager/voyager_cat.c
index 2132ca652df1..17a7904f75b1 100644
--- a/arch/x86/mach-voyager/voyager_cat.c
+++ b/arch/x86/mach-voyager/voyager_cat.c
@@ -39,34 +39,32 @@
 #define CAT_DATA	(sspb + 0xd)
 
 /* the internal cat functions */
-static void cat_pack(__u8 *msg, __u16 start_bit, __u8 *data, 
-		     __u16 num_bits);
-static void cat_unpack(__u8 *msg, __u16 start_bit, __u8 *data,
+static void cat_pack(__u8 * msg, __u16 start_bit, __u8 * data, __u16 num_bits);
+static void cat_unpack(__u8 * msg, __u16 start_bit, __u8 * data,
 		       __u16 num_bits);
-static void cat_build_header(__u8 *header, const __u16 len, 
+static void cat_build_header(__u8 * header, const __u16 len,
 			     const __u16 smallest_reg_bits,
 			     const __u16 longest_reg_bits);
-static int cat_sendinst(voyager_module_t *modp, voyager_asic_t *asicp,
+static int cat_sendinst(voyager_module_t * modp, voyager_asic_t * asicp,
 			__u8 reg, __u8 op);
-static int cat_getdata(voyager_module_t *modp, voyager_asic_t *asicp,
-		       __u8 reg, __u8 *value);
-static int cat_shiftout(__u8 *data, __u16 data_bytes, __u16 header_bytes,
+static int cat_getdata(voyager_module_t * modp, voyager_asic_t * asicp,
+		       __u8 reg, __u8 * value);
+static int cat_shiftout(__u8 * data, __u16 data_bytes, __u16 header_bytes,
 			__u8 pad_bits);
-static int cat_write(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
+static int cat_write(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
 		     __u8 value);
-static int cat_read(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
-		    __u8 *value);
-static int cat_subread(voyager_module_t *modp, voyager_asic_t *asicp,
+static int cat_read(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
+		    __u8 * value);
+static int cat_subread(voyager_module_t * modp, voyager_asic_t * asicp,
 		       __u16 offset, __u16 len, void *buf);
-static int cat_senddata(voyager_module_t *modp, voyager_asic_t *asicp,
+static int cat_senddata(voyager_module_t * modp, voyager_asic_t * asicp,
 			__u8 reg, __u8 value);
-static int cat_disconnect(voyager_module_t *modp, voyager_asic_t *asicp);
-static int cat_connect(voyager_module_t *modp, voyager_asic_t *asicp);
+static int cat_disconnect(voyager_module_t * modp, voyager_asic_t * asicp);
+static int cat_connect(voyager_module_t * modp, voyager_asic_t * asicp);
 
-static inline const char *
-cat_module_name(int module_id)
+static inline const char *cat_module_name(int module_id)
 {
-	switch(module_id) {
+	switch (module_id) {
 	case 0x10:
 		return "Processor Slot 0";
 	case 0x11:
@@ -105,14 +103,14 @@ voyager_module_t *voyager_cat_list;
 
 /* the I/O port assignments for the VIC and QIC */
 static struct resource vic_res = {
-	.name	= "Voyager Interrupt Controller",
-	.start	= 0xFC00,
-	.end	= 0xFC6F
+	.name = "Voyager Interrupt Controller",
+	.start = 0xFC00,
+	.end = 0xFC6F
 };
 static struct resource qic_res = {
-	.name	= "Quad Interrupt Controller",
-	.start	= 0xFC70,
-	.end	= 0xFCFF
+	.name = "Quad Interrupt Controller",
+	.start = 0xFC70,
+	.end = 0xFCFF
 };
 
 /* This function is used to pack a data bit stream inside a message.
@@ -120,7 +118,7 @@ static struct resource qic_res = {
  * Note: This function assumes that any unused bit in the data stream
  * is set to zero so that the ors will work correctly */
 static void
-cat_pack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits)
+cat_pack(__u8 * msg, const __u16 start_bit, __u8 * data, const __u16 num_bits)
 {
 	/* compute initial shift needed */
 	const __u16 offset = start_bit % BITS_PER_BYTE;
@@ -130,7 +128,7 @@ cat_pack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits)
 	int i;
 
 	/* adjust if we have more than a byte of residue */
-	if(residue >= BITS_PER_BYTE) {
+	if (residue >= BITS_PER_BYTE) {
 		residue -= BITS_PER_BYTE;
 		len++;
 	}
@@ -138,24 +136,25 @@ cat_pack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits)
 	/* clear out the bits.  We assume here that if len==0 then
 	 * residue >= offset.  This is always true for the catbus
 	 * operations */
-	msg[byte] &= 0xff << (BITS_PER_BYTE - offset); 
+	msg[byte] &= 0xff << (BITS_PER_BYTE - offset);
 	msg[byte++] |= data[0] >> offset;
-	if(len == 0)
+	if (len == 0)
 		return;
-	for(i = 1; i < len; i++)
-		msg[byte++] = (data[i-1] << (BITS_PER_BYTE - offset))
-			| (data[i] >> offset);
-	if(residue != 0) {
+	for (i = 1; i < len; i++)
+		msg[byte++] = (data[i - 1] << (BITS_PER_BYTE - offset))
+		    | (data[i] >> offset);
+	if (residue != 0) {
 		__u8 mask = 0xff >> residue;
-		__u8 last_byte = data[i-1] << (BITS_PER_BYTE - offset)
-			| (data[i] >> offset);
-		
+		__u8 last_byte = data[i - 1] << (BITS_PER_BYTE - offset)
+		    | (data[i] >> offset);
+
 		last_byte &= ~mask;
 		msg[byte] &= mask;
 		msg[byte] |= last_byte;
 	}
 	return;
 }
+
 /* unpack the data again (same arguments as cat_pack()). data buffer
  * must be zero populated.
  *
@@ -163,7 +162,7 @@ cat_pack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits)
  * data (starting at bit 0 in data).
  */
 static void
-cat_unpack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits)
+cat_unpack(__u8 * msg, const __u16 start_bit, __u8 * data, const __u16 num_bits)
 {
 	/* compute initial shift needed */
 	const __u16 offset = start_bit % BITS_PER_BYTE;
@@ -172,97 +171,97 @@ cat_unpack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits)
 	__u16 byte = start_bit / BITS_PER_BYTE;
 	int i;
 
-	if(last_bits != 0)
+	if (last_bits != 0)
 		len++;
 
 	/* special case: want < 8 bits from msg and we can get it from
 	 * a single byte of the msg */
-	if(len == 0 && BITS_PER_BYTE - offset >= num_bits) {
+	if (len == 0 && BITS_PER_BYTE - offset >= num_bits) {
 		data[0] = msg[byte] << offset;
 		data[0] &= 0xff >> (BITS_PER_BYTE - num_bits);
 		return;
 	}
-	for(i = 0; i < len; i++) {
+	for (i = 0; i < len; i++) {
 		/* this annoying if has to be done just in case a read of
 		 * msg one beyond the array causes a panic */
-		if(offset != 0) {
+		if (offset != 0) {
 			data[i] = msg[byte++] << offset;
 			data[i] |= msg[byte] >> (BITS_PER_BYTE - offset);
-		}
-		else {
+		} else {
 			data[i] = msg[byte++];
 		}
 	}
 	/* do we need to truncate the final byte */
-	if(last_bits != 0) {
-		data[i-1] &= 0xff << (BITS_PER_BYTE - last_bits);
+	if (last_bits != 0) {
+		data[i - 1] &= 0xff << (BITS_PER_BYTE - last_bits);
 	}
 	return;
 }
 
 static void
-cat_build_header(__u8 *header, const __u16 len, const __u16 smallest_reg_bits,
+cat_build_header(__u8 * header, const __u16 len, const __u16 smallest_reg_bits,
 		 const __u16 longest_reg_bits)
 {
 	int i;
 	__u16 start_bit = (smallest_reg_bits - 1) % BITS_PER_BYTE;
 	__u8 *last_byte = &header[len - 1];
 
-	if(start_bit == 0)
+	if (start_bit == 0)
 		start_bit = 1;	/* must have at least one bit in the hdr */
-	
-	for(i=0; i < len; i++)
+
+	for (i = 0; i < len; i++)
 		header[i] = 0;
 
-	for(i = start_bit; i > 0; i--)
+	for (i = start_bit; i > 0; i--)
 		*last_byte = ((*last_byte) << 1) + 1;
 
 }
 
 static int
-cat_sendinst(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg, __u8 op)
+cat_sendinst(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, __u8 op)
 {
 	__u8 parity, inst, inst_buf[4] = { 0 };
 	__u8 iseq[VOYAGER_MAX_SCAN_PATH], hseq[VOYAGER_MAX_REG_SIZE];
 	__u16 ibytes, hbytes, padbits;
 	int i;
-	
+
 	/* 
 	 * Parity is the parity of the register number + 1 (READ_REGISTER
 	 * and WRITE_REGISTER always add '1' to the number of bits == 1)
 	 */
-	parity = (__u8)(1 + (reg & 0x01) +
-	         ((__u8)(reg & 0x02) >> 1) +
-	         ((__u8)(reg & 0x04) >> 2) +
-	         ((__u8)(reg & 0x08) >> 3)) % 2;
+	parity = (__u8) (1 + (reg & 0x01) +
+			 ((__u8) (reg & 0x02) >> 1) +
+			 ((__u8) (reg & 0x04) >> 2) +
+			 ((__u8) (reg & 0x08) >> 3)) % 2;
 
 	inst = ((parity << 7) | (reg << 2) | op);
 
 	outb(VOYAGER_CAT_IRCYC, CAT_CMD);
-	if(!modp->scan_path_connected) {
-		if(asicp->asic_id != VOYAGER_CAT_ID) {
-			printk("**WARNING***: cat_sendinst has disconnected scan path not to CAT asic\n");
+	if (!modp->scan_path_connected) {
+		if (asicp->asic_id != VOYAGER_CAT_ID) {
+			printk
+			    ("**WARNING***: cat_sendinst has disconnected scan path not to CAT asic\n");
 			return 1;
 		}
 		outb(VOYAGER_CAT_HEADER, CAT_DATA);
 		outb(inst, CAT_DATA);
-		if(inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
+		if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
 			CDEBUG(("VOYAGER CAT: cat_sendinst failed to get CAT_HEADER\n"));
 			return 1;
 		}
 		return 0;
 	}
 	ibytes = modp->inst_bits / BITS_PER_BYTE;
-	if((padbits = modp->inst_bits % BITS_PER_BYTE) != 0) {
+	if ((padbits = modp->inst_bits % BITS_PER_BYTE) != 0) {
 		padbits = BITS_PER_BYTE - padbits;
 		ibytes++;
 	}
 	hbytes = modp->largest_reg / BITS_PER_BYTE;
-	if(modp->largest_reg % BITS_PER_BYTE)
+	if (modp->largest_reg % BITS_PER_BYTE)
 		hbytes++;
 	CDEBUG(("cat_sendinst: ibytes=%d, hbytes=%d\n", ibytes, hbytes));
 	/* initialise the instruction sequence to 0xff */
-	for(i=0; i < ibytes + hbytes; i++)
+	for (i = 0; i < ibytes + hbytes; i++)
 		iseq[i] = 0xff;
 	cat_build_header(hseq, hbytes, modp->smallest_reg, modp->largest_reg);
 	cat_pack(iseq, modp->inst_bits, hseq, hbytes * BITS_PER_BYTE);
@@ -271,11 +270,11 @@ cat_sendinst(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg, __u8 op)
 	cat_pack(iseq, asicp->bit_location, inst_buf, asicp->ireg_length);
 #ifdef VOYAGER_CAT_DEBUG
 	printk("ins = 0x%x, iseq: ", inst);
-	for(i=0; i< ibytes + hbytes; i++)
+	for (i = 0; i < ibytes + hbytes; i++)
 		printk("0x%x ", iseq[i]);
 	printk("\n");
 #endif
-	if(cat_shiftout(iseq, ibytes, hbytes, padbits)) {
+	if (cat_shiftout(iseq, ibytes, hbytes, padbits)) {
 		CDEBUG(("VOYAGER CAT: cat_sendinst: cat_shiftout failed\n"));
 		return 1;
 	}
@@ -284,72 +283,74 @@ cat_sendinst(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg, __u8 op)
 }
 
 static int
-cat_getdata(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg, 
-	    __u8 *value)
+cat_getdata(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
+	    __u8 * value)
 {
-	if(!modp->scan_path_connected) {
-		if(asicp->asic_id != VOYAGER_CAT_ID) {
+	if (!modp->scan_path_connected) {
+		if (asicp->asic_id != VOYAGER_CAT_ID) {
 			CDEBUG(("VOYAGER CAT: ERROR: cat_getdata to CAT asic with scan path connected\n"));
 			return 1;
 		}
-		if(reg > VOYAGER_SUBADDRHI) 
+		if (reg > VOYAGER_SUBADDRHI)
 			outb(VOYAGER_CAT_RUN, CAT_CMD);
 		outb(VOYAGER_CAT_DRCYC, CAT_CMD);
 		outb(VOYAGER_CAT_HEADER, CAT_DATA);
 		*value = inb(CAT_DATA);
 		outb(0xAA, CAT_DATA);
-		if(inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
+		if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
 			CDEBUG(("cat_getdata: failed to get VOYAGER_CAT_HEADER\n"));
 			return 1;
 		}
 		return 0;
-	}
-	else {
-		__u16 sbits = modp->num_asics -1 + asicp->ireg_length;
+	} else {
+		__u16 sbits = modp->num_asics - 1 + asicp->ireg_length;
 		__u16 sbytes = sbits / BITS_PER_BYTE;
 		__u16 tbytes;
-		__u8 string[VOYAGER_MAX_SCAN_PATH], trailer[VOYAGER_MAX_REG_SIZE];
+		__u8 string[VOYAGER_MAX_SCAN_PATH],
+		    trailer[VOYAGER_MAX_REG_SIZE];
 		__u8 padbits;
 		int i;
-		
+
 		outb(VOYAGER_CAT_DRCYC, CAT_CMD);
 
-		if((padbits = sbits % BITS_PER_BYTE) != 0) {
+		if ((padbits = sbits % BITS_PER_BYTE) != 0) {
 			padbits = BITS_PER_BYTE - padbits;
 			sbytes++;
 		}
 		tbytes = asicp->ireg_length / BITS_PER_BYTE;
-		if(asicp->ireg_length % BITS_PER_BYTE)
+		if (asicp->ireg_length % BITS_PER_BYTE)
 			tbytes++;
 		CDEBUG(("cat_getdata: tbytes = %d, sbytes = %d, padbits = %d\n",
-			tbytes,	sbytes, padbits));
+			tbytes, sbytes, padbits));
 		cat_build_header(trailer, tbytes, 1, asicp->ireg_length);
 
-		
-		for(i = tbytes - 1; i >= 0; i--) {
+		for (i = tbytes - 1; i >= 0; i--) {
 			outb(trailer[i], CAT_DATA);
 			string[sbytes + i] = inb(CAT_DATA);
 		}
 
-		for(i = sbytes - 1; i >= 0; i--) {
+		for (i = sbytes - 1; i >= 0; i--) {
 			outb(0xaa, CAT_DATA);
 			string[i] = inb(CAT_DATA);
 		}
 		*value = 0;
-		cat_unpack(string, padbits + (tbytes * BITS_PER_BYTE) + asicp->asic_location, value, asicp->ireg_length);
+		cat_unpack(string,
+			   padbits + (tbytes * BITS_PER_BYTE) +
+			   asicp->asic_location, value, asicp->ireg_length);
 #ifdef VOYAGER_CAT_DEBUG
 		printk("value=0x%x, string: ", *value);
-		for(i=0; i< tbytes+sbytes; i++)
+		for (i = 0; i < tbytes + sbytes; i++)
 			printk("0x%x ", string[i]);
 		printk("\n");
 #endif
-		
+
 		/* sanity check the rest of the return */
-		for(i=0; i < tbytes; i++) {
+		for (i = 0; i < tbytes; i++) {
 			__u8 input = 0;
 
-			cat_unpack(string, padbits + (i * BITS_PER_BYTE), &input, BITS_PER_BYTE);
-			if(trailer[i] != input) {
+			cat_unpack(string, padbits + (i * BITS_PER_BYTE),
+				   &input, BITS_PER_BYTE);
+			if (trailer[i] != input) {
 				CDEBUG(("cat_getdata: failed to sanity check rest of ret(%d) 0x%x != 0x%x\n", i, input, trailer[i]));
 				return 1;
 			}
@@ -360,14 +361,14 @@ cat_getdata(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
 }
 
 static int
-cat_shiftout(__u8 *data, __u16 data_bytes, __u16 header_bytes, __u8 pad_bits)
+cat_shiftout(__u8 * data, __u16 data_bytes, __u16 header_bytes, __u8 pad_bits)
 {
 	int i;
-	
-	for(i = data_bytes + header_bytes - 1; i >= header_bytes; i--)
+
+	for (i = data_bytes + header_bytes - 1; i >= header_bytes; i--)
 		outb(data[i], CAT_DATA);
 
-	for(i = header_bytes - 1; i >= 0; i--) {
+	for (i = header_bytes - 1; i >= 0; i--) {
 		__u8 header = 0;
 		__u8 input;
 
@@ -376,7 +377,7 @@ cat_shiftout(__u8 *data, __u16 data_bytes, __u16 header_bytes, __u8 pad_bits)
 		CDEBUG(("cat_shiftout: returned 0x%x\n", input));
 		cat_unpack(data, ((data_bytes + i) * BITS_PER_BYTE) - pad_bits,
 			   &header, BITS_PER_BYTE);
-		if(input != header) {
+		if (input != header) {
 			CDEBUG(("VOYAGER CAT: cat_shiftout failed to return header 0x%x != 0x%x\n", input, header));
 			return 1;
 		}
@@ -385,57 +386,57 @@ cat_shiftout(__u8 *data, __u16 data_bytes, __u16 header_bytes, __u8 pad_bits)
 }
 
 static int
-cat_senddata(voyager_module_t *modp, voyager_asic_t *asicp, 
+cat_senddata(voyager_module_t * modp, voyager_asic_t * asicp,
 	     __u8 reg, __u8 value)
 {
 	outb(VOYAGER_CAT_DRCYC, CAT_CMD);
-	if(!modp->scan_path_connected) {
-		if(asicp->asic_id != VOYAGER_CAT_ID) {
+	if (!modp->scan_path_connected) {
+		if (asicp->asic_id != VOYAGER_CAT_ID) {
 			CDEBUG(("VOYAGER CAT: ERROR: scan path disconnected when asic != CAT\n"));
 			return 1;
 		}
 		outb(VOYAGER_CAT_HEADER, CAT_DATA);
 		outb(value, CAT_DATA);
-		if(inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
+		if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
 			CDEBUG(("cat_senddata: failed to get correct header response to sent data\n"));
 			return 1;
 		}
-		if(reg > VOYAGER_SUBADDRHI) {
+		if (reg > VOYAGER_SUBADDRHI) {
 			outb(VOYAGER_CAT_RUN, CAT_CMD);
 			outb(VOYAGER_CAT_END, CAT_CMD);
 			outb(VOYAGER_CAT_RUN, CAT_CMD);
 		}
-		
+
 		return 0;
-	}
-	else {
+	} else {
 		__u16 hbytes = asicp->ireg_length / BITS_PER_BYTE;
-		__u16 dbytes = (modp->num_asics - 1 + asicp->ireg_length)/BITS_PER_BYTE;
-		__u8 padbits, dseq[VOYAGER_MAX_SCAN_PATH], 
-			hseq[VOYAGER_MAX_REG_SIZE];
+		__u16 dbytes =
+		    (modp->num_asics - 1 + asicp->ireg_length) / BITS_PER_BYTE;
+		__u8 padbits, dseq[VOYAGER_MAX_SCAN_PATH],
+		    hseq[VOYAGER_MAX_REG_SIZE];
 		int i;
 
-		if((padbits = (modp->num_asics - 1 
-			       + asicp->ireg_length) % BITS_PER_BYTE) != 0) {
+		if ((padbits = (modp->num_asics - 1
+				+ asicp->ireg_length) % BITS_PER_BYTE) != 0) {
 			padbits = BITS_PER_BYTE - padbits;
 			dbytes++;
 		}
-		if(asicp->ireg_length % BITS_PER_BYTE)
+		if (asicp->ireg_length % BITS_PER_BYTE)
 			hbytes++;
-		
+
 		cat_build_header(hseq, hbytes, 1, asicp->ireg_length);
-		
-		for(i = 0; i < dbytes + hbytes; i++)
+
+		for (i = 0; i < dbytes + hbytes; i++)
 			dseq[i] = 0xff;
 		CDEBUG(("cat_senddata: dbytes=%d, hbytes=%d, padbits=%d\n",
 			dbytes, hbytes, padbits));
 		cat_pack(dseq, modp->num_asics - 1 + asicp->ireg_length,
 			 hseq, hbytes * BITS_PER_BYTE);
-		cat_pack(dseq, asicp->asic_location, &value, 
+		cat_pack(dseq, asicp->asic_location, &value,
 			 asicp->ireg_length);
 #ifdef VOYAGER_CAT_DEBUG
 		printk("dseq ");
-		for(i=0; i<hbytes+dbytes; i++) {
+		for (i = 0; i < hbytes + dbytes; i++) {
 			printk("0x%x ", dseq[i]);
 		}
 		printk("\n");
@@ -445,121 +446,125 @@ cat_senddata(voyager_module_t *modp, voyager_asic_t *asicp,
 }
 
 static int
-cat_write(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
-	 __u8 value)
+cat_write(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, __u8 value)
 {
-	if(cat_sendinst(modp, asicp, reg, VOYAGER_WRITE_CONFIG))
+	if (cat_sendinst(modp, asicp, reg, VOYAGER_WRITE_CONFIG))
 		return 1;
 	return cat_senddata(modp, asicp, reg, value);
 }
 
 static int
-cat_read(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
-	 __u8 *value)
+cat_read(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
+	 __u8 * value)
 {
-	if(cat_sendinst(modp, asicp, reg, VOYAGER_READ_CONFIG))
+	if (cat_sendinst(modp, asicp, reg, VOYAGER_READ_CONFIG))
 		return 1;
 	return cat_getdata(modp, asicp, reg, value);
 }
 
 static int
-cat_subaddrsetup(voyager_module_t *modp, voyager_asic_t *asicp, __u16 offset,
+cat_subaddrsetup(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset,
 		 __u16 len)
 {
 	__u8 val;
 
-	if(len > 1) {
+	if (len > 1) {
 		/* set auto increment */
 		__u8 newval;
-		
-		if(cat_read(modp, asicp, VOYAGER_AUTO_INC_REG, &val)) {
+
+		if (cat_read(modp, asicp, VOYAGER_AUTO_INC_REG, &val)) {
 			CDEBUG(("cat_subaddrsetup: read of VOYAGER_AUTO_INC_REG failed\n"));
 			return 1;
 		}
-		CDEBUG(("cat_subaddrsetup: VOYAGER_AUTO_INC_REG = 0x%x\n", val));
+		CDEBUG(("cat_subaddrsetup: VOYAGER_AUTO_INC_REG = 0x%x\n",
+			val));
 		newval = val | VOYAGER_AUTO_INC;
-		if(newval != val) {
-			if(cat_write(modp, asicp, VOYAGER_AUTO_INC_REG, val)) {
+		if (newval != val) {
+			if (cat_write(modp, asicp, VOYAGER_AUTO_INC_REG, val)) {
 				CDEBUG(("cat_subaddrsetup: write to VOYAGER_AUTO_INC_REG failed\n"));
 				return 1;
 			}
 		}
 	}
-	if(cat_write(modp, asicp, VOYAGER_SUBADDRLO, (__u8)(offset &0xff))) {
+	if (cat_write(modp, asicp, VOYAGER_SUBADDRLO, (__u8) (offset & 0xff))) {
 		CDEBUG(("cat_subaddrsetup: write to SUBADDRLO failed\n"));
 		return 1;
 	}
-	if(asicp->subaddr > VOYAGER_SUBADDR_LO) {
-		if(cat_write(modp, asicp, VOYAGER_SUBADDRHI, (__u8)(offset >> 8))) {
+	if (asicp->subaddr > VOYAGER_SUBADDR_LO) {
+		if (cat_write
+		    (modp, asicp, VOYAGER_SUBADDRHI, (__u8) (offset >> 8))) {
 			CDEBUG(("cat_subaddrsetup: write to SUBADDRHI failed\n"));
 			return 1;
 		}
 		cat_read(modp, asicp, VOYAGER_SUBADDRHI, &val);
-		CDEBUG(("cat_subaddrsetup: offset = %d, hi = %d\n", offset, val));
+		CDEBUG(("cat_subaddrsetup: offset = %d, hi = %d\n", offset,
+			val));
 	}
 	cat_read(modp, asicp, VOYAGER_SUBADDRLO, &val);
 	CDEBUG(("cat_subaddrsetup: offset = %d, lo = %d\n", offset, val));
 	return 0;
 }
-		
+
 static int
-cat_subwrite(voyager_module_t *modp, voyager_asic_t *asicp, __u16 offset,
-	    __u16 len, void *buf)
+cat_subwrite(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset,
+	     __u16 len, void *buf)
 {
 	int i, retval;
 
 	/* FIXME: need special actions for VOYAGER_CAT_ID here */
-	if(asicp->asic_id == VOYAGER_CAT_ID) {
+	if (asicp->asic_id == VOYAGER_CAT_ID) {
 		CDEBUG(("cat_subwrite: ATTEMPT TO WRITE TO CAT ASIC\n"));
 		/* FIXME -- This is supposed to be handled better
 		 * There is a problem writing to the cat asic in the
 		 * PSI.  The 30us delay seems to work, though */
 		udelay(30);
 	}
-		
-	if((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
+
+	if ((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
 		printk("cat_subwrite: cat_subaddrsetup FAILED\n");
 		return retval;
 	}
-	
-	if(cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_WRITE_CONFIG)) {
+
+	if (cat_sendinst
+	    (modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_WRITE_CONFIG)) {
 		printk("cat_subwrite: cat_sendinst FAILED\n");
 		return 1;
 	}
-	for(i = 0; i < len; i++) {
-		if(cat_senddata(modp, asicp, 0xFF, ((__u8 *)buf)[i])) {
-			printk("cat_subwrite: cat_sendata element at %d FAILED\n", i);
+	for (i = 0; i < len; i++) {
+		if (cat_senddata(modp, asicp, 0xFF, ((__u8 *) buf)[i])) {
+			printk
+			    ("cat_subwrite: cat_sendata element at %d FAILED\n",
+			     i);
 			return 1;
 		}
 	}
 	return 0;
 }
 static int
-cat_subread(voyager_module_t *modp, voyager_asic_t *asicp, __u16 offset,
+cat_subread(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset,
 	    __u16 len, void *buf)
 {
 	int i, retval;
 
-	if((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
+	if ((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
 		CDEBUG(("cat_subread: cat_subaddrsetup FAILED\n"));
 		return retval;
 	}
 
-	if(cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_READ_CONFIG)) {
+	if (cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_READ_CONFIG)) {
 		CDEBUG(("cat_subread: cat_sendinst failed\n"));
 		return 1;
 	}
-	for(i = 0; i < len; i++) {
-		if(cat_getdata(modp, asicp, 0xFF,
-			       &((__u8 *)buf)[i])) {
-			CDEBUG(("cat_subread: cat_getdata element %d failed\n", i));
+	for (i = 0; i < len; i++) {
+		if (cat_getdata(modp, asicp, 0xFF, &((__u8 *) buf)[i])) {
+			CDEBUG(("cat_subread: cat_getdata element %d failed\n",
+				i));
 			return 1;
 		}
 	}
 	return 0;
 }
 
-
 /* buffer for storing EPROM data read in during initialisation */
 static __initdata __u8 eprom_buf[0xFFFF];
 static voyager_module_t *voyager_initial_module;
@@ -568,8 +573,7 @@ static voyager_module_t *voyager_initial_module;
  * boot cpu *after* all memory initialisation has been done (so we can
  * use kmalloc) but before smp initialisation, so we can probe the SMP
  * configuration and pick up necessary information.  */
-void __init
-voyager_cat_init(void)
+void __init voyager_cat_init(void)
 {
 	voyager_module_t **modpp = &voyager_initial_module;
 	voyager_asic_t **asicpp;
@@ -578,27 +582,29 @@ voyager_cat_init(void)
 	unsigned long qic_addr = 0;
 	__u8 qabc_data[0x20];
 	__u8 num_submodules, val;
-	voyager_eprom_hdr_t *eprom_hdr = (voyager_eprom_hdr_t *)&eprom_buf[0];
-	
+	voyager_eprom_hdr_t *eprom_hdr = (voyager_eprom_hdr_t *) & eprom_buf[0];
+
 	__u8 cmos[4];
 	unsigned long addr;
-	
+
 	/* initiallise the SUS mailbox */
-	for(i=0; i<sizeof(cmos); i++)
+	for (i = 0; i < sizeof(cmos); i++)
 		cmos[i] = voyager_extended_cmos_read(VOYAGER_DUMP_LOCATION + i);
 	addr = *(unsigned long *)cmos;
-	if((addr & 0xff000000) != 0xff000000) {
-		printk(KERN_ERR "Voyager failed to get SUS mailbox (addr = 0x%lx\n", addr);
+	if ((addr & 0xff000000) != 0xff000000) {
+		printk(KERN_ERR
+		       "Voyager failed to get SUS mailbox (addr = 0x%lx\n",
+		       addr);
 	} else {
 		static struct resource res;
-		
+
 		res.name = "voyager SUS";
 		res.start = addr;
-		res.end = addr+0x3ff;
-		
+		res.end = addr + 0x3ff;
+
 		request_resource(&iomem_resource, &res);
 		voyager_SUS = (struct voyager_SUS *)
-			ioremap(addr, 0x400);
+		    ioremap(addr, 0x400);
 		printk(KERN_NOTICE "Voyager SUS mailbox version 0x%x\n",
 		       voyager_SUS->SUS_version);
 		voyager_SUS->kernel_version = VOYAGER_MAILBOX_VERSION;
@@ -609,8 +615,6 @@ voyager_cat_init(void)
 	voyager_extended_vic_processors = 0;
 	voyager_quad_processors = 0;
 
-
-
 	printk("VOYAGER: beginning CAT bus probe\n");
 	/* set up the SuperSet Port Block which tells us where the
 	 * CAT communication port is */
@@ -618,14 +622,14 @@ voyager_cat_init(void)
 	VDEBUG(("VOYAGER DEBUG: sspb = 0x%x\n", sspb));
 
 	/* now find out if were 8 slot or normal */
-	if((inb(VIC_PROC_WHO_AM_I) & EIGHT_SLOT_IDENTIFIER)
-	   == EIGHT_SLOT_IDENTIFIER) {
+	if ((inb(VIC_PROC_WHO_AM_I) & EIGHT_SLOT_IDENTIFIER)
+	    == EIGHT_SLOT_IDENTIFIER) {
 		voyager_8slot = 1;
-		printk(KERN_NOTICE "Voyager: Eight slot 51xx configuration detected\n");
+		printk(KERN_NOTICE
+		       "Voyager: Eight slot 51xx configuration detected\n");
 	}
 
-	for(i = VOYAGER_MIN_MODULE;
-	    i <= VOYAGER_MAX_MODULE; i++) {
+	for (i = VOYAGER_MIN_MODULE; i <= VOYAGER_MAX_MODULE; i++) {
 		__u8 input;
 		int asic;
 		__u16 eprom_size;
@@ -643,21 +647,21 @@ voyager_cat_init(void)
 		outb(0xAA, CAT_DATA);
 		input = inb(CAT_DATA);
 		outb(VOYAGER_CAT_END, CAT_CMD);
-		if(input != VOYAGER_CAT_HEADER) {
+		if (input != VOYAGER_CAT_HEADER) {
 			continue;
 		}
 		CDEBUG(("VOYAGER DEBUG: found module id 0x%x, %s\n", i,
 			cat_module_name(i)));
-		*modpp = kmalloc(sizeof(voyager_module_t), GFP_KERNEL); /*&voyager_module_storage[cat_count++];*/
-		if(*modpp == NULL) {
+		*modpp = kmalloc(sizeof(voyager_module_t), GFP_KERNEL);	/*&voyager_module_storage[cat_count++]; */
+		if (*modpp == NULL) {
 			printk("**WARNING** kmalloc failure in cat_init\n");
 			continue;
 		}
 		memset(*modpp, 0, sizeof(voyager_module_t));
 		/* need temporary asic for cat_subread.  It will be
 		 * filled in correctly later */
-		(*modpp)->asic = kmalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count];*/
-		if((*modpp)->asic == NULL) {
+		(*modpp)->asic = kmalloc(sizeof(voyager_asic_t), GFP_KERNEL);	/*&voyager_asic_storage[asic_count]; */
+		if ((*modpp)->asic == NULL) {
 			printk("**WARNING** kmalloc failure in cat_init\n");
 			continue;
 		}
@@ -666,47 +670,52 @@ voyager_cat_init(void)
 		(*modpp)->asic->subaddr = VOYAGER_SUBADDR_HI;
 		(*modpp)->module_addr = i;
 		(*modpp)->scan_path_connected = 0;
-		if(i == VOYAGER_PSI) {
+		if (i == VOYAGER_PSI) {
 			/* Exception leg for modules with no EEPROM */
 			printk("Module \"%s\"\n", cat_module_name(i));
 			continue;
 		}
-			       
+
 		CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET));
 		outb(VOYAGER_CAT_RUN, CAT_CMD);
 		cat_disconnect(*modpp, (*modpp)->asic);
-		if(cat_subread(*modpp, (*modpp)->asic,
-			       VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
-			       &eprom_size)) {
-			printk("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n", i);
+		if (cat_subread(*modpp, (*modpp)->asic,
+				VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
+				&eprom_size)) {
+			printk
+			    ("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n",
+			     i);
 			outb(VOYAGER_CAT_END, CAT_CMD);
 			continue;
 		}
-		if(eprom_size > sizeof(eprom_buf)) {
-			printk("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x.  Need %d\n", i, eprom_size);
+		if (eprom_size > sizeof(eprom_buf)) {
+			printk
+			    ("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x.  Need %d\n",
+			     i, eprom_size);
 			outb(VOYAGER_CAT_END, CAT_CMD);
 			continue;
 		}
 		outb(VOYAGER_CAT_END, CAT_CMD);
 		outb(VOYAGER_CAT_RUN, CAT_CMD);
-		CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i, eprom_size));
-		if(cat_subread(*modpp, (*modpp)->asic, 0, 
-			       eprom_size, eprom_buf)) {
+		CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i,
+			eprom_size));
+		if (cat_subread
+		    (*modpp, (*modpp)->asic, 0, eprom_size, eprom_buf)) {
 			outb(VOYAGER_CAT_END, CAT_CMD);
 			continue;
 		}
 		outb(VOYAGER_CAT_END, CAT_CMD);
 		printk("Module \"%s\", version 0x%x, tracer 0x%x, asics %d\n",
 		       cat_module_name(i), eprom_hdr->version_id,
-		       *((__u32 *)eprom_hdr->tracer),  eprom_hdr->num_asics);
+		       *((__u32 *) eprom_hdr->tracer), eprom_hdr->num_asics);
 		(*modpp)->ee_size = eprom_hdr->ee_size;
 		(*modpp)->num_asics = eprom_hdr->num_asics;
 		asicpp = &((*modpp)->asic);
 		sp_offset = eprom_hdr->scan_path_offset;
 		/* All we really care about are the Quad cards.  We
-                 * identify them because they are in a processor slot
-                 * and have only four asics */
-		if((i < 0x10 || (i>=0x14 && i < 0x1c) || i>0x1f)) {
+		 * identify them because they are in a processor slot
+		 * and have only four asics */
+		if ((i < 0x10 || (i >= 0x14 && i < 0x1c) || i > 0x1f)) {
 			modpp = &((*modpp)->next);
 			continue;
 		}
@@ -717,16 +726,17 @@ voyager_cat_init(void)
 			 &num_submodules);
 		/* lowest two bits, active low */
 		num_submodules = ~(0xfc | num_submodules);
-		CDEBUG(("VOYAGER CAT: %d submodules present\n", num_submodules));
-		if(num_submodules == 0) {
+		CDEBUG(("VOYAGER CAT: %d submodules present\n",
+			num_submodules));
+		if (num_submodules == 0) {
 			/* fill in the dyadic extended processors */
 			__u8 cpu = i & 0x07;
 
 			printk("Module \"%s\": Dyadic Processor Card\n",
 			       cat_module_name(i));
-			voyager_extended_vic_processors |= (1<<cpu);
+			voyager_extended_vic_processors |= (1 << cpu);
 			cpu += 4;
-			voyager_extended_vic_processors |= (1<<cpu);
+			voyager_extended_vic_processors |= (1 << cpu);
 			outb(VOYAGER_CAT_END, CAT_CMD);
 			continue;
 		}
@@ -740,28 +750,32 @@ voyager_cat_init(void)
 		cat_write(*modpp, (*modpp)->asic, VOYAGER_SUBMODSELECT, val);
 
 		outb(VOYAGER_CAT_END, CAT_CMD);
-			 
 
 		CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET));
 		outb(VOYAGER_CAT_RUN, CAT_CMD);
 		cat_disconnect(*modpp, (*modpp)->asic);
-		if(cat_subread(*modpp, (*modpp)->asic,
-			       VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
-			       &eprom_size)) {
-			printk("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n", i);
+		if (cat_subread(*modpp, (*modpp)->asic,
+				VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
+				&eprom_size)) {
+			printk
+			    ("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n",
+			     i);
 			outb(VOYAGER_CAT_END, CAT_CMD);
 			continue;
 		}
-		if(eprom_size > sizeof(eprom_buf)) {
-			printk("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x.  Need %d\n", i, eprom_size);
+		if (eprom_size > sizeof(eprom_buf)) {
+			printk
+			    ("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x.  Need %d\n",
+			     i, eprom_size);
 			outb(VOYAGER_CAT_END, CAT_CMD);
 			continue;
 		}
 		outb(VOYAGER_CAT_END, CAT_CMD);
 		outb(VOYAGER_CAT_RUN, CAT_CMD);
-		CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i, eprom_size));
-		if(cat_subread(*modpp, (*modpp)->asic, 0, 
-			       eprom_size, eprom_buf)) {
+		CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i,
+			eprom_size));
+		if (cat_subread
+		    (*modpp, (*modpp)->asic, 0, eprom_size, eprom_buf)) {
 			outb(VOYAGER_CAT_END, CAT_CMD);
 			continue;
 		}
@@ -773,30 +787,35 @@ voyager_cat_init(void)
 		sp_offset = eprom_hdr->scan_path_offset;
 		/* get rid of the dummy CAT asic and read the real one */
 		kfree((*modpp)->asic);
-		for(asic=0; asic < (*modpp)->num_asics; asic++) {
+		for (asic = 0; asic < (*modpp)->num_asics; asic++) {
 			int j;
-			voyager_asic_t *asicp = *asicpp 
-				= kzalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count++];*/
+			voyager_asic_t *asicp = *asicpp = kzalloc(sizeof(voyager_asic_t), GFP_KERNEL);	/*&voyager_asic_storage[asic_count++]; */
 			voyager_sp_table_t *sp_table;
 			voyager_at_t *asic_table;
 			voyager_jtt_t *jtag_table;
 
-			if(asicp == NULL) {
-				printk("**WARNING** kmalloc failure in cat_init\n");
+			if (asicp == NULL) {
+				printk
+				    ("**WARNING** kmalloc failure in cat_init\n");
 				continue;
 			}
 			asicpp = &(asicp->next);
 			asicp->asic_location = asic;
-			sp_table = (voyager_sp_table_t *)(eprom_buf + sp_offset);
+			sp_table =
+			    (voyager_sp_table_t *) (eprom_buf + sp_offset);
 			asicp->asic_id = sp_table->asic_id;
-			asic_table = (voyager_at_t *)(eprom_buf + sp_table->asic_data_offset);
-			for(j=0; j<4; j++)
+			asic_table =
+			    (voyager_at_t *) (eprom_buf +
+					      sp_table->asic_data_offset);
+			for (j = 0; j < 4; j++)
 				asicp->jtag_id[j] = asic_table->jtag_id[j];
-			jtag_table = (voyager_jtt_t *)(eprom_buf + asic_table->jtag_offset);
+			jtag_table =
+			    (voyager_jtt_t *) (eprom_buf +
+					       asic_table->jtag_offset);
 			asicp->ireg_length = jtag_table->ireg_len;
 			asicp->bit_location = (*modpp)->inst_bits;
 			(*modpp)->inst_bits += asicp->ireg_length;
-			if(asicp->ireg_length > (*modpp)->largest_reg)
+			if (asicp->ireg_length > (*modpp)->largest_reg)
 				(*modpp)->largest_reg = asicp->ireg_length;
 			if (asicp->ireg_length < (*modpp)->smallest_reg ||
 			    (*modpp)->smallest_reg == 0)
@@ -804,15 +823,13 @@ voyager_cat_init(void)
 			CDEBUG(("asic 0x%x, ireg_length=%d, bit_location=%d\n",
 				asicp->asic_id, asicp->ireg_length,
 				asicp->bit_location));
-			if(asicp->asic_id == VOYAGER_QUAD_QABC) {
+			if (asicp->asic_id == VOYAGER_QUAD_QABC) {
 				CDEBUG(("VOYAGER CAT: QABC ASIC found\n"));
 				qabc_asic = asicp;
 			}
 			sp_offset += sizeof(voyager_sp_table_t);
 		}
-		CDEBUG(("Module inst_bits = %d, largest_reg = %d, smallest_reg=%d\n",
-			(*modpp)->inst_bits, (*modpp)->largest_reg,
-			(*modpp)->smallest_reg));
+		CDEBUG(("Module inst_bits = %d, largest_reg = %d, smallest_reg=%d\n", (*modpp)->inst_bits, (*modpp)->largest_reg, (*modpp)->smallest_reg));
 		/* OK, now we have the QUAD ASICs set up, use them.
 		 * we need to:
 		 *
@@ -828,10 +845,11 @@ voyager_cat_init(void)
 		qic_addr = qabc_data[5] << 8;
 		qic_addr = (qic_addr | qabc_data[6]) << 8;
 		qic_addr = (qic_addr | qabc_data[7]) << 8;
-		printk("Module \"%s\": Quad Processor Card; CPI 0x%lx, SET=0x%x\n",
-		       cat_module_name(i), qic_addr, qabc_data[8]);
+		printk
+		    ("Module \"%s\": Quad Processor Card; CPI 0x%lx, SET=0x%x\n",
+		     cat_module_name(i), qic_addr, qabc_data[8]);
 #if 0				/* plumbing fails---FIXME */
-		if((qabc_data[8] & 0xf0) == 0) {
+		if ((qabc_data[8] & 0xf0) == 0) {
 			/* FIXME: 32 way 8 CPU slot monster cannot be
 			 * plumbed this way---need to check for it */
 
@@ -842,94 +860,97 @@ voyager_cat_init(void)
 #ifdef VOYAGER_CAT_DEBUG
 			/* verify plumbing */
 			cat_subread(*modpp, qabc_asic, 8, 1, &qabc_data[8]);
-			if((qabc_data[8] & 0xf0) == 0) {
-				CDEBUG(("PLUMBING FAILED: 0x%x\n", qabc_data[8]));
+			if ((qabc_data[8] & 0xf0) == 0) {
+				CDEBUG(("PLUMBING FAILED: 0x%x\n",
+					qabc_data[8]));
 			}
 #endif
 		}
 #endif
 
 		{
-			struct resource *res = kzalloc(sizeof(struct resource),GFP_KERNEL);
+			struct resource *res =
+			    kzalloc(sizeof(struct resource), GFP_KERNEL);
 			res->name = kmalloc(128, GFP_KERNEL);
-			sprintf((char *)res->name, "Voyager %s Quad CPI", cat_module_name(i));
+			sprintf((char *)res->name, "Voyager %s Quad CPI",
+				cat_module_name(i));
 			res->start = qic_addr;
 			res->end = qic_addr + 0x3ff;
 			request_resource(&iomem_resource, res);
 		}
 
 		qic_addr = (unsigned long)ioremap(qic_addr, 0x400);
-				
-		for(j = 0; j < 4; j++) {
+
+		for (j = 0; j < 4; j++) {
 			__u8 cpu;
 
-			if(voyager_8slot) {
+			if (voyager_8slot) {
 				/* 8 slot has a different mapping,
 				 * each slot has only one vic line, so
 				 * 1 cpu in each slot must be < 8 */
-				cpu = (i & 0x07) + j*8;
+				cpu = (i & 0x07) + j * 8;
 			} else {
-				cpu = (i & 0x03) + j*4;
+				cpu = (i & 0x03) + j * 4;
 			}
-			if( (qabc_data[8] & (1<<j))) {
-				voyager_extended_vic_processors |= (1<<cpu);
+			if ((qabc_data[8] & (1 << j))) {
+				voyager_extended_vic_processors |= (1 << cpu);
 			}
-			if(qabc_data[8] & (1<<(j+4)) ) {
+			if (qabc_data[8] & (1 << (j + 4))) {
 				/* Second SET register plumbed: Quad
 				 * card has two VIC connected CPUs.
 				 * Secondary cannot be booted as a VIC
 				 * CPU */
-				voyager_extended_vic_processors |= (1<<cpu);
-				voyager_allowed_boot_processors &= (~(1<<cpu));
+				voyager_extended_vic_processors |= (1 << cpu);
+				voyager_allowed_boot_processors &=
+				    (~(1 << cpu));
 			}
 
-			voyager_quad_processors |= (1<<cpu);
+			voyager_quad_processors |= (1 << cpu);
 			voyager_quad_cpi_addr[cpu] = (struct voyager_qic_cpi *)
-				(qic_addr+(j<<8));
+			    (qic_addr + (j << 8));
 			CDEBUG(("CPU%d: CPI address 0x%lx\n", cpu,
 				(unsigned long)voyager_quad_cpi_addr[cpu]));
 		}
 		outb(VOYAGER_CAT_END, CAT_CMD);
 
-		
-		
 		*asicpp = NULL;
 		modpp = &((*modpp)->next);
 	}
 	*modpp = NULL;
-	printk("CAT Bus Initialisation finished: extended procs 0x%x, quad procs 0x%x, allowed vic boot = 0x%x\n", voyager_extended_vic_processors, voyager_quad_processors, voyager_allowed_boot_processors);
+	printk
+	    ("CAT Bus Initialisation finished: extended procs 0x%x, quad procs 0x%x, allowed vic boot = 0x%x\n",
+	     voyager_extended_vic_processors, voyager_quad_processors,
+	     voyager_allowed_boot_processors);
 	request_resource(&ioport_resource, &vic_res);
-	if(voyager_quad_processors)
+	if (voyager_quad_processors)
 		request_resource(&ioport_resource, &qic_res);
 	/* set up the front power switch */
 }
 
-int
-voyager_cat_readb(__u8 module, __u8 asic, int reg)
+int voyager_cat_readb(__u8 module, __u8 asic, int reg)
 {
 	return 0;
 }
 
-static int
-cat_disconnect(voyager_module_t *modp, voyager_asic_t *asicp) 
+static int cat_disconnect(voyager_module_t * modp, voyager_asic_t * asicp)
 {
 	__u8 val;
 	int err = 0;
 
-	if(!modp->scan_path_connected)
+	if (!modp->scan_path_connected)
 		return 0;
-	if(asicp->asic_id != VOYAGER_CAT_ID) {
+	if (asicp->asic_id != VOYAGER_CAT_ID) {
 		CDEBUG(("cat_disconnect: ASIC is not CAT\n"));
 		return 1;
 	}
 	err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val);
-	if(err) {
+	if (err) {
 		CDEBUG(("cat_disconnect: failed to read SCANPATH\n"));
 		return err;
 	}
 	val &= VOYAGER_DISCONNECT_ASIC;
 	err = cat_write(modp, asicp, VOYAGER_SCANPATH, val);
-	if(err) {
+	if (err) {
 		CDEBUG(("cat_disconnect: failed to write SCANPATH\n"));
 		return err;
 	}
@@ -940,27 +961,26 @@ cat_disconnect(voyager_module_t *modp, voyager_asic_t *asicp)
 	return 0;
 }
 
-static int
-cat_connect(voyager_module_t *modp, voyager_asic_t *asicp) 
+static int cat_connect(voyager_module_t * modp, voyager_asic_t * asicp)
 {
 	__u8 val;
 	int err = 0;
 
-	if(modp->scan_path_connected)
+	if (modp->scan_path_connected)
 		return 0;
-	if(asicp->asic_id != VOYAGER_CAT_ID) {
+	if (asicp->asic_id != VOYAGER_CAT_ID) {
 		CDEBUG(("cat_connect: ASIC is not CAT\n"));
 		return 1;
 	}
 
 	err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val);
-	if(err) {
+	if (err) {
 		CDEBUG(("cat_connect: failed to read SCANPATH\n"));
 		return err;
 	}
 	val |= VOYAGER_CONNECT_ASIC;
 	err = cat_write(modp, asicp, VOYAGER_SCANPATH, val);
-	if(err) {
+	if (err) {
 		CDEBUG(("cat_connect: failed to write SCANPATH\n"));
 		return err;
 	}
@@ -971,11 +991,10 @@ cat_connect(voyager_module_t *modp, voyager_asic_t *asicp)
 	return 0;
 }
 
-void
-voyager_cat_power_off(void)
+void voyager_cat_power_off(void)
 {
 	/* Power the machine off by writing to the PSI over the CAT
-         * bus */
+	 * bus */
 	__u8 data;
 	voyager_module_t psi = { 0 };
 	voyager_asic_t psi_asic = { 0 };
@@ -1009,8 +1028,7 @@ voyager_cat_power_off(void)
 
 struct voyager_status voyager_status = { 0 };
 
-void
-voyager_cat_psi(__u8 cmd, __u16 reg, __u8 *data)
+void voyager_cat_psi(__u8 cmd, __u16 reg, __u8 * data)
 {
 	voyager_module_t psi = { 0 };
 	voyager_asic_t psi_asic = { 0 };
@@ -1027,7 +1045,7 @@ voyager_cat_psi(__u8 cmd, __u16 reg, __u8 *data)
 	outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT);
 	outb(VOYAGER_CAT_RUN, CAT_CMD);
 	cat_disconnect(&psi, &psi_asic);
-	switch(cmd) {
+	switch (cmd) {
 	case VOYAGER_PSI_READ:
 		cat_read(&psi, &psi_asic, reg, data);
 		break;
@@ -1047,8 +1065,7 @@ voyager_cat_psi(__u8 cmd, __u16 reg, __u8 *data)
 	outb(VOYAGER_CAT_END, CAT_CMD);
 }
 
-void
-voyager_cat_do_common_interrupt(void)
+void voyager_cat_do_common_interrupt(void)
 {
 	/* This is caused either by a memory parity error or something
 	 * in the PSI */
@@ -1057,7 +1074,7 @@ voyager_cat_do_common_interrupt(void)
 	voyager_asic_t psi_asic = { 0 };
 	struct voyager_psi psi_reg;
 	int i;
- re_read:
+      re_read:
 	psi.asic = &psi_asic;
 	psi.asic->asic_id = VOYAGER_CAT_ID;
 	psi.asic->subaddr = VOYAGER_SUBADDR_HI;
@@ -1072,43 +1089,45 @@ voyager_cat_do_common_interrupt(void)
 	cat_disconnect(&psi, &psi_asic);
 	/* Read the status.  NOTE: Need to read *all* the PSI regs here
 	 * otherwise the cmn int will be reasserted */
-	for(i = 0; i < sizeof(psi_reg.regs); i++) {
-		cat_read(&psi, &psi_asic, i, &((__u8 *)&psi_reg.regs)[i]);
+	for (i = 0; i < sizeof(psi_reg.regs); i++) {
+		cat_read(&psi, &psi_asic, i, &((__u8 *) & psi_reg.regs)[i]);
 	}
 	outb(VOYAGER_CAT_END, CAT_CMD);
-	if((psi_reg.regs.checkbit & 0x02) == 0) {
+	if ((psi_reg.regs.checkbit & 0x02) == 0) {
 		psi_reg.regs.checkbit |= 0x02;
 		cat_write(&psi, &psi_asic, 5, psi_reg.regs.checkbit);
 		printk("VOYAGER RE-READ PSI\n");
 		goto re_read;
 	}
 	outb(VOYAGER_CAT_RUN, CAT_CMD);
-	for(i = 0; i < sizeof(psi_reg.subregs); i++) {
+	for (i = 0; i < sizeof(psi_reg.subregs); i++) {
 		/* This looks strange, but the PSI doesn't do auto increment
 		 * correctly */
-		cat_subread(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG + i, 
-			    1, &((__u8 *)&psi_reg.subregs)[i]); 
+		cat_subread(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG + i,
+			    1, &((__u8 *) & psi_reg.subregs)[i]);
 	}
 	outb(VOYAGER_CAT_END, CAT_CMD);
 #ifdef VOYAGER_CAT_DEBUG
 	printk("VOYAGER PSI: ");
-	for(i=0; i<sizeof(psi_reg.regs); i++)
-		printk("%02x ", ((__u8 *)&psi_reg.regs)[i]);
+	for (i = 0; i < sizeof(psi_reg.regs); i++)
+		printk("%02x ", ((__u8 *) & psi_reg.regs)[i]);
 	printk("\n           ");
-	for(i=0; i<sizeof(psi_reg.subregs); i++)
-		printk("%02x ", ((__u8 *)&psi_reg.subregs)[i]);
+	for (i = 0; i < sizeof(psi_reg.subregs); i++)
+		printk("%02x ", ((__u8 *) & psi_reg.subregs)[i]);
 	printk("\n");
 #endif
-	if(psi_reg.regs.intstatus & PSI_MON) {
+	if (psi_reg.regs.intstatus & PSI_MON) {
 		/* switch off or power fail */
 
-		if(psi_reg.subregs.supply & PSI_SWITCH_OFF) {
-			if(voyager_status.switch_off) {
-				printk(KERN_ERR "Voyager front panel switch turned off again---Immediate power off!\n");
+		if (psi_reg.subregs.supply & PSI_SWITCH_OFF) {
+			if (voyager_status.switch_off) {
+				printk(KERN_ERR
+				       "Voyager front panel switch turned off again---Immediate power off!\n");
 				voyager_cat_power_off();
 				/* not reached */
 			} else {
-				printk(KERN_ERR "Voyager front panel switch turned off\n");
+				printk(KERN_ERR
+				       "Voyager front panel switch turned off\n");
 				voyager_status.switch_off = 1;
 				voyager_status.request_from_kernel = 1;
 				wake_up_process(voyager_thread);
@@ -1127,7 +1146,7 @@ voyager_cat_do_common_interrupt(void)
 
 			VDEBUG(("Voyager ac fail reg 0x%x\n",
 				psi_reg.subregs.ACfail));
-			if((psi_reg.subregs.ACfail & AC_FAIL_STAT_CHANGE) == 0) {
+			if ((psi_reg.subregs.ACfail & AC_FAIL_STAT_CHANGE) == 0) {
 				/* No further update */
 				return;
 			}
@@ -1135,20 +1154,20 @@ voyager_cat_do_common_interrupt(void)
 			/* Don't bother trying to find out who failed.
 			 * FIXME: This probably makes the code incorrect on
 			 * anything other than a 345x */
-			for(i=0; i< 5; i++) {
-				if( psi_reg.subregs.ACfail &(1<<i)) {
+			for (i = 0; i < 5; i++) {
+				if (psi_reg.subregs.ACfail & (1 << i)) {
 					break;
 				}
 			}
 			printk(KERN_NOTICE "AC FAIL IN SUPPLY %d\n", i);
 #endif
 			/* DON'T do this: it shuts down the AC PSI 
-			outb(VOYAGER_CAT_RUN, CAT_CMD);
-			data = PSI_MASK_MASK | i;
-			cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_MASK,
-				     1, &data);
-			outb(VOYAGER_CAT_END, CAT_CMD);
-			*/
+			   outb(VOYAGER_CAT_RUN, CAT_CMD);
+			   data = PSI_MASK_MASK | i;
+			   cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_MASK,
+			   1, &data);
+			   outb(VOYAGER_CAT_END, CAT_CMD);
+			 */
 			printk(KERN_ERR "Voyager AC power failure\n");
 			outb(VOYAGER_CAT_RUN, CAT_CMD);
 			data = PSI_COLD_START;
@@ -1159,16 +1178,16 @@ voyager_cat_do_common_interrupt(void)
 			voyager_status.request_from_kernel = 1;
 			wake_up_process(voyager_thread);
 		}
-		
-		
-	} else if(psi_reg.regs.intstatus & PSI_FAULT) {
+
+	} else if (psi_reg.regs.intstatus & PSI_FAULT) {
 		/* Major fault! */
-		printk(KERN_ERR "Voyager PSI Detected major fault, immediate power off!\n");
+		printk(KERN_ERR
+		       "Voyager PSI Detected major fault, immediate power off!\n");
 		voyager_cat_power_off();
 		/* not reached */
-	} else if(psi_reg.regs.intstatus & (PSI_DC_FAIL | PSI_ALARM
-					    | PSI_CURRENT | PSI_DVM
-					    | PSI_PSCFAULT | PSI_STAT_CHG)) {
+	} else if (psi_reg.regs.intstatus & (PSI_DC_FAIL | PSI_ALARM
+					     | PSI_CURRENT | PSI_DVM
+					     | PSI_PSCFAULT | PSI_STAT_CHG)) {
 		/* other psi fault */
 
 		printk(KERN_WARNING "Voyager PSI status 0x%x\n", data);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 88124dd35406..73c435ce10fd 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -32,7 +32,8 @@
 DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { &init_mm, 0 };
 
 /* CPU IRQ affinity -- set to all ones initially */
-static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1]  = ~0UL };
+static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned =
+	{[0 ... NR_CPUS-1]  = ~0UL };
 
 /* per CPU data structure (for /proc/cpuinfo et al), visible externally
  * indexed physically */
@@ -76,7 +77,6 @@ EXPORT_SYMBOL(cpu_online_map);
  * by scheduler but indexed physically */
 cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
 
-
 /* The internal functions */
 static void send_CPI(__u32 cpuset, __u8 cpi);
 static void ack_CPI(__u8 cpi);
@@ -101,94 +101,86 @@ int hard_smp_processor_id(void);
 int safe_smp_processor_id(void);
 
 /* Inline functions */
-static inline void
-send_one_QIC_CPI(__u8 cpu, __u8 cpi)
+static inline void send_one_QIC_CPI(__u8 cpu, __u8 cpi)
 {
 	voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi =
-		(smp_processor_id() << 16) + cpi;
+	    (smp_processor_id() << 16) + cpi;
 }
 
-static inline void
-send_QIC_CPI(__u32 cpuset, __u8 cpi)
+static inline void send_QIC_CPI(__u32 cpuset, __u8 cpi)
 {
 	int cpu;
 
 	for_each_online_cpu(cpu) {
-		if(cpuset & (1<<cpu)) {
+		if (cpuset & (1 << cpu)) {
 #ifdef VOYAGER_DEBUG
-			if(!cpu_isset(cpu, cpu_online_map))
-				VDEBUG(("CPU%d sending cpi %d to CPU%d not in cpu_online_map\n", hard_smp_processor_id(), cpi, cpu));
+			if (!cpu_isset(cpu, cpu_online_map))
+				VDEBUG(("CPU%d sending cpi %d to CPU%d not in "
+					"cpu_online_map\n",
+					hard_smp_processor_id(), cpi, cpu));
 #endif
 			send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET);
 		}
 	}
 }
 
-static inline void
-wrapper_smp_local_timer_interrupt(void)
+static inline void wrapper_smp_local_timer_interrupt(void)
 {
 	irq_enter();
 	smp_local_timer_interrupt();
 	irq_exit();
 }
 
-static inline void
-send_one_CPI(__u8 cpu, __u8 cpi)
+static inline void send_one_CPI(__u8 cpu, __u8 cpi)
 {
-	if(voyager_quad_processors & (1<<cpu))
+	if (voyager_quad_processors & (1 << cpu))
 		send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET);
 	else
-		send_CPI(1<<cpu, cpi);
+		send_CPI(1 << cpu, cpi);
 }
 
-static inline void
-send_CPI_allbutself(__u8 cpi)
+static inline void send_CPI_allbutself(__u8 cpi)
 {
 	__u8 cpu = smp_processor_id();
 	__u32 mask = cpus_addr(cpu_online_map)[0] & ~(1 << cpu);
 	send_CPI(mask, cpi);
 }
 
-static inline int
-is_cpu_quad(void)
+static inline int is_cpu_quad(void)
 {
 	__u8 cpumask = inb(VIC_PROC_WHO_AM_I);
 	return ((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER);
 }
 
-static inline int
-is_cpu_extended(void)
+static inline int is_cpu_extended(void)
 {
 	__u8 cpu = hard_smp_processor_id();
 
-	return(voyager_extended_vic_processors & (1<<cpu));
+	return (voyager_extended_vic_processors & (1 << cpu));
 }
 
-static inline int
-is_cpu_vic_boot(void)
+static inline int is_cpu_vic_boot(void)
 {
 	__u8 cpu = hard_smp_processor_id();
 
-	return(voyager_extended_vic_processors
-	       & voyager_allowed_boot_processors & (1<<cpu));
+	return (voyager_extended_vic_processors
+		& voyager_allowed_boot_processors & (1 << cpu));
 }
 
-
-static inline void
-ack_CPI(__u8 cpi)
+static inline void ack_CPI(__u8 cpi)
 {
-	switch(cpi) {
+	switch (cpi) {
 	case VIC_CPU_BOOT_CPI:
-		if(is_cpu_quad() && !is_cpu_vic_boot())
+		if (is_cpu_quad() && !is_cpu_vic_boot())
 			ack_QIC_CPI(cpi);
 		else
 			ack_VIC_CPI(cpi);
 		break;
 	case VIC_SYS_INT:
-	case VIC_CMN_INT: 
+	case VIC_CMN_INT:
 		/* These are slightly strange.  Even on the Quad card,
 		 * They are vectored as VIC CPIs */
-		if(is_cpu_quad())
+		if (is_cpu_quad())
 			ack_special_QIC_CPI(cpi);
 		else
 			ack_VIC_CPI(cpi);
@@ -205,11 +197,11 @@ ack_CPI(__u8 cpi)
  * 8259 IRQs except that masks and things must be kept per processor
  */
 static struct irq_chip vic_chip = {
-	.name		= "VIC",
-	.startup	= startup_vic_irq,
-	.mask		= mask_vic_irq,
-	.unmask		= unmask_vic_irq,
-	.set_affinity	= set_vic_irq_affinity,
+	.name = "VIC",
+	.startup = startup_vic_irq,
+	.mask = mask_vic_irq,
+	.unmask = unmask_vic_irq,
+	.set_affinity = set_vic_irq_affinity,
 };
 
 /* used to count up as CPUs are brought on line (starts at 0) */
@@ -223,7 +215,7 @@ static __u32 trampoline_base;
 /* The per cpu profile stuff - used in smp_local_timer_interrupt */
 static DEFINE_PER_CPU(int, prof_multiplier) = 1;
 static DEFINE_PER_CPU(int, prof_old_multiplier) = 1;
-static DEFINE_PER_CPU(int, prof_counter) =  1;
+static DEFINE_PER_CPU(int, prof_counter) = 1;
 
 /* the map used to check if a CPU has booted */
 static __u32 cpu_booted_map;
@@ -246,9 +238,9 @@ static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned;
 static __u16 vic_irq_enable_mask[NR_CPUS] __cacheline_aligned = { 0 };
 
 /* Lock for enable/disable of VIC interrupts */
-static  __cacheline_aligned DEFINE_SPINLOCK(vic_irq_lock);
+static __cacheline_aligned DEFINE_SPINLOCK(vic_irq_lock);
 
-/* The boot processor is correctly set up in PC mode when it 
+/* The boot processor is correctly set up in PC mode when it
  * comes up, but the secondaries need their master/slave 8259
  * pairs initializing correctly */
 
@@ -262,8 +254,7 @@ static unsigned long vic_tick[NR_CPUS] __cacheline_aligned = { 0 };
 static unsigned long vic_cpi_mailbox[NR_CPUS] __cacheline_aligned;
 
 /* debugging routine to read the isr of the cpu's pic */
-static inline __u16
-vic_read_isr(void)
+static inline __u16 vic_read_isr(void)
 {
 	__u16 isr;
 
@@ -275,17 +266,16 @@ vic_read_isr(void)
 	return isr;
 }
 
-static __init void
-qic_setup(void)
+static __init void qic_setup(void)
 {
-	if(!is_cpu_quad()) {
+	if (!is_cpu_quad()) {
 		/* not a quad, no setup */
 		return;
 	}
 	outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0);
 	outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1);
-	
-	if(is_cpu_extended()) {
+
+	if (is_cpu_extended()) {
 		/* the QIC duplicate of the VIC base register */
 		outb(VIC_DEFAULT_CPI_BASE, QIC_VIC_CPI_BASE_REGISTER);
 		outb(QIC_DEFAULT_CPI_BASE, QIC_CPI_BASE_REGISTER);
@@ -295,8 +285,7 @@ qic_setup(void)
 	}
 }
 
-static __init void
-vic_setup_pic(void)
+static __init void vic_setup_pic(void)
 {
 	outb(1, VIC_REDIRECT_REGISTER_1);
 	/* clear the claim registers for dynamic routing */
@@ -333,7 +322,7 @@ vic_setup_pic(void)
 
 	/* ICW2: slave vector base */
 	outb(FIRST_EXTERNAL_VECTOR + 8, 0xA1);
-	
+
 	/* ICW3: slave ID */
 	outb(0x02, 0xA1);
 
@@ -341,19 +330,18 @@ vic_setup_pic(void)
 	outb(0x01, 0xA1);
 }
 
-static void
-do_quad_bootstrap(void)
+static void do_quad_bootstrap(void)
 {
-	if(is_cpu_quad() && is_cpu_vic_boot()) {
+	if (is_cpu_quad() && is_cpu_vic_boot()) {
 		int i;
 		unsigned long flags;
 		__u8 cpuid = hard_smp_processor_id();
 
 		local_irq_save(flags);
 
-		for(i = 0; i<4; i++) {
+		for (i = 0; i < 4; i++) {
 			/* FIXME: this would be >>3 &0x7 on the 32 way */
-			if(((cpuid >> 2) & 0x03) == i)
+			if (((cpuid >> 2) & 0x03) == i)
 				/* don't lower our own mask! */
 				continue;
 
@@ -368,12 +356,10 @@ do_quad_bootstrap(void)
 	}
 }
 
-
 /* Set up all the basic stuff: read the SMP config and make all the
  * SMP information reflect only the boot cpu.  All others will be
  * brought on-line later. */
-void __init 
-find_smp_config(void)
+void __init find_smp_config(void)
 {
 	int i;
 
@@ -382,24 +368,31 @@ find_smp_config(void)
 	printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id);
 
 	/* initialize the CPU structures (moved from smp_boot_cpus) */
-	for(i=0; i<NR_CPUS; i++) {
+	for (i = 0; i < NR_CPUS; i++) {
 		cpu_irq_affinity[i] = ~0;
 	}
 	cpu_online_map = cpumask_of_cpu(boot_cpu_id);
 
 	/* The boot CPU must be extended */
-	voyager_extended_vic_processors = 1<<boot_cpu_id;
+	voyager_extended_vic_processors = 1 << boot_cpu_id;
 	/* initially, all of the first 8 CPUs can boot */
 	voyager_allowed_boot_processors = 0xff;
 	/* set up everything for just this CPU, we can alter
 	 * this as we start the other CPUs later */
 	/* now get the CPU disposition from the extended CMOS */
-	cpus_addr(phys_cpu_present_map)[0] = voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK);
-	cpus_addr(phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 1) << 8;
-	cpus_addr(phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 2) << 16;
-	cpus_addr(phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 3) << 24;
+	cpus_addr(phys_cpu_present_map)[0] =
+	    voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK);
+	cpus_addr(phys_cpu_present_map)[0] |=
+	    voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 1) << 8;
+	cpus_addr(phys_cpu_present_map)[0] |=
+	    voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK +
+				       2) << 16;
+	cpus_addr(phys_cpu_present_map)[0] |=
+	    voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK +
+				       3) << 24;
 	cpu_possible_map = phys_cpu_present_map;
-	printk("VOYAGER SMP: phys_cpu_present_map = 0x%lx\n", cpus_addr(phys_cpu_present_map)[0]);
+	printk("VOYAGER SMP: phys_cpu_present_map = 0x%lx\n",
+	       cpus_addr(phys_cpu_present_map)[0]);
 	/* Here we set up the VIC to enable SMP */
 	/* enable the CPIs by writing the base vector to their register */
 	outb(VIC_DEFAULT_CPI_BASE, VIC_CPI_BASE_REGISTER);
@@ -427,8 +420,7 @@ find_smp_config(void)
 /*
  *	The bootstrap kernel entry code has set these up. Save them
  *	for a given CPU, id is physical */
-void __init
-smp_store_cpu_info(int id)
+void __init smp_store_cpu_info(int id)
 {
 	struct cpuinfo_x86 *c = &cpu_data(id);
 
@@ -438,21 +430,19 @@ smp_store_cpu_info(int id)
 }
 
 /* set up the trampoline and return the physical address of the code */
-static __u32 __init
-setup_trampoline(void)
+static __u32 __init setup_trampoline(void)
 {
 	/* these two are global symbols in trampoline.S */
 	extern const __u8 trampoline_end[];
 	extern const __u8 trampoline_data[];
 
-	memcpy((__u8 *)trampoline_base, trampoline_data,
+	memcpy((__u8 *) trampoline_base, trampoline_data,
 	       trampoline_end - trampoline_data);
-	return virt_to_phys((__u8 *)trampoline_base);
+	return virt_to_phys((__u8 *) trampoline_base);
 }
 
 /* Routine initially called when a non-boot CPU is brought online */
-static void __init
-start_secondary(void *unused)
+static void __init start_secondary(void *unused)
 {
 	__u8 cpuid = hard_smp_processor_id();
 	/* external functions not defined in the headers */
@@ -464,17 +454,18 @@ start_secondary(void *unused)
 	ack_CPI(VIC_CPU_BOOT_CPI);
 
 	/* setup the 8259 master slave pair belonging to this CPU ---
-         * we won't actually receive any until the boot CPU
-         * relinquishes it's static routing mask */
+	 * we won't actually receive any until the boot CPU
+	 * relinquishes it's static routing mask */
 	vic_setup_pic();
 
 	qic_setup();
 
-	if(is_cpu_quad() && !is_cpu_vic_boot()) {
+	if (is_cpu_quad() && !is_cpu_vic_boot()) {
 		/* clear the boot CPI */
 		__u8 dummy;
 
-		dummy = voyager_quad_cpi_addr[cpuid]->qic_cpi[VIC_CPU_BOOT_CPI].cpi;
+		dummy =
+		    voyager_quad_cpi_addr[cpuid]->qic_cpi[VIC_CPU_BOOT_CPI].cpi;
 		printk("read dummy %d\n", dummy);
 	}
 
@@ -516,7 +507,6 @@ start_secondary(void *unused)
 	cpu_idle();
 }
 
-
 /* Routine to kick start the given CPU and wait for it to report ready
  * (or timeout in startup).  When this routine returns, the requested
  * CPU is either fully running and configured or known to be dead.
@@ -524,15 +514,14 @@ start_secondary(void *unused)
  * We call this routine sequentially 1 CPU at a time, so no need for
  * locking */
 
-static void __init
-do_boot_cpu(__u8 cpu)
+static void __init do_boot_cpu(__u8 cpu)
 {
 	struct task_struct *idle;
 	int timeout;
 	unsigned long flags;
-	int quad_boot = (1<<cpu) & voyager_quad_processors 
-		& ~( voyager_extended_vic_processors
-		     & voyager_allowed_boot_processors);
+	int quad_boot = (1 << cpu) & voyager_quad_processors
+	    & ~(voyager_extended_vic_processors
+		& voyager_allowed_boot_processors);
 
 	/* This is an area in head.S which was used to set up the
 	 * initial kernel stack.  We need to alter this to give the
@@ -543,10 +532,10 @@ do_boot_cpu(__u8 cpu)
 	} stack_start;
 	/* This is the format of the CPI IDT gate (in real mode) which
 	 * we're hijacking to boot the CPU */
-	union 	IDTFormat {
+	union IDTFormat {
 		struct seg {
-			__u16	Offset;
-			__u16	Segment;
+			__u16 Offset;
+			__u16 Segment;
 		} idt;
 		__u32 val;
 	} hijack_source;
@@ -565,19 +554,19 @@ do_boot_cpu(__u8 cpu)
 	alternatives_smp_switch(1);
 
 	idle = fork_idle(cpu);
-	if(IS_ERR(idle))
+	if (IS_ERR(idle))
 		panic("failed fork for CPU%d", cpu);
-	idle->thread.eip = (unsigned long) start_secondary;
+	idle->thread.eip = (unsigned long)start_secondary;
 	/* init_tasks (in sched.c) is indexed logically */
-	stack_start.esp = (void *) idle->thread.esp;
+	stack_start.esp = (void *)idle->thread.esp;
 
 	init_gdt(cpu);
- 	per_cpu(current_task, cpu) = idle;
+	per_cpu(current_task, cpu) = idle;
 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 	irq_ctx_init(cpu);
 
 	/* Note: Don't modify initial ss override */
-	VDEBUG(("VOYAGER SMP: Booting CPU%d at 0x%lx[%x:%x], stack %p\n", cpu, 
+	VDEBUG(("VOYAGER SMP: Booting CPU%d at 0x%lx[%x:%x], stack %p\n", cpu,
 		(unsigned long)hijack_source.val, hijack_source.idt.Segment,
 		hijack_source.idt.Offset, stack_start.esp));
 
@@ -586,16 +575,23 @@ do_boot_cpu(__u8 cpu)
 			min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
 	flush_tlb_all();
 
-	if(quad_boot) {
+	if (quad_boot) {
 		printk("CPU %d: non extended Quad boot\n", cpu);
-		hijack_vector = (__u32 *)phys_to_virt((VIC_CPU_BOOT_CPI + QIC_DEFAULT_CPI_BASE)*4);
+		hijack_vector =
+		    (__u32 *)
+		    phys_to_virt((VIC_CPU_BOOT_CPI + QIC_DEFAULT_CPI_BASE) * 4);
 		*hijack_vector = hijack_source.val;
 	} else {
 		printk("CPU%d: extended VIC boot\n", cpu);
-		hijack_vector = (__u32 *)phys_to_virt((VIC_CPU_BOOT_CPI + VIC_DEFAULT_CPI_BASE)*4);
+		hijack_vector =
+		    (__u32 *)
+		    phys_to_virt((VIC_CPU_BOOT_CPI + VIC_DEFAULT_CPI_BASE) * 4);
 		*hijack_vector = hijack_source.val;
 		/* VIC errata, may also receive interrupt at this address */
-		hijack_vector = (__u32 *)phys_to_virt((VIC_CPU_BOOT_ERRATA_CPI + VIC_DEFAULT_CPI_BASE)*4);
+		hijack_vector =
+		    (__u32 *)
+		    phys_to_virt((VIC_CPU_BOOT_ERRATA_CPI +
+				  VIC_DEFAULT_CPI_BASE) * 4);
 		*hijack_vector = hijack_source.val;
 	}
 	/* All non-boot CPUs start with interrupts fully masked.  Need
@@ -603,73 +599,76 @@ do_boot_cpu(__u8 cpu)
 	 * this in the VIC by masquerading as the processor we're
 	 * about to boot and lowering its interrupt mask */
 	local_irq_save(flags);
-	if(quad_boot) {
+	if (quad_boot) {
 		send_one_QIC_CPI(cpu, VIC_CPU_BOOT_CPI);
 	} else {
 		outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID);
 		/* here we're altering registers belonging to `cpu' */
-		
+
 		outb(VIC_BOOT_INTERRUPT_MASK, 0x21);
 		/* now go back to our original identity */
 		outb(boot_cpu_id, VIC_PROCESSOR_ID);
 
 		/* and boot the CPU */
 
-		send_CPI((1<<cpu), VIC_CPU_BOOT_CPI);
+		send_CPI((1 << cpu), VIC_CPU_BOOT_CPI);
 	}
 	cpu_booted_map = 0;
 	local_irq_restore(flags);
 
 	/* now wait for it to become ready (or timeout) */
-	for(timeout = 0; timeout < 50000; timeout++) {
-		if(cpu_booted_map)
+	for (timeout = 0; timeout < 50000; timeout++) {
+		if (cpu_booted_map)
 			break;
 		udelay(100);
 	}
 	/* reset the page table */
 	zap_low_mappings();
-	  
+
 	if (cpu_booted_map) {
 		VDEBUG(("CPU%d: Booted successfully, back in CPU %d\n",
 			cpu, smp_processor_id()));
-	
+
 		printk("CPU%d: ", cpu);
 		print_cpu_info(&cpu_data(cpu));
 		wmb();
 		cpu_set(cpu, cpu_callout_map);
 		cpu_set(cpu, cpu_present_map);
-	}
-	else {
+	} else {
 		printk("CPU%d FAILED TO BOOT: ", cpu);
-		if (*((volatile unsigned char *)phys_to_virt(start_phys_address))==0xA5)
+		if (*
+		    ((volatile unsigned char *)phys_to_virt(start_phys_address))
+		    == 0xA5)
 			printk("Stuck.\n");
 		else
 			printk("Not responding.\n");
-		
+
 		cpucount--;
 	}
 }
 
-void __init
-smp_boot_cpus(void)
+void __init smp_boot_cpus(void)
 {
 	int i;
 
 	/* CAT BUS initialisation must be done after the memory */
 	/* FIXME: The L4 has a catbus too, it just needs to be
 	 * accessed in a totally different way */
-	if(voyager_level == 5) {
+	if (voyager_level == 5) {
 		voyager_cat_init();
 
 		/* now that the cat has probed the Voyager System Bus, sanity
 		 * check the cpu map */
-		if( ((voyager_quad_processors | voyager_extended_vic_processors)
-		     & cpus_addr(phys_cpu_present_map)[0]) != cpus_addr(phys_cpu_present_map)[0]) {
+		if (((voyager_quad_processors | voyager_extended_vic_processors)
+		     & cpus_addr(phys_cpu_present_map)[0]) !=
+		    cpus_addr(phys_cpu_present_map)[0]) {
 			/* should panic */
-			printk("\n\n***WARNING*** Sanity check of CPU present map FAILED\n");
+			printk("\n\n***WARNING*** "
+			       "Sanity check of CPU present map FAILED\n");
 		}
-	} else if(voyager_level == 4)
-		voyager_extended_vic_processors = cpus_addr(phys_cpu_present_map)[0];
+	} else if (voyager_level == 4)
+		voyager_extended_vic_processors =
+		    cpus_addr(phys_cpu_present_map)[0];
 
 	/* this sets up the idle task to run on the current cpu */
 	voyager_extended_cpus = 1;
@@ -678,14 +677,14 @@ smp_boot_cpus(void)
 	//global_irq_holder = boot_cpu_id;
 
 	/* FIXME: Need to do something about this but currently only works
-	 * on CPUs with a tsc which none of mine have. 
-	smp_tune_scheduling();
+	 * on CPUs with a tsc which none of mine have.
+	 smp_tune_scheduling();
 	 */
 	smp_store_cpu_info(boot_cpu_id);
 	printk("CPU%d: ", boot_cpu_id);
 	print_cpu_info(&cpu_data(boot_cpu_id));
 
-	if(is_cpu_quad()) {
+	if (is_cpu_quad()) {
 		/* booting on a Quad CPU */
 		printk("VOYAGER SMP: Boot CPU is Quad\n");
 		qic_setup();
@@ -697,11 +696,11 @@ smp_boot_cpus(void)
 
 	cpu_set(boot_cpu_id, cpu_online_map);
 	cpu_set(boot_cpu_id, cpu_callout_map);
-	
-	/* loop over all the extended VIC CPUs and boot them.  The 
+
+	/* loop over all the extended VIC CPUs and boot them.  The
 	 * Quad CPUs must be bootstrapped by their extended VIC cpu */
-	for(i = 0; i < NR_CPUS; i++) {
-		if(i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map))
+	for (i = 0; i < NR_CPUS; i++) {
+		if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map))
 			continue;
 		do_boot_cpu(i);
 		/* This udelay seems to be needed for the Quad boots
@@ -715,25 +714,26 @@ smp_boot_cpus(void)
 		for (i = 0; i < NR_CPUS; i++)
 			if (cpu_isset(i, cpu_online_map))
 				bogosum += cpu_data(i).loops_per_jiffy;
-		printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
-			cpucount+1,
-			bogosum/(500000/HZ),
-			(bogosum/(5000/HZ))%100);
+		printk(KERN_INFO "Total of %d processors activated "
+		       "(%lu.%02lu BogoMIPS).\n",
+		       cpucount + 1, bogosum / (500000 / HZ),
+		       (bogosum / (5000 / HZ)) % 100);
 	}
 	voyager_extended_cpus = hweight32(voyager_extended_vic_processors);
-	printk("VOYAGER: Extended (interrupt handling CPUs): %d, non-extended: %d\n", voyager_extended_cpus, num_booting_cpus() - voyager_extended_cpus);
+	printk("VOYAGER: Extended (interrupt handling CPUs): "
+	       "%d, non-extended: %d\n", voyager_extended_cpus,
+	       num_booting_cpus() - voyager_extended_cpus);
 	/* that's it, switch to symmetric mode */
 	outb(0, VIC_PRIORITY_REGISTER);
 	outb(0, VIC_CLAIM_REGISTER_0);
 	outb(0, VIC_CLAIM_REGISTER_1);
-	
+
 	VDEBUG(("VOYAGER SMP: Booted with %d CPUs\n", num_booting_cpus()));
 }
 
 /* Reload the secondary CPUs task structure (this function does not
  * return ) */
-void __init 
-initialize_secondary(void)
+void __init initialize_secondary(void)
 {
 #if 0
 	// AC kernels only
@@ -745,11 +745,9 @@ initialize_secondary(void)
 	 * basically just the stack pointer and the eip.
 	 */
 
-	asm volatile(
-		"movl %0,%%esp\n\t"
-		"jmp *%1"
-		:
-		:"r" (current->thread.esp),"r" (current->thread.eip));
+	asm volatile ("movl %0,%%esp\n\t"
+		      "jmp *%1"::"r" (current->thread.esp),
+		      "r"(current->thread.eip));
 }
 
 /* handle a Voyager SYS_INT -- If we don't, the base board will
@@ -758,25 +756,23 @@ initialize_secondary(void)
  * System interrupts occur because some problem was detected on the
  * various busses.  To find out what you have to probe all the
  * hardware via the CAT bus.  FIXME: At the moment we do nothing. */
-fastcall void
-smp_vic_sys_interrupt(struct pt_regs *regs)
+fastcall void smp_vic_sys_interrupt(struct pt_regs *regs)
 {
 	ack_CPI(VIC_SYS_INT);
-	printk("Voyager SYSTEM INTERRUPT\n");	
+	printk("Voyager SYSTEM INTERRUPT\n");
 }
 
 /* Handle a voyager CMN_INT; These interrupts occur either because of
  * a system status change or because a single bit memory error
  * occurred.  FIXME: At the moment, ignore all this. */
-fastcall void
-smp_vic_cmn_interrupt(struct pt_regs *regs)
+fastcall void smp_vic_cmn_interrupt(struct pt_regs *regs)
 {
 	static __u8 in_cmn_int = 0;
 	static DEFINE_SPINLOCK(cmn_int_lock);
 
 	/* common ints are broadcast, so make sure we only do this once */
 	_raw_spin_lock(&cmn_int_lock);
-	if(in_cmn_int)
+	if (in_cmn_int)
 		goto unlock_end;
 
 	in_cmn_int++;
@@ -784,12 +780,12 @@ smp_vic_cmn_interrupt(struct pt_regs *regs)
 
 	VDEBUG(("Voyager COMMON INTERRUPT\n"));
 
-	if(voyager_level == 5)
+	if (voyager_level == 5)
 		voyager_cat_do_common_interrupt();
 
 	_raw_spin_lock(&cmn_int_lock);
 	in_cmn_int = 0;
- unlock_end:
+      unlock_end:
 	_raw_spin_unlock(&cmn_int_lock);
 	ack_CPI(VIC_CMN_INT);
 }
@@ -797,26 +793,24 @@ smp_vic_cmn_interrupt(struct pt_regs *regs)
 /*
  * Reschedule call back. Nothing to do, all the work is done
  * automatically when we return from the interrupt.  */
-static void
-smp_reschedule_interrupt(void)
+static void smp_reschedule_interrupt(void)
 {
 	/* do nothing */
 }
 
-static struct mm_struct * flush_mm;
+static struct mm_struct *flush_mm;
 static unsigned long flush_va;
 static DEFINE_SPINLOCK(tlbstate_lock);
 #define FLUSH_ALL	0xffffffff
 
 /*
- * We cannot call mmdrop() because we are in interrupt context, 
+ * We cannot call mmdrop() because we are in interrupt context,
  * instead update mm->cpu_vm_mask.
  *
  * We need to reload %cr3 since the page tables may be going
  * away from under us..
  */
-static inline void
-leave_mm (unsigned long cpu)
+static inline void leave_mm(unsigned long cpu)
 {
 	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
 		BUG();
@@ -824,12 +818,10 @@ leave_mm (unsigned long cpu)
 	load_cr3(swapper_pg_dir);
 }
 
-
 /*
  * Invalidate call-back
  */
-static void 
-smp_invalidate_interrupt(void)
+static void smp_invalidate_interrupt(void)
 {
 	__u8 cpu = smp_processor_id();
 
@@ -837,9 +829,9 @@ smp_invalidate_interrupt(void)
 		return;
 	/* This will flood messages.  Don't uncomment unless you see
 	 * Problems with cross cpu invalidation
-	VDEBUG(("VOYAGER SMP: CPU%d received INVALIDATE_CPI\n",
-		smp_processor_id()));
-	*/
+	 VDEBUG(("VOYAGER SMP: CPU%d received INVALIDATE_CPI\n",
+	 smp_processor_id()));
+	 */
 
 	if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
 		if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
@@ -857,11 +849,10 @@ smp_invalidate_interrupt(void)
 
 /* All the new flush operations for 2.4 */
 
-
 /* This routine is called with a physical cpu mask */
 static void
-voyager_flush_tlb_others (unsigned long cpumask, struct mm_struct *mm,
-			  unsigned long va)
+voyager_flush_tlb_others(unsigned long cpumask, struct mm_struct *mm,
+			 unsigned long va)
 {
 	int stuck = 50000;
 
@@ -875,7 +866,7 @@ voyager_flush_tlb_others (unsigned long cpumask, struct mm_struct *mm,
 		BUG();
 
 	spin_lock(&tlbstate_lock);
-	
+
 	flush_mm = mm;
 	flush_va = va;
 	atomic_set_mask(cpumask, &smp_invalidate_needed);
@@ -887,23 +878,23 @@ voyager_flush_tlb_others (unsigned long cpumask, struct mm_struct *mm,
 
 	while (smp_invalidate_needed) {
 		mb();
-		if(--stuck == 0) {
-			printk("***WARNING*** Stuck doing invalidate CPI (CPU%d)\n", smp_processor_id());
+		if (--stuck == 0) {
+			printk("***WARNING*** Stuck doing invalidate CPI "
+			       "(CPU%d)\n", smp_processor_id());
 			break;
 		}
 	}
 
 	/* Uncomment only to debug invalidation problems
-	VDEBUG(("VOYAGER SMP: Completed invalidate CPI (CPU%d)\n", cpu));
-	*/
+	   VDEBUG(("VOYAGER SMP: Completed invalidate CPI (CPU%d)\n", cpu));
+	 */
 
 	flush_mm = NULL;
 	flush_va = 0;
 	spin_unlock(&tlbstate_lock);
 }
 
-void
-flush_tlb_current_task(void)
+void flush_tlb_current_task(void)
 {
 	struct mm_struct *mm = current->mm;
 	unsigned long cpu_mask;
@@ -918,9 +909,7 @@ flush_tlb_current_task(void)
 	preempt_enable();
 }
 
-
-void
-flush_tlb_mm (struct mm_struct * mm)
+void flush_tlb_mm(struct mm_struct *mm)
 {
 	unsigned long cpu_mask;
 
@@ -940,7 +929,7 @@ flush_tlb_mm (struct mm_struct * mm)
 	preempt_enable();
 }
 
-void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long cpu_mask;
@@ -949,10 +938,10 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 
 	cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
 	if (current->active_mm == mm) {
-		if(current->mm)
+		if (current->mm)
 			__flush_tlb_one(va);
-		 else
-		 	leave_mm(smp_processor_id());
+		else
+			leave_mm(smp_processor_id());
 	}
 
 	if (cpu_mask)
@@ -960,21 +949,21 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 
 	preempt_enable();
 }
+
 EXPORT_SYMBOL(flush_tlb_page);
 
 /* enable the requested IRQs */
-static void
-smp_enable_irq_interrupt(void)
+static void smp_enable_irq_interrupt(void)
 {
 	__u8 irq;
 	__u8 cpu = get_cpu();
 
 	VDEBUG(("VOYAGER SMP: CPU%d enabling irq mask 0x%x\n", cpu,
-	       vic_irq_enable_mask[cpu]));
+		vic_irq_enable_mask[cpu]));
 
 	spin_lock(&vic_irq_lock);
-	for(irq = 0; irq < 16; irq++) {
-		if(vic_irq_enable_mask[cpu] & (1<<irq))
+	for (irq = 0; irq < 16; irq++) {
+		if (vic_irq_enable_mask[cpu] & (1 << irq))
 			enable_local_vic_irq(irq);
 	}
 	vic_irq_enable_mask[cpu] = 0;
@@ -982,17 +971,16 @@ smp_enable_irq_interrupt(void)
 
 	put_cpu_no_resched();
 }
-	
+
 /*
  *	CPU halt call-back
  */
-static void
-smp_stop_cpu_function(void *dummy)
+static void smp_stop_cpu_function(void *dummy)
 {
 	VDEBUG(("VOYAGER SMP: CPU%d is STOPPING\n", smp_processor_id()));
 	cpu_clear(smp_processor_id(), cpu_online_map);
 	local_irq_disable();
-	for(;;)
+	for (;;)
 		halt();
 }
 
@@ -1006,14 +994,13 @@ struct call_data_struct {
 	int wait;
 };
 
-static struct call_data_struct * call_data;
+static struct call_data_struct *call_data;
 
 /* execute a thread on a new CPU.  The function to be called must be
  * previously set up.  This is used to schedule a function for
  * execution on all CPUs - set up the function then broadcast a
  * function_interrupt CPI to come here on each CPU */
-static void
-smp_call_function_interrupt(void)
+static void smp_call_function_interrupt(void)
 {
 	void (*func) (void *info) = call_data->func;
 	void *info = call_data->info;
@@ -1027,16 +1014,17 @@ smp_call_function_interrupt(void)
 	 * about to execute the function
 	 */
 	mb();
-	if(!test_and_clear_bit(cpu, &call_data->started)) {
+	if (!test_and_clear_bit(cpu, &call_data->started)) {
 		/* If the bit wasn't set, this could be a replay */
-		printk(KERN_WARNING "VOYAGER SMP: CPU %d received call funtion with no call pending\n", cpu);
+		printk(KERN_WARNING "VOYAGER SMP: CPU %d received call funtion"
+		       " with no call pending\n", cpu);
 		return;
 	}
 	/*
 	 * At this point the info structure may be out of scope unless wait==1
 	 */
 	irq_enter();
-	(*func)(info);
+	(*func) (info);
 	__get_cpu_var(irq_stat).irq_call_count++;
 	irq_exit();
 	if (wait) {
@@ -1046,14 +1034,13 @@ smp_call_function_interrupt(void)
 }
 
 static int
-voyager_smp_call_function_mask (cpumask_t cpumask,
-				void (*func) (void *info), void *info,
-				int wait)
+voyager_smp_call_function_mask(cpumask_t cpumask,
+			       void (*func) (void *info), void *info, int wait)
 {
 	struct call_data_struct data;
 	u32 mask = cpus_addr(cpumask)[0];
 
-	mask &= ~(1<<smp_processor_id());
+	mask &= ~(1 << smp_processor_id());
 
 	if (!mask)
 		return 0;
@@ -1093,7 +1080,7 @@ voyager_smp_call_function_mask (cpumask_t cpumask,
  * so we use the system clock to interrupt one processor, which in
  * turn, broadcasts a timer CPI to all the others --- we receive that
  * CPI here.  We don't use this actually for counting so losing
- * ticks doesn't matter 
+ * ticks doesn't matter
  *
  * FIXME: For those CPUs which actually have a local APIC, we could
  * try to use it to trigger this interrupt instead of having to
@@ -1101,8 +1088,7 @@ voyager_smp_call_function_mask (cpumask_t cpumask,
  * no local APIC, so I can't do this
  *
  * This function is currently a placeholder and is unused in the code */
-fastcall void 
-smp_apic_timer_interrupt(struct pt_regs *regs)
+fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	wrapper_smp_local_timer_interrupt();
@@ -1110,8 +1096,7 @@ smp_apic_timer_interrupt(struct pt_regs *regs)
 }
 
 /* All of the QUAD interrupt GATES */
-fastcall void
-smp_qic_timer_interrupt(struct pt_regs *regs)
+fastcall void smp_qic_timer_interrupt(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	ack_QIC_CPI(QIC_TIMER_CPI);
@@ -1119,60 +1104,54 @@ smp_qic_timer_interrupt(struct pt_regs *regs)
 	set_irq_regs(old_regs);
 }
 
-fastcall void
-smp_qic_invalidate_interrupt(struct pt_regs *regs)
+fastcall void smp_qic_invalidate_interrupt(struct pt_regs *regs)
 {
 	ack_QIC_CPI(QIC_INVALIDATE_CPI);
 	smp_invalidate_interrupt();
 }
 
-fastcall void
-smp_qic_reschedule_interrupt(struct pt_regs *regs)
+fastcall void smp_qic_reschedule_interrupt(struct pt_regs *regs)
 {
 	ack_QIC_CPI(QIC_RESCHEDULE_CPI);
 	smp_reschedule_interrupt();
 }
 
-fastcall void
-smp_qic_enable_irq_interrupt(struct pt_regs *regs)
+fastcall void smp_qic_enable_irq_interrupt(struct pt_regs *regs)
 {
 	ack_QIC_CPI(QIC_ENABLE_IRQ_CPI);
 	smp_enable_irq_interrupt();
 }
 
-fastcall void
-smp_qic_call_function_interrupt(struct pt_regs *regs)
+fastcall void smp_qic_call_function_interrupt(struct pt_regs *regs)
 {
 	ack_QIC_CPI(QIC_CALL_FUNCTION_CPI);
 	smp_call_function_interrupt();
 }
 
-fastcall void
-smp_vic_cpi_interrupt(struct pt_regs *regs)
+fastcall void smp_vic_cpi_interrupt(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	__u8 cpu = smp_processor_id();
 
-	if(is_cpu_quad())
+	if (is_cpu_quad())
 		ack_QIC_CPI(VIC_CPI_LEVEL0);
 	else
 		ack_VIC_CPI(VIC_CPI_LEVEL0);
 
-	if(test_and_clear_bit(VIC_TIMER_CPI, &vic_cpi_mailbox[cpu]))
+	if (test_and_clear_bit(VIC_TIMER_CPI, &vic_cpi_mailbox[cpu]))
 		wrapper_smp_local_timer_interrupt();
-	if(test_and_clear_bit(VIC_INVALIDATE_CPI, &vic_cpi_mailbox[cpu]))
+	if (test_and_clear_bit(VIC_INVALIDATE_CPI, &vic_cpi_mailbox[cpu]))
 		smp_invalidate_interrupt();
-	if(test_and_clear_bit(VIC_RESCHEDULE_CPI, &vic_cpi_mailbox[cpu]))
+	if (test_and_clear_bit(VIC_RESCHEDULE_CPI, &vic_cpi_mailbox[cpu]))
 		smp_reschedule_interrupt();
-	if(test_and_clear_bit(VIC_ENABLE_IRQ_CPI, &vic_cpi_mailbox[cpu]))
+	if (test_and_clear_bit(VIC_ENABLE_IRQ_CPI, &vic_cpi_mailbox[cpu]))
 		smp_enable_irq_interrupt();
-	if(test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu]))
+	if (test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu]))
 		smp_call_function_interrupt();
 	set_irq_regs(old_regs);
 }
 
-static void
-do_flush_tlb_all(void* info)
+static void do_flush_tlb_all(void *info)
 {
 	unsigned long cpu = smp_processor_id();
 
@@ -1181,65 +1160,56 @@ do_flush_tlb_all(void* info)
 		leave_mm(cpu);
 }
 
-
 /* flush the TLB of every active CPU in the system */
-void
-flush_tlb_all(void)
+void flush_tlb_all(void)
 {
 	on_each_cpu(do_flush_tlb_all, 0, 1, 1);
 }
 
 /* used to set up the trampoline for other CPUs when the memory manager
  * is sorted out */
-void __init
-smp_alloc_memory(void)
+void __init smp_alloc_memory(void)
 {
-	trampoline_base = (__u32)alloc_bootmem_low_pages(PAGE_SIZE);
-	if(__pa(trampoline_base) >= 0x93000)
+	trampoline_base = (__u32) alloc_bootmem_low_pages(PAGE_SIZE);
+	if (__pa(trampoline_base) >= 0x93000)
 		BUG();
 }
 
 /* send a reschedule CPI to one CPU by physical CPU number*/
-static void
-voyager_smp_send_reschedule(int cpu)
+static void voyager_smp_send_reschedule(int cpu)
 {
 	send_one_CPI(cpu, VIC_RESCHEDULE_CPI);
 }
 
-
-int
-hard_smp_processor_id(void)
+int hard_smp_processor_id(void)
 {
 	__u8 i;
 	__u8 cpumask = inb(VIC_PROC_WHO_AM_I);
-	if((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER)
+	if ((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER)
 		return cpumask & 0x1F;
 
-	for(i = 0; i < 8; i++) {
-		if(cpumask & (1<<i))
+	for (i = 0; i < 8; i++) {
+		if (cpumask & (1 << i))
 			return i;
 	}
 	printk("** WARNING ** Illegal cpuid returned by VIC: %d", cpumask);
 	return 0;
 }
 
-int
-safe_smp_processor_id(void)
+int safe_smp_processor_id(void)
 {
 	return hard_smp_processor_id();
 }
 
 /* broadcast a halt to all other CPUs */
-static void
-voyager_smp_send_stop(void)
+static void voyager_smp_send_stop(void)
 {
 	smp_call_function(smp_stop_cpu_function, NULL, 1, 1);
 }
 
 /* this function is triggered in time.c when a clock tick fires
  * we need to re-broadcast the tick to all CPUs */
-void
-smp_vic_timer_interrupt(void)
+void smp_vic_timer_interrupt(void)
 {
 	send_CPI_allbutself(VIC_TIMER_CPI);
 	smp_local_timer_interrupt();
@@ -1253,8 +1223,7 @@ smp_vic_timer_interrupt(void)
  * multiplier is 1 and it can be changed by writing the new multiplier
  * value into /proc/profile.
  */
-void
-smp_local_timer_interrupt(void)
+void smp_local_timer_interrupt(void)
 {
 	int cpu = smp_processor_id();
 	long weight;
@@ -1269,18 +1238,18 @@ smp_local_timer_interrupt(void)
 		 *
 		 * Interrupts are already masked off at this point.
 		 */
-		per_cpu(prof_counter,cpu) = per_cpu(prof_multiplier, cpu);
+		per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu);
 		if (per_cpu(prof_counter, cpu) !=
-					per_cpu(prof_old_multiplier, cpu)) {
+		    per_cpu(prof_old_multiplier, cpu)) {
 			/* FIXME: need to update the vic timer tick here */
 			per_cpu(prof_old_multiplier, cpu) =
-						per_cpu(prof_counter, cpu);
+			    per_cpu(prof_counter, cpu);
 		}
 
 		update_process_times(user_mode_vm(get_irq_regs()));
 	}
 
-	if( ((1<<cpu) & voyager_extended_vic_processors) == 0)
+	if (((1 << cpu) & voyager_extended_vic_processors) == 0)
 		/* only extended VIC processors participate in
 		 * interrupt distribution */
 		return;
@@ -1296,12 +1265,12 @@ smp_local_timer_interrupt(void)
 	 * we can take more than 100K local irqs per second on a 100 MHz P5.
 	 */
 
-	if((++vic_tick[cpu] & 0x7) != 0)
+	if ((++vic_tick[cpu] & 0x7) != 0)
 		return;
 	/* get here every 16 ticks (about every 1/6 of a second) */
 
 	/* Change our priority to give someone else a chance at getting
-         * the IRQ. The algorithm goes like this:
+	 * the IRQ. The algorithm goes like this:
 	 *
 	 * In the VIC, the dynamically routed interrupt is always
 	 * handled by the lowest priority eligible (i.e. receiving
@@ -1325,18 +1294,18 @@ smp_local_timer_interrupt(void)
 	 * affinity code since we now try to even up the interrupt
 	 * counts when an affinity binding is keeping them on a
 	 * particular CPU*/
-	weight = (vic_intr_count[cpu]*voyager_extended_cpus
+	weight = (vic_intr_count[cpu] * voyager_extended_cpus
 		  - vic_intr_total) >> 4;
 	weight += 4;
-	if(weight > 7)
+	if (weight > 7)
 		weight = 7;
-	if(weight < 0)
+	if (weight < 0)
 		weight = 0;
-	
-	outb((__u8)weight, VIC_PRIORITY_REGISTER);
+
+	outb((__u8) weight, VIC_PRIORITY_REGISTER);
 
 #ifdef VOYAGER_DEBUG
-	if((vic_tick[cpu] & 0xFFF) == 0) {
+	if ((vic_tick[cpu] & 0xFFF) == 0) {
 		/* print this message roughly every 25 secs */
 		printk("VOYAGER SMP: vic_tick[%d] = %lu, weight = %ld\n",
 		       cpu, vic_tick[cpu], weight);
@@ -1345,15 +1314,14 @@ smp_local_timer_interrupt(void)
 }
 
 /* setup the profiling timer */
-int 
-setup_profiling_timer(unsigned int multiplier)
+int setup_profiling_timer(unsigned int multiplier)
 {
 	int i;
 
-	if ( (!multiplier))
+	if ((!multiplier))
 		return -EINVAL;
 
-	/* 
+	/*
 	 * Set the new multiplier for each CPU. CPUs don't start using the
 	 * new values until the next timer interrupt in which they do process
 	 * accounting.
@@ -1367,15 +1335,13 @@ setup_profiling_timer(unsigned int multiplier)
 /* This is a bit of a mess, but forced on us by the genirq changes
  * there's no genirq handler that really does what voyager wants
  * so hack it up with the simple IRQ handler */
-static void fastcall
-handle_vic_irq(unsigned int irq, struct irq_desc *desc)
+static void fastcall handle_vic_irq(unsigned int irq, struct irq_desc *desc)
 {
 	before_handle_vic_irq(irq);
 	handle_simple_irq(irq, desc);
 	after_handle_vic_irq(irq);
 }
 
-
 /*  The CPIs are handled in the per cpu 8259s, so they must be
  *  enabled to be received: FIX: enabling the CPIs in the early
  *  boot sequence interferes with bug checking; enable them later
@@ -1385,13 +1351,12 @@ handle_vic_irq(unsigned int irq, struct irq_desc *desc)
 #define QIC_SET_GATE(cpi, vector) \
 	set_intr_gate((cpi) + QIC_DEFAULT_CPI_BASE, (vector))
 
-void __init
-smp_intr_init(void)
+void __init smp_intr_init(void)
 {
 	int i;
 
 	/* initialize the per cpu irq mask to all disabled */
-	for(i = 0; i < NR_CPUS; i++)
+	for (i = 0; i < NR_CPUS; i++)
 		vic_irq_mask[i] = 0xFFFF;
 
 	VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt);
@@ -1404,42 +1369,40 @@ smp_intr_init(void)
 	QIC_SET_GATE(QIC_RESCHEDULE_CPI, qic_reschedule_interrupt);
 	QIC_SET_GATE(QIC_ENABLE_IRQ_CPI, qic_enable_irq_interrupt);
 	QIC_SET_GATE(QIC_CALL_FUNCTION_CPI, qic_call_function_interrupt);
-	
 
-	/* now put the VIC descriptor into the first 48 IRQs 
+	/* now put the VIC descriptor into the first 48 IRQs
 	 *
 	 * This is for later: first 16 correspond to PC IRQs; next 16
 	 * are Primary MC IRQs and final 16 are Secondary MC IRQs */
-	for(i = 0; i < 48; i++)
+	for (i = 0; i < 48; i++)
 		set_irq_chip_and_handler(i, &vic_chip, handle_vic_irq);
 }
 
 /* send a CPI at level cpi to a set of cpus in cpuset (set 1 bit per
  * processor to receive CPI */
-static void
-send_CPI(__u32 cpuset, __u8 cpi)
+static void send_CPI(__u32 cpuset, __u8 cpi)
 {
 	int cpu;
 	__u32 quad_cpuset = (cpuset & voyager_quad_processors);
 
-	if(cpi < VIC_START_FAKE_CPI) {
-		/* fake CPI are only used for booting, so send to the 
+	if (cpi < VIC_START_FAKE_CPI) {
+		/* fake CPI are only used for booting, so send to the
 		 * extended quads as well---Quads must be VIC booted */
-		outb((__u8)(cpuset), VIC_CPI_Registers[cpi]);
+		outb((__u8) (cpuset), VIC_CPI_Registers[cpi]);
 		return;
 	}
-	if(quad_cpuset)
+	if (quad_cpuset)
 		send_QIC_CPI(quad_cpuset, cpi);
 	cpuset &= ~quad_cpuset;
 	cpuset &= 0xff;		/* only first 8 CPUs vaild for VIC CPI */
-	if(cpuset == 0)
+	if (cpuset == 0)
 		return;
 	for_each_online_cpu(cpu) {
-		if(cpuset & (1<<cpu))
+		if (cpuset & (1 << cpu))
 			set_bit(cpi, &vic_cpi_mailbox[cpu]);
 	}
-	if(cpuset)
-		outb((__u8)cpuset, VIC_CPI_Registers[VIC_CPI_LEVEL0]);
+	if (cpuset)
+		outb((__u8) cpuset, VIC_CPI_Registers[VIC_CPI_LEVEL0]);
 }
 
 /* Acknowledge receipt of CPI in the QIC, clear in QIC hardware and
@@ -1448,20 +1411,19 @@ send_CPI(__u32 cpuset, __u8 cpi)
  * DON'T make this inline otherwise the cache line read will be
  * optimised away
  * */
-static int
-ack_QIC_CPI(__u8 cpi) {
+static int ack_QIC_CPI(__u8 cpi)
+{
 	__u8 cpu = hard_smp_processor_id();
 
 	cpi &= 7;
 
-	outb(1<<cpi, QIC_INTERRUPT_CLEAR1);
+	outb(1 << cpi, QIC_INTERRUPT_CLEAR1);
 	return voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi;
 }
 
-static void
-ack_special_QIC_CPI(__u8 cpi)
+static void ack_special_QIC_CPI(__u8 cpi)
 {
-	switch(cpi) {
+	switch (cpi) {
 	case VIC_CMN_INT:
 		outb(QIC_CMN_INT, QIC_INTERRUPT_CLEAR0);
 		break;
@@ -1474,8 +1436,7 @@ ack_special_QIC_CPI(__u8 cpi)
 }
 
 /* Acknowledge receipt of CPI in the VIC (essentially an EOI) */
-static void
-ack_VIC_CPI(__u8 cpi)
+static void ack_VIC_CPI(__u8 cpi)
 {
 #ifdef VOYAGER_DEBUG
 	unsigned long flags;
@@ -1484,17 +1445,17 @@ ack_VIC_CPI(__u8 cpi)
 
 	local_irq_save(flags);
 	isr = vic_read_isr();
-	if((isr & (1<<(cpi &7))) == 0) {
+	if ((isr & (1 << (cpi & 7))) == 0) {
 		printk("VOYAGER SMP: CPU%d lost CPI%d\n", cpu, cpi);
 	}
 #endif
 	/* send specific EOI; the two system interrupts have
 	 * bit 4 set for a separate vector but behave as the
 	 * corresponding 3 bit intr */
-	outb_p(0x60|(cpi & 7),0x20);
+	outb_p(0x60 | (cpi & 7), 0x20);
 
 #ifdef VOYAGER_DEBUG
-	if((vic_read_isr() & (1<<(cpi &7))) != 0) {
+	if ((vic_read_isr() & (1 << (cpi & 7))) != 0) {
 		printk("VOYAGER SMP: CPU%d still asserting CPI%d\n", cpu, cpi);
 	}
 	local_irq_restore(flags);
@@ -1502,12 +1463,11 @@ ack_VIC_CPI(__u8 cpi)
 }
 
 /* cribbed with thanks from irq.c */
-#define __byte(x,y) 	(((unsigned char *)&(y))[x])
+#define __byte(x,y)	(((unsigned char *)&(y))[x])
 #define cached_21(cpu)	(__byte(0,vic_irq_mask[cpu]))
 #define cached_A1(cpu)	(__byte(1,vic_irq_mask[cpu]))
 
-static unsigned int
-startup_vic_irq(unsigned int irq)
+static unsigned int startup_vic_irq(unsigned int irq)
 {
 	unmask_vic_irq(irq);
 
@@ -1535,13 +1495,12 @@ startup_vic_irq(unsigned int irq)
  *    broadcast an Interrupt enable CPI which causes all other CPUs to
  *    adjust their masks accordingly.  */
 
-static void
-unmask_vic_irq(unsigned int irq)
+static void unmask_vic_irq(unsigned int irq)
 {
 	/* linux doesn't to processor-irq affinity, so enable on
 	 * all CPUs we know about */
 	int cpu = smp_processor_id(), real_cpu;
-	__u16 mask = (1<<irq);
+	__u16 mask = (1 << irq);
 	__u32 processorList = 0;
 	unsigned long flags;
 
@@ -1549,78 +1508,72 @@ unmask_vic_irq(unsigned int irq)
 		irq, cpu, cpu_irq_affinity[cpu]));
 	spin_lock_irqsave(&vic_irq_lock, flags);
 	for_each_online_cpu(real_cpu) {
-		if(!(voyager_extended_vic_processors & (1<<real_cpu)))
+		if (!(voyager_extended_vic_processors & (1 << real_cpu)))
 			continue;
-		if(!(cpu_irq_affinity[real_cpu] & mask)) {
+		if (!(cpu_irq_affinity[real_cpu] & mask)) {
 			/* irq has no affinity for this CPU, ignore */
 			continue;
 		}
-		if(real_cpu == cpu) {
+		if (real_cpu == cpu) {
 			enable_local_vic_irq(irq);
-		}
-		else if(vic_irq_mask[real_cpu] & mask) {
+		} else if (vic_irq_mask[real_cpu] & mask) {
 			vic_irq_enable_mask[real_cpu] |= mask;
-			processorList |= (1<<real_cpu);
+			processorList |= (1 << real_cpu);
 		}
 	}
 	spin_unlock_irqrestore(&vic_irq_lock, flags);
-	if(processorList)
+	if (processorList)
 		send_CPI(processorList, VIC_ENABLE_IRQ_CPI);
 }
 
-static void
-mask_vic_irq(unsigned int irq)
+static void mask_vic_irq(unsigned int irq)
 {
 	/* lazy disable, do nothing */
 }
 
-static void
-enable_local_vic_irq(unsigned int irq)
+static void enable_local_vic_irq(unsigned int irq)
 {
 	__u8 cpu = smp_processor_id();
 	__u16 mask = ~(1 << irq);
 	__u16 old_mask = vic_irq_mask[cpu];
 
 	vic_irq_mask[cpu] &= mask;
-	if(vic_irq_mask[cpu] == old_mask)
+	if (vic_irq_mask[cpu] == old_mask)
 		return;
 
 	VDEBUG(("VOYAGER DEBUG: Enabling irq %d in hardware on CPU %d\n",
 		irq, cpu));
 
 	if (irq & 8) {
-		outb_p(cached_A1(cpu),0xA1);
+		outb_p(cached_A1(cpu), 0xA1);
 		(void)inb_p(0xA1);
-	}
-	else {
-		outb_p(cached_21(cpu),0x21);
+	} else {
+		outb_p(cached_21(cpu), 0x21);
 		(void)inb_p(0x21);
 	}
 }
 
-static void
-disable_local_vic_irq(unsigned int irq)
+static void disable_local_vic_irq(unsigned int irq)
 {
 	__u8 cpu = smp_processor_id();
 	__u16 mask = (1 << irq);
 	__u16 old_mask = vic_irq_mask[cpu];
 
-	if(irq == 7)
+	if (irq == 7)
 		return;
 
 	vic_irq_mask[cpu] |= mask;
-	if(old_mask == vic_irq_mask[cpu])
+	if (old_mask == vic_irq_mask[cpu])
 		return;
 
 	VDEBUG(("VOYAGER DEBUG: Disabling irq %d in hardware on CPU %d\n",
 		irq, cpu));
 
 	if (irq & 8) {
-		outb_p(cached_A1(cpu),0xA1);
+		outb_p(cached_A1(cpu), 0xA1);
 		(void)inb_p(0xA1);
-	}
-	else {
-		outb_p(cached_21(cpu),0x21);
+	} else {
+		outb_p(cached_21(cpu), 0x21);
 		(void)inb_p(0x21);
 	}
 }
@@ -1631,8 +1584,7 @@ disable_local_vic_irq(unsigned int irq)
  * interrupt in the vic, so we merely set a flag (IRQ_DISABLED).  If
  * this interrupt actually comes in, then we mask and ack here to push
  * the interrupt off to another CPU */
-static void
-before_handle_vic_irq(unsigned int irq)
+static void before_handle_vic_irq(unsigned int irq)
 {
 	irq_desc_t *desc = irq_desc + irq;
 	__u8 cpu = smp_processor_id();
@@ -1641,16 +1593,16 @@ before_handle_vic_irq(unsigned int irq)
 	vic_intr_total++;
 	vic_intr_count[cpu]++;
 
-	if(!(cpu_irq_affinity[cpu] & (1<<irq))) {
+	if (!(cpu_irq_affinity[cpu] & (1 << irq))) {
 		/* The irq is not in our affinity mask, push it off
 		 * onto another CPU */
-		VDEBUG(("VOYAGER DEBUG: affinity triggered disable of irq %d on cpu %d\n",
-			irq, cpu));
+		VDEBUG(("VOYAGER DEBUG: affinity triggered disable of irq %d "
+			"on cpu %d\n", irq, cpu));
 		disable_local_vic_irq(irq);
 		/* set IRQ_INPROGRESS to prevent the handler in irq.c from
 		 * actually calling the interrupt routine */
 		desc->status |= IRQ_REPLAY | IRQ_INPROGRESS;
-	} else if(desc->status & IRQ_DISABLED) {
+	} else if (desc->status & IRQ_DISABLED) {
 		/* Damn, the interrupt actually arrived, do the lazy
 		 * disable thing. The interrupt routine in irq.c will
 		 * not handle a IRQ_DISABLED interrupt, so nothing more
@@ -1667,8 +1619,7 @@ before_handle_vic_irq(unsigned int irq)
 }
 
 /* Finish the VIC interrupt: basically mask */
-static void
-after_handle_vic_irq(unsigned int irq)
+static void after_handle_vic_irq(unsigned int irq)
 {
 	irq_desc_t *desc = irq_desc + irq;
 
@@ -1685,11 +1636,11 @@ after_handle_vic_irq(unsigned int irq)
 #ifdef VOYAGER_DEBUG
 		/* DEBUG: before we ack, check what's in progress */
 		isr = vic_read_isr();
-		if((isr & (1<<irq) && !(status & IRQ_REPLAY)) == 0) {
+		if ((isr & (1 << irq) && !(status & IRQ_REPLAY)) == 0) {
 			int i;
 			__u8 cpu = smp_processor_id();
 			__u8 real_cpu;
-			int mask; /* Um... initialize me??? --RR */
+			int mask;	/* Um... initialize me??? --RR */
 
 			printk("VOYAGER SMP: CPU%d lost interrupt %d\n",
 			       cpu, irq);
@@ -1698,9 +1649,10 @@ after_handle_vic_irq(unsigned int irq)
 				outb(VIC_CPU_MASQUERADE_ENABLE | real_cpu,
 				     VIC_PROCESSOR_ID);
 				isr = vic_read_isr();
-				if(isr & (1<<irq)) {
-					printk("VOYAGER SMP: CPU%d ack irq %d\n",
-					       real_cpu, irq);
+				if (isr & (1 << irq)) {
+					printk
+					    ("VOYAGER SMP: CPU%d ack irq %d\n",
+					     real_cpu, irq);
 					ack_vic_irq(irq);
 				}
 				outb(cpu, VIC_PROCESSOR_ID);
@@ -1711,7 +1663,7 @@ after_handle_vic_irq(unsigned int irq)
 		 * receipt by another CPU so everything must be in
 		 * order here  */
 		ack_vic_irq(irq);
-		if(status & IRQ_REPLAY) {
+		if (status & IRQ_REPLAY) {
 			/* replay is set if we disable the interrupt
 			 * in the before_handle_vic_irq() routine, so
 			 * clear the in progress bit here to allow the
@@ -1720,9 +1672,9 @@ after_handle_vic_irq(unsigned int irq)
 		}
 #ifdef VOYAGER_DEBUG
 		isr = vic_read_isr();
-		if((isr & (1<<irq)) != 0)
-			printk("VOYAGER SMP: after_handle_vic_irq() after ack irq=%d, isr=0x%x\n",
-			       irq, isr);
+		if ((isr & (1 << irq)) != 0)
+			printk("VOYAGER SMP: after_handle_vic_irq() after "
+			       "ack irq=%d, isr=0x%x\n", irq, isr);
 #endif /* VOYAGER_DEBUG */
 	}
 	_raw_spin_unlock(&vic_irq_lock);
@@ -1731,7 +1683,6 @@ after_handle_vic_irq(unsigned int irq)
 	 * may be intercepted by another CPU if reasserted */
 }
 
-
 /* Linux processor - interrupt affinity manipulations.
  *
  * For each processor, we maintain a 32 bit irq affinity mask.
@@ -1748,8 +1699,7 @@ after_handle_vic_irq(unsigned int irq)
  * change the mask and then do an interrupt enable CPI to re-enable on
  * the selected processors */
 
-void
-set_vic_irq_affinity(unsigned int irq, cpumask_t mask)
+void set_vic_irq_affinity(unsigned int irq, cpumask_t mask)
 {
 	/* Only extended processors handle interrupts */
 	unsigned long real_mask;
@@ -1757,13 +1707,13 @@ set_vic_irq_affinity(unsigned int irq, cpumask_t mask)
 	int cpu;
 
 	real_mask = cpus_addr(mask)[0] & voyager_extended_vic_processors;
-	
-	if(cpus_addr(mask)[0] == 0)
+
+	if (cpus_addr(mask)[0] == 0)
 		/* can't have no CPUs to accept the interrupt -- extremely
 		 * bad things will happen */
 		return;
 
-	if(irq == 0)
+	if (irq == 0)
 		/* can't change the affinity of the timer IRQ.  This
 		 * is due to the constraint in the voyager
 		 * architecture that the CPI also comes in on and IRQ
@@ -1772,7 +1722,7 @@ set_vic_irq_affinity(unsigned int irq, cpumask_t mask)
 		 * will no-longer be able to accept VIC CPIs */
 		return;
 
-	if(irq >= 32) 
+	if (irq >= 32)
 		/* You can only have 32 interrupts in a voyager system
 		 * (and 32 only if you have a secondary microchannel
 		 * bus) */
@@ -1780,8 +1730,8 @@ set_vic_irq_affinity(unsigned int irq, cpumask_t mask)
 
 	for_each_online_cpu(cpu) {
 		unsigned long cpu_mask = 1 << cpu;
-		
-		if(cpu_mask & real_mask) {
+
+		if (cpu_mask & real_mask) {
 			/* enable the interrupt for this cpu */
 			cpu_irq_affinity[cpu] |= irq_mask;
 		} else {
@@ -1800,25 +1750,23 @@ set_vic_irq_affinity(unsigned int irq, cpumask_t mask)
 	unmask_vic_irq(irq);
 }
 
-static void
-ack_vic_irq(unsigned int irq)
+static void ack_vic_irq(unsigned int irq)
 {
 	if (irq & 8) {
-		outb(0x62,0x20);	/* Specific EOI to cascade */
-		outb(0x60|(irq & 7),0xA0);
+		outb(0x62, 0x20);	/* Specific EOI to cascade */
+		outb(0x60 | (irq & 7), 0xA0);
 	} else {
-		outb(0x60 | (irq & 7),0x20);
+		outb(0x60 | (irq & 7), 0x20);
 	}
 }
 
 /* enable the CPIs.  In the VIC, the CPIs are delivered by the 8259
  * but are not vectored by it.  This means that the 8259 mask must be
  * lowered to receive them */
-static __init void
-vic_enable_cpi(void)
+static __init void vic_enable_cpi(void)
 {
 	__u8 cpu = smp_processor_id();
-	
+
 	/* just take a copy of the current mask (nop for boot cpu) */
 	vic_irq_mask[cpu] = vic_irq_mask[boot_cpu_id];
 
@@ -1827,7 +1775,7 @@ vic_enable_cpi(void)
 	/* for sys int and cmn int */
 	enable_local_vic_irq(7);
 
-	if(is_cpu_quad()) {
+	if (is_cpu_quad()) {
 		outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0);
 		outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1);
 		VDEBUG(("VOYAGER SMP: QIC ENABLE CPI: CPU%d: MASK 0x%x\n",
@@ -1838,8 +1786,7 @@ vic_enable_cpi(void)
 		cpu, vic_irq_mask[cpu]));
 }
 
-void
-voyager_smp_dump()
+void voyager_smp_dump()
 {
 	int old_cpu = smp_processor_id(), cpu;
 
@@ -1865,10 +1812,10 @@ voyager_smp_dump()
 		       cpu, vic_irq_mask[cpu], imr, irr, isr);
 #if 0
 		/* These lines are put in to try to unstick an un ack'd irq */
-		if(isr != 0) {
+		if (isr != 0) {
 			int irq;
-			for(irq=0; irq<16; irq++) {
-				if(isr & (1<<irq)) {
+			for (irq = 0; irq < 16; irq++) {
+				if (isr & (1 << irq)) {
 					printk("\tCPU%d: ack irq %d\n",
 					       cpu, irq);
 					local_irq_save(flags);
@@ -1884,17 +1831,15 @@ voyager_smp_dump()
 	}
 }
 
-void
-smp_voyager_power_off(void *dummy)
+void smp_voyager_power_off(void *dummy)
 {
-	if(smp_processor_id() == boot_cpu_id) 
+	if (smp_processor_id() == boot_cpu_id)
 		voyager_power_off();
 	else
 		smp_stop_cpu_function(NULL);
 }
 
-static void __init
-voyager_smp_prepare_cpus(unsigned int max_cpus)
+static void __init voyager_smp_prepare_cpus(unsigned int max_cpus)
 {
 	/* FIXME: ignore max_cpus for now */
 	smp_boot_cpus();
@@ -1911,8 +1856,7 @@ static void __cpuinit voyager_smp_prepare_boot_cpu(void)
 	cpu_set(smp_processor_id(), cpu_present_map);
 }
 
-static int __cpuinit
-voyager_cpu_up(unsigned int cpu)
+static int __cpuinit voyager_cpu_up(unsigned int cpu)
 {
 	/* This only works at boot for x86.  See "rewrite" above. */
 	if (cpu_isset(cpu, smp_commenced_mask))
@@ -1928,14 +1872,12 @@ voyager_cpu_up(unsigned int cpu)
 	return 0;
 }
 
-static void __init
-voyager_smp_cpus_done(unsigned int max_cpus)
+static void __init voyager_smp_cpus_done(unsigned int max_cpus)
 {
 	zap_low_mappings();
 }
 
-void __init
-smp_setup_processor_id(void)
+void __init smp_setup_processor_id(void)
 {
 	current_thread_info()->cpu = hard_smp_processor_id();
 	x86_write_percpu(cpu_number, hard_smp_processor_id());
diff --git a/arch/x86/mach-voyager/voyager_thread.c b/arch/x86/mach-voyager/voyager_thread.c
index 50f9366c411e..c69c931818ed 100644
--- a/arch/x86/mach-voyager/voyager_thread.c
+++ b/arch/x86/mach-voyager/voyager_thread.c
@@ -30,12 +30,10 @@
 #include <asm/mtrr.h>
 #include <asm/msr.h>
 
-
 struct task_struct *voyager_thread;
 static __u8 set_timeout;
 
-static int
-execute(const char *string)
+static int execute(const char *string)
 {
 	int ret;
 
@@ -52,48 +50,48 @@ execute(const char *string)
 		NULL,
 	};
 
-	if ((ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) {
-		printk(KERN_ERR "Voyager failed to run \"%s\": %i\n",
-		       string, ret);
+	if ((ret =
+	     call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) {
+		printk(KERN_ERR "Voyager failed to run \"%s\": %i\n", string,
+		       ret);
 	}
 	return ret;
 }
 
-static void
-check_from_kernel(void)
+static void check_from_kernel(void)
 {
-	if(voyager_status.switch_off) {
-		
+	if (voyager_status.switch_off) {
+
 		/* FIXME: This should be configurable via proc */
 		execute("umask 600; echo 0 > /etc/initrunlvl; kill -HUP 1");
-	} else if(voyager_status.power_fail) {
+	} else if (voyager_status.power_fail) {
 		VDEBUG(("Voyager daemon detected AC power failure\n"));
-		
+
 		/* FIXME: This should be configureable via proc */
 		execute("umask 600; echo F > /etc/powerstatus; kill -PWR 1");
 		set_timeout = 1;
 	}
 }
 
-static void
-check_continuing_condition(void)
+static void check_continuing_condition(void)
 {
-	if(voyager_status.power_fail) {
+	if (voyager_status.power_fail) {
 		__u8 data;
-		voyager_cat_psi(VOYAGER_PSI_SUBREAD, 
+		voyager_cat_psi(VOYAGER_PSI_SUBREAD,
 				VOYAGER_PSI_AC_FAIL_REG, &data);
-		if((data & 0x1f) == 0) {
+		if ((data & 0x1f) == 0) {
 			/* all power restored */
-			printk(KERN_NOTICE "VOYAGER AC power restored, cancelling shutdown\n");
+			printk(KERN_NOTICE
+			       "VOYAGER AC power restored, cancelling shutdown\n");
 			/* FIXME: should be user configureable */
-			execute("umask 600; echo O > /etc/powerstatus; kill -PWR 1");
+			execute
+			    ("umask 600; echo O > /etc/powerstatus; kill -PWR 1");
 			set_timeout = 0;
 		}
 	}
 }
 
-static int
-thread(void *unused)
+static int thread(void *unused)
 {
 	printk(KERN_NOTICE "Voyager starting monitor thread\n");
 
@@ -102,7 +100,7 @@ thread(void *unused)
 		schedule_timeout(set_timeout ? HZ : MAX_SCHEDULE_TIMEOUT);
 
 		VDEBUG(("Voyager Daemon awoken\n"));
-		if(voyager_status.request_from_kernel == 0) {
+		if (voyager_status.request_from_kernel == 0) {
 			/* probably awoken from timeout */
 			check_continuing_condition();
 		} else {
@@ -112,20 +110,18 @@ thread(void *unused)
 	}
 }
 
-static int __init
-voyager_thread_start(void)
+static int __init voyager_thread_start(void)
 {
 	voyager_thread = kthread_run(thread, NULL, "kvoyagerd");
 	if (IS_ERR(voyager_thread)) {
-		printk(KERN_ERR "Voyager: Failed to create system monitor thread.\n");
+		printk(KERN_ERR
+		       "Voyager: Failed to create system monitor thread.\n");
 		return PTR_ERR(voyager_thread);
 	}
 	return 0;
 }
 
-
-static void __exit
-voyager_thread_stop(void)
+static void __exit voyager_thread_stop(void)
 {
 	kthread_stop(voyager_thread);
 }

From 3d0d14f983b55a570b976976284df4c434af3223 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:30:11 +0100
Subject: [PATCH 043/890] x86: lindent arch/i386/math-emu

lindent these files:
                                       errors   lines of code   errors/KLOC
 arch/x86/math-emu/                      2236            9424         237.2
 arch/x86/math-emu/                       128            8706          14.7

no other changes. No code changed:

   text    data     bss     dec     hex filename
   5589802  612739 3833856 10036397         9924ad vmlinux.before
   5589802  612739 3833856 10036397         9924ad vmlinux.after

the intent of this patch is to ease the automated tracking of kernel
code quality - it's just much easier for us to maintain it if every file
in arch/x86 is supposed to be clean.

NOTE: it is a known problem of lindent that it causes some style damage
of its own, but it's a safe tool (well, except for the gcc array range
initializers extension), so we did the bulk of the changes via lindent,
and did the manual fixups in a followup patch.

the resulting math-emu code has been tested by Thomas Gleixner on a real
386 DX CPU as well, and it works fine.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/math-emu/errors.c       |  866 +++++----
 arch/x86/math-emu/exception.h    |    9 +-
 arch/x86/math-emu/fpu_arith.c    |  150 +-
 arch/x86/math-emu/fpu_asm.h      |    1 -
 arch/x86/math-emu/fpu_aux.c      |  203 +--
 arch/x86/math-emu/fpu_emu.h      |   77 +-
 arch/x86/math-emu/fpu_entry.c    | 1178 +++++++------
 arch/x86/math-emu/fpu_etc.c      |  193 +-
 arch/x86/math-emu/fpu_proto.h    |   80 +-
 arch/x86/math-emu/fpu_tags.c     |   92 +-
 arch/x86/math-emu/fpu_trig.c     | 2814 ++++++++++++++----------------
 arch/x86/math-emu/get_address.c  |  626 ++++---
 arch/x86/math-emu/load_store.c   |  448 ++---
 arch/x86/math-emu/poly.h         |   79 +-
 arch/x86/math-emu/poly_2xm1.c    |  195 +--
 arch/x86/math-emu/poly_atan.c    |  331 ++--
 arch/x86/math-emu/poly_l2.c      |  344 ++--
 arch/x86/math-emu/poly_sin.c     |  615 ++++---
 arch/x86/math-emu/poly_tan.c     |  320 ++--
 arch/x86/math-emu/reg_add_sub.c  |  545 +++---
 arch/x86/math-emu/reg_compare.c  |  547 +++---
 arch/x86/math-emu/reg_constant.c |   71 +-
 arch/x86/math-emu/reg_convert.c  |   51 +-
 arch/x86/math-emu/reg_divide.c   |  299 ++--
 arch/x86/math-emu/reg_ld_str.c   | 2083 ++++++++++------------
 arch/x86/math-emu/reg_mul.c      |  163 +-
 arch/x86/math-emu/status_w.h     |    8 +-
 27 files changed, 5835 insertions(+), 6553 deletions(-)

diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c
index a1b0d22f6978..7cb5bf3495b2 100644
--- a/arch/x86/math-emu/errors.c
+++ b/arch/x86/math-emu/errors.c
@@ -33,45 +33,41 @@
 #undef PRINT_MESSAGES
 /* */
 
-
 #if 0
 void Un_impl(void)
 {
-  u_char byte1, FPU_modrm;
-  unsigned long address = FPU_ORIG_EIP;
+	u_char byte1, FPU_modrm;
+	unsigned long address = FPU_ORIG_EIP;
 
-  RE_ENTRANT_CHECK_OFF;
-  /* No need to check access_ok(), we have previously fetched these bytes. */
-  printk("Unimplemented FPU Opcode at eip=%p : ", (void __user *) address);
-  if ( FPU_CS == __USER_CS )
-    {
-      while ( 1 )
-	{
-	  FPU_get_user(byte1, (u_char __user *) address);
-	  if ( (byte1 & 0xf8) == 0xd8 ) break;
-	  printk("[%02x]", byte1);
-	  address++;
+	RE_ENTRANT_CHECK_OFF;
+	/* No need to check access_ok(), we have previously fetched these bytes. */
+	printk("Unimplemented FPU Opcode at eip=%p : ", (void __user *)address);
+	if (FPU_CS == __USER_CS) {
+		while (1) {
+			FPU_get_user(byte1, (u_char __user *) address);
+			if ((byte1 & 0xf8) == 0xd8)
+				break;
+			printk("[%02x]", byte1);
+			address++;
+		}
+		printk("%02x ", byte1);
+		FPU_get_user(FPU_modrm, 1 + (u_char __user *) address);
+
+		if (FPU_modrm >= 0300)
+			printk("%02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8,
+			       FPU_modrm & 7);
+		else
+			printk("/%d\n", (FPU_modrm >> 3) & 7);
+	} else {
+		printk("cs selector = %04x\n", FPU_CS);
 	}
-      printk("%02x ", byte1);
-      FPU_get_user(FPU_modrm, 1 + (u_char __user *) address);
-      
-      if (FPU_modrm >= 0300)
-	printk("%02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7);
-      else
-	printk("/%d\n", (FPU_modrm >> 3) & 7);
-    }
-  else
-    {
-      printk("cs selector = %04x\n", FPU_CS);
-    }
 
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_ON;
 
-  EXCEPTION(EX_Invalid);
+	EXCEPTION(EX_Invalid);
 
 }
-#endif  /*  0  */
-
+#endif /*  0  */
 
 /*
    Called for opcodes which are illegal and which are known to result in a
@@ -79,139 +75,152 @@ void Un_impl(void)
    */
 void FPU_illegal(void)
 {
-  math_abort(FPU_info,SIGILL);
+	math_abort(FPU_info, SIGILL);
 }
 
-
-
 void FPU_printall(void)
 {
-  int i;
-  static const char *tag_desc[] = { "Valid", "Zero", "ERROR", "Empty",
-                              "DeNorm", "Inf", "NaN" };
-  u_char byte1, FPU_modrm;
-  unsigned long address = FPU_ORIG_EIP;
+	int i;
+	static const char *tag_desc[] = { "Valid", "Zero", "ERROR", "Empty",
+		"DeNorm", "Inf", "NaN"
+	};
+	u_char byte1, FPU_modrm;
+	unsigned long address = FPU_ORIG_EIP;
 
-  RE_ENTRANT_CHECK_OFF;
-  /* No need to check access_ok(), we have previously fetched these bytes. */
-  printk("At %p:", (void *) address);
-  if ( FPU_CS == __USER_CS )
-    {
+	RE_ENTRANT_CHECK_OFF;
+	/* No need to check access_ok(), we have previously fetched these bytes. */
+	printk("At %p:", (void *)address);
+	if (FPU_CS == __USER_CS) {
 #define MAX_PRINTED_BYTES 20
-      for ( i = 0; i < MAX_PRINTED_BYTES; i++ )
-	{
-	  FPU_get_user(byte1, (u_char __user *) address);
-	  if ( (byte1 & 0xf8) == 0xd8 )
-	    {
-	      printk(" %02x", byte1);
-	      break;
-	    }
-	  printk(" [%02x]", byte1);
-	  address++;
-	}
-      if ( i == MAX_PRINTED_BYTES )
-	printk(" [more..]\n");
-      else
-	{
-	  FPU_get_user(FPU_modrm, 1 + (u_char __user *) address);
-	  
-	  if (FPU_modrm >= 0300)
-	    printk(" %02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7);
-	  else
-	    printk(" /%d, mod=%d rm=%d\n",
-		   (FPU_modrm >> 3) & 7, (FPU_modrm >> 6) & 3, FPU_modrm & 7);
-	}
-    }
-  else
-    {
-      printk("%04x\n", FPU_CS);
-    }
+		for (i = 0; i < MAX_PRINTED_BYTES; i++) {
+			FPU_get_user(byte1, (u_char __user *) address);
+			if ((byte1 & 0xf8) == 0xd8) {
+				printk(" %02x", byte1);
+				break;
+			}
+			printk(" [%02x]", byte1);
+			address++;
+		}
+		if (i == MAX_PRINTED_BYTES)
+			printk(" [more..]\n");
+		else {
+			FPU_get_user(FPU_modrm, 1 + (u_char __user *) address);
 
-  partial_status = status_word();
+			if (FPU_modrm >= 0300)
+				printk(" %02x (%02x+%d)\n", FPU_modrm,
+				       FPU_modrm & 0xf8, FPU_modrm & 7);
+			else
+				printk(" /%d, mod=%d rm=%d\n",
+				       (FPU_modrm >> 3) & 7,
+				       (FPU_modrm >> 6) & 3, FPU_modrm & 7);
+		}
+	} else {
+		printk("%04x\n", FPU_CS);
+	}
+
+	partial_status = status_word();
 
 #ifdef DEBUGGING
-if ( partial_status & SW_Backward )    printk("SW: backward compatibility\n");
-if ( partial_status & SW_C3 )          printk("SW: condition bit 3\n");
-if ( partial_status & SW_C2 )          printk("SW: condition bit 2\n");
-if ( partial_status & SW_C1 )          printk("SW: condition bit 1\n");
-if ( partial_status & SW_C0 )          printk("SW: condition bit 0\n");
-if ( partial_status & SW_Summary )     printk("SW: exception summary\n");
-if ( partial_status & SW_Stack_Fault ) printk("SW: stack fault\n");
-if ( partial_status & SW_Precision )   printk("SW: loss of precision\n");
-if ( partial_status & SW_Underflow )   printk("SW: underflow\n");
-if ( partial_status & SW_Overflow )    printk("SW: overflow\n");
-if ( partial_status & SW_Zero_Div )    printk("SW: divide by zero\n");
-if ( partial_status & SW_Denorm_Op )   printk("SW: denormalized operand\n");
-if ( partial_status & SW_Invalid )     printk("SW: invalid operation\n");
+	if (partial_status & SW_Backward)
+		printk("SW: backward compatibility\n");
+	if (partial_status & SW_C3)
+		printk("SW: condition bit 3\n");
+	if (partial_status & SW_C2)
+		printk("SW: condition bit 2\n");
+	if (partial_status & SW_C1)
+		printk("SW: condition bit 1\n");
+	if (partial_status & SW_C0)
+		printk("SW: condition bit 0\n");
+	if (partial_status & SW_Summary)
+		printk("SW: exception summary\n");
+	if (partial_status & SW_Stack_Fault)
+		printk("SW: stack fault\n");
+	if (partial_status & SW_Precision)
+		printk("SW: loss of precision\n");
+	if (partial_status & SW_Underflow)
+		printk("SW: underflow\n");
+	if (partial_status & SW_Overflow)
+		printk("SW: overflow\n");
+	if (partial_status & SW_Zero_Div)
+		printk("SW: divide by zero\n");
+	if (partial_status & SW_Denorm_Op)
+		printk("SW: denormalized operand\n");
+	if (partial_status & SW_Invalid)
+		printk("SW: invalid operation\n");
 #endif /* DEBUGGING */
 
-  printk(" SW: b=%d st=%ld es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n",
-	 partial_status & 0x8000 ? 1 : 0,   /* busy */
-	 (partial_status & 0x3800) >> 11,   /* stack top pointer */
-	 partial_status & 0x80 ? 1 : 0,     /* Error summary status */
-	 partial_status & 0x40 ? 1 : 0,     /* Stack flag */
-	 partial_status & SW_C3?1:0, partial_status & SW_C2?1:0, /* cc */
-	 partial_status & SW_C1?1:0, partial_status & SW_C0?1:0, /* cc */
-	 partial_status & SW_Precision?1:0, partial_status & SW_Underflow?1:0,
-	 partial_status & SW_Overflow?1:0, partial_status & SW_Zero_Div?1:0,
-	 partial_status & SW_Denorm_Op?1:0, partial_status & SW_Invalid?1:0);
-  
-printk(" CW: ic=%d rc=%ld%ld pc=%ld%ld iem=%d     ef=%d%d%d%d%d%d\n",
-	 control_word & 0x1000 ? 1 : 0,
-	 (control_word & 0x800) >> 11, (control_word & 0x400) >> 10,
-	 (control_word & 0x200) >> 9, (control_word & 0x100) >> 8,
-	 control_word & 0x80 ? 1 : 0,
-	 control_word & SW_Precision?1:0, control_word & SW_Underflow?1:0,
-	 control_word & SW_Overflow?1:0, control_word & SW_Zero_Div?1:0,
-	 control_word & SW_Denorm_Op?1:0, control_word & SW_Invalid?1:0);
+	printk(" SW: b=%d st=%ld es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n", partial_status & 0x8000 ? 1 : 0,	/* busy */
+	       (partial_status & 0x3800) >> 11,	/* stack top pointer */
+	       partial_status & 0x80 ? 1 : 0,	/* Error summary status */
+	       partial_status & 0x40 ? 1 : 0,	/* Stack flag */
+	       partial_status & SW_C3 ? 1 : 0, partial_status & SW_C2 ? 1 : 0,	/* cc */
+	       partial_status & SW_C1 ? 1 : 0, partial_status & SW_C0 ? 1 : 0,	/* cc */
+	       partial_status & SW_Precision ? 1 : 0,
+	       partial_status & SW_Underflow ? 1 : 0,
+	       partial_status & SW_Overflow ? 1 : 0,
+	       partial_status & SW_Zero_Div ? 1 : 0,
+	       partial_status & SW_Denorm_Op ? 1 : 0,
+	       partial_status & SW_Invalid ? 1 : 0);
 
-  for ( i = 0; i < 8; i++ )
-    {
-      FPU_REG *r = &st(i);
-      u_char tagi = FPU_gettagi(i);
-      switch (tagi)
-	{
-	case TAG_Empty:
-	  continue;
-	  break;
-	case TAG_Zero:
-	case TAG_Special:
-	  tagi = FPU_Special(r);
-	case TAG_Valid:
-	  printk("st(%d)  %c .%04lx %04lx %04lx %04lx e%+-6d ", i,
-		 getsign(r) ? '-' : '+',
-		 (long)(r->sigh >> 16),
-		 (long)(r->sigh & 0xFFFF),
-		 (long)(r->sigl >> 16),
-		 (long)(r->sigl & 0xFFFF),
-		 exponent(r) - EXP_BIAS + 1);
-	  break;
-	default:
-	  printk("Whoops! Error in errors.c: tag%d is %d ", i, tagi);
-	  continue;
-	  break;
+	printk(" CW: ic=%d rc=%ld%ld pc=%ld%ld iem=%d     ef=%d%d%d%d%d%d\n",
+	       control_word & 0x1000 ? 1 : 0,
+	       (control_word & 0x800) >> 11, (control_word & 0x400) >> 10,
+	       (control_word & 0x200) >> 9, (control_word & 0x100) >> 8,
+	       control_word & 0x80 ? 1 : 0,
+	       control_word & SW_Precision ? 1 : 0,
+	       control_word & SW_Underflow ? 1 : 0,
+	       control_word & SW_Overflow ? 1 : 0,
+	       control_word & SW_Zero_Div ? 1 : 0,
+	       control_word & SW_Denorm_Op ? 1 : 0,
+	       control_word & SW_Invalid ? 1 : 0);
+
+	for (i = 0; i < 8; i++) {
+		FPU_REG *r = &st(i);
+		u_char tagi = FPU_gettagi(i);
+		switch (tagi) {
+		case TAG_Empty:
+			continue;
+			break;
+		case TAG_Zero:
+		case TAG_Special:
+			tagi = FPU_Special(r);
+		case TAG_Valid:
+			printk("st(%d)  %c .%04lx %04lx %04lx %04lx e%+-6d ", i,
+			       getsign(r) ? '-' : '+',
+			       (long)(r->sigh >> 16),
+			       (long)(r->sigh & 0xFFFF),
+			       (long)(r->sigl >> 16),
+			       (long)(r->sigl & 0xFFFF),
+			       exponent(r) - EXP_BIAS + 1);
+			break;
+		default:
+			printk("Whoops! Error in errors.c: tag%d is %d ", i,
+			       tagi);
+			continue;
+			break;
+		}
+		printk("%s\n", tag_desc[(int)(unsigned)tagi]);
 	}
-      printk("%s\n", tag_desc[(int) (unsigned) tagi]);
-    }
 
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_ON;
 
 }
 
 static struct {
-  int type;
-  const char *name;
+	int type;
+	const char *name;
 } exception_names[] = {
-  { EX_StackOver, "stack overflow" },
-  { EX_StackUnder, "stack underflow" },
-  { EX_Precision, "loss of precision" },
-  { EX_Underflow, "underflow" },
-  { EX_Overflow, "overflow" },
-  { EX_ZeroDiv, "divide by zero" },
-  { EX_Denormal, "denormalized operand" },
-  { EX_Invalid, "invalid operation" },
-  { EX_INTERNAL, "INTERNAL BUG in "FPU_VERSION },
-  { 0, NULL }
+	{
+	EX_StackOver, "stack overflow"}, {
+	EX_StackUnder, "stack underflow"}, {
+	EX_Precision, "loss of precision"}, {
+	EX_Underflow, "underflow"}, {
+	EX_Overflow, "overflow"}, {
+	EX_ZeroDiv, "divide by zero"}, {
+	EX_Denormal, "denormalized operand"}, {
+	EX_Invalid, "invalid operation"}, {
+	EX_INTERNAL, "INTERNAL BUG in " FPU_VERSION}, {
+	0, NULL}
 };
 
 /*
@@ -295,445 +304,386 @@ static struct {
 
 asmlinkage void FPU_exception(int n)
 {
-  int i, int_type;
+	int i, int_type;
 
-  int_type = 0;         /* Needed only to stop compiler warnings */
-  if ( n & EX_INTERNAL )
-    {
-      int_type = n - EX_INTERNAL;
-      n = EX_INTERNAL;
-      /* Set lots of exception bits! */
-      partial_status |= (SW_Exc_Mask | SW_Summary | SW_Backward);
-    }
-  else
-    {
-      /* Extract only the bits which we use to set the status word */
-      n &= (SW_Exc_Mask);
-      /* Set the corresponding exception bit */
-      partial_status |= n;
-      /* Set summary bits iff exception isn't masked */
-      if ( partial_status & ~control_word & CW_Exceptions )
-	partial_status |= (SW_Summary | SW_Backward);
-      if ( n & (SW_Stack_Fault | EX_Precision) )
-	{
-	  if ( !(n & SW_C1) )
-	    /* This bit distinguishes over- from underflow for a stack fault,
-	       and roundup from round-down for precision loss. */
-	    partial_status &= ~SW_C1;
+	int_type = 0;		/* Needed only to stop compiler warnings */
+	if (n & EX_INTERNAL) {
+		int_type = n - EX_INTERNAL;
+		n = EX_INTERNAL;
+		/* Set lots of exception bits! */
+		partial_status |= (SW_Exc_Mask | SW_Summary | SW_Backward);
+	} else {
+		/* Extract only the bits which we use to set the status word */
+		n &= (SW_Exc_Mask);
+		/* Set the corresponding exception bit */
+		partial_status |= n;
+		/* Set summary bits iff exception isn't masked */
+		if (partial_status & ~control_word & CW_Exceptions)
+			partial_status |= (SW_Summary | SW_Backward);
+		if (n & (SW_Stack_Fault | EX_Precision)) {
+			if (!(n & SW_C1))
+				/* This bit distinguishes over- from underflow for a stack fault,
+				   and roundup from round-down for precision loss. */
+				partial_status &= ~SW_C1;
+		}
 	}
-    }
 
-  RE_ENTRANT_CHECK_OFF;
-  if ( (~control_word & n & CW_Exceptions) || (n == EX_INTERNAL) )
-    {
+	RE_ENTRANT_CHECK_OFF;
+	if ((~control_word & n & CW_Exceptions) || (n == EX_INTERNAL)) {
 #ifdef PRINT_MESSAGES
-      /* My message from the sponsor */
-      printk(FPU_VERSION" "__DATE__" (C) W. Metzenthen.\n");
-#endif /* PRINT_MESSAGES */
-      
-      /* Get a name string for error reporting */
-      for (i=0; exception_names[i].type; i++)
-	if ( (exception_names[i].type & n) == exception_names[i].type )
-	  break;
-      
-      if (exception_names[i].type)
-	{
-#ifdef PRINT_MESSAGES
-	  printk("FP Exception: %s!\n", exception_names[i].name);
-#endif /* PRINT_MESSAGES */
-	}
-      else
-	printk("FPU emulator: Unknown Exception: 0x%04x!\n", n);
-      
-      if ( n == EX_INTERNAL )
-	{
-	  printk("FPU emulator: Internal error type 0x%04x\n", int_type);
-	  FPU_printall();
-	}
-#ifdef PRINT_MESSAGES
-      else
-	FPU_printall();
+		/* My message from the sponsor */
+		printk(FPU_VERSION " " __DATE__ " (C) W. Metzenthen.\n");
 #endif /* PRINT_MESSAGES */
 
-      /*
-       * The 80486 generates an interrupt on the next non-control FPU
-       * instruction. So we need some means of flagging it.
-       * We use the ES (Error Summary) bit for this.
-       */
-    }
-  RE_ENTRANT_CHECK_ON;
+		/* Get a name string for error reporting */
+		for (i = 0; exception_names[i].type; i++)
+			if ((exception_names[i].type & n) ==
+			    exception_names[i].type)
+				break;
+
+		if (exception_names[i].type) {
+#ifdef PRINT_MESSAGES
+			printk("FP Exception: %s!\n", exception_names[i].name);
+#endif /* PRINT_MESSAGES */
+		} else
+			printk("FPU emulator: Unknown Exception: 0x%04x!\n", n);
+
+		if (n == EX_INTERNAL) {
+			printk("FPU emulator: Internal error type 0x%04x\n",
+			       int_type);
+			FPU_printall();
+		}
+#ifdef PRINT_MESSAGES
+		else
+			FPU_printall();
+#endif /* PRINT_MESSAGES */
+
+		/*
+		 * The 80486 generates an interrupt on the next non-control FPU
+		 * instruction. So we need some means of flagging it.
+		 * We use the ES (Error Summary) bit for this.
+		 */
+	}
+	RE_ENTRANT_CHECK_ON;
 
 #ifdef __DEBUG__
-  math_abort(FPU_info,SIGFPE);
+	math_abort(FPU_info, SIGFPE);
 #endif /* __DEBUG__ */
 
 }
 
-
 /* Real operation attempted on a NaN. */
 /* Returns < 0 if the exception is unmasked */
-int real_1op_NaN(FPU_REG *a)
+int real_1op_NaN(FPU_REG * a)
 {
-  int signalling, isNaN;
+	int signalling, isNaN;
 
-  isNaN = (exponent(a) == EXP_OVER) && (a->sigh & 0x80000000);
+	isNaN = (exponent(a) == EXP_OVER) && (a->sigh & 0x80000000);
 
-  /* The default result for the case of two "equal" NaNs (signs may
-     differ) is chosen to reproduce 80486 behaviour */
-  signalling = isNaN && !(a->sigh & 0x40000000);
+	/* The default result for the case of two "equal" NaNs (signs may
+	   differ) is chosen to reproduce 80486 behaviour */
+	signalling = isNaN && !(a->sigh & 0x40000000);
 
-  if ( !signalling )
-    {
-      if ( !isNaN )  /* pseudo-NaN, or other unsupported? */
-	{
-	  if ( control_word & CW_Invalid )
-	    {
-	      /* Masked response */
-	      reg_copy(&CONST_QNaN, a);
-	    }
-	  EXCEPTION(EX_Invalid);
-	  return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
+	if (!signalling) {
+		if (!isNaN) {	/* pseudo-NaN, or other unsupported? */
+			if (control_word & CW_Invalid) {
+				/* Masked response */
+				reg_copy(&CONST_QNaN, a);
+			}
+			EXCEPTION(EX_Invalid);
+			return (!(control_word & CW_Invalid) ? FPU_Exception :
+				0) | TAG_Special;
+		}
+		return TAG_Special;
 	}
-      return TAG_Special;
-    }
 
-  if ( control_word & CW_Invalid )
-    {
-      /* The masked response */
-      if ( !(a->sigh & 0x80000000) )  /* pseudo-NaN ? */
-	{
-	  reg_copy(&CONST_QNaN, a);
+	if (control_word & CW_Invalid) {
+		/* The masked response */
+		if (!(a->sigh & 0x80000000)) {	/* pseudo-NaN ? */
+			reg_copy(&CONST_QNaN, a);
+		}
+		/* ensure a Quiet NaN */
+		a->sigh |= 0x40000000;
 	}
-      /* ensure a Quiet NaN */
-      a->sigh |= 0x40000000;
-    }
 
-  EXCEPTION(EX_Invalid);
+	EXCEPTION(EX_Invalid);
 
-  return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
+	return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
 }
 
-
 /* Real operation attempted on two operands, one a NaN. */
 /* Returns < 0 if the exception is unmasked */
 int real_2op_NaN(FPU_REG const *b, u_char tagb,
-		 int deststnr,
-		 FPU_REG const *defaultNaN)
+		 int deststnr, FPU_REG const *defaultNaN)
 {
-  FPU_REG *dest = &st(deststnr);
-  FPU_REG const *a = dest;
-  u_char taga = FPU_gettagi(deststnr);
-  FPU_REG const *x;
-  int signalling, unsupported;
+	FPU_REG *dest = &st(deststnr);
+	FPU_REG const *a = dest;
+	u_char taga = FPU_gettagi(deststnr);
+	FPU_REG const *x;
+	int signalling, unsupported;
 
-  if ( taga == TAG_Special )
-    taga = FPU_Special(a);
-  if ( tagb == TAG_Special )
-    tagb = FPU_Special(b);
+	if (taga == TAG_Special)
+		taga = FPU_Special(a);
+	if (tagb == TAG_Special)
+		tagb = FPU_Special(b);
 
-  /* TW_NaN is also used for unsupported data types. */
-  unsupported = ((taga == TW_NaN)
-		 && !((exponent(a) == EXP_OVER) && (a->sigh & 0x80000000)))
-    || ((tagb == TW_NaN)
-	&& !((exponent(b) == EXP_OVER) && (b->sigh & 0x80000000)));
-  if ( unsupported )
-    {
-      if ( control_word & CW_Invalid )
-	{
-	  /* Masked response */
-	  FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
+	/* TW_NaN is also used for unsupported data types. */
+	unsupported = ((taga == TW_NaN)
+		       && !((exponent(a) == EXP_OVER)
+			    && (a->sigh & 0x80000000)))
+	    || ((tagb == TW_NaN)
+		&& !((exponent(b) == EXP_OVER) && (b->sigh & 0x80000000)));
+	if (unsupported) {
+		if (control_word & CW_Invalid) {
+			/* Masked response */
+			FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
+		}
+		EXCEPTION(EX_Invalid);
+		return (!(control_word & CW_Invalid) ? FPU_Exception : 0) |
+		    TAG_Special;
 	}
-      EXCEPTION(EX_Invalid);
-      return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
-    }
 
-  if (taga == TW_NaN)
-    {
-      x = a;
-      if (tagb == TW_NaN)
-	{
-	  signalling = !(a->sigh & b->sigh & 0x40000000);
-	  if ( significand(b) > significand(a) )
-	    x = b;
-	  else if ( significand(b) == significand(a) )
-	    {
-	      /* The default result for the case of two "equal" NaNs (signs may
-		 differ) is chosen to reproduce 80486 behaviour */
-	      x = defaultNaN;
-	    }
-	}
-      else
-	{
-	  /* return the quiet version of the NaN in a */
-	  signalling = !(a->sigh & 0x40000000);
-	}
-    }
-  else
+	if (taga == TW_NaN) {
+		x = a;
+		if (tagb == TW_NaN) {
+			signalling = !(a->sigh & b->sigh & 0x40000000);
+			if (significand(b) > significand(a))
+				x = b;
+			else if (significand(b) == significand(a)) {
+				/* The default result for the case of two "equal" NaNs (signs may
+				   differ) is chosen to reproduce 80486 behaviour */
+				x = defaultNaN;
+			}
+		} else {
+			/* return the quiet version of the NaN in a */
+			signalling = !(a->sigh & 0x40000000);
+		}
+	} else
 #ifdef PARANOID
-    if (tagb == TW_NaN)
+	if (tagb == TW_NaN)
 #endif /* PARANOID */
-    {
-      signalling = !(b->sigh & 0x40000000);
-      x = b;
-    }
+	{
+		signalling = !(b->sigh & 0x40000000);
+		x = b;
+	}
 #ifdef PARANOID
-  else
-    {
-      signalling = 0;
-      EXCEPTION(EX_INTERNAL|0x113);
-      x = &CONST_QNaN;
-    }
+	else {
+		signalling = 0;
+		EXCEPTION(EX_INTERNAL | 0x113);
+		x = &CONST_QNaN;
+	}
 #endif /* PARANOID */
 
-  if ( (!signalling) || (control_word & CW_Invalid) )
-    {
-      if ( ! x )
-	x = b;
+	if ((!signalling) || (control_word & CW_Invalid)) {
+		if (!x)
+			x = b;
 
-      if ( !(x->sigh & 0x80000000) )  /* pseudo-NaN ? */
-	x = &CONST_QNaN;
+		if (!(x->sigh & 0x80000000))	/* pseudo-NaN ? */
+			x = &CONST_QNaN;
 
-      FPU_copy_to_regi(x, TAG_Special, deststnr);
+		FPU_copy_to_regi(x, TAG_Special, deststnr);
 
-      if ( !signalling )
-	return TAG_Special;
+		if (!signalling)
+			return TAG_Special;
 
-      /* ensure a Quiet NaN */
-      dest->sigh |= 0x40000000;
-    }
+		/* ensure a Quiet NaN */
+		dest->sigh |= 0x40000000;
+	}
 
-  EXCEPTION(EX_Invalid);
+	EXCEPTION(EX_Invalid);
 
-  return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
+	return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
 }
 
-
 /* Invalid arith operation on Valid registers */
 /* Returns < 0 if the exception is unmasked */
 asmlinkage int arith_invalid(int deststnr)
 {
 
-  EXCEPTION(EX_Invalid);
-  
-  if ( control_word & CW_Invalid )
-    {
-      /* The masked response */
-      FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
-    }
-  
-  return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Valid;
+	EXCEPTION(EX_Invalid);
+
+	if (control_word & CW_Invalid) {
+		/* The masked response */
+		FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
+	}
+
+	return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Valid;
 
 }
 
-
 /* Divide a finite number by zero */
 asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign)
 {
-  FPU_REG *dest = &st(deststnr);
-  int tag = TAG_Valid;
+	FPU_REG *dest = &st(deststnr);
+	int tag = TAG_Valid;
 
-  if ( control_word & CW_ZeroDiv )
-    {
-      /* The masked response */
-      FPU_copy_to_regi(&CONST_INF, TAG_Special, deststnr);
-      setsign(dest, sign);
-      tag = TAG_Special;
-    }
- 
-  EXCEPTION(EX_ZeroDiv);
+	if (control_word & CW_ZeroDiv) {
+		/* The masked response */
+		FPU_copy_to_regi(&CONST_INF, TAG_Special, deststnr);
+		setsign(dest, sign);
+		tag = TAG_Special;
+	}
 
-  return (!(control_word & CW_ZeroDiv) ? FPU_Exception : 0) | tag;
+	EXCEPTION(EX_ZeroDiv);
+
+	return (!(control_word & CW_ZeroDiv) ? FPU_Exception : 0) | tag;
 
 }
 
-
 /* This may be called often, so keep it lean */
 int set_precision_flag(int flags)
 {
-  if ( control_word & CW_Precision )
-    {
-      partial_status &= ~(SW_C1 & flags);
-      partial_status |= flags;   /* The masked response */
-      return 0;
-    }
-  else
-    {
-      EXCEPTION(flags);
-      return 1;
-    }
+	if (control_word & CW_Precision) {
+		partial_status &= ~(SW_C1 & flags);
+		partial_status |= flags;	/* The masked response */
+		return 0;
+	} else {
+		EXCEPTION(flags);
+		return 1;
+	}
 }
 
-
 /* This may be called often, so keep it lean */
 asmlinkage void set_precision_flag_up(void)
 {
-  if ( control_word & CW_Precision )
-    partial_status |= (SW_Precision | SW_C1);   /* The masked response */
-  else
-    EXCEPTION(EX_Precision | SW_C1);
+	if (control_word & CW_Precision)
+		partial_status |= (SW_Precision | SW_C1);	/* The masked response */
+	else
+		EXCEPTION(EX_Precision | SW_C1);
 }
 
-
 /* This may be called often, so keep it lean */
 asmlinkage void set_precision_flag_down(void)
 {
-  if ( control_word & CW_Precision )
-    {   /* The masked response */
-      partial_status &= ~SW_C1;
-      partial_status |= SW_Precision;
-    }
-  else
-    EXCEPTION(EX_Precision);
+	if (control_word & CW_Precision) {	/* The masked response */
+		partial_status &= ~SW_C1;
+		partial_status |= SW_Precision;
+	} else
+		EXCEPTION(EX_Precision);
 }
 
-
 asmlinkage int denormal_operand(void)
 {
-  if ( control_word & CW_Denormal )
-    {   /* The masked response */
-      partial_status |= SW_Denorm_Op;
-      return TAG_Special;
-    }
-  else
-    {
-      EXCEPTION(EX_Denormal);
-      return TAG_Special | FPU_Exception;
-    }
+	if (control_word & CW_Denormal) {	/* The masked response */
+		partial_status |= SW_Denorm_Op;
+		return TAG_Special;
+	} else {
+		EXCEPTION(EX_Denormal);
+		return TAG_Special | FPU_Exception;
+	}
 }
 
-
-asmlinkage int arith_overflow(FPU_REG *dest)
+asmlinkage int arith_overflow(FPU_REG * dest)
 {
-  int tag = TAG_Valid;
+	int tag = TAG_Valid;
 
-  if ( control_word & CW_Overflow )
-    {
-      /* The masked response */
+	if (control_word & CW_Overflow) {
+		/* The masked response */
 /* ###### The response here depends upon the rounding mode */
-      reg_copy(&CONST_INF, dest);
-      tag = TAG_Special;
-    }
-  else
-    {
-      /* Subtract the magic number from the exponent */
-      addexponent(dest, (-3 * (1 << 13)));
-    }
+		reg_copy(&CONST_INF, dest);
+		tag = TAG_Special;
+	} else {
+		/* Subtract the magic number from the exponent */
+		addexponent(dest, (-3 * (1 << 13)));
+	}
 
-  EXCEPTION(EX_Overflow);
-  if ( control_word & CW_Overflow )
-    {
-      /* The overflow exception is masked. */
-      /* By definition, precision is lost.
-	 The roundup bit (C1) is also set because we have
-	 "rounded" upwards to Infinity. */
-      EXCEPTION(EX_Precision | SW_C1);
-      return tag;
-    }
+	EXCEPTION(EX_Overflow);
+	if (control_word & CW_Overflow) {
+		/* The overflow exception is masked. */
+		/* By definition, precision is lost.
+		   The roundup bit (C1) is also set because we have
+		   "rounded" upwards to Infinity. */
+		EXCEPTION(EX_Precision | SW_C1);
+		return tag;
+	}
 
-  return tag;
+	return tag;
 
 }
 
-
-asmlinkage int arith_underflow(FPU_REG *dest)
+asmlinkage int arith_underflow(FPU_REG * dest)
 {
-  int tag = TAG_Valid;
+	int tag = TAG_Valid;
 
-  if ( control_word & CW_Underflow )
-    {
-      /* The masked response */
-      if ( exponent16(dest) <= EXP_UNDER - 63 )
-	{
-	  reg_copy(&CONST_Z, dest);
-	  partial_status &= ~SW_C1;       /* Round down. */
-	  tag = TAG_Zero;
+	if (control_word & CW_Underflow) {
+		/* The masked response */
+		if (exponent16(dest) <= EXP_UNDER - 63) {
+			reg_copy(&CONST_Z, dest);
+			partial_status &= ~SW_C1;	/* Round down. */
+			tag = TAG_Zero;
+		} else {
+			stdexp(dest);
+		}
+	} else {
+		/* Add the magic number to the exponent. */
+		addexponent(dest, (3 * (1 << 13)) + EXTENDED_Ebias);
 	}
-      else
-	{
-	  stdexp(dest);
+
+	EXCEPTION(EX_Underflow);
+	if (control_word & CW_Underflow) {
+		/* The underflow exception is masked. */
+		EXCEPTION(EX_Precision);
+		return tag;
 	}
-    }
-  else
-    {
-      /* Add the magic number to the exponent. */
-      addexponent(dest, (3 * (1 << 13)) + EXTENDED_Ebias);
-    }
 
-  EXCEPTION(EX_Underflow);
-  if ( control_word & CW_Underflow )
-    {
-      /* The underflow exception is masked. */
-      EXCEPTION(EX_Precision);
-      return tag;
-    }
-
-  return tag;
+	return tag;
 
 }
 
-
 void FPU_stack_overflow(void)
 {
 
- if ( control_word & CW_Invalid )
-    {
-      /* The masked response */
-      top--;
-      FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
-    }
+	if (control_word & CW_Invalid) {
+		/* The masked response */
+		top--;
+		FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
+	}
 
-  EXCEPTION(EX_StackOver);
+	EXCEPTION(EX_StackOver);
 
-  return;
+	return;
 
 }
 
-
 void FPU_stack_underflow(void)
 {
 
- if ( control_word & CW_Invalid )
-    {
-      /* The masked response */
-      FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
-    }
+	if (control_word & CW_Invalid) {
+		/* The masked response */
+		FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
+	}
 
-  EXCEPTION(EX_StackUnder);
+	EXCEPTION(EX_StackUnder);
 
-  return;
+	return;
 
 }
 
-
 void FPU_stack_underflow_i(int i)
 {
 
- if ( control_word & CW_Invalid )
-    {
-      /* The masked response */
-      FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
-    }
+	if (control_word & CW_Invalid) {
+		/* The masked response */
+		FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
+	}
 
-  EXCEPTION(EX_StackUnder);
+	EXCEPTION(EX_StackUnder);
 
-  return;
+	return;
 
 }
 
-
 void FPU_stack_underflow_pop(int i)
 {
 
- if ( control_word & CW_Invalid )
-    {
-      /* The masked response */
-      FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
-      FPU_pop();
-    }
+	if (control_word & CW_Invalid) {
+		/* The masked response */
+		FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
+		FPU_pop();
+	}
 
-  EXCEPTION(EX_StackUnder);
+	EXCEPTION(EX_StackUnder);
 
-  return;
+	return;
 
 }
-
diff --git a/arch/x86/math-emu/exception.h b/arch/x86/math-emu/exception.h
index b463f21a811e..67f43a4683d5 100644
--- a/arch/x86/math-emu/exception.h
+++ b/arch/x86/math-emu/exception.h
@@ -9,7 +9,6 @@
 #ifndef _EXCEPTION_H_
 #define _EXCEPTION_H_
 
-
 #ifdef __ASSEMBLY__
 #define	Const_(x)	$##x
 #else
@@ -20,8 +19,8 @@
 #include "fpu_emu.h"
 #endif /* SW_C1 */
 
-#define FPU_BUSY        Const_(0x8000)   /* FPU busy bit (8087 compatibility) */
-#define EX_ErrorSummary Const_(0x0080)   /* Error summary status */
+#define FPU_BUSY        Const_(0x8000)	/* FPU busy bit (8087 compatibility) */
+#define EX_ErrorSummary Const_(0x0080)	/* Error summary status */
 /* Special exceptions: */
 #define	EX_INTERNAL	Const_(0x8000)	/* Internal error in wm-FPU-emu */
 #define EX_StackOver	Const_(0x0041|SW_C1)	/* stack overflow */
@@ -34,11 +33,9 @@
 #define EX_Denormal	Const_(0x0002)	/* denormalized operand */
 #define EX_Invalid	Const_(0x0001)	/* invalid operation */
 
-
 #define PRECISION_LOST_UP    Const_((EX_Precision | SW_C1))
 #define PRECISION_LOST_DOWN  Const_(EX_Precision)
 
-
 #ifndef __ASSEMBLY__
 
 #ifdef DEBUG
@@ -48,6 +45,6 @@
 #define	EXCEPTION(x)	FPU_exception(x)
 #endif
 
-#endif /* __ASSEMBLY__ */ 
+#endif /* __ASSEMBLY__ */
 
 #endif /* _EXCEPTION_H_ */
diff --git a/arch/x86/math-emu/fpu_arith.c b/arch/x86/math-emu/fpu_arith.c
index 6972dec01af6..aeab24e083c4 100644
--- a/arch/x86/math-emu/fpu_arith.c
+++ b/arch/x86/math-emu/fpu_arith.c
@@ -15,160 +15,138 @@
 #include "control_w.h"
 #include "status_w.h"
 
-
 void fadd__(void)
 {
-  /* fadd st,st(i) */
-  int i = FPU_rm;
-  clear_C1();
-  FPU_add(&st(i), FPU_gettagi(i), 0, control_word);
+	/* fadd st,st(i) */
+	int i = FPU_rm;
+	clear_C1();
+	FPU_add(&st(i), FPU_gettagi(i), 0, control_word);
 }
 
-
 void fmul__(void)
 {
-  /* fmul st,st(i) */
-  int i = FPU_rm;
-  clear_C1();
-  FPU_mul(&st(i), FPU_gettagi(i), 0, control_word);
+	/* fmul st,st(i) */
+	int i = FPU_rm;
+	clear_C1();
+	FPU_mul(&st(i), FPU_gettagi(i), 0, control_word);
 }
 
-
-
 void fsub__(void)
 {
-  /* fsub st,st(i) */
-  clear_C1();
-  FPU_sub(0, FPU_rm, control_word);
+	/* fsub st,st(i) */
+	clear_C1();
+	FPU_sub(0, FPU_rm, control_word);
 }
 
-
 void fsubr_(void)
 {
-  /* fsubr st,st(i) */
-  clear_C1();
-  FPU_sub(REV, FPU_rm, control_word);
+	/* fsubr st,st(i) */
+	clear_C1();
+	FPU_sub(REV, FPU_rm, control_word);
 }
 
-
 void fdiv__(void)
 {
-  /* fdiv st,st(i) */
-  clear_C1();
-  FPU_div(0, FPU_rm, control_word);
+	/* fdiv st,st(i) */
+	clear_C1();
+	FPU_div(0, FPU_rm, control_word);
 }
 
-
 void fdivr_(void)
 {
-  /* fdivr st,st(i) */
-  clear_C1();
-  FPU_div(REV, FPU_rm, control_word);
+	/* fdivr st,st(i) */
+	clear_C1();
+	FPU_div(REV, FPU_rm, control_word);
 }
 
-
-
 void fadd_i(void)
 {
-  /* fadd st(i),st */
-  int i = FPU_rm;
-  clear_C1();
-  FPU_add(&st(i), FPU_gettagi(i), i, control_word);
+	/* fadd st(i),st */
+	int i = FPU_rm;
+	clear_C1();
+	FPU_add(&st(i), FPU_gettagi(i), i, control_word);
 }
 
-
 void fmul_i(void)
 {
-  /* fmul st(i),st */
-  clear_C1();
-  FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word);
+	/* fmul st(i),st */
+	clear_C1();
+	FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word);
 }
 
-
 void fsubri(void)
 {
-  /* fsubr st(i),st */
-  clear_C1();
-  FPU_sub(DEST_RM, FPU_rm, control_word);
+	/* fsubr st(i),st */
+	clear_C1();
+	FPU_sub(DEST_RM, FPU_rm, control_word);
 }
 
-
 void fsub_i(void)
 {
-  /* fsub st(i),st */
-  clear_C1();
-  FPU_sub(REV|DEST_RM, FPU_rm, control_word);
+	/* fsub st(i),st */
+	clear_C1();
+	FPU_sub(REV | DEST_RM, FPU_rm, control_word);
 }
 
-
 void fdivri(void)
 {
-  /* fdivr st(i),st */
-  clear_C1();
-  FPU_div(DEST_RM, FPU_rm, control_word);
+	/* fdivr st(i),st */
+	clear_C1();
+	FPU_div(DEST_RM, FPU_rm, control_word);
 }
 
-
 void fdiv_i(void)
 {
-  /* fdiv st(i),st */
-  clear_C1();
-  FPU_div(REV|DEST_RM, FPU_rm, control_word);
+	/* fdiv st(i),st */
+	clear_C1();
+	FPU_div(REV | DEST_RM, FPU_rm, control_word);
 }
 
-
-
 void faddp_(void)
 {
-  /* faddp st(i),st */
-  int i = FPU_rm;
-  clear_C1();
-  if ( FPU_add(&st(i), FPU_gettagi(i), i, control_word) >= 0 )
-    FPU_pop();
+	/* faddp st(i),st */
+	int i = FPU_rm;
+	clear_C1();
+	if (FPU_add(&st(i), FPU_gettagi(i), i, control_word) >= 0)
+		FPU_pop();
 }
 
-
 void fmulp_(void)
 {
-  /* fmulp st(i),st */
-  clear_C1();
-  if ( FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word) >= 0 )
-    FPU_pop();
+	/* fmulp st(i),st */
+	clear_C1();
+	if (FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word) >= 0)
+		FPU_pop();
 }
 
-
-
 void fsubrp(void)
 {
-  /* fsubrp st(i),st */
-  clear_C1();
-  if ( FPU_sub(DEST_RM, FPU_rm, control_word) >= 0 )
-    FPU_pop();
+	/* fsubrp st(i),st */
+	clear_C1();
+	if (FPU_sub(DEST_RM, FPU_rm, control_word) >= 0)
+		FPU_pop();
 }
 
-
 void fsubp_(void)
 {
-  /* fsubp st(i),st */
-  clear_C1();
-  if ( FPU_sub(REV|DEST_RM, FPU_rm, control_word) >= 0 )
-    FPU_pop();
+	/* fsubp st(i),st */
+	clear_C1();
+	if (FPU_sub(REV | DEST_RM, FPU_rm, control_word) >= 0)
+		FPU_pop();
 }
 
-
 void fdivrp(void)
 {
-  /* fdivrp st(i),st */
-  clear_C1();
-  if ( FPU_div(DEST_RM, FPU_rm, control_word) >= 0 )
-    FPU_pop();
+	/* fdivrp st(i),st */
+	clear_C1();
+	if (FPU_div(DEST_RM, FPU_rm, control_word) >= 0)
+		FPU_pop();
 }
 
-
 void fdivp_(void)
 {
-  /* fdivp st(i),st */
-  clear_C1();
-  if ( FPU_div(REV|DEST_RM, FPU_rm, control_word) >= 0 )
-    FPU_pop();
+	/* fdivp st(i),st */
+	clear_C1();
+	if (FPU_div(REV | DEST_RM, FPU_rm, control_word) >= 0)
+		FPU_pop();
 }
diff --git a/arch/x86/math-emu/fpu_asm.h b/arch/x86/math-emu/fpu_asm.h
index 9ba12416df12..955b932735a4 100644
--- a/arch/x86/math-emu/fpu_asm.h
+++ b/arch/x86/math-emu/fpu_asm.h
@@ -14,7 +14,6 @@
 
 #define	EXCEPTION	FPU_exception
 
-
 #define PARAM1	8(%ebp)
 #define	PARAM2	12(%ebp)
 #define	PARAM3	16(%ebp)
diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c
index 20886cfb9f76..491e737ce547 100644
--- a/arch/x86/math-emu/fpu_aux.c
+++ b/arch/x86/math-emu/fpu_aux.c
@@ -16,34 +16,34 @@
 #include "status_w.h"
 #include "control_w.h"
 
-
 static void fnop(void)
 {
 }
 
 static void fclex(void)
 {
-  partial_status &= ~(SW_Backward|SW_Summary|SW_Stack_Fault|SW_Precision|
-		   SW_Underflow|SW_Overflow|SW_Zero_Div|SW_Denorm_Op|
-		   SW_Invalid);
-  no_ip_update = 1;
+	partial_status &=
+	    ~(SW_Backward | SW_Summary | SW_Stack_Fault | SW_Precision |
+	      SW_Underflow | SW_Overflow | SW_Zero_Div | SW_Denorm_Op |
+	      SW_Invalid);
+	no_ip_update = 1;
 }
 
 /* Needs to be externally visible */
 void finit(void)
 {
-  control_word = 0x037f;
-  partial_status = 0;
-  top = 0;            /* We don't keep top in the status word internally. */
-  fpu_tag_word = 0xffff;
-  /* The behaviour is different from that detailed in
-     Section 15.1.6 of the Intel manual */
-  operand_address.offset = 0;
-  operand_address.selector = 0;
-  instruction_address.offset = 0;
-  instruction_address.selector = 0;
-  instruction_address.opcode = 0;
-  no_ip_update = 1;
+	control_word = 0x037f;
+	partial_status = 0;
+	top = 0;		/* We don't keep top in the status word internally. */
+	fpu_tag_word = 0xffff;
+	/* The behaviour is different from that detailed in
+	   Section 15.1.6 of the Intel manual */
+	operand_address.offset = 0;
+	operand_address.selector = 0;
+	instruction_address.offset = 0;
+	instruction_address.selector = 0;
+	instruction_address.opcode = 0;
+	no_ip_update = 1;
 }
 
 /*
@@ -54,151 +54,134 @@ void finit(void)
 #define fsetpm fnop
 
 static FUNC const finit_table[] = {
-  feni, fdisi, fclex, finit,
-  fsetpm, FPU_illegal, FPU_illegal, FPU_illegal
+	feni, fdisi, fclex, finit,
+	fsetpm, FPU_illegal, FPU_illegal, FPU_illegal
 };
 
 void finit_(void)
 {
-  (finit_table[FPU_rm])();
+	(finit_table[FPU_rm]) ();
 }
 
-
 static void fstsw_ax(void)
 {
-  *(short *) &FPU_EAX = status_word();
-  no_ip_update = 1;
+	*(short *)&FPU_EAX = status_word();
+	no_ip_update = 1;
 }
 
 static FUNC const fstsw_table[] = {
-  fstsw_ax, FPU_illegal, FPU_illegal, FPU_illegal,
-  FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
+	fstsw_ax, FPU_illegal, FPU_illegal, FPU_illegal,
+	FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
 };
 
 void fstsw_(void)
 {
-  (fstsw_table[FPU_rm])();
+	(fstsw_table[FPU_rm]) ();
 }
 
-
 static FUNC const fp_nop_table[] = {
-  fnop, FPU_illegal, FPU_illegal, FPU_illegal,
-  FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
+	fnop, FPU_illegal, FPU_illegal, FPU_illegal,
+	FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
 };
 
 void fp_nop(void)
 {
-  (fp_nop_table[FPU_rm])();
+	(fp_nop_table[FPU_rm]) ();
 }
 
-
 void fld_i_(void)
 {
-  FPU_REG *st_new_ptr;
-  int i;
-  u_char tag;
+	FPU_REG *st_new_ptr;
+	int i;
+	u_char tag;
 
-  if ( STACK_OVERFLOW )
-    { FPU_stack_overflow(); return; }
-
-  /* fld st(i) */
-  i = FPU_rm;
-  if ( NOT_EMPTY(i) )
-    {
-      reg_copy(&st(i), st_new_ptr);
-      tag = FPU_gettagi(i);
-      push();
-      FPU_settag0(tag);
-    }
-  else
-    {
-      if ( control_word & CW_Invalid )
-	{
-	  /* The masked response */
-	  FPU_stack_underflow();
+	if (STACK_OVERFLOW) {
+		FPU_stack_overflow();
+		return;
+	}
+
+	/* fld st(i) */
+	i = FPU_rm;
+	if (NOT_EMPTY(i)) {
+		reg_copy(&st(i), st_new_ptr);
+		tag = FPU_gettagi(i);
+		push();
+		FPU_settag0(tag);
+	} else {
+		if (control_word & CW_Invalid) {
+			/* The masked response */
+			FPU_stack_underflow();
+		} else
+			EXCEPTION(EX_StackUnder);
 	}
-      else
-	EXCEPTION(EX_StackUnder);
-    }
 
 }
 
-
 void fxch_i(void)
 {
-  /* fxch st(i) */
-  FPU_REG t;
-  int i = FPU_rm;
-  FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i);
-  long tag_word = fpu_tag_word;
-  int regnr = top & 7, regnri = ((regnr + i) & 7);
-  u_char st0_tag = (tag_word >> (regnr*2)) & 3;
-  u_char sti_tag = (tag_word >> (regnri*2)) & 3;
+	/* fxch st(i) */
+	FPU_REG t;
+	int i = FPU_rm;
+	FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i);
+	long tag_word = fpu_tag_word;
+	int regnr = top & 7, regnri = ((regnr + i) & 7);
+	u_char st0_tag = (tag_word >> (regnr * 2)) & 3;
+	u_char sti_tag = (tag_word >> (regnri * 2)) & 3;
 
-  if ( st0_tag == TAG_Empty )
-    {
-      if ( sti_tag == TAG_Empty )
-	{
-	  FPU_stack_underflow();
-	  FPU_stack_underflow_i(i);
-	  return;
+	if (st0_tag == TAG_Empty) {
+		if (sti_tag == TAG_Empty) {
+			FPU_stack_underflow();
+			FPU_stack_underflow_i(i);
+			return;
+		}
+		if (control_word & CW_Invalid) {
+			/* Masked response */
+			FPU_copy_to_reg0(sti_ptr, sti_tag);
+		}
+		FPU_stack_underflow_i(i);
+		return;
 	}
-      if ( control_word & CW_Invalid )
-	{
-	  /* Masked response */
-	  FPU_copy_to_reg0(sti_ptr, sti_tag);
+	if (sti_tag == TAG_Empty) {
+		if (control_word & CW_Invalid) {
+			/* Masked response */
+			FPU_copy_to_regi(st0_ptr, st0_tag, i);
+		}
+		FPU_stack_underflow();
+		return;
 	}
-      FPU_stack_underflow_i(i);
-      return;
-    }
-  if ( sti_tag == TAG_Empty )
-    {
-      if ( control_word & CW_Invalid )
-	{
-	  /* Masked response */
-	  FPU_copy_to_regi(st0_ptr, st0_tag, i);
-	}
-      FPU_stack_underflow();
-      return;
-    }
-  clear_C1();
+	clear_C1();
 
-  reg_copy(st0_ptr, &t);
-  reg_copy(sti_ptr, st0_ptr);
-  reg_copy(&t, sti_ptr);
+	reg_copy(st0_ptr, &t);
+	reg_copy(sti_ptr, st0_ptr);
+	reg_copy(&t, sti_ptr);
 
-  tag_word &= ~(3 << (regnr*2)) & ~(3 << (regnri*2));
-  tag_word |= (sti_tag << (regnr*2)) | (st0_tag << (regnri*2));
-  fpu_tag_word = tag_word;
+	tag_word &= ~(3 << (regnr * 2)) & ~(3 << (regnri * 2));
+	tag_word |= (sti_tag << (regnr * 2)) | (st0_tag << (regnri * 2));
+	fpu_tag_word = tag_word;
 }
 
-
 void ffree_(void)
 {
-  /* ffree st(i) */
-  FPU_settagi(FPU_rm, TAG_Empty);
+	/* ffree st(i) */
+	FPU_settagi(FPU_rm, TAG_Empty);
 }
 
-
 void ffreep(void)
 {
-  /* ffree st(i) + pop - unofficial code */
-  FPU_settagi(FPU_rm, TAG_Empty);
-  FPU_pop();
+	/* ffree st(i) + pop - unofficial code */
+	FPU_settagi(FPU_rm, TAG_Empty);
+	FPU_pop();
 }
 
-
 void fst_i_(void)
 {
-  /* fst st(i) */
-  FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
+	/* fst st(i) */
+	FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
 }
 
-
 void fstp_i(void)
 {
-  /* fstp st(i) */
-  FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
-  FPU_pop();
+	/* fstp st(i) */
+	FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
+	FPU_pop();
 }
-
diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h
index 65120f523853..656dd4c04b1b 100644
--- a/arch/x86/math-emu/fpu_emu.h
+++ b/arch/x86/math-emu/fpu_emu.h
@@ -7,7 +7,6 @@
  |                                                                           |
  +---------------------------------------------------------------------------*/
 
-
 #ifndef _FPU_EMU_H_
 #define _FPU_EMU_H_
 
@@ -28,15 +27,15 @@
 #endif
 
 #define EXP_BIAS	Const(0)
-#define EXP_OVER	Const(0x4000)    /* smallest invalid large exponent */
-#define	EXP_UNDER	Const(-0x3fff)   /* largest invalid small exponent */
-#define EXP_WAY_UNDER   Const(-0x6000)   /* Below the smallest denormal, but
-					    still a 16 bit nr. */
+#define EXP_OVER	Const(0x4000)	/* smallest invalid large exponent */
+#define	EXP_UNDER	Const(-0x3fff)	/* largest invalid small exponent */
+#define EXP_WAY_UNDER   Const(-0x6000)	/* Below the smallest denormal, but
+					   still a 16 bit nr. */
 #define EXP_Infinity    EXP_OVER
 #define EXP_NaN         EXP_OVER
 
 #define EXTENDED_Ebias Const(0x3fff)
-#define EXTENDED_Emin (-0x3ffe)  /* smallest valid exponent */
+#define EXTENDED_Emin (-0x3ffe)	/* smallest valid exponent */
 
 #define SIGN_POS	Const(0)
 #define SIGN_NEG	Const(0x80)
@@ -44,10 +43,9 @@
 #define SIGN_Positive	Const(0)
 #define SIGN_Negative	Const(0x8000)
 
-
 /* Keep the order TAG_Valid, TAG_Zero, TW_Denormal */
 /* The following fold to 2 (Special) in the Tag Word */
-#define TW_Denormal     Const(4)        /* De-normal */
+#define TW_Denormal     Const(4)	/* De-normal */
 #define TW_Infinity	Const(5)	/* + or - infinity */
 #define	TW_NaN		Const(6)	/* Not a Number */
 #define	TW_Unsupported	Const(7)	/* Not supported by an 80486 */
@@ -67,14 +65,13 @@
 #define DEST_RM         0x20
 #define LOADED          0x40
 
-#define FPU_Exception   Const(0x80000000)   /* Added to tag returns. */
-
+#define FPU_Exception   Const(0x80000000)	/* Added to tag returns. */
 
 #ifndef __ASSEMBLY__
 
 #include "fpu_system.h"
 
-#include <asm/sigcontext.h>   /* for struct _fpstate */
+#include <asm/sigcontext.h>	/* for struct _fpstate */
 #include <asm/math_emu.h>
 #include <linux/linkage.h>
 
@@ -112,30 +109,33 @@ extern u_char emulating;
 #define PREFIX_DEFAULT 7
 
 struct address {
-  unsigned int offset;
-  unsigned int selector:16;
-  unsigned int opcode:11;
-  unsigned int empty:5;
+	unsigned int offset;
+	unsigned int selector:16;
+	unsigned int opcode:11;
+	unsigned int empty:5;
 };
 struct fpu__reg {
-  unsigned sigl;
-  unsigned sigh;
-  short exp;
+	unsigned sigl;
+	unsigned sigh;
+	short exp;
 };
 
-typedef void (*FUNC)(void);
+typedef void (*FUNC) (void);
 typedef struct fpu__reg FPU_REG;
-typedef void (*FUNC_ST0)(FPU_REG *st0_ptr, u_char st0_tag);
-typedef struct { u_char address_size, operand_size, segment; }
-        overrides;
+typedef void (*FUNC_ST0) (FPU_REG * st0_ptr, u_char st0_tag);
+typedef struct {
+	u_char address_size, operand_size, segment;
+} overrides;
 /* This structure is 32 bits: */
-typedef struct { overrides override;
-		 u_char default_mode; } fpu_addr_modes;
+typedef struct {
+	overrides override;
+	u_char default_mode;
+} fpu_addr_modes;
 /* PROTECTED has a restricted meaning in the emulator; it is used
    to signal that the emulator needs to do special things to ensure
    that protection is respected in a segmented model. */
 #define PROTECTED 4
-#define SIXTEEN   1         /* We rely upon this being 1 (true) */
+#define SIXTEEN   1		/* We rely upon this being 1 (true) */
 #define VM86      SIXTEEN
 #define PM16      (SIXTEEN | PROTECTED)
 #define SEG32     PROTECTED
@@ -166,10 +166,10 @@ extern u_char const data_sizes_16[32];
 #define signpositive(a) ( (signbyte(a) & 0x80) == 0 )
 #define signnegative(a) (signbyte(a) & 0x80)
 
-static inline void reg_copy(FPU_REG const *x, FPU_REG *y)
+static inline void reg_copy(FPU_REG const *x, FPU_REG * y)
 {
-  *(short *)&(y->exp) = *(const short *)&(x->exp); 
-  *(long long *)&(y->sigl) = *(const long long *)&(x->sigl);
+	*(short *)&(y->exp) = *(const short *)&(x->exp);
+	*(long long *)&(y->sigl) = *(const long long *)&(x->sigl);
 }
 
 #define exponent(x)  (((*(short *)&((x)->exp)) & 0x7fff) - EXTENDED_Ebias)
@@ -184,29 +184,28 @@ static inline void reg_copy(FPU_REG const *x, FPU_REG *y)
 
 #define significand(x) ( ((unsigned long long *)&((x)->sigl))[0] )
 
-
 /*----- Prototypes for functions written in assembler -----*/
 /* extern void reg_move(FPU_REG *a, FPU_REG *b); */
 
-asmlinkage int FPU_normalize(FPU_REG *x);
-asmlinkage int FPU_normalize_nuo(FPU_REG *x);
+asmlinkage int FPU_normalize(FPU_REG * x);
+asmlinkage int FPU_normalize_nuo(FPU_REG * x);
 asmlinkage int FPU_u_sub(FPU_REG const *arg1, FPU_REG const *arg2,
-			 FPU_REG *answ, unsigned int control_w, u_char sign,
+			 FPU_REG * answ, unsigned int control_w, u_char sign,
 			 int expa, int expb);
 asmlinkage int FPU_u_mul(FPU_REG const *arg1, FPU_REG const *arg2,
-			 FPU_REG *answ, unsigned int control_w, u_char sign,
+			 FPU_REG * answ, unsigned int control_w, u_char sign,
 			 int expon);
 asmlinkage int FPU_u_div(FPU_REG const *arg1, FPU_REG const *arg2,
-			 FPU_REG *answ, unsigned int control_w, u_char sign);
+			 FPU_REG * answ, unsigned int control_w, u_char sign);
 asmlinkage int FPU_u_add(FPU_REG const *arg1, FPU_REG const *arg2,
-			 FPU_REG *answ, unsigned int control_w, u_char sign,
+			 FPU_REG * answ, unsigned int control_w, u_char sign,
 			 int expa, int expb);
-asmlinkage int wm_sqrt(FPU_REG *n, int dummy1, int dummy2,
+asmlinkage int wm_sqrt(FPU_REG * n, int dummy1, int dummy2,
 		       unsigned int control_w, u_char sign);
-asmlinkage unsigned	FPU_shrx(void *l, unsigned x);
-asmlinkage unsigned	FPU_shrxs(void *v, unsigned x);
+asmlinkage unsigned FPU_shrx(void *l, unsigned x);
+asmlinkage unsigned FPU_shrxs(void *v, unsigned x);
 asmlinkage unsigned long FPU_div_small(unsigned long long *x, unsigned long y);
-asmlinkage int FPU_round(FPU_REG *arg, unsigned int extent, int dummy,
+asmlinkage int FPU_round(FPU_REG * arg, unsigned int extent, int dummy,
 			 unsigned int control_w, u_char sign);
 
 #ifndef MAKING_PROTO
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 1853524c8b57..cbb8717f09fd 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -36,726 +36,720 @@
 #include "control_w.h"
 #include "status_w.h"
 
-#define __BAD__ FPU_illegal   /* Illegal on an 80486, causes SIGILL */
+#define __BAD__ FPU_illegal	/* Illegal on an 80486, causes SIGILL */
 
-#ifndef NO_UNDOC_CODE    /* Un-documented FPU op-codes supported by default. */
+#ifndef NO_UNDOC_CODE		/* Un-documented FPU op-codes supported by default. */
 
 /* WARNING: These codes are not documented by Intel in their 80486 manual
    and may not work on FPU clones or later Intel FPUs. */
 
 /* Changes to support the un-doc codes provided by Linus Torvalds. */
 
-#define _d9_d8_ fstp_i    /* unofficial code (19) */
-#define _dc_d0_ fcom_st   /* unofficial code (14) */
-#define _dc_d8_ fcompst   /* unofficial code (1c) */
-#define _dd_c8_ fxch_i    /* unofficial code (0d) */
-#define _de_d0_ fcompst   /* unofficial code (16) */
-#define _df_c0_ ffreep    /* unofficial code (07) ffree + pop */
-#define _df_c8_ fxch_i    /* unofficial code (0f) */
-#define _df_d0_ fstp_i    /* unofficial code (17) */
-#define _df_d8_ fstp_i    /* unofficial code (1f) */
+#define _d9_d8_ fstp_i		/* unofficial code (19) */
+#define _dc_d0_ fcom_st		/* unofficial code (14) */
+#define _dc_d8_ fcompst		/* unofficial code (1c) */
+#define _dd_c8_ fxch_i		/* unofficial code (0d) */
+#define _de_d0_ fcompst		/* unofficial code (16) */
+#define _df_c0_ ffreep		/* unofficial code (07) ffree + pop */
+#define _df_c8_ fxch_i		/* unofficial code (0f) */
+#define _df_d0_ fstp_i		/* unofficial code (17) */
+#define _df_d8_ fstp_i		/* unofficial code (1f) */
 
 static FUNC const st_instr_table[64] = {
-  fadd__,   fld_i_,     __BAD__, __BAD__, fadd_i,  ffree_,  faddp_,  _df_c0_,
-  fmul__,   fxch_i,     __BAD__, __BAD__, fmul_i,  _dd_c8_, fmulp_,  _df_c8_,
-  fcom_st,  fp_nop,     __BAD__, __BAD__, _dc_d0_, fst_i_,  _de_d0_, _df_d0_,
-  fcompst,  _d9_d8_,    __BAD__, __BAD__, _dc_d8_, fstp_i,  fcompp,  _df_d8_,
-  fsub__,   FPU_etc,    __BAD__, finit_,  fsubri,  fucom_,  fsubrp,  fstsw_,
-  fsubr_,   fconst,     fucompp, __BAD__, fsub_i,  fucomp,  fsubp_,  __BAD__,
-  fdiv__,   FPU_triga,  __BAD__, __BAD__, fdivri,  __BAD__, fdivrp,  __BAD__,
-  fdivr_,   FPU_trigb,  __BAD__, __BAD__, fdiv_i,  __BAD__, fdivp_,  __BAD__,
+	fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, _df_c0_,
+	fmul__, fxch_i, __BAD__, __BAD__, fmul_i, _dd_c8_, fmulp_, _df_c8_,
+	fcom_st, fp_nop, __BAD__, __BAD__, _dc_d0_, fst_i_, _de_d0_, _df_d0_,
+	fcompst, _d9_d8_, __BAD__, __BAD__, _dc_d8_, fstp_i, fcompp, _df_d8_,
+	fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
+	fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
+	fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
+	fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
 };
 
-#else     /* Support only documented FPU op-codes */
+#else /* Support only documented FPU op-codes */
 
 static FUNC const st_instr_table[64] = {
-  fadd__,   fld_i_,     __BAD__, __BAD__, fadd_i,  ffree_,  faddp_,  __BAD__,
-  fmul__,   fxch_i,     __BAD__, __BAD__, fmul_i,  __BAD__, fmulp_,  __BAD__,
-  fcom_st,  fp_nop,     __BAD__, __BAD__, __BAD__, fst_i_,  __BAD__, __BAD__,
-  fcompst,  __BAD__,    __BAD__, __BAD__, __BAD__, fstp_i,  fcompp,  __BAD__,
-  fsub__,   FPU_etc,    __BAD__, finit_,  fsubri,  fucom_,  fsubrp,  fstsw_,
-  fsubr_,   fconst,     fucompp, __BAD__, fsub_i,  fucomp,  fsubp_,  __BAD__,
-  fdiv__,   FPU_triga,  __BAD__, __BAD__, fdivri,  __BAD__, fdivrp,  __BAD__,
-  fdivr_,   FPU_trigb,  __BAD__, __BAD__, fdiv_i,  __BAD__, fdivp_,  __BAD__,
+	fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, __BAD__,
+	fmul__, fxch_i, __BAD__, __BAD__, fmul_i, __BAD__, fmulp_, __BAD__,
+	fcom_st, fp_nop, __BAD__, __BAD__, __BAD__, fst_i_, __BAD__, __BAD__,
+	fcompst, __BAD__, __BAD__, __BAD__, __BAD__, fstp_i, fcompp, __BAD__,
+	fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
+	fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
+	fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
+	fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
 };
 
 #endif /* NO_UNDOC_CODE */
 
-
-#define _NONE_ 0   /* Take no special action */
-#define _REG0_ 1   /* Need to check for not empty st(0) */
-#define _REGI_ 2   /* Need to check for not empty st(0) and st(rm) */
-#define _REGi_ 0   /* Uses st(rm) */
-#define _PUSH_ 3   /* Need to check for space to push onto stack */
-#define _null_ 4   /* Function illegal or not implemented */
-#define _REGIi 5   /* Uses st(0) and st(rm), result to st(rm) */
-#define _REGIp 6   /* Uses st(0) and st(rm), result to st(rm) then pop */
-#define _REGIc 0   /* Compare st(0) and st(rm) */
-#define _REGIn 0   /* Uses st(0) and st(rm), but handle checks later */
+#define _NONE_ 0		/* Take no special action */
+#define _REG0_ 1		/* Need to check for not empty st(0) */
+#define _REGI_ 2		/* Need to check for not empty st(0) and st(rm) */
+#define _REGi_ 0		/* Uses st(rm) */
+#define _PUSH_ 3		/* Need to check for space to push onto stack */
+#define _null_ 4		/* Function illegal or not implemented */
+#define _REGIi 5		/* Uses st(0) and st(rm), result to st(rm) */
+#define _REGIp 6		/* Uses st(0) and st(rm), result to st(rm) then pop */
+#define _REGIc 0		/* Compare st(0) and st(rm) */
+#define _REGIn 0		/* Uses st(0) and st(rm), but handle checks later */
 
 #ifndef NO_UNDOC_CODE
 
 /* Un-documented FPU op-codes supported by default. (see above) */
 
 static u_char const type_table[64] = {
-  _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_,
-  _REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_,
-  _REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
-  _REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
-  _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
-  _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
-  _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
-  _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
+	_REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_,
+	_REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_,
+	_REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
+	_REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
+	_REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
+	_REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
+	_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
+	_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
 };
 
-#else     /* Support only documented FPU op-codes */
+#else /* Support only documented FPU op-codes */
 
 static u_char const type_table[64] = {
-  _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_,
-  _REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
-  _REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_,
-  _REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_,
-  _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
-  _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
-  _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
-  _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
+	_REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_,
+	_REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
+	_REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_,
+	_REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_,
+	_REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
+	_REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
+	_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
+	_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
 };
 
 #endif /* NO_UNDOC_CODE */
 
-
 #ifdef RE_ENTRANT_CHECKING
-u_char emulating=0;
+u_char emulating = 0;
 #endif /* RE_ENTRANT_CHECKING */
 
-static int valid_prefix(u_char *Byte, u_char __user **fpu_eip,
-			overrides *override);
+static int valid_prefix(u_char * Byte, u_char __user ** fpu_eip,
+			overrides * override);
 
 asmlinkage void math_emulate(long arg)
 {
-  u_char  FPU_modrm, byte1;
-  unsigned short code;
-  fpu_addr_modes addr_modes;
-  int unmasked;
-  FPU_REG loaded_data;
-  FPU_REG *st0_ptr;
-  u_char	  loaded_tag, st0_tag;
-  void __user *data_address;
-  struct address data_sel_off;
-  struct address entry_sel_off;
-  unsigned long code_base = 0;
-  unsigned long code_limit = 0;  /* Initialized to stop compiler warnings */
-  struct desc_struct code_descriptor;
+	u_char FPU_modrm, byte1;
+	unsigned short code;
+	fpu_addr_modes addr_modes;
+	int unmasked;
+	FPU_REG loaded_data;
+	FPU_REG *st0_ptr;
+	u_char loaded_tag, st0_tag;
+	void __user *data_address;
+	struct address data_sel_off;
+	struct address entry_sel_off;
+	unsigned long code_base = 0;
+	unsigned long code_limit = 0;	/* Initialized to stop compiler warnings */
+	struct desc_struct code_descriptor;
 
 #ifdef RE_ENTRANT_CHECKING
-  if ( emulating )
-    {
-      printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n");
-    }
-  RE_ENTRANT_CHECK_ON;
+	if (emulating) {
+		printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n");
+	}
+	RE_ENTRANT_CHECK_ON;
 #endif /* RE_ENTRANT_CHECKING */
 
-  if (!used_math())
-    {
-      finit();
-      set_used_math();
-    }
-
-  SETUP_DATA_AREA(arg);
-
-  FPU_ORIG_EIP = FPU_EIP;
-
-  if ( (FPU_EFLAGS & 0x00020000) != 0 )
-    {
-      /* Virtual 8086 mode */
-      addr_modes.default_mode = VM86;
-      FPU_EIP += code_base = FPU_CS << 4;
-      code_limit = code_base + 0xffff;  /* Assumes code_base <= 0xffff0000 */
-    }
-  else if ( FPU_CS == __USER_CS && FPU_DS == __USER_DS )
-    {
-      addr_modes.default_mode = 0;
-    }
-  else if ( FPU_CS == __KERNEL_CS )
-    {
-      printk("math_emulate: %04x:%08lx\n",FPU_CS,FPU_EIP);
-      panic("Math emulation needed in kernel");
-    }
-  else
-    {
-
-      if ( (FPU_CS & 4) != 4 )   /* Must be in the LDT */
-	{
-	  /* Can only handle segmented addressing via the LDT
-	     for now, and it must be 16 bit */
-	  printk("FPU emulator: Unsupported addressing mode\n");
-	  math_abort(FPU_info, SIGILL);
+	if (!used_math()) {
+		finit();
+		set_used_math();
 	}
 
-      code_descriptor = LDT_DESCRIPTOR(FPU_CS);
-      if ( SEG_D_SIZE(code_descriptor) )
-	{
-	  /* The above test may be wrong, the book is not clear */
-	  /* Segmented 32 bit protected mode */
-	  addr_modes.default_mode = SEG32;
+	SETUP_DATA_AREA(arg);
+
+	FPU_ORIG_EIP = FPU_EIP;
+
+	if ((FPU_EFLAGS & 0x00020000) != 0) {
+		/* Virtual 8086 mode */
+		addr_modes.default_mode = VM86;
+		FPU_EIP += code_base = FPU_CS << 4;
+		code_limit = code_base + 0xffff;	/* Assumes code_base <= 0xffff0000 */
+	} else if (FPU_CS == __USER_CS && FPU_DS == __USER_DS) {
+		addr_modes.default_mode = 0;
+	} else if (FPU_CS == __KERNEL_CS) {
+		printk("math_emulate: %04x:%08lx\n", FPU_CS, FPU_EIP);
+		panic("Math emulation needed in kernel");
+	} else {
+
+		if ((FPU_CS & 4) != 4) {	/* Must be in the LDT */
+			/* Can only handle segmented addressing via the LDT
+			   for now, and it must be 16 bit */
+			printk("FPU emulator: Unsupported addressing mode\n");
+			math_abort(FPU_info, SIGILL);
+		}
+
+		code_descriptor = LDT_DESCRIPTOR(FPU_CS);
+		if (SEG_D_SIZE(code_descriptor)) {
+			/* The above test may be wrong, the book is not clear */
+			/* Segmented 32 bit protected mode */
+			addr_modes.default_mode = SEG32;
+		} else {
+			/* 16 bit protected mode */
+			addr_modes.default_mode = PM16;
+		}
+		FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor);
+		code_limit = code_base
+		    + (SEG_LIMIT(code_descriptor) +
+		       1) * SEG_GRANULARITY(code_descriptor)
+		    - 1;
+		if (code_limit < code_base)
+			code_limit = 0xffffffff;
 	}
-      else
-	{
-	  /* 16 bit protected mode */
-	  addr_modes.default_mode = PM16;
+
+	FPU_lookahead = 1;
+	if (current->ptrace & PT_PTRACED)
+		FPU_lookahead = 0;
+
+	if (!valid_prefix(&byte1, (u_char __user **) & FPU_EIP,
+			  &addr_modes.override)) {
+		RE_ENTRANT_CHECK_OFF;
+		printk
+		    ("FPU emulator: Unknown prefix byte 0x%02x, probably due to\n"
+		     "FPU emulator: self-modifying code! (emulation impossible)\n",
+		     byte1);
+		RE_ENTRANT_CHECK_ON;
+		EXCEPTION(EX_INTERNAL | 0x126);
+		math_abort(FPU_info, SIGILL);
 	}
-      FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor);
-      code_limit = code_base
-	+ (SEG_LIMIT(code_descriptor)+1) * SEG_GRANULARITY(code_descriptor)
-	  - 1;
-      if ( code_limit < code_base ) code_limit = 0xffffffff;
-    }
 
-  FPU_lookahead = 1;
-  if (current->ptrace & PT_PTRACED)
-    FPU_lookahead = 0;
+      do_another_FPU_instruction:
 
-  if ( !valid_prefix(&byte1, (u_char __user **)&FPU_EIP,
-		     &addr_modes.override) )
-    {
-      RE_ENTRANT_CHECK_OFF;
-      printk("FPU emulator: Unknown prefix byte 0x%02x, probably due to\n"
-	     "FPU emulator: self-modifying code! (emulation impossible)\n",
-	     byte1);
-      RE_ENTRANT_CHECK_ON;
-      EXCEPTION(EX_INTERNAL|0x126);
-      math_abort(FPU_info,SIGILL);
-    }
+	no_ip_update = 0;
 
-do_another_FPU_instruction:
+	FPU_EIP++;		/* We have fetched the prefix and first code bytes. */
 
-  no_ip_update = 0;
-
-  FPU_EIP++;  /* We have fetched the prefix and first code bytes. */
-
-  if ( addr_modes.default_mode )
-    {
-      /* This checks for the minimum instruction bytes.
-	 We also need to check any extra (address mode) code access. */
-      if ( FPU_EIP > code_limit )
-	math_abort(FPU_info,SIGSEGV);
-    }
-
-  if ( (byte1 & 0xf8) != 0xd8 )
-    {
-      if ( byte1 == FWAIT_OPCODE )
-	{
-	  if (partial_status & SW_Summary)
-	    goto do_the_FPU_interrupt;
-	  else
-	    goto FPU_fwait_done;
+	if (addr_modes.default_mode) {
+		/* This checks for the minimum instruction bytes.
+		   We also need to check any extra (address mode) code access. */
+		if (FPU_EIP > code_limit)
+			math_abort(FPU_info, SIGSEGV);
 	}
+
+	if ((byte1 & 0xf8) != 0xd8) {
+		if (byte1 == FWAIT_OPCODE) {
+			if (partial_status & SW_Summary)
+				goto do_the_FPU_interrupt;
+			else
+				goto FPU_fwait_done;
+		}
 #ifdef PARANOID
-      EXCEPTION(EX_INTERNAL|0x128);
-      math_abort(FPU_info,SIGILL);
+		EXCEPTION(EX_INTERNAL | 0x128);
+		math_abort(FPU_info, SIGILL);
 #endif /* PARANOID */
-    }
-
-  RE_ENTRANT_CHECK_OFF;
-  FPU_code_access_ok(1);
-  FPU_get_user(FPU_modrm, (u_char __user *) FPU_EIP);
-  RE_ENTRANT_CHECK_ON;
-  FPU_EIP++;
-
-  if (partial_status & SW_Summary)
-    {
-      /* Ignore the error for now if the current instruction is a no-wait
-	 control instruction */
-      /* The 80486 manual contradicts itself on this topic,
-	 but a real 80486 uses the following instructions:
-	 fninit, fnstenv, fnsave, fnstsw, fnstenv, fnclex.
-       */
-      code = (FPU_modrm << 8) | byte1;
-      if ( ! ( (((code & 0xf803) == 0xe003) ||    /* fnclex, fninit, fnstsw */
-		(((code & 0x3003) == 0x3001) &&   /* fnsave, fnstcw, fnstenv,
-						     fnstsw */
-		 ((code & 0xc000) != 0xc000))) ) )
-	{
-	  /*
-	   *  We need to simulate the action of the kernel to FPU
-	   *  interrupts here.
-	   */
-	do_the_FPU_interrupt:
-
-	  FPU_EIP = FPU_ORIG_EIP;	/* Point to current FPU instruction. */
-
-	  RE_ENTRANT_CHECK_OFF;
-	  current->thread.trap_no = 16;
-	  current->thread.error_code = 0;
-	  send_sig(SIGFPE, current, 1);
-	  return;
-	}
-    }
-
-  entry_sel_off.offset = FPU_ORIG_EIP;
-  entry_sel_off.selector = FPU_CS;
-  entry_sel_off.opcode = (byte1 << 8) | FPU_modrm;
-
-  FPU_rm = FPU_modrm & 7;
-
-  if ( FPU_modrm < 0300 )
-    {
-      /* All of these instructions use the mod/rm byte to get a data address */
-
-      if ( (addr_modes.default_mode & SIXTEEN)
-	  ^ (addr_modes.override.address_size == ADDR_SIZE_PREFIX) )
-	data_address = FPU_get_address_16(FPU_modrm, &FPU_EIP, &data_sel_off,
-					  addr_modes);
-      else
-	data_address = FPU_get_address(FPU_modrm, &FPU_EIP, &data_sel_off,
-				       addr_modes);
-
-      if ( addr_modes.default_mode )
-	{
-	  if ( FPU_EIP-1 > code_limit )
-	    math_abort(FPU_info,SIGSEGV);
 	}
 
-      if ( !(byte1 & 1) )
-	{
-	  unsigned short status1 = partial_status;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_code_access_ok(1);
+	FPU_get_user(FPU_modrm, (u_char __user *) FPU_EIP);
+	RE_ENTRANT_CHECK_ON;
+	FPU_EIP++;
 
-	  st0_ptr = &st(0);
-	  st0_tag = FPU_gettag0();
+	if (partial_status & SW_Summary) {
+		/* Ignore the error for now if the current instruction is a no-wait
+		   control instruction */
+		/* The 80486 manual contradicts itself on this topic,
+		   but a real 80486 uses the following instructions:
+		   fninit, fnstenv, fnsave, fnstsw, fnstenv, fnclex.
+		 */
+		code = (FPU_modrm << 8) | byte1;
+		if (!((((code & 0xf803) == 0xe003) ||	/* fnclex, fninit, fnstsw */
+		       (((code & 0x3003) == 0x3001) &&	/* fnsave, fnstcw, fnstenv,
+							   fnstsw */
+			((code & 0xc000) != 0xc000))))) {
+			/*
+			 *  We need to simulate the action of the kernel to FPU
+			 *  interrupts here.
+			 */
+		      do_the_FPU_interrupt:
 
-	  /* Stack underflow has priority */
-	  if ( NOT_EMPTY_ST0 )
-	    {
-	      if ( addr_modes.default_mode & PROTECTED )
-		{
-		  /* This table works for 16 and 32 bit protected mode */
-		  if ( access_limit < data_sizes_16[(byte1 >> 1) & 3] )
-		    math_abort(FPU_info,SIGSEGV);
+			FPU_EIP = FPU_ORIG_EIP;	/* Point to current FPU instruction. */
+
+			RE_ENTRANT_CHECK_OFF;
+			current->thread.trap_no = 16;
+			current->thread.error_code = 0;
+			send_sig(SIGFPE, current, 1);
+			return;
+		}
+	}
+
+	entry_sel_off.offset = FPU_ORIG_EIP;
+	entry_sel_off.selector = FPU_CS;
+	entry_sel_off.opcode = (byte1 << 8) | FPU_modrm;
+
+	FPU_rm = FPU_modrm & 7;
+
+	if (FPU_modrm < 0300) {
+		/* All of these instructions use the mod/rm byte to get a data address */
+
+		if ((addr_modes.default_mode & SIXTEEN)
+		    ^ (addr_modes.override.address_size == ADDR_SIZE_PREFIX))
+			data_address =
+			    FPU_get_address_16(FPU_modrm, &FPU_EIP,
+					       &data_sel_off, addr_modes);
+		else
+			data_address =
+			    FPU_get_address(FPU_modrm, &FPU_EIP, &data_sel_off,
+					    addr_modes);
+
+		if (addr_modes.default_mode) {
+			if (FPU_EIP - 1 > code_limit)
+				math_abort(FPU_info, SIGSEGV);
 		}
 
-	      unmasked = 0;  /* Do this here to stop compiler warnings. */
-	      switch ( (byte1 >> 1) & 3 )
-		{
-		case 0:
-		  unmasked = FPU_load_single((float __user *)data_address,
-					     &loaded_data);
-		  loaded_tag = unmasked & 0xff;
-		  unmasked &= ~0xff;
-		  break;
-		case 1:
-		  loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data);
-		  break;
-		case 2:
-		  unmasked = FPU_load_double((double __user *)data_address,
-					     &loaded_data);
-		  loaded_tag = unmasked & 0xff;
-		  unmasked &= ~0xff;
-		  break;
-		case 3:
-		default:  /* Used here to suppress gcc warnings. */
-		  loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data);
-		  break;
-		}
+		if (!(byte1 & 1)) {
+			unsigned short status1 = partial_status;
 
-	      /* No more access to user memory, it is safe
-		 to use static data now */
+			st0_ptr = &st(0);
+			st0_tag = FPU_gettag0();
 
-	      /* NaN operands have the next priority. */
-	      /* We have to delay looking at st(0) until after
-		 loading the data, because that data might contain an SNaN */
-	      if ( ((st0_tag == TAG_Special) && isNaN(st0_ptr)) ||
-		  ((loaded_tag == TAG_Special) && isNaN(&loaded_data)) )
-		{
-		  /* Restore the status word; we might have loaded a
-		     denormal. */
-		  partial_status = status1;
-		  if ( (FPU_modrm & 0x30) == 0x10 )
-		    {
-		      /* fcom or fcomp */
-		      EXCEPTION(EX_Invalid);
-		      setcc(SW_C3 | SW_C2 | SW_C0);
-		      if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) )
-			FPU_pop();             /* fcomp, masked, so we pop. */
-		    }
-		  else
-		    {
-		      if ( loaded_tag == TAG_Special )
-			loaded_tag = FPU_Special(&loaded_data);
+			/* Stack underflow has priority */
+			if (NOT_EMPTY_ST0) {
+				if (addr_modes.default_mode & PROTECTED) {
+					/* This table works for 16 and 32 bit protected mode */
+					if (access_limit <
+					    data_sizes_16[(byte1 >> 1) & 3])
+						math_abort(FPU_info, SIGSEGV);
+				}
+
+				unmasked = 0;	/* Do this here to stop compiler warnings. */
+				switch ((byte1 >> 1) & 3) {
+				case 0:
+					unmasked =
+					    FPU_load_single((float __user *)
+							    data_address,
+							    &loaded_data);
+					loaded_tag = unmasked & 0xff;
+					unmasked &= ~0xff;
+					break;
+				case 1:
+					loaded_tag =
+					    FPU_load_int32((long __user *)
+							   data_address,
+							   &loaded_data);
+					break;
+				case 2:
+					unmasked =
+					    FPU_load_double((double __user *)
+							    data_address,
+							    &loaded_data);
+					loaded_tag = unmasked & 0xff;
+					unmasked &= ~0xff;
+					break;
+				case 3:
+				default:	/* Used here to suppress gcc warnings. */
+					loaded_tag =
+					    FPU_load_int16((short __user *)
+							   data_address,
+							   &loaded_data);
+					break;
+				}
+
+				/* No more access to user memory, it is safe
+				   to use static data now */
+
+				/* NaN operands have the next priority. */
+				/* We have to delay looking at st(0) until after
+				   loading the data, because that data might contain an SNaN */
+				if (((st0_tag == TAG_Special) && isNaN(st0_ptr))
+				    || ((loaded_tag == TAG_Special)
+					&& isNaN(&loaded_data))) {
+					/* Restore the status word; we might have loaded a
+					   denormal. */
+					partial_status = status1;
+					if ((FPU_modrm & 0x30) == 0x10) {
+						/* fcom or fcomp */
+						EXCEPTION(EX_Invalid);
+						setcc(SW_C3 | SW_C2 | SW_C0);
+						if ((FPU_modrm & 0x08)
+						    && (control_word &
+							CW_Invalid))
+							FPU_pop();	/* fcomp, masked, so we pop. */
+					} else {
+						if (loaded_tag == TAG_Special)
+							loaded_tag =
+							    FPU_Special
+							    (&loaded_data);
 #ifdef PECULIAR_486
-		      /* This is not really needed, but gives behaviour
-			 identical to an 80486 */
-		      if ( (FPU_modrm & 0x28) == 0x20 )
-			/* fdiv or fsub */
-			real_2op_NaN(&loaded_data, loaded_tag, 0, &loaded_data);
-		      else
-#endif /* PECULIAR_486 */ 
-			/* fadd, fdivr, fmul, or fsubr */
-			real_2op_NaN(&loaded_data, loaded_tag, 0, st0_ptr);
-		    }
-		  goto reg_mem_instr_done;
-		}
+						/* This is not really needed, but gives behaviour
+						   identical to an 80486 */
+						if ((FPU_modrm & 0x28) == 0x20)
+							/* fdiv or fsub */
+							real_2op_NaN
+							    (&loaded_data,
+							     loaded_tag, 0,
+							     &loaded_data);
+						else
+#endif /* PECULIAR_486 */
+							/* fadd, fdivr, fmul, or fsubr */
+							real_2op_NaN
+							    (&loaded_data,
+							     loaded_tag, 0,
+							     st0_ptr);
+					}
+					goto reg_mem_instr_done;
+				}
 
-	      if ( unmasked && !((FPU_modrm & 0x30) == 0x10) )
-		{
-		  /* Is not a comparison instruction. */
-		  if ( (FPU_modrm & 0x38) == 0x38 )
-		    {
-		      /* fdivr */
-		      if ( (st0_tag == TAG_Zero) &&
-			   ((loaded_tag == TAG_Valid)
-			    || (loaded_tag == TAG_Special
-				&& isdenormal(&loaded_data))) )
-			{
-			  if ( FPU_divide_by_zero(0, getsign(&loaded_data))
-			       < 0 )
-			    {
-			      /* We use the fact here that the unmasked
-				 exception in the loaded data was for a
-				 denormal operand */
-			      /* Restore the state of the denormal op bit */
-			      partial_status &= ~SW_Denorm_Op;
-			      partial_status |= status1 & SW_Denorm_Op;
-			    }
-			  else
-			    setsign(st0_ptr, getsign(&loaded_data));
+				if (unmasked && !((FPU_modrm & 0x30) == 0x10)) {
+					/* Is not a comparison instruction. */
+					if ((FPU_modrm & 0x38) == 0x38) {
+						/* fdivr */
+						if ((st0_tag == TAG_Zero) &&
+						    ((loaded_tag == TAG_Valid)
+						     || (loaded_tag ==
+							 TAG_Special
+							 &&
+							 isdenormal
+							 (&loaded_data)))) {
+							if (FPU_divide_by_zero
+							    (0,
+							     getsign
+							     (&loaded_data))
+							    < 0) {
+								/* We use the fact here that the unmasked
+								   exception in the loaded data was for a
+								   denormal operand */
+								/* Restore the state of the denormal op bit */
+								partial_status
+								    &=
+								    ~SW_Denorm_Op;
+								partial_status
+								    |=
+								    status1 &
+								    SW_Denorm_Op;
+							} else
+								setsign(st0_ptr,
+									getsign
+									(&loaded_data));
+						}
+					}
+					goto reg_mem_instr_done;
+				}
+
+				switch ((FPU_modrm >> 3) & 7) {
+				case 0:	/* fadd */
+					clear_C1();
+					FPU_add(&loaded_data, loaded_tag, 0,
+						control_word);
+					break;
+				case 1:	/* fmul */
+					clear_C1();
+					FPU_mul(&loaded_data, loaded_tag, 0,
+						control_word);
+					break;
+				case 2:	/* fcom */
+					FPU_compare_st_data(&loaded_data,
+							    loaded_tag);
+					break;
+				case 3:	/* fcomp */
+					if (!FPU_compare_st_data
+					    (&loaded_data, loaded_tag)
+					    && !unmasked)
+						FPU_pop();
+					break;
+				case 4:	/* fsub */
+					clear_C1();
+					FPU_sub(LOADED | loaded_tag,
+						(int)&loaded_data,
+						control_word);
+					break;
+				case 5:	/* fsubr */
+					clear_C1();
+					FPU_sub(REV | LOADED | loaded_tag,
+						(int)&loaded_data,
+						control_word);
+					break;
+				case 6:	/* fdiv */
+					clear_C1();
+					FPU_div(LOADED | loaded_tag,
+						(int)&loaded_data,
+						control_word);
+					break;
+				case 7:	/* fdivr */
+					clear_C1();
+					if (st0_tag == TAG_Zero)
+						partial_status = status1;	/* Undo any denorm tag,
+										   zero-divide has priority. */
+					FPU_div(REV | LOADED | loaded_tag,
+						(int)&loaded_data,
+						control_word);
+					break;
+				}
+			} else {
+				if ((FPU_modrm & 0x30) == 0x10) {
+					/* The instruction is fcom or fcomp */
+					EXCEPTION(EX_StackUnder);
+					setcc(SW_C3 | SW_C2 | SW_C0);
+					if ((FPU_modrm & 0x08)
+					    && (control_word & CW_Invalid))
+						FPU_pop();	/* fcomp */
+				} else
+					FPU_stack_underflow();
+			}
+		      reg_mem_instr_done:
+			operand_address = data_sel_off;
+		} else {
+			if (!(no_ip_update =
+			      FPU_load_store(((FPU_modrm & 0x38) | (byte1 & 6))
+					     >> 1, addr_modes, data_address))) {
+				operand_address = data_sel_off;
 			}
-		    }
-		  goto reg_mem_instr_done;
 		}
 
-	      switch ( (FPU_modrm >> 3) & 7 )
-		{
-		case 0:         /* fadd */
-		  clear_C1();
-		  FPU_add(&loaded_data, loaded_tag, 0, control_word);
-		  break;
-		case 1:         /* fmul */
-		  clear_C1();
-		  FPU_mul(&loaded_data, loaded_tag, 0, control_word);
-		  break;
-		case 2:         /* fcom */
-		  FPU_compare_st_data(&loaded_data, loaded_tag);
-		  break;
-		case 3:         /* fcomp */
-		  if ( !FPU_compare_st_data(&loaded_data, loaded_tag)
-		       && !unmasked )
-		    FPU_pop();
-		  break;
-		case 4:         /* fsub */
-		  clear_C1();
-		  FPU_sub(LOADED|loaded_tag, (int)&loaded_data, control_word);
-		  break;
-		case 5:         /* fsubr */
-		  clear_C1();
-		  FPU_sub(REV|LOADED|loaded_tag, (int)&loaded_data, control_word);
-		  break;
-		case 6:         /* fdiv */
-		  clear_C1();
-		  FPU_div(LOADED|loaded_tag, (int)&loaded_data, control_word);
-		  break;
-		case 7:         /* fdivr */
-		  clear_C1();
-		  if ( st0_tag == TAG_Zero )
-		    partial_status = status1;  /* Undo any denorm tag,
-						  zero-divide has priority. */
-		  FPU_div(REV|LOADED|loaded_tag, (int)&loaded_data, control_word);
-		  break;
-		}
-	    }
-	  else
-	    {
-	      if ( (FPU_modrm & 0x30) == 0x10 )
-		{
-		  /* The instruction is fcom or fcomp */
-		  EXCEPTION(EX_StackUnder);
-		  setcc(SW_C3 | SW_C2 | SW_C0);
-		  if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) )
-		    FPU_pop();             /* fcomp */
-		}
-	      else
-		FPU_stack_underflow();
-	    }
-	reg_mem_instr_done:
-	  operand_address = data_sel_off;
-	}
-      else
-	{
-	  if ( !(no_ip_update =
-		 FPU_load_store(((FPU_modrm & 0x38) | (byte1 & 6)) >> 1,
-				addr_modes, data_address)) )
-	    {
-	      operand_address = data_sel_off;
-	    }
-	}
-
-    }
-  else
-    {
-      /* None of these instructions access user memory */
-      u_char instr_index = (FPU_modrm & 0x38) | (byte1 & 7);
+	} else {
+		/* None of these instructions access user memory */
+		u_char instr_index = (FPU_modrm & 0x38) | (byte1 & 7);
 
 #ifdef PECULIAR_486
-      /* This is supposed to be undefined, but a real 80486 seems
-	 to do this: */
-      operand_address.offset = 0;
-      operand_address.selector = FPU_DS;
+		/* This is supposed to be undefined, but a real 80486 seems
+		   to do this: */
+		operand_address.offset = 0;
+		operand_address.selector = FPU_DS;
 #endif /* PECULIAR_486 */
 
-      st0_ptr = &st(0);
-      st0_tag = FPU_gettag0();
-      switch ( type_table[(int) instr_index] )
-	{
-	case _NONE_:   /* also _REGIc: _REGIn */
-	  break;
-	case _REG0_:
-	  if ( !NOT_EMPTY_ST0 )
-	    {
-	      FPU_stack_underflow();
-	      goto FPU_instruction_done;
-	    }
-	  break;
-	case _REGIi:
-	  if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
-	    {
-	      FPU_stack_underflow_i(FPU_rm);
-	      goto FPU_instruction_done;
-	    }
-	  break;
-	case _REGIp:
-	  if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
-	    {
-	      FPU_stack_underflow_pop(FPU_rm);
-	      goto FPU_instruction_done;
-	    }
-	  break;
-	case _REGI_:
-	  if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
-	    {
-	      FPU_stack_underflow();
-	      goto FPU_instruction_done;
-	    }
-	  break;
-	case _PUSH_:     /* Only used by the fld st(i) instruction */
-	  break;
-	case _null_:
-	  FPU_illegal();
-	  goto FPU_instruction_done;
-	default:
-	  EXCEPTION(EX_INTERNAL|0x111);
-	  goto FPU_instruction_done;
+		st0_ptr = &st(0);
+		st0_tag = FPU_gettag0();
+		switch (type_table[(int)instr_index]) {
+		case _NONE_:	/* also _REGIc: _REGIn */
+			break;
+		case _REG0_:
+			if (!NOT_EMPTY_ST0) {
+				FPU_stack_underflow();
+				goto FPU_instruction_done;
+			}
+			break;
+		case _REGIi:
+			if (!NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm)) {
+				FPU_stack_underflow_i(FPU_rm);
+				goto FPU_instruction_done;
+			}
+			break;
+		case _REGIp:
+			if (!NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm)) {
+				FPU_stack_underflow_pop(FPU_rm);
+				goto FPU_instruction_done;
+			}
+			break;
+		case _REGI_:
+			if (!NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm)) {
+				FPU_stack_underflow();
+				goto FPU_instruction_done;
+			}
+			break;
+		case _PUSH_:	/* Only used by the fld st(i) instruction */
+			break;
+		case _null_:
+			FPU_illegal();
+			goto FPU_instruction_done;
+		default:
+			EXCEPTION(EX_INTERNAL | 0x111);
+			goto FPU_instruction_done;
+		}
+		(*st_instr_table[(int)instr_index]) ();
+
+	      FPU_instruction_done:
+		;
 	}
-      (*st_instr_table[(int) instr_index])();
 
-FPU_instruction_done:
-      ;
-    }
+	if (!no_ip_update)
+		instruction_address = entry_sel_off;
 
-  if ( ! no_ip_update )
-    instruction_address = entry_sel_off;
-
-FPU_fwait_done:
+      FPU_fwait_done:
 
 #ifdef DEBUG
-  RE_ENTRANT_CHECK_OFF;
-  FPU_printall();
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_printall();
+	RE_ENTRANT_CHECK_ON;
 #endif /* DEBUG */
 
-  if (FPU_lookahead && !need_resched())
-    {
-      FPU_ORIG_EIP = FPU_EIP - code_base;
-      if ( valid_prefix(&byte1, (u_char __user **)&FPU_EIP,
-			&addr_modes.override) )
-	goto do_another_FPU_instruction;
-    }
+	if (FPU_lookahead && !need_resched()) {
+		FPU_ORIG_EIP = FPU_EIP - code_base;
+		if (valid_prefix(&byte1, (u_char __user **) & FPU_EIP,
+				 &addr_modes.override))
+			goto do_another_FPU_instruction;
+	}
 
-  if ( addr_modes.default_mode )
-    FPU_EIP -= code_base;
+	if (addr_modes.default_mode)
+		FPU_EIP -= code_base;
 
-  RE_ENTRANT_CHECK_OFF;
+	RE_ENTRANT_CHECK_OFF;
 }
 
-
 /* Support for prefix bytes is not yet complete. To properly handle
    all prefix bytes, further changes are needed in the emulator code
    which accesses user address space. Access to separate segments is
    important for msdos emulation. */
-static int valid_prefix(u_char *Byte, u_char __user **fpu_eip,
-			overrides *override)
+static int valid_prefix(u_char * Byte, u_char __user ** fpu_eip,
+			overrides * override)
 {
-  u_char byte;
-  u_char __user *ip = *fpu_eip;
+	u_char byte;
+	u_char __user *ip = *fpu_eip;
 
-  *override = (overrides) { 0, 0, PREFIX_DEFAULT };       /* defaults */
+	*override = (overrides) {
+	0, 0, PREFIX_DEFAULT};	/* defaults */
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_code_access_ok(1);
-  FPU_get_user(byte, ip);
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_code_access_ok(1);
+	FPU_get_user(byte, ip);
+	RE_ENTRANT_CHECK_ON;
 
-  while ( 1 )
-    {
-      switch ( byte )
-	{
-	case ADDR_SIZE_PREFIX:
-	  override->address_size = ADDR_SIZE_PREFIX;
-	  goto do_next_byte;
+	while (1) {
+		switch (byte) {
+		case ADDR_SIZE_PREFIX:
+			override->address_size = ADDR_SIZE_PREFIX;
+			goto do_next_byte;
 
-	case OP_SIZE_PREFIX:
-	  override->operand_size = OP_SIZE_PREFIX;
-	  goto do_next_byte;
+		case OP_SIZE_PREFIX:
+			override->operand_size = OP_SIZE_PREFIX;
+			goto do_next_byte;
 
-	case PREFIX_CS:
-	  override->segment = PREFIX_CS_;
-	  goto do_next_byte;
-	case PREFIX_ES:
-	  override->segment = PREFIX_ES_;
-	  goto do_next_byte;
-	case PREFIX_SS:
-	  override->segment = PREFIX_SS_;
-	  goto do_next_byte;
-	case PREFIX_FS:
-	  override->segment = PREFIX_FS_;
-	  goto do_next_byte;
-	case PREFIX_GS:
-	  override->segment = PREFIX_GS_;
-	  goto do_next_byte;
-	case PREFIX_DS:
-	  override->segment = PREFIX_DS_;
-	  goto do_next_byte;
+		case PREFIX_CS:
+			override->segment = PREFIX_CS_;
+			goto do_next_byte;
+		case PREFIX_ES:
+			override->segment = PREFIX_ES_;
+			goto do_next_byte;
+		case PREFIX_SS:
+			override->segment = PREFIX_SS_;
+			goto do_next_byte;
+		case PREFIX_FS:
+			override->segment = PREFIX_FS_;
+			goto do_next_byte;
+		case PREFIX_GS:
+			override->segment = PREFIX_GS_;
+			goto do_next_byte;
+		case PREFIX_DS:
+			override->segment = PREFIX_DS_;
+			goto do_next_byte;
 
 /* lock is not a valid prefix for FPU instructions,
    let the cpu handle it to generate a SIGILL. */
 /*	case PREFIX_LOCK: */
 
-	  /* rep.. prefixes have no meaning for FPU instructions */
-	case PREFIX_REPE:
-	case PREFIX_REPNE:
+			/* rep.. prefixes have no meaning for FPU instructions */
+		case PREFIX_REPE:
+		case PREFIX_REPNE:
 
-	do_next_byte:
-	  ip++;
-	  RE_ENTRANT_CHECK_OFF;
-	  FPU_code_access_ok(1);
-	  FPU_get_user(byte, ip);
-	  RE_ENTRANT_CHECK_ON;
-	  break;
-	case FWAIT_OPCODE:
-	  *Byte = byte;
-	  return 1;
-	default:
-	  if ( (byte & 0xf8) == 0xd8 )
-	    {
-	      *Byte = byte;
-	      *fpu_eip = ip;
-	      return 1;
-	    }
-	  else
-	    {
-	      /* Not a valid sequence of prefix bytes followed by
-		 an FPU instruction. */
-	      *Byte = byte;  /* Needed for error message. */
-	      return 0;
-	    }
+		      do_next_byte:
+			ip++;
+			RE_ENTRANT_CHECK_OFF;
+			FPU_code_access_ok(1);
+			FPU_get_user(byte, ip);
+			RE_ENTRANT_CHECK_ON;
+			break;
+		case FWAIT_OPCODE:
+			*Byte = byte;
+			return 1;
+		default:
+			if ((byte & 0xf8) == 0xd8) {
+				*Byte = byte;
+				*fpu_eip = ip;
+				return 1;
+			} else {
+				/* Not a valid sequence of prefix bytes followed by
+				   an FPU instruction. */
+				*Byte = byte;	/* Needed for error message. */
+				return 0;
+			}
+		}
 	}
-    }
 }
 
-
-void math_abort(struct info * info, unsigned int signal)
+void math_abort(struct info *info, unsigned int signal)
 {
 	FPU_EIP = FPU_ORIG_EIP;
 	current->thread.trap_no = 16;
 	current->thread.error_code = 0;
-	send_sig(signal,current,1);
+	send_sig(signal, current, 1);
 	RE_ENTRANT_CHECK_OFF;
-	__asm__("movl %0,%%esp ; ret": :"g" (((long) info)-4));
+      __asm__("movl %0,%%esp ; ret": :"g"(((long)info) - 4));
 #ifdef PARANOID
-      printk("ERROR: wm-FPU-emu math_abort failed!\n");
+	printk("ERROR: wm-FPU-emu math_abort failed!\n");
 #endif /* PARANOID */
 }
 
-
-
 #define S387 ((struct i387_soft_struct *)s387)
 #define sstatus_word() \
   ((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top))
 
-int restore_i387_soft(void *s387, struct _fpstate __user *buf)
+int restore_i387_soft(void *s387, struct _fpstate __user * buf)
 {
-  u_char __user *d = (u_char __user *)buf;
-  int offset, other, i, tags, regnr, tag, newtop;
+	u_char __user *d = (u_char __user *) buf;
+	int offset, other, i, tags, regnr, tag, newtop;
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_READ, d, 7*4 + 8*10);
-  if (__copy_from_user(&S387->cwd, d, 7*4))
-    return -1;
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_READ, d, 7 * 4 + 8 * 10);
+	if (__copy_from_user(&S387->cwd, d, 7 * 4))
+		return -1;
+	RE_ENTRANT_CHECK_ON;
 
-  d += 7*4;
+	d += 7 * 4;
 
-  S387->ftop = (S387->swd >> SW_Top_Shift) & 7;
-  offset = (S387->ftop & 7) * 10;
-  other = 80 - offset;
+	S387->ftop = (S387->swd >> SW_Top_Shift) & 7;
+	offset = (S387->ftop & 7) * 10;
+	other = 80 - offset;
 
-  RE_ENTRANT_CHECK_OFF;
-  /* Copy all registers in stack order. */
-  if (__copy_from_user(((u_char *)&S387->st_space)+offset, d, other))
-    return -1;
-  if ( offset )
-    if (__copy_from_user((u_char *)&S387->st_space, d+other, offset))
-      return -1;
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_OFF;
+	/* Copy all registers in stack order. */
+	if (__copy_from_user(((u_char *) & S387->st_space) + offset, d, other))
+		return -1;
+	if (offset)
+		if (__copy_from_user
+		    ((u_char *) & S387->st_space, d + other, offset))
+			return -1;
+	RE_ENTRANT_CHECK_ON;
 
-  /* The tags may need to be corrected now. */
-  tags = S387->twd;
-  newtop = S387->ftop;
-  for ( i = 0; i < 8; i++ )
-    {
-      regnr = (i+newtop) & 7;
-      if ( ((tags >> ((regnr & 7)*2)) & 3) != TAG_Empty )
-	{
-	  /* The loaded data over-rides all other cases. */
-	  tag = FPU_tagof((FPU_REG *)((u_char *)S387->st_space + 10*regnr));
-	  tags &= ~(3 << (regnr*2));
-	  tags |= (tag & 3) << (regnr*2);
+	/* The tags may need to be corrected now. */
+	tags = S387->twd;
+	newtop = S387->ftop;
+	for (i = 0; i < 8; i++) {
+		regnr = (i + newtop) & 7;
+		if (((tags >> ((regnr & 7) * 2)) & 3) != TAG_Empty) {
+			/* The loaded data over-rides all other cases. */
+			tag =
+			    FPU_tagof((FPU_REG *) ((u_char *) S387->st_space +
+						   10 * regnr));
+			tags &= ~(3 << (regnr * 2));
+			tags |= (tag & 3) << (regnr * 2);
+		}
 	}
-    }
-  S387->twd = tags;
+	S387->twd = tags;
 
-  return 0;
+	return 0;
 }
 
-
 int save_i387_soft(void *s387, struct _fpstate __user * buf)
 {
-  u_char __user *d = (u_char __user *)buf;
-  int offset = (S387->ftop & 7) * 10, other = 80 - offset;
+	u_char __user *d = (u_char __user *) buf;
+	int offset = (S387->ftop & 7) * 10, other = 80 - offset;
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_WRITE, d, 7*4 + 8*10);
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_WRITE, d, 7 * 4 + 8 * 10);
 #ifdef PECULIAR_486
-  S387->cwd &= ~0xe080;
-  /* An 80486 sets nearly all of the reserved bits to 1. */
-  S387->cwd |= 0xffff0040;
-  S387->swd = sstatus_word() | 0xffff0000;
-  S387->twd |= 0xffff0000;
-  S387->fcs &= ~0xf8000000;
-  S387->fos |= 0xffff0000;
+	S387->cwd &= ~0xe080;
+	/* An 80486 sets nearly all of the reserved bits to 1. */
+	S387->cwd |= 0xffff0040;
+	S387->swd = sstatus_word() | 0xffff0000;
+	S387->twd |= 0xffff0000;
+	S387->fcs &= ~0xf8000000;
+	S387->fos |= 0xffff0000;
 #endif /* PECULIAR_486 */
-  if (__copy_to_user(d, &S387->cwd, 7*4))
-    return -1;
-  RE_ENTRANT_CHECK_ON;
+	if (__copy_to_user(d, &S387->cwd, 7 * 4))
+		return -1;
+	RE_ENTRANT_CHECK_ON;
 
-  d += 7*4;
+	d += 7 * 4;
 
-  RE_ENTRANT_CHECK_OFF;
-  /* Copy all registers in stack order. */
-  if (__copy_to_user(d, ((u_char *)&S387->st_space)+offset, other))
-    return -1;
-  if ( offset )
-    if (__copy_to_user(d+other, (u_char *)&S387->st_space, offset))
-      return -1;
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_OFF;
+	/* Copy all registers in stack order. */
+	if (__copy_to_user(d, ((u_char *) & S387->st_space) + offset, other))
+		return -1;
+	if (offset)
+		if (__copy_to_user
+		    (d + other, (u_char *) & S387->st_space, offset))
+			return -1;
+	RE_ENTRANT_CHECK_ON;
 
-  return 1;
+	return 1;
 }
diff --git a/arch/x86/math-emu/fpu_etc.c b/arch/x86/math-emu/fpu_etc.c
index e3b5d465587f..e73631e0cde9 100644
--- a/arch/x86/math-emu/fpu_etc.c
+++ b/arch/x86/math-emu/fpu_etc.c
@@ -16,128 +16,115 @@
 #include "status_w.h"
 #include "reg_constant.h"
 
-
-static void fchs(FPU_REG *st0_ptr, u_char st0tag)
+static void fchs(FPU_REG * st0_ptr, u_char st0tag)
 {
-  if ( st0tag ^ TAG_Empty )
-    {
-      signbyte(st0_ptr) ^= SIGN_NEG;
-      clear_C1();
-    }
-  else
-    FPU_stack_underflow();
+	if (st0tag ^ TAG_Empty) {
+		signbyte(st0_ptr) ^= SIGN_NEG;
+		clear_C1();
+	} else
+		FPU_stack_underflow();
 }
 
-
-static void fabs(FPU_REG *st0_ptr, u_char st0tag)
+static void fabs(FPU_REG * st0_ptr, u_char st0tag)
 {
-  if ( st0tag ^ TAG_Empty )
-    {
-      setpositive(st0_ptr);
-      clear_C1();
-    }
-  else
-    FPU_stack_underflow();
+	if (st0tag ^ TAG_Empty) {
+		setpositive(st0_ptr);
+		clear_C1();
+	} else
+		FPU_stack_underflow();
 }
 
-
-static void ftst_(FPU_REG *st0_ptr, u_char st0tag)
+static void ftst_(FPU_REG * st0_ptr, u_char st0tag)
 {
-  switch (st0tag)
-    {
-    case TAG_Zero:
-      setcc(SW_C3);
-      break;
-    case TAG_Valid:
-      if (getsign(st0_ptr) == SIGN_POS)
-        setcc(0);
-      else
-        setcc(SW_C0);
-      break;
-    case TAG_Special:
-      switch ( FPU_Special(st0_ptr) )
-	{
-	case TW_Denormal:
-	  if (getsign(st0_ptr) == SIGN_POS)
-	    setcc(0);
-	  else
-	    setcc(SW_C0);
-	  if ( denormal_operand() < 0 )
-	    {
-#ifdef PECULIAR_486
-	      /* This is weird! */
-	      if (getsign(st0_ptr) == SIGN_POS)
+	switch (st0tag) {
+	case TAG_Zero:
 		setcc(SW_C3);
+		break;
+	case TAG_Valid:
+		if (getsign(st0_ptr) == SIGN_POS)
+			setcc(0);
+		else
+			setcc(SW_C0);
+		break;
+	case TAG_Special:
+		switch (FPU_Special(st0_ptr)) {
+		case TW_Denormal:
+			if (getsign(st0_ptr) == SIGN_POS)
+				setcc(0);
+			else
+				setcc(SW_C0);
+			if (denormal_operand() < 0) {
+#ifdef PECULIAR_486
+				/* This is weird! */
+				if (getsign(st0_ptr) == SIGN_POS)
+					setcc(SW_C3);
 #endif /* PECULIAR_486 */
-	      return;
-	    }
-	  break;
-	case TW_NaN:
-	  setcc(SW_C0|SW_C2|SW_C3);   /* Operand is not comparable */ 
-	  EXCEPTION(EX_Invalid);
-	  break;
-	case TW_Infinity:
-	  if (getsign(st0_ptr) == SIGN_POS)
-	    setcc(0);
-	  else
-	    setcc(SW_C0);
-	  break;
-	default:
-	  setcc(SW_C0|SW_C2|SW_C3);   /* Operand is not comparable */ 
-	  EXCEPTION(EX_INTERNAL|0x14);
-	  break;
+				return;
+			}
+			break;
+		case TW_NaN:
+			setcc(SW_C0 | SW_C2 | SW_C3);	/* Operand is not comparable */
+			EXCEPTION(EX_Invalid);
+			break;
+		case TW_Infinity:
+			if (getsign(st0_ptr) == SIGN_POS)
+				setcc(0);
+			else
+				setcc(SW_C0);
+			break;
+		default:
+			setcc(SW_C0 | SW_C2 | SW_C3);	/* Operand is not comparable */
+			EXCEPTION(EX_INTERNAL | 0x14);
+			break;
+		}
+		break;
+	case TAG_Empty:
+		setcc(SW_C0 | SW_C2 | SW_C3);
+		EXCEPTION(EX_StackUnder);
+		break;
 	}
-      break;
-    case TAG_Empty:
-      setcc(SW_C0|SW_C2|SW_C3);
-      EXCEPTION(EX_StackUnder);
-      break;
-    }
 }
 
-
-static void fxam(FPU_REG *st0_ptr, u_char st0tag)
+static void fxam(FPU_REG * st0_ptr, u_char st0tag)
 {
-  int c = 0;
-  switch (st0tag)
-    {
-    case TAG_Empty:
-      c = SW_C3|SW_C0;
-      break;
-    case TAG_Zero:
-      c = SW_C3;
-      break;
-    case TAG_Valid:
-      c = SW_C2;
-      break;
-    case TAG_Special:
-      switch ( FPU_Special(st0_ptr) )
-	{
-	case TW_Denormal:
-	  c = SW_C2|SW_C3;  /* Denormal */
-	  break;
-	case TW_NaN:
-	  /* We also use NaN for unsupported types. */
-	  if ( (st0_ptr->sigh & 0x80000000) && (exponent(st0_ptr) == EXP_OVER) )
-	    c = SW_C0;
-	  break;
-	case TW_Infinity:
-	  c = SW_C2|SW_C0;
-	  break;
+	int c = 0;
+	switch (st0tag) {
+	case TAG_Empty:
+		c = SW_C3 | SW_C0;
+		break;
+	case TAG_Zero:
+		c = SW_C3;
+		break;
+	case TAG_Valid:
+		c = SW_C2;
+		break;
+	case TAG_Special:
+		switch (FPU_Special(st0_ptr)) {
+		case TW_Denormal:
+			c = SW_C2 | SW_C3;	/* Denormal */
+			break;
+		case TW_NaN:
+			/* We also use NaN for unsupported types. */
+			if ((st0_ptr->sigh & 0x80000000)
+			    && (exponent(st0_ptr) == EXP_OVER))
+				c = SW_C0;
+			break;
+		case TW_Infinity:
+			c = SW_C2 | SW_C0;
+			break;
+		}
 	}
-    }
-  if ( getsign(st0_ptr) == SIGN_NEG )
-    c |= SW_C1;
-  setcc(c);
+	if (getsign(st0_ptr) == SIGN_NEG)
+		c |= SW_C1;
+	setcc(c);
 }
 
-
 static FUNC_ST0 const fp_etc_table[] = {
-  fchs, fabs, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal,
-  ftst_, fxam, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal
+	fchs, fabs, (FUNC_ST0) FPU_illegal, (FUNC_ST0) FPU_illegal,
+	ftst_, fxam, (FUNC_ST0) FPU_illegal, (FUNC_ST0) FPU_illegal
 };
 
 void FPU_etc(void)
 {
-  (fp_etc_table[FPU_rm])(&st(0), FPU_gettag0());
+	(fp_etc_table[FPU_rm]) (&st(0), FPU_gettag0());
 }
diff --git a/arch/x86/math-emu/fpu_proto.h b/arch/x86/math-emu/fpu_proto.h
index 37a8a7fe7e2b..0f6384102afd 100644
--- a/arch/x86/math-emu/fpu_proto.h
+++ b/arch/x86/math-emu/fpu_proto.h
@@ -5,7 +5,7 @@
 extern void FPU_illegal(void);
 extern void FPU_printall(void);
 asmlinkage void FPU_exception(int n);
-extern int real_1op_NaN(FPU_REG *a);
+extern int real_1op_NaN(FPU_REG * a);
 extern int real_2op_NaN(FPU_REG const *b, u_char tagb, int deststnr,
 			FPU_REG const *defaultNaN);
 asmlinkage int arith_invalid(int deststnr);
@@ -14,8 +14,8 @@ extern int set_precision_flag(int flags);
 asmlinkage void set_precision_flag_up(void);
 asmlinkage void set_precision_flag_down(void);
 asmlinkage int denormal_operand(void);
-asmlinkage int arith_overflow(FPU_REG *dest);
-asmlinkage int arith_underflow(FPU_REG *dest);
+asmlinkage int arith_overflow(FPU_REG * dest);
+asmlinkage int arith_underflow(FPU_REG * dest);
 extern void FPU_stack_overflow(void);
 extern void FPU_stack_underflow(void);
 extern void FPU_stack_underflow_i(int i);
@@ -66,7 +66,7 @@ extern int FPU_Special(FPU_REG const *ptr);
 extern int isNaN(FPU_REG const *ptr);
 extern void FPU_pop(void);
 extern int FPU_empty_i(int stnr);
-extern int FPU_stackoverflow(FPU_REG **st_new_ptr);
+extern int FPU_stackoverflow(FPU_REG ** st_new_ptr);
 extern void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr);
 extern void FPU_copy_to_reg1(FPU_REG const *r, u_char tag);
 extern void FPU_copy_to_reg0(FPU_REG const *r, u_char tag);
@@ -75,26 +75,28 @@ extern void FPU_triga(void);
 extern void FPU_trigb(void);
 /* get_address.c */
 extern void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip,
-			 struct address *addr, fpu_addr_modes addr_modes);
+				    struct address *addr,
+				    fpu_addr_modes addr_modes);
 extern void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip,
-			    struct address *addr, fpu_addr_modes addr_modes);
+				       struct address *addr,
+				       fpu_addr_modes addr_modes);
 /* load_store.c */
 extern int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
-			    void __user *data_address);
+			  void __user * data_address);
 /* poly_2xm1.c */
-extern int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result);
+extern int poly_2xm1(u_char sign, FPU_REG * arg, FPU_REG * result);
 /* poly_atan.c */
-extern void poly_atan(FPU_REG *st0_ptr, u_char st0_tag, FPU_REG *st1_ptr,
+extern void poly_atan(FPU_REG * st0_ptr, u_char st0_tag, FPU_REG * st1_ptr,
 		      u_char st1_tag);
 /* poly_l2.c */
-extern void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign);
-extern int poly_l2p1(u_char s0, u_char s1, FPU_REG *r0, FPU_REG *r1,
-		     FPU_REG *d);
+extern void poly_l2(FPU_REG * st0_ptr, FPU_REG * st1_ptr, u_char st1_sign);
+extern int poly_l2p1(u_char s0, u_char s1, FPU_REG * r0, FPU_REG * r1,
+		     FPU_REG * d);
 /* poly_sin.c */
-extern void poly_sine(FPU_REG *st0_ptr);
-extern void poly_cos(FPU_REG *st0_ptr);
+extern void poly_sine(FPU_REG * st0_ptr);
+extern void poly_cos(FPU_REG * st0_ptr);
 /* poly_tan.c */
-extern void poly_tan(FPU_REG *st0_ptr);
+extern void poly_tan(FPU_REG * st0_ptr);
 /* reg_add_sub.c */
 extern int FPU_add(FPU_REG const *b, u_char tagb, int destrnr, int control_w);
 extern int FPU_sub(int flags, int rm, int control_w);
@@ -109,32 +111,34 @@ extern void fucompp(void);
 /* reg_constant.c */
 extern void fconst(void);
 /* reg_ld_str.c */
-extern int FPU_load_extended(long double __user *s, int stnr);
-extern int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data);
-extern int FPU_load_single(float __user *single, FPU_REG *loaded_data);
-extern int FPU_load_int64(long long __user *_s);
-extern int FPU_load_int32(long __user *_s, FPU_REG *loaded_data);
-extern int FPU_load_int16(short __user *_s, FPU_REG *loaded_data);
-extern int FPU_load_bcd(u_char __user *s);
-extern int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag,
-			      long double __user *d);
-extern int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat);
-extern int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single);
-extern int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d);
-extern int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d);
-extern int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d);
-extern int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d);
-extern int FPU_round_to_int(FPU_REG *r, u_char tag);
-extern u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s);
-extern void frstor(fpu_addr_modes addr_modes, u_char __user *data_address);
-extern u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d);
-extern void fsave(fpu_addr_modes addr_modes, u_char __user *data_address);
-extern int FPU_tagof(FPU_REG *ptr);
+extern int FPU_load_extended(long double __user * s, int stnr);
+extern int FPU_load_double(double __user * dfloat, FPU_REG * loaded_data);
+extern int FPU_load_single(float __user * single, FPU_REG * loaded_data);
+extern int FPU_load_int64(long long __user * _s);
+extern int FPU_load_int32(long __user * _s, FPU_REG * loaded_data);
+extern int FPU_load_int16(short __user * _s, FPU_REG * loaded_data);
+extern int FPU_load_bcd(u_char __user * s);
+extern int FPU_store_extended(FPU_REG * st0_ptr, u_char st0_tag,
+			      long double __user * d);
+extern int FPU_store_double(FPU_REG * st0_ptr, u_char st0_tag,
+			    double __user * dfloat);
+extern int FPU_store_single(FPU_REG * st0_ptr, u_char st0_tag,
+			    float __user * single);
+extern int FPU_store_int64(FPU_REG * st0_ptr, u_char st0_tag,
+			   long long __user * d);
+extern int FPU_store_int32(FPU_REG * st0_ptr, u_char st0_tag, long __user * d);
+extern int FPU_store_int16(FPU_REG * st0_ptr, u_char st0_tag, short __user * d);
+extern int FPU_store_bcd(FPU_REG * st0_ptr, u_char st0_tag, u_char __user * d);
+extern int FPU_round_to_int(FPU_REG * r, u_char tag);
+extern u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user * s);
+extern void frstor(fpu_addr_modes addr_modes, u_char __user * data_address);
+extern u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user * d);
+extern void fsave(fpu_addr_modes addr_modes, u_char __user * data_address);
+extern int FPU_tagof(FPU_REG * ptr);
 /* reg_mul.c */
 extern int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w);
 
 extern int FPU_div(int flags, int regrm, int control_w);
 /* reg_convert.c */
-extern int FPU_to_exp16(FPU_REG const *a, FPU_REG *x);
+extern int FPU_to_exp16(FPU_REG const *a, FPU_REG * x);
 #endif /* _FPU_PROTO_H */
-
diff --git a/arch/x86/math-emu/fpu_tags.c b/arch/x86/math-emu/fpu_tags.c
index cb436fe20e4c..d9c657cd7746 100644
--- a/arch/x86/math-emu/fpu_tags.c
+++ b/arch/x86/math-emu/fpu_tags.c
@@ -14,114 +14,102 @@
 #include "fpu_system.h"
 #include "exception.h"
 
-
 void FPU_pop(void)
 {
-  fpu_tag_word |= 3 << ((top & 7)*2);
-  top++;
+	fpu_tag_word |= 3 << ((top & 7) * 2);
+	top++;
 }
 
-
 int FPU_gettag0(void)
 {
-  return (fpu_tag_word >> ((top & 7)*2)) & 3;
+	return (fpu_tag_word >> ((top & 7) * 2)) & 3;
 }
 
-
 int FPU_gettagi(int stnr)
 {
-  return (fpu_tag_word >> (((top+stnr) & 7)*2)) & 3;
+	return (fpu_tag_word >> (((top + stnr) & 7) * 2)) & 3;
 }
 
-
 int FPU_gettag(int regnr)
 {
-  return (fpu_tag_word >> ((regnr & 7)*2)) & 3;
+	return (fpu_tag_word >> ((regnr & 7) * 2)) & 3;
 }
 
-
 void FPU_settag0(int tag)
 {
-  int regnr = top;
-  regnr &= 7;
-  fpu_tag_word &= ~(3 << (regnr*2));
-  fpu_tag_word |= (tag & 3) << (regnr*2);
+	int regnr = top;
+	regnr &= 7;
+	fpu_tag_word &= ~(3 << (regnr * 2));
+	fpu_tag_word |= (tag & 3) << (regnr * 2);
 }
 
-
 void FPU_settagi(int stnr, int tag)
 {
-  int regnr = stnr+top;
-  regnr &= 7;
-  fpu_tag_word &= ~(3 << (regnr*2));
-  fpu_tag_word |= (tag & 3) << (regnr*2);
+	int regnr = stnr + top;
+	regnr &= 7;
+	fpu_tag_word &= ~(3 << (regnr * 2));
+	fpu_tag_word |= (tag & 3) << (regnr * 2);
 }
 
-
 void FPU_settag(int regnr, int tag)
 {
-  regnr &= 7;
-  fpu_tag_word &= ~(3 << (regnr*2));
-  fpu_tag_word |= (tag & 3) << (regnr*2);
+	regnr &= 7;
+	fpu_tag_word &= ~(3 << (regnr * 2));
+	fpu_tag_word |= (tag & 3) << (regnr * 2);
 }
 
-
 int FPU_Special(FPU_REG const *ptr)
 {
-  int exp = exponent(ptr);
+	int exp = exponent(ptr);
 
-  if ( exp == EXP_BIAS+EXP_UNDER )
-    return TW_Denormal;
-  else if ( exp != EXP_BIAS+EXP_OVER )
-    return TW_NaN;
-  else if ( (ptr->sigh == 0x80000000) && (ptr->sigl == 0) )
-    return TW_Infinity;
-  return TW_NaN;
+	if (exp == EXP_BIAS + EXP_UNDER)
+		return TW_Denormal;
+	else if (exp != EXP_BIAS + EXP_OVER)
+		return TW_NaN;
+	else if ((ptr->sigh == 0x80000000) && (ptr->sigl == 0))
+		return TW_Infinity;
+	return TW_NaN;
 }
 
-
 int isNaN(FPU_REG const *ptr)
 {
-  return ( (exponent(ptr) == EXP_BIAS+EXP_OVER)
-	   && !((ptr->sigh == 0x80000000) && (ptr->sigl == 0)) );
+	return ((exponent(ptr) == EXP_BIAS + EXP_OVER)
+		&& !((ptr->sigh == 0x80000000) && (ptr->sigl == 0)));
 }
 
-
 int FPU_empty_i(int stnr)
 {
-  int regnr = (top+stnr) & 7;
+	int regnr = (top + stnr) & 7;
 
-  return ((fpu_tag_word >> (regnr*2)) & 3) == TAG_Empty;
+	return ((fpu_tag_word >> (regnr * 2)) & 3) == TAG_Empty;
 }
 
-
-int FPU_stackoverflow(FPU_REG **st_new_ptr)
+int FPU_stackoverflow(FPU_REG ** st_new_ptr)
 {
-  *st_new_ptr = &st(-1);
+	*st_new_ptr = &st(-1);
 
-  return ((fpu_tag_word >> (((top - 1) & 7)*2)) & 3) != TAG_Empty;
+	return ((fpu_tag_word >> (((top - 1) & 7) * 2)) & 3) != TAG_Empty;
 }
 
-
 void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr)
 {
-  reg_copy(r, &st(stnr));
-  FPU_settagi(stnr, tag);
+	reg_copy(r, &st(stnr));
+	FPU_settagi(stnr, tag);
 }
 
 void FPU_copy_to_reg1(FPU_REG const *r, u_char tag)
 {
-  reg_copy(r, &st(1));
-  FPU_settagi(1, tag);
+	reg_copy(r, &st(1));
+	FPU_settagi(1, tag);
 }
 
 void FPU_copy_to_reg0(FPU_REG const *r, u_char tag)
 {
-  int regnr = top;
-  regnr &= 7;
+	int regnr = top;
+	regnr &= 7;
 
-  reg_copy(r, &st(0));
+	reg_copy(r, &st(0));
 
-  fpu_tag_word &= ~(3 << (regnr*2));
-  fpu_tag_word |= (tag & 3) << (regnr*2);
+	fpu_tag_word &= ~(3 << (regnr * 2));
+	fpu_tag_word |= (tag & 3) << (regnr * 2);
 }
diff --git a/arch/x86/math-emu/fpu_trig.c b/arch/x86/math-emu/fpu_trig.c
index 403cbde1d425..e5316a288a6e 100644
--- a/arch/x86/math-emu/fpu_trig.c
+++ b/arch/x86/math-emu/fpu_trig.c
@@ -15,11 +15,10 @@
 #include "fpu_emu.h"
 #include "status_w.h"
 #include "control_w.h"
-#include "reg_constant.h"	
+#include "reg_constant.h"
 
 static void rem_kernel(unsigned long long st0, unsigned long long *y,
-		       unsigned long long st1,
-		       unsigned long long q, int n);
+		       unsigned long long st1, unsigned long long q, int n);
 
 #define BETTER_THAN_486
 
@@ -31,791 +30,709 @@ static void rem_kernel(unsigned long long st0, unsigned long long *y,
 /* Limited measurements show no results worse than 64 bit precision
    except for the results for arguments close to 2^63, where the
    precision of the result sometimes degrades to about 63.9 bits */
-static int trig_arg(FPU_REG *st0_ptr, int even)
+static int trig_arg(FPU_REG * st0_ptr, int even)
 {
-  FPU_REG tmp;
-  u_char tmptag;
-  unsigned long long q;
-  int old_cw = control_word, saved_status = partial_status;
-  int tag, st0_tag = TAG_Valid;
+	FPU_REG tmp;
+	u_char tmptag;
+	unsigned long long q;
+	int old_cw = control_word, saved_status = partial_status;
+	int tag, st0_tag = TAG_Valid;
 
-  if ( exponent(st0_ptr) >= 63 )
-    {
-      partial_status |= SW_C2;     /* Reduction incomplete. */
-      return -1;
-    }
-
-  control_word &= ~CW_RC;
-  control_word |= RC_CHOP;
-
-  setpositive(st0_ptr);
-  tag = FPU_u_div(st0_ptr, &CONST_PI2, &tmp, PR_64_BITS | RC_CHOP | 0x3f,
-		  SIGN_POS);
-
-  FPU_round_to_int(&tmp, tag);  /* Fortunately, this can't overflow
-				   to 2^64 */
-  q = significand(&tmp);
-  if ( q )
-    {
-      rem_kernel(significand(st0_ptr),
-		 &significand(&tmp),
-		 significand(&CONST_PI2),
-		 q, exponent(st0_ptr) - exponent(&CONST_PI2));
-      setexponent16(&tmp, exponent(&CONST_PI2));
-      st0_tag = FPU_normalize(&tmp);
-      FPU_copy_to_reg0(&tmp, st0_tag);
-    }
-
-  if ( (even && !(q & 1)) || (!even && (q & 1)) )
-    {
-      st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2, FULL_PRECISION);
-
-#ifdef BETTER_THAN_486
-      /* So far, the results are exact but based upon a 64 bit
-	 precision approximation to pi/2. The technique used
-	 now is equivalent to using an approximation to pi/2 which
-	 is accurate to about 128 bits. */
-      if ( (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64) || (q > 1) )
-	{
-	  /* This code gives the effect of having pi/2 to better than
-	     128 bits precision. */
-
-	  significand(&tmp) = q + 1;
-	  setexponent16(&tmp, 63);
-	  FPU_normalize(&tmp);
-	  tmptag =
-	    FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION, SIGN_POS,
-		      exponent(&CONST_PI2extra) + exponent(&tmp));
-	  setsign(&tmp, getsign(&CONST_PI2extra));
-	  st0_tag = FPU_add(&tmp, tmptag, 0, FULL_PRECISION);
-	  if ( signnegative(st0_ptr) )
-	    {
-	      /* CONST_PI2extra is negative, so the result of the addition
-		 can be negative. This means that the argument is actually
-		 in a different quadrant. The correction is always < pi/2,
-		 so it can't overflow into yet another quadrant. */
-	      setpositive(st0_ptr);
-	      q++;
-	    }
+	if (exponent(st0_ptr) >= 63) {
+		partial_status |= SW_C2;	/* Reduction incomplete. */
+		return -1;
 	}
-#endif /* BETTER_THAN_486 */
-    }
-#ifdef BETTER_THAN_486
-  else
-    {
-      /* So far, the results are exact but based upon a 64 bit
-	 precision approximation to pi/2. The technique used
-	 now is equivalent to using an approximation to pi/2 which
-	 is accurate to about 128 bits. */
-      if ( ((q > 0) && (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64))
-	   || (q > 1) )
-	{
-	  /* This code gives the effect of having p/2 to better than
-	     128 bits precision. */
 
-	  significand(&tmp) = q;
-	  setexponent16(&tmp, 63);
-	  FPU_normalize(&tmp);         /* This must return TAG_Valid */
-	  tmptag = FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION,
-			     SIGN_POS,
-			     exponent(&CONST_PI2extra) + exponent(&tmp));
-	  setsign(&tmp, getsign(&CONST_PI2extra));
-	  st0_tag = FPU_sub(LOADED|(tmptag & 0x0f), (int)&tmp,
+	control_word &= ~CW_RC;
+	control_word |= RC_CHOP;
+
+	setpositive(st0_ptr);
+	tag = FPU_u_div(st0_ptr, &CONST_PI2, &tmp, PR_64_BITS | RC_CHOP | 0x3f,
+			SIGN_POS);
+
+	FPU_round_to_int(&tmp, tag);	/* Fortunately, this can't overflow
+					   to 2^64 */
+	q = significand(&tmp);
+	if (q) {
+		rem_kernel(significand(st0_ptr),
+			   &significand(&tmp),
+			   significand(&CONST_PI2),
+			   q, exponent(st0_ptr) - exponent(&CONST_PI2));
+		setexponent16(&tmp, exponent(&CONST_PI2));
+		st0_tag = FPU_normalize(&tmp);
+		FPU_copy_to_reg0(&tmp, st0_tag);
+	}
+
+	if ((even && !(q & 1)) || (!even && (q & 1))) {
+		st0_tag =
+		    FPU_sub(REV | LOADED | TAG_Valid, (int)&CONST_PI2,
 			    FULL_PRECISION);
-	  if ( (exponent(st0_ptr) == exponent(&CONST_PI2)) &&
-	      ((st0_ptr->sigh > CONST_PI2.sigh)
-	       || ((st0_ptr->sigh == CONST_PI2.sigh)
-		   && (st0_ptr->sigl > CONST_PI2.sigl))) )
-	    {
-	      /* CONST_PI2extra is negative, so the result of the
-		 subtraction can be larger than pi/2. This means
-		 that the argument is actually in a different quadrant.
-		 The correction is always < pi/2, so it can't overflow
-		 into yet another quadrant. */
-	      st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2,
-				FULL_PRECISION);
-	      q++;
-	    }
+
+#ifdef BETTER_THAN_486
+		/* So far, the results are exact but based upon a 64 bit
+		   precision approximation to pi/2. The technique used
+		   now is equivalent to using an approximation to pi/2 which
+		   is accurate to about 128 bits. */
+		if ((exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64)
+		    || (q > 1)) {
+			/* This code gives the effect of having pi/2 to better than
+			   128 bits precision. */
+
+			significand(&tmp) = q + 1;
+			setexponent16(&tmp, 63);
+			FPU_normalize(&tmp);
+			tmptag =
+			    FPU_u_mul(&CONST_PI2extra, &tmp, &tmp,
+				      FULL_PRECISION, SIGN_POS,
+				      exponent(&CONST_PI2extra) +
+				      exponent(&tmp));
+			setsign(&tmp, getsign(&CONST_PI2extra));
+			st0_tag = FPU_add(&tmp, tmptag, 0, FULL_PRECISION);
+			if (signnegative(st0_ptr)) {
+				/* CONST_PI2extra is negative, so the result of the addition
+				   can be negative. This means that the argument is actually
+				   in a different quadrant. The correction is always < pi/2,
+				   so it can't overflow into yet another quadrant. */
+				setpositive(st0_ptr);
+				q++;
+			}
+		}
+#endif /* BETTER_THAN_486 */
+	}
+#ifdef BETTER_THAN_486
+	else {
+		/* So far, the results are exact but based upon a 64 bit
+		   precision approximation to pi/2. The technique used
+		   now is equivalent to using an approximation to pi/2 which
+		   is accurate to about 128 bits. */
+		if (((q > 0)
+		     && (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64))
+		    || (q > 1)) {
+			/* This code gives the effect of having p/2 to better than
+			   128 bits precision. */
+
+			significand(&tmp) = q;
+			setexponent16(&tmp, 63);
+			FPU_normalize(&tmp);	/* This must return TAG_Valid */
+			tmptag =
+			    FPU_u_mul(&CONST_PI2extra, &tmp, &tmp,
+				      FULL_PRECISION, SIGN_POS,
+				      exponent(&CONST_PI2extra) +
+				      exponent(&tmp));
+			setsign(&tmp, getsign(&CONST_PI2extra));
+			st0_tag = FPU_sub(LOADED | (tmptag & 0x0f), (int)&tmp,
+					  FULL_PRECISION);
+			if ((exponent(st0_ptr) == exponent(&CONST_PI2)) &&
+			    ((st0_ptr->sigh > CONST_PI2.sigh)
+			     || ((st0_ptr->sigh == CONST_PI2.sigh)
+				 && (st0_ptr->sigl > CONST_PI2.sigl)))) {
+				/* CONST_PI2extra is negative, so the result of the
+				   subtraction can be larger than pi/2. This means
+				   that the argument is actually in a different quadrant.
+				   The correction is always < pi/2, so it can't overflow
+				   into yet another quadrant. */
+				st0_tag =
+				    FPU_sub(REV | LOADED | TAG_Valid,
+					    (int)&CONST_PI2, FULL_PRECISION);
+				q++;
+			}
+		}
 	}
-    }
 #endif /* BETTER_THAN_486 */
 
-  FPU_settag0(st0_tag);
-  control_word = old_cw;
-  partial_status = saved_status & ~SW_C2;     /* Reduction complete. */
+	FPU_settag0(st0_tag);
+	control_word = old_cw;
+	partial_status = saved_status & ~SW_C2;	/* Reduction complete. */
 
-  return (q & 3) | even;
+	return (q & 3) | even;
 }
 
-
 /* Convert a long to register */
 static void convert_l2reg(long const *arg, int deststnr)
 {
-  int tag;
-  long num = *arg;
-  u_char sign;
-  FPU_REG *dest = &st(deststnr);
+	int tag;
+	long num = *arg;
+	u_char sign;
+	FPU_REG *dest = &st(deststnr);
 
-  if (num == 0)
-    {
-      FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
-      return;
-    }
+	if (num == 0) {
+		FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+		return;
+	}
 
-  if (num > 0)
-    { sign = SIGN_POS; }
-  else
-    { num = -num; sign = SIGN_NEG; }
+	if (num > 0) {
+		sign = SIGN_POS;
+	} else {
+		num = -num;
+		sign = SIGN_NEG;
+	}
 
-  dest->sigh = num;
-  dest->sigl = 0;
-  setexponent16(dest, 31);
-  tag = FPU_normalize(dest);
-  FPU_settagi(deststnr, tag);
-  setsign(dest, sign);
-  return;
+	dest->sigh = num;
+	dest->sigl = 0;
+	setexponent16(dest, 31);
+	tag = FPU_normalize(dest);
+	FPU_settagi(deststnr, tag);
+	setsign(dest, sign);
+	return;
 }
 
-
-static void single_arg_error(FPU_REG *st0_ptr, u_char st0_tag)
+static void single_arg_error(FPU_REG * st0_ptr, u_char st0_tag)
 {
-  if ( st0_tag == TAG_Empty )
-    FPU_stack_underflow();  /* Puts a QNaN in st(0) */
-  else if ( st0_tag == TW_NaN )
-    real_1op_NaN(st0_ptr);       /* return with a NaN in st(0) */
+	if (st0_tag == TAG_Empty)
+		FPU_stack_underflow();	/* Puts a QNaN in st(0) */
+	else if (st0_tag == TW_NaN)
+		real_1op_NaN(st0_ptr);	/* return with a NaN in st(0) */
 #ifdef PARANOID
-  else
-    EXCEPTION(EX_INTERNAL|0x0112);
+	else
+		EXCEPTION(EX_INTERNAL | 0x0112);
 #endif /* PARANOID */
 }
 
-
-static void single_arg_2_error(FPU_REG *st0_ptr, u_char st0_tag)
+static void single_arg_2_error(FPU_REG * st0_ptr, u_char st0_tag)
 {
-  int isNaN;
+	int isNaN;
 
-  switch ( st0_tag )
-    {
-    case TW_NaN:
-      isNaN = (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000);
-      if ( isNaN && !(st0_ptr->sigh & 0x40000000) )   /* Signaling ? */
-	{
-	  EXCEPTION(EX_Invalid);
-	  if ( control_word & CW_Invalid )
-	    {
-	      /* The masked response */
-	      /* Convert to a QNaN */
-	      st0_ptr->sigh |= 0x40000000;
-	      push();
-	      FPU_copy_to_reg0(st0_ptr, TAG_Special);
-	    }
-	}
-      else if ( isNaN )
-	{
-	  /* A QNaN */
-	  push();
-	  FPU_copy_to_reg0(st0_ptr, TAG_Special);
-	}
-      else
-	{
-	  /* pseudoNaN or other unsupported */
-	  EXCEPTION(EX_Invalid);
-	  if ( control_word & CW_Invalid )
-	    {
-	      /* The masked response */
-	      FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
-	      push();
-	      FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
-	    }
-	}
-      break;              /* return with a NaN in st(0) */
+	switch (st0_tag) {
+	case TW_NaN:
+		isNaN = (exponent(st0_ptr) == EXP_OVER)
+		    && (st0_ptr->sigh & 0x80000000);
+		if (isNaN && !(st0_ptr->sigh & 0x40000000)) {	/* Signaling ? */
+			EXCEPTION(EX_Invalid);
+			if (control_word & CW_Invalid) {
+				/* The masked response */
+				/* Convert to a QNaN */
+				st0_ptr->sigh |= 0x40000000;
+				push();
+				FPU_copy_to_reg0(st0_ptr, TAG_Special);
+			}
+		} else if (isNaN) {
+			/* A QNaN */
+			push();
+			FPU_copy_to_reg0(st0_ptr, TAG_Special);
+		} else {
+			/* pseudoNaN or other unsupported */
+			EXCEPTION(EX_Invalid);
+			if (control_word & CW_Invalid) {
+				/* The masked response */
+				FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
+				push();
+				FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
+			}
+		}
+		break;		/* return with a NaN in st(0) */
 #ifdef PARANOID
-    default:
-      EXCEPTION(EX_INTERNAL|0x0112);
+	default:
+		EXCEPTION(EX_INTERNAL | 0x0112);
 #endif /* PARANOID */
-    }
+	}
 }
 
-
 /*---------------------------------------------------------------------------*/
 
-static void f2xm1(FPU_REG *st0_ptr, u_char tag)
+static void f2xm1(FPU_REG * st0_ptr, u_char tag)
 {
-  FPU_REG a;
+	FPU_REG a;
 
-  clear_C1();
+	clear_C1();
 
-  if ( tag == TAG_Valid )
-    {
-      /* For an 80486 FPU, the result is undefined if the arg is >= 1.0 */
-      if ( exponent(st0_ptr) < 0 )
-	{
-	denormal_arg:
+	if (tag == TAG_Valid) {
+		/* For an 80486 FPU, the result is undefined if the arg is >= 1.0 */
+		if (exponent(st0_ptr) < 0) {
+		      denormal_arg:
 
-	  FPU_to_exp16(st0_ptr, &a);
+			FPU_to_exp16(st0_ptr, &a);
 
-	  /* poly_2xm1(x) requires 0 < st(0) < 1. */
-	  poly_2xm1(getsign(st0_ptr), &a, st0_ptr);
+			/* poly_2xm1(x) requires 0 < st(0) < 1. */
+			poly_2xm1(getsign(st0_ptr), &a, st0_ptr);
+		}
+		set_precision_flag_up();	/* 80486 appears to always do this */
+		return;
 	}
-      set_precision_flag_up();   /* 80486 appears to always do this */
-      return;
-    }
 
-  if ( tag == TAG_Zero )
-    return;
+	if (tag == TAG_Zero)
+		return;
 
-  if ( tag == TAG_Special )
-    tag = FPU_Special(st0_ptr);
+	if (tag == TAG_Special)
+		tag = FPU_Special(st0_ptr);
 
-  switch ( tag )
-    {
-    case TW_Denormal:
-      if ( denormal_operand() < 0 )
-	return;
-      goto denormal_arg;
-    case TW_Infinity:
-      if ( signnegative(st0_ptr) )
-	{
-	  /* -infinity gives -1 (p16-10) */
-	  FPU_copy_to_reg0(&CONST_1, TAG_Valid);
-	  setnegative(st0_ptr);
+	switch (tag) {
+	case TW_Denormal:
+		if (denormal_operand() < 0)
+			return;
+		goto denormal_arg;
+	case TW_Infinity:
+		if (signnegative(st0_ptr)) {
+			/* -infinity gives -1 (p16-10) */
+			FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+			setnegative(st0_ptr);
+		}
+		return;
+	default:
+		single_arg_error(st0_ptr, tag);
 	}
-      return;
-    default:
-      single_arg_error(st0_ptr, tag);
-    }
 }
 
-
-static void fptan(FPU_REG *st0_ptr, u_char st0_tag)
+static void fptan(FPU_REG * st0_ptr, u_char st0_tag)
 {
-  FPU_REG *st_new_ptr;
-  int q;
-  u_char arg_sign = getsign(st0_ptr);
+	FPU_REG *st_new_ptr;
+	int q;
+	u_char arg_sign = getsign(st0_ptr);
 
-  /* Stack underflow has higher priority */
-  if ( st0_tag == TAG_Empty )
-    {
-      FPU_stack_underflow();  /* Puts a QNaN in st(0) */
-      if ( control_word & CW_Invalid )
-	{
-	  st_new_ptr = &st(-1);
-	  push();
-	  FPU_stack_underflow();  /* Puts a QNaN in the new st(0) */
+	/* Stack underflow has higher priority */
+	if (st0_tag == TAG_Empty) {
+		FPU_stack_underflow();	/* Puts a QNaN in st(0) */
+		if (control_word & CW_Invalid) {
+			st_new_ptr = &st(-1);
+			push();
+			FPU_stack_underflow();	/* Puts a QNaN in the new st(0) */
+		}
+		return;
 	}
-      return;
-    }
 
-  if ( STACK_OVERFLOW )
-    { FPU_stack_overflow(); return; }
-
-  if ( st0_tag == TAG_Valid )
-    {
-      if ( exponent(st0_ptr) > -40 )
-	{
-	  if ( (q = trig_arg(st0_ptr, 0)) == -1 )
-	    {
-	      /* Operand is out of range */
-	      return;
-	    }
-
-	  poly_tan(st0_ptr);
-	  setsign(st0_ptr, (q & 1) ^ (arg_sign != 0));
-	  set_precision_flag_up();  /* We do not really know if up or down */
+	if (STACK_OVERFLOW) {
+		FPU_stack_overflow();
+		return;
 	}
-      else
-	{
-	  /* For a small arg, the result == the argument */
-	  /* Underflow may happen */
 
-	denormal_arg:
+	if (st0_tag == TAG_Valid) {
+		if (exponent(st0_ptr) > -40) {
+			if ((q = trig_arg(st0_ptr, 0)) == -1) {
+				/* Operand is out of range */
+				return;
+			}
 
-	  FPU_to_exp16(st0_ptr, st0_ptr);
-      
-	  st0_tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign);
-	  FPU_settag0(st0_tag);
+			poly_tan(st0_ptr);
+			setsign(st0_ptr, (q & 1) ^ (arg_sign != 0));
+			set_precision_flag_up();	/* We do not really know if up or down */
+		} else {
+			/* For a small arg, the result == the argument */
+			/* Underflow may happen */
+
+		      denormal_arg:
+
+			FPU_to_exp16(st0_ptr, st0_ptr);
+
+			st0_tag =
+			    FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign);
+			FPU_settag0(st0_tag);
+		}
+		push();
+		FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+		return;
 	}
-      push();
-      FPU_copy_to_reg0(&CONST_1, TAG_Valid);
-      return;
-    }
 
-  if ( st0_tag == TAG_Zero )
-    {
-      push();
-      FPU_copy_to_reg0(&CONST_1, TAG_Valid);
-      setcc(0);
-      return;
-    }
-
-  if ( st0_tag == TAG_Special )
-    st0_tag = FPU_Special(st0_ptr);
-
-  if ( st0_tag == TW_Denormal )
-    {
-      if ( denormal_operand() < 0 )
-	return;
-
-      goto denormal_arg;
-    }
-
-  if ( st0_tag == TW_Infinity )
-    {
-      /* The 80486 treats infinity as an invalid operand */
-      if ( arith_invalid(0) >= 0 )
-	{
-	  st_new_ptr = &st(-1);
-	  push();
-	  arith_invalid(0);
+	if (st0_tag == TAG_Zero) {
+		push();
+		FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+		setcc(0);
+		return;
 	}
-      return;
-    }
 
-  single_arg_2_error(st0_ptr, st0_tag);
+	if (st0_tag == TAG_Special)
+		st0_tag = FPU_Special(st0_ptr);
+
+	if (st0_tag == TW_Denormal) {
+		if (denormal_operand() < 0)
+			return;
+
+		goto denormal_arg;
+	}
+
+	if (st0_tag == TW_Infinity) {
+		/* The 80486 treats infinity as an invalid operand */
+		if (arith_invalid(0) >= 0) {
+			st_new_ptr = &st(-1);
+			push();
+			arith_invalid(0);
+		}
+		return;
+	}
+
+	single_arg_2_error(st0_ptr, st0_tag);
 }
 
-
-static void fxtract(FPU_REG *st0_ptr, u_char st0_tag)
+static void fxtract(FPU_REG * st0_ptr, u_char st0_tag)
 {
-  FPU_REG *st_new_ptr;
-  u_char sign;
-  register FPU_REG *st1_ptr = st0_ptr;  /* anticipate */
+	FPU_REG *st_new_ptr;
+	u_char sign;
+	register FPU_REG *st1_ptr = st0_ptr;	/* anticipate */
 
-  if ( STACK_OVERFLOW )
-    {  FPU_stack_overflow(); return; }
-
-  clear_C1();
-
-  if ( st0_tag == TAG_Valid )
-    {
-      long e;
-
-      push();
-      sign = getsign(st1_ptr);
-      reg_copy(st1_ptr, st_new_ptr);
-      setexponent16(st_new_ptr, exponent(st_new_ptr));
-
-    denormal_arg:
-
-      e = exponent16(st_new_ptr);
-      convert_l2reg(&e, 1);
-      setexponentpos(st_new_ptr, 0);
-      setsign(st_new_ptr, sign);
-      FPU_settag0(TAG_Valid);       /* Needed if arg was a denormal */
-      return;
-    }
-  else if ( st0_tag == TAG_Zero )
-    {
-      sign = getsign(st0_ptr);
-
-      if ( FPU_divide_by_zero(0, SIGN_NEG) < 0 )
-	return;
-
-      push();
-      FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
-      setsign(st_new_ptr, sign);
-      return;
-    }
-
-  if ( st0_tag == TAG_Special )
-    st0_tag = FPU_Special(st0_ptr);
-
-  if ( st0_tag == TW_Denormal )
-    {
-      if (denormal_operand() < 0 )
-	return;
-
-      push();
-      sign = getsign(st1_ptr);
-      FPU_to_exp16(st1_ptr, st_new_ptr);
-      goto denormal_arg;
-    }
-  else if ( st0_tag == TW_Infinity )
-    {
-      sign = getsign(st0_ptr);
-      setpositive(st0_ptr);
-      push();
-      FPU_copy_to_reg0(&CONST_INF, TAG_Special);
-      setsign(st_new_ptr, sign);
-      return;
-    }
-  else if ( st0_tag == TW_NaN )
-    {
-      if ( real_1op_NaN(st0_ptr) < 0 )
-	return;
-
-      push();
-      FPU_copy_to_reg0(st0_ptr, TAG_Special);
-      return;
-    }
-  else if ( st0_tag == TAG_Empty )
-    {
-      /* Is this the correct behaviour? */
-      if ( control_word & EX_Invalid )
-	{
-	  FPU_stack_underflow();
-	  push();
-	  FPU_stack_underflow();
+	if (STACK_OVERFLOW) {
+		FPU_stack_overflow();
+		return;
+	}
+
+	clear_C1();
+
+	if (st0_tag == TAG_Valid) {
+		long e;
+
+		push();
+		sign = getsign(st1_ptr);
+		reg_copy(st1_ptr, st_new_ptr);
+		setexponent16(st_new_ptr, exponent(st_new_ptr));
+
+	      denormal_arg:
+
+		e = exponent16(st_new_ptr);
+		convert_l2reg(&e, 1);
+		setexponentpos(st_new_ptr, 0);
+		setsign(st_new_ptr, sign);
+		FPU_settag0(TAG_Valid);	/* Needed if arg was a denormal */
+		return;
+	} else if (st0_tag == TAG_Zero) {
+		sign = getsign(st0_ptr);
+
+		if (FPU_divide_by_zero(0, SIGN_NEG) < 0)
+			return;
+
+		push();
+		FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
+		setsign(st_new_ptr, sign);
+		return;
+	}
+
+	if (st0_tag == TAG_Special)
+		st0_tag = FPU_Special(st0_ptr);
+
+	if (st0_tag == TW_Denormal) {
+		if (denormal_operand() < 0)
+			return;
+
+		push();
+		sign = getsign(st1_ptr);
+		FPU_to_exp16(st1_ptr, st_new_ptr);
+		goto denormal_arg;
+	} else if (st0_tag == TW_Infinity) {
+		sign = getsign(st0_ptr);
+		setpositive(st0_ptr);
+		push();
+		FPU_copy_to_reg0(&CONST_INF, TAG_Special);
+		setsign(st_new_ptr, sign);
+		return;
+	} else if (st0_tag == TW_NaN) {
+		if (real_1op_NaN(st0_ptr) < 0)
+			return;
+
+		push();
+		FPU_copy_to_reg0(st0_ptr, TAG_Special);
+		return;
+	} else if (st0_tag == TAG_Empty) {
+		/* Is this the correct behaviour? */
+		if (control_word & EX_Invalid) {
+			FPU_stack_underflow();
+			push();
+			FPU_stack_underflow();
+		} else
+			EXCEPTION(EX_StackUnder);
 	}
-      else
-	EXCEPTION(EX_StackUnder);
-    }
 #ifdef PARANOID
-  else
-    EXCEPTION(EX_INTERNAL | 0x119);
+	else
+		EXCEPTION(EX_INTERNAL | 0x119);
 #endif /* PARANOID */
 }
 
-
 static void fdecstp(void)
 {
-  clear_C1();
-  top--;
+	clear_C1();
+	top--;
 }
 
 static void fincstp(void)
 {
-  clear_C1();
-  top++;
+	clear_C1();
+	top++;
 }
 
-
-static void fsqrt_(FPU_REG *st0_ptr, u_char st0_tag)
+static void fsqrt_(FPU_REG * st0_ptr, u_char st0_tag)
 {
-  int expon;
+	int expon;
 
-  clear_C1();
+	clear_C1();
 
-  if ( st0_tag == TAG_Valid )
-    {
-      u_char tag;
-      
-      if (signnegative(st0_ptr))
-	{
-	  arith_invalid(0);  /* sqrt(negative) is invalid */
-	  return;
+	if (st0_tag == TAG_Valid) {
+		u_char tag;
+
+		if (signnegative(st0_ptr)) {
+			arith_invalid(0);	/* sqrt(negative) is invalid */
+			return;
+		}
+
+		/* make st(0) in  [1.0 .. 4.0) */
+		expon = exponent(st0_ptr);
+
+	      denormal_arg:
+
+		setexponent16(st0_ptr, (expon & 1));
+
+		/* Do the computation, the sign of the result will be positive. */
+		tag = wm_sqrt(st0_ptr, 0, 0, control_word, SIGN_POS);
+		addexponent(st0_ptr, expon >> 1);
+		FPU_settag0(tag);
+		return;
 	}
 
-      /* make st(0) in  [1.0 .. 4.0) */
-      expon = exponent(st0_ptr);
+	if (st0_tag == TAG_Zero)
+		return;
 
-    denormal_arg:
+	if (st0_tag == TAG_Special)
+		st0_tag = FPU_Special(st0_ptr);
 
-      setexponent16(st0_ptr, (expon & 1));
+	if (st0_tag == TW_Infinity) {
+		if (signnegative(st0_ptr))
+			arith_invalid(0);	/* sqrt(-Infinity) is invalid */
+		return;
+	} else if (st0_tag == TW_Denormal) {
+		if (signnegative(st0_ptr)) {
+			arith_invalid(0);	/* sqrt(negative) is invalid */
+			return;
+		}
 
-      /* Do the computation, the sign of the result will be positive. */
-      tag = wm_sqrt(st0_ptr, 0, 0, control_word, SIGN_POS);
-      addexponent(st0_ptr, expon >> 1);
-      FPU_settag0(tag);
-      return;
-    }
+		if (denormal_operand() < 0)
+			return;
 
-  if ( st0_tag == TAG_Zero )
-    return;
+		FPU_to_exp16(st0_ptr, st0_ptr);
 
-  if ( st0_tag == TAG_Special )
-    st0_tag = FPU_Special(st0_ptr);
+		expon = exponent16(st0_ptr);
 
-  if ( st0_tag == TW_Infinity )
-    {
-      if ( signnegative(st0_ptr) )
-	arith_invalid(0);  /* sqrt(-Infinity) is invalid */
-      return;
-    }
-  else if ( st0_tag == TW_Denormal )
-    {
-      if (signnegative(st0_ptr))
-	{
-	  arith_invalid(0);  /* sqrt(negative) is invalid */
-	  return;
+		goto denormal_arg;
 	}
 
-      if ( denormal_operand() < 0 )
-	return;
-
-      FPU_to_exp16(st0_ptr, st0_ptr);
-
-      expon = exponent16(st0_ptr);
-
-      goto denormal_arg;
-    }
-
-  single_arg_error(st0_ptr, st0_tag);
+	single_arg_error(st0_ptr, st0_tag);
 
 }
 
-
-static void frndint_(FPU_REG *st0_ptr, u_char st0_tag)
+static void frndint_(FPU_REG * st0_ptr, u_char st0_tag)
 {
-  int flags, tag;
+	int flags, tag;
 
-  if ( st0_tag == TAG_Valid )
-    {
-      u_char sign;
+	if (st0_tag == TAG_Valid) {
+		u_char sign;
 
-    denormal_arg:
+	      denormal_arg:
 
-      sign = getsign(st0_ptr);
+		sign = getsign(st0_ptr);
 
-      if (exponent(st0_ptr) > 63)
-	return;
+		if (exponent(st0_ptr) > 63)
+			return;
 
-      if ( st0_tag == TW_Denormal )
-	{
-	  if (denormal_operand() < 0 )
-	    return;
+		if (st0_tag == TW_Denormal) {
+			if (denormal_operand() < 0)
+				return;
+		}
+
+		/* Fortunately, this can't overflow to 2^64 */
+		if ((flags = FPU_round_to_int(st0_ptr, st0_tag)))
+			set_precision_flag(flags);
+
+		setexponent16(st0_ptr, 63);
+		tag = FPU_normalize(st0_ptr);
+		setsign(st0_ptr, sign);
+		FPU_settag0(tag);
+		return;
 	}
 
-      /* Fortunately, this can't overflow to 2^64 */
-      if ( (flags = FPU_round_to_int(st0_ptr, st0_tag)) )
-	set_precision_flag(flags);
+	if (st0_tag == TAG_Zero)
+		return;
 
-      setexponent16(st0_ptr, 63);
-      tag = FPU_normalize(st0_ptr);
-      setsign(st0_ptr, sign);
-      FPU_settag0(tag);
-      return;
-    }
+	if (st0_tag == TAG_Special)
+		st0_tag = FPU_Special(st0_ptr);
 
-  if ( st0_tag == TAG_Zero )
-    return;
-
-  if ( st0_tag == TAG_Special )
-    st0_tag = FPU_Special(st0_ptr);
-
-  if ( st0_tag == TW_Denormal )
-    goto denormal_arg;
-  else if ( st0_tag == TW_Infinity )
-    return;
-  else
-    single_arg_error(st0_ptr, st0_tag);
+	if (st0_tag == TW_Denormal)
+		goto denormal_arg;
+	else if (st0_tag == TW_Infinity)
+		return;
+	else
+		single_arg_error(st0_ptr, st0_tag);
 }
 
-
-static int fsin(FPU_REG *st0_ptr, u_char tag)
+static int fsin(FPU_REG * st0_ptr, u_char tag)
 {
-  u_char arg_sign = getsign(st0_ptr);
+	u_char arg_sign = getsign(st0_ptr);
 
-  if ( tag == TAG_Valid )
-    {
-      int q;
+	if (tag == TAG_Valid) {
+		int q;
 
-      if ( exponent(st0_ptr) > -40 )
-	{
-	  if ( (q = trig_arg(st0_ptr, 0)) == -1 )
-	    {
-	      /* Operand is out of range */
-	      return 1;
-	    }
+		if (exponent(st0_ptr) > -40) {
+			if ((q = trig_arg(st0_ptr, 0)) == -1) {
+				/* Operand is out of range */
+				return 1;
+			}
 
-	  poly_sine(st0_ptr);
-	  
-	  if (q & 2)
-	    changesign(st0_ptr);
+			poly_sine(st0_ptr);
 
-	  setsign(st0_ptr, getsign(st0_ptr) ^ arg_sign);
+			if (q & 2)
+				changesign(st0_ptr);
 
-	  /* We do not really know if up or down */
-	  set_precision_flag_up();
-	  return 0;
+			setsign(st0_ptr, getsign(st0_ptr) ^ arg_sign);
+
+			/* We do not really know if up or down */
+			set_precision_flag_up();
+			return 0;
+		} else {
+			/* For a small arg, the result == the argument */
+			set_precision_flag_up();	/* Must be up. */
+			return 0;
+		}
 	}
-      else
-	{
-	  /* For a small arg, the result == the argument */
-	  set_precision_flag_up();  /* Must be up. */
-	  return 0;
+
+	if (tag == TAG_Zero) {
+		setcc(0);
+		return 0;
 	}
-    }
 
-  if ( tag == TAG_Zero )
-    {
-      setcc(0);
-      return 0;
-    }
+	if (tag == TAG_Special)
+		tag = FPU_Special(st0_ptr);
 
-  if ( tag == TAG_Special )
-    tag = FPU_Special(st0_ptr);
+	if (tag == TW_Denormal) {
+		if (denormal_operand() < 0)
+			return 1;
 
-  if ( tag == TW_Denormal )
-    {
-      if ( denormal_operand() < 0 )
-	return 1;
+		/* For a small arg, the result == the argument */
+		/* Underflow may happen */
+		FPU_to_exp16(st0_ptr, st0_ptr);
 
-      /* For a small arg, the result == the argument */
-      /* Underflow may happen */
-      FPU_to_exp16(st0_ptr, st0_ptr);
-      
-      tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign);
+		tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign);
 
-      FPU_settag0(tag);
+		FPU_settag0(tag);
 
-      return 0;
-    }
-  else if ( tag == TW_Infinity )
-    {
-      /* The 80486 treats infinity as an invalid operand */
-      arith_invalid(0);
-      return 1;
-    }
-  else
-    {
-      single_arg_error(st0_ptr, tag);
-      return 1;
-    }
+		return 0;
+	} else if (tag == TW_Infinity) {
+		/* The 80486 treats infinity as an invalid operand */
+		arith_invalid(0);
+		return 1;
+	} else {
+		single_arg_error(st0_ptr, tag);
+		return 1;
+	}
 }
 
-
-static int f_cos(FPU_REG *st0_ptr, u_char tag)
+static int f_cos(FPU_REG * st0_ptr, u_char tag)
 {
-  u_char st0_sign;
+	u_char st0_sign;
 
-  st0_sign = getsign(st0_ptr);
+	st0_sign = getsign(st0_ptr);
 
-  if ( tag == TAG_Valid )
-    {
-      int q;
+	if (tag == TAG_Valid) {
+		int q;
 
-      if ( exponent(st0_ptr) > -40 )
-	{
-	  if ( (exponent(st0_ptr) < 0)
-	      || ((exponent(st0_ptr) == 0)
-		  && (significand(st0_ptr) <= 0xc90fdaa22168c234LL)) )
-	    {
-	      poly_cos(st0_ptr);
+		if (exponent(st0_ptr) > -40) {
+			if ((exponent(st0_ptr) < 0)
+			    || ((exponent(st0_ptr) == 0)
+				&& (significand(st0_ptr) <=
+				    0xc90fdaa22168c234LL))) {
+				poly_cos(st0_ptr);
 
-	      /* We do not really know if up or down */
-	      set_precision_flag_down();
-	  
-	      return 0;
-	    }
-	  else if ( (q = trig_arg(st0_ptr, FCOS)) != -1 )
-	    {
-	      poly_sine(st0_ptr);
+				/* We do not really know if up or down */
+				set_precision_flag_down();
 
-	      if ((q+1) & 2)
-		changesign(st0_ptr);
+				return 0;
+			} else if ((q = trig_arg(st0_ptr, FCOS)) != -1) {
+				poly_sine(st0_ptr);
 
-	      /* We do not really know if up or down */
-	      set_precision_flag_down();
-	  
-	      return 0;
-	    }
-	  else
-	    {
-	      /* Operand is out of range */
-	      return 1;
-	    }
-	}
-      else
-	{
-	denormal_arg:
+				if ((q + 1) & 2)
+					changesign(st0_ptr);
 
-	  setcc(0);
-	  FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+				/* We do not really know if up or down */
+				set_precision_flag_down();
+
+				return 0;
+			} else {
+				/* Operand is out of range */
+				return 1;
+			}
+		} else {
+		      denormal_arg:
+
+			setcc(0);
+			FPU_copy_to_reg0(&CONST_1, TAG_Valid);
 #ifdef PECULIAR_486
-	  set_precision_flag_down();  /* 80486 appears to do this. */
+			set_precision_flag_down();	/* 80486 appears to do this. */
 #else
-	  set_precision_flag_up();  /* Must be up. */
+			set_precision_flag_up();	/* Must be up. */
 #endif /* PECULIAR_486 */
-	  return 0;
+			return 0;
+		}
+	} else if (tag == TAG_Zero) {
+		FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+		setcc(0);
+		return 0;
 	}
-    }
-  else if ( tag == TAG_Zero )
-    {
-      FPU_copy_to_reg0(&CONST_1, TAG_Valid);
-      setcc(0);
-      return 0;
-    }
 
-  if ( tag == TAG_Special )
-    tag = FPU_Special(st0_ptr);
+	if (tag == TAG_Special)
+		tag = FPU_Special(st0_ptr);
 
-  if ( tag == TW_Denormal )
-    {
-      if ( denormal_operand() < 0 )
-	return 1;
+	if (tag == TW_Denormal) {
+		if (denormal_operand() < 0)
+			return 1;
 
-      goto denormal_arg;
-    }
-  else if ( tag == TW_Infinity )
-    {
-      /* The 80486 treats infinity as an invalid operand */
-      arith_invalid(0);
-      return 1;
-    }
-  else
-    {
-      single_arg_error(st0_ptr, tag);  /* requires st0_ptr == &st(0) */
-      return 1;
-    }
+		goto denormal_arg;
+	} else if (tag == TW_Infinity) {
+		/* The 80486 treats infinity as an invalid operand */
+		arith_invalid(0);
+		return 1;
+	} else {
+		single_arg_error(st0_ptr, tag);	/* requires st0_ptr == &st(0) */
+		return 1;
+	}
 }
 
-
-static void fcos(FPU_REG *st0_ptr, u_char st0_tag)
+static void fcos(FPU_REG * st0_ptr, u_char st0_tag)
 {
-  f_cos(st0_ptr, st0_tag);
+	f_cos(st0_ptr, st0_tag);
 }
 
-
-static void fsincos(FPU_REG *st0_ptr, u_char st0_tag)
+static void fsincos(FPU_REG * st0_ptr, u_char st0_tag)
 {
-  FPU_REG *st_new_ptr;
-  FPU_REG arg;
-  u_char tag;
+	FPU_REG *st_new_ptr;
+	FPU_REG arg;
+	u_char tag;
 
-  /* Stack underflow has higher priority */
-  if ( st0_tag == TAG_Empty )
-    {
-      FPU_stack_underflow();  /* Puts a QNaN in st(0) */
-      if ( control_word & CW_Invalid )
-	{
-	  st_new_ptr = &st(-1);
-	  push();
-	  FPU_stack_underflow();  /* Puts a QNaN in the new st(0) */
+	/* Stack underflow has higher priority */
+	if (st0_tag == TAG_Empty) {
+		FPU_stack_underflow();	/* Puts a QNaN in st(0) */
+		if (control_word & CW_Invalid) {
+			st_new_ptr = &st(-1);
+			push();
+			FPU_stack_underflow();	/* Puts a QNaN in the new st(0) */
+		}
+		return;
 	}
-      return;
-    }
 
-  if ( STACK_OVERFLOW )
-    { FPU_stack_overflow(); return; }
-
-  if ( st0_tag == TAG_Special )
-    tag = FPU_Special(st0_ptr);
-  else
-    tag = st0_tag;
-
-  if ( tag == TW_NaN )
-    {
-      single_arg_2_error(st0_ptr, TW_NaN);
-      return;
-    }
-  else if ( tag == TW_Infinity )
-    {
-      /* The 80486 treats infinity as an invalid operand */
-      if ( arith_invalid(0) >= 0 )
-	{
-	  /* Masked response */
-	  push();
-	  arith_invalid(0);
+	if (STACK_OVERFLOW) {
+		FPU_stack_overflow();
+		return;
 	}
-      return;
-    }
 
-  reg_copy(st0_ptr, &arg);
-  if ( !fsin(st0_ptr, st0_tag) )
-    {
-      push();
-      FPU_copy_to_reg0(&arg, st0_tag);
-      f_cos(&st(0), st0_tag);
-    }
-  else
-    {
-      /* An error, so restore st(0) */
-      FPU_copy_to_reg0(&arg, st0_tag);
-    }
+	if (st0_tag == TAG_Special)
+		tag = FPU_Special(st0_ptr);
+	else
+		tag = st0_tag;
+
+	if (tag == TW_NaN) {
+		single_arg_2_error(st0_ptr, TW_NaN);
+		return;
+	} else if (tag == TW_Infinity) {
+		/* The 80486 treats infinity as an invalid operand */
+		if (arith_invalid(0) >= 0) {
+			/* Masked response */
+			push();
+			arith_invalid(0);
+		}
+		return;
+	}
+
+	reg_copy(st0_ptr, &arg);
+	if (!fsin(st0_ptr, st0_tag)) {
+		push();
+		FPU_copy_to_reg0(&arg, st0_tag);
+		f_cos(&st(0), st0_tag);
+	} else {
+		/* An error, so restore st(0) */
+		FPU_copy_to_reg0(&arg, st0_tag);
+	}
 }
 
-
 /*---------------------------------------------------------------------------*/
 /* The following all require two arguments: st(0) and st(1) */
 
@@ -826,1020 +743,901 @@ static void fsincos(FPU_REG *st0_ptr, u_char st0_tag)
    result must be zero.
  */
 static void rem_kernel(unsigned long long st0, unsigned long long *y,
-		       unsigned long long st1,
-		       unsigned long long q, int n)
+		       unsigned long long st1, unsigned long long q, int n)
 {
-  int dummy;
-  unsigned long long x;
+	int dummy;
+	unsigned long long x;
 
-  x = st0 << n;
+	x = st0 << n;
 
-  /* Do the required multiplication and subtraction in the one operation */
+	/* Do the required multiplication and subtraction in the one operation */
 
-  /* lsw x -= lsw st1 * lsw q */
-  asm volatile ("mull %4; subl %%eax,%0; sbbl %%edx,%1"
-		:"=m" (((unsigned *)&x)[0]), "=m" (((unsigned *)&x)[1]),
-		"=a" (dummy)
-		:"2" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[0])
-		:"%dx");
-  /* msw x -= msw st1 * lsw q */
-  asm volatile ("mull %3; subl %%eax,%0"
-		:"=m" (((unsigned *)&x)[1]), "=a" (dummy)
-		:"1" (((unsigned *)&st1)[1]), "m" (((unsigned *)&q)[0])
-		:"%dx");
-  /* msw x -= lsw st1 * msw q */
-  asm volatile ("mull %3; subl %%eax,%0"
-		:"=m" (((unsigned *)&x)[1]), "=a" (dummy)
-		:"1" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[1])
-		:"%dx");
+	/* lsw x -= lsw st1 * lsw q */
+	asm volatile ("mull %4; subl %%eax,%0; sbbl %%edx,%1":"=m"
+		      (((unsigned *)&x)[0]), "=m"(((unsigned *)&x)[1]),
+		      "=a"(dummy)
+		      :"2"(((unsigned *)&st1)[0]), "m"(((unsigned *)&q)[0])
+		      :"%dx");
+	/* msw x -= msw st1 * lsw q */
+	asm volatile ("mull %3; subl %%eax,%0":"=m" (((unsigned *)&x)[1]),
+		      "=a"(dummy)
+		      :"1"(((unsigned *)&st1)[1]), "m"(((unsigned *)&q)[0])
+		      :"%dx");
+	/* msw x -= lsw st1 * msw q */
+	asm volatile ("mull %3; subl %%eax,%0":"=m" (((unsigned *)&x)[1]),
+		      "=a"(dummy)
+		      :"1"(((unsigned *)&st1)[0]), "m"(((unsigned *)&q)[1])
+		      :"%dx");
 
-  *y = x;
+	*y = x;
 }
 
-
 /* Remainder of st(0) / st(1) */
 /* This routine produces exact results, i.e. there is never any
    rounding or truncation, etc of the result. */
-static void do_fprem(FPU_REG *st0_ptr, u_char st0_tag, int round)
+static void do_fprem(FPU_REG * st0_ptr, u_char st0_tag, int round)
 {
-  FPU_REG *st1_ptr = &st(1);
-  u_char st1_tag = FPU_gettagi(1);
+	FPU_REG *st1_ptr = &st(1);
+	u_char st1_tag = FPU_gettagi(1);
 
-  if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
-    {
-      FPU_REG tmp, st0, st1;
-      u_char st0_sign, st1_sign;
-      u_char tmptag;
-      int tag;
-      int old_cw;
-      int expdif;
-      long long q;
-      unsigned short saved_status;
-      int cc;
+	if (!((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid))) {
+		FPU_REG tmp, st0, st1;
+		u_char st0_sign, st1_sign;
+		u_char tmptag;
+		int tag;
+		int old_cw;
+		int expdif;
+		long long q;
+		unsigned short saved_status;
+		int cc;
 
-    fprem_valid:
-      /* Convert registers for internal use. */
-      st0_sign = FPU_to_exp16(st0_ptr, &st0);
-      st1_sign = FPU_to_exp16(st1_ptr, &st1);
-      expdif = exponent16(&st0) - exponent16(&st1);
+	      fprem_valid:
+		/* Convert registers for internal use. */
+		st0_sign = FPU_to_exp16(st0_ptr, &st0);
+		st1_sign = FPU_to_exp16(st1_ptr, &st1);
+		expdif = exponent16(&st0) - exponent16(&st1);
 
-      old_cw = control_word;
-      cc = 0;
+		old_cw = control_word;
+		cc = 0;
 
-      /* We want the status following the denorm tests, but don't want
-	 the status changed by the arithmetic operations. */
-      saved_status = partial_status;
-      control_word &= ~CW_RC;
-      control_word |= RC_CHOP;
+		/* We want the status following the denorm tests, but don't want
+		   the status changed by the arithmetic operations. */
+		saved_status = partial_status;
+		control_word &= ~CW_RC;
+		control_word |= RC_CHOP;
 
-      if ( expdif < 64 )
-	{
-	  /* This should be the most common case */
+		if (expdif < 64) {
+			/* This should be the most common case */
 
-	  if ( expdif > -2 )
-	    {
-	      u_char sign = st0_sign ^ st1_sign;
-	      tag = FPU_u_div(&st0, &st1, &tmp,
-			      PR_64_BITS | RC_CHOP | 0x3f,
-			      sign);
-	      setsign(&tmp, sign);
+			if (expdif > -2) {
+				u_char sign = st0_sign ^ st1_sign;
+				tag = FPU_u_div(&st0, &st1, &tmp,
+						PR_64_BITS | RC_CHOP | 0x3f,
+						sign);
+				setsign(&tmp, sign);
 
-	      if ( exponent(&tmp) >= 0 )
-		{
-		  FPU_round_to_int(&tmp, tag);  /* Fortunately, this can't
-						   overflow to 2^64 */
-		  q = significand(&tmp);
+				if (exponent(&tmp) >= 0) {
+					FPU_round_to_int(&tmp, tag);	/* Fortunately, this can't
+									   overflow to 2^64 */
+					q = significand(&tmp);
 
-		  rem_kernel(significand(&st0),
-			     &significand(&tmp),
-			     significand(&st1),
-			     q, expdif);
+					rem_kernel(significand(&st0),
+						   &significand(&tmp),
+						   significand(&st1),
+						   q, expdif);
 
-		  setexponent16(&tmp, exponent16(&st1));
-		}
-	      else
-		{
-		  reg_copy(&st0, &tmp);
-		  q = 0;
-		}
+					setexponent16(&tmp, exponent16(&st1));
+				} else {
+					reg_copy(&st0, &tmp);
+					q = 0;
+				}
 
-	      if ( (round == RC_RND) && (tmp.sigh & 0xc0000000) )
-		{
-		  /* We may need to subtract st(1) once more,
-		     to get a result <= 1/2 of st(1). */
-		  unsigned long long x;
-		  expdif = exponent16(&st1) - exponent16(&tmp);
-		  if ( expdif <= 1 )
-		    {
-		      if ( expdif == 0 )
-			x = significand(&st1) - significand(&tmp);
-		      else /* expdif is 1 */
-			x = (significand(&st1) << 1) - significand(&tmp);
-		      if ( (x < significand(&tmp)) ||
-			  /* or equi-distant (from 0 & st(1)) and q is odd */
-			  ((x == significand(&tmp)) && (q & 1) ) )
-			{
-			  st0_sign = ! st0_sign;
-			  significand(&tmp) = x;
-			  q++;
+				if ((round == RC_RND)
+				    && (tmp.sigh & 0xc0000000)) {
+					/* We may need to subtract st(1) once more,
+					   to get a result <= 1/2 of st(1). */
+					unsigned long long x;
+					expdif =
+					    exponent16(&st1) - exponent16(&tmp);
+					if (expdif <= 1) {
+						if (expdif == 0)
+							x = significand(&st1) -
+							    significand(&tmp);
+						else	/* expdif is 1 */
+							x = (significand(&st1)
+							     << 1) -
+							    significand(&tmp);
+						if ((x < significand(&tmp)) ||
+						    /* or equi-distant (from 0 & st(1)) and q is odd */
+						    ((x == significand(&tmp))
+						     && (q & 1))) {
+							st0_sign = !st0_sign;
+							significand(&tmp) = x;
+							q++;
+						}
+					}
+				}
+
+				if (q & 4)
+					cc |= SW_C0;
+				if (q & 2)
+					cc |= SW_C3;
+				if (q & 1)
+					cc |= SW_C1;
+			} else {
+				control_word = old_cw;
+				setcc(0);
+				return;
 			}
-		    }
+		} else {
+			/* There is a large exponent difference ( >= 64 ) */
+			/* To make much sense, the code in this section should
+			   be done at high precision. */
+			int exp_1, N;
+			u_char sign;
+
+			/* prevent overflow here */
+			/* N is 'a number between 32 and 63' (p26-113) */
+			reg_copy(&st0, &tmp);
+			tmptag = st0_tag;
+			N = (expdif & 0x0000001f) + 32;	/* This choice gives results
+							   identical to an AMD 486 */
+			setexponent16(&tmp, N);
+			exp_1 = exponent16(&st1);
+			setexponent16(&st1, 0);
+			expdif -= N;
+
+			sign = getsign(&tmp) ^ st1_sign;
+			tag =
+			    FPU_u_div(&tmp, &st1, &tmp,
+				      PR_64_BITS | RC_CHOP | 0x3f, sign);
+			setsign(&tmp, sign);
+
+			FPU_round_to_int(&tmp, tag);	/* Fortunately, this can't
+							   overflow to 2^64 */
+
+			rem_kernel(significand(&st0),
+				   &significand(&tmp),
+				   significand(&st1),
+				   significand(&tmp), exponent(&tmp)
+			    );
+			setexponent16(&tmp, exp_1 + expdif);
+
+			/* It is possible for the operation to be complete here.
+			   What does the IEEE standard say? The Intel 80486 manual
+			   implies that the operation will never be completed at this
+			   point, and the behaviour of a real 80486 confirms this.
+			 */
+			if (!(tmp.sigh | tmp.sigl)) {
+				/* The result is zero */
+				control_word = old_cw;
+				partial_status = saved_status;
+				FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
+				setsign(&st0, st0_sign);
+#ifdef PECULIAR_486
+				setcc(SW_C2);
+#else
+				setcc(0);
+#endif /* PECULIAR_486 */
+				return;
+			}
+			cc = SW_C2;
 		}
 
-	      if (q & 4) cc |= SW_C0;
-	      if (q & 2) cc |= SW_C3;
-	      if (q & 1) cc |= SW_C1;
-	    }
-	  else
-	    {
-	      control_word = old_cw;
-	      setcc(0);
-	      return;
-	    }
-	}
-      else
-	{
-	  /* There is a large exponent difference ( >= 64 ) */
-	  /* To make much sense, the code in this section should
-	     be done at high precision. */
-	  int exp_1, N;
-	  u_char sign;
+		control_word = old_cw;
+		partial_status = saved_status;
+		tag = FPU_normalize_nuo(&tmp);
+		reg_copy(&tmp, st0_ptr);
 
-	  /* prevent overflow here */
-	  /* N is 'a number between 32 and 63' (p26-113) */
-	  reg_copy(&st0, &tmp);
-	  tmptag = st0_tag;
-	  N = (expdif & 0x0000001f) + 32;  /* This choice gives results
-					      identical to an AMD 486 */
-	  setexponent16(&tmp, N);
-	  exp_1 = exponent16(&st1);
-	  setexponent16(&st1, 0);
-	  expdif -= N;
+		/* The only condition to be looked for is underflow,
+		   and it can occur here only if underflow is unmasked. */
+		if ((exponent16(&tmp) <= EXP_UNDER) && (tag != TAG_Zero)
+		    && !(control_word & CW_Underflow)) {
+			setcc(cc);
+			tag = arith_underflow(st0_ptr);
+			setsign(st0_ptr, st0_sign);
+			FPU_settag0(tag);
+			return;
+		} else if ((exponent16(&tmp) > EXP_UNDER) || (tag == TAG_Zero)) {
+			stdexp(st0_ptr);
+			setsign(st0_ptr, st0_sign);
+		} else {
+			tag =
+			    FPU_round(st0_ptr, 0, 0, FULL_PRECISION, st0_sign);
+		}
+		FPU_settag0(tag);
+		setcc(cc);
 
-	  sign = getsign(&tmp) ^ st1_sign;
-	  tag = FPU_u_div(&tmp, &st1, &tmp, PR_64_BITS | RC_CHOP | 0x3f,
-			  sign);
-	  setsign(&tmp, sign);
-
-	  FPU_round_to_int(&tmp, tag);  /* Fortunately, this can't
-					   overflow to 2^64 */
-
-	  rem_kernel(significand(&st0),
-		     &significand(&tmp),
-		     significand(&st1),
-		     significand(&tmp),
-		     exponent(&tmp)
-		     ); 
-	  setexponent16(&tmp, exp_1 + expdif);
-
-	  /* It is possible for the operation to be complete here.
-	     What does the IEEE standard say? The Intel 80486 manual
-	     implies that the operation will never be completed at this
-	     point, and the behaviour of a real 80486 confirms this.
-	   */
-	  if ( !(tmp.sigh | tmp.sigl) )
-	    {
-	      /* The result is zero */
-	      control_word = old_cw;
-	      partial_status = saved_status;
-	      FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
-	      setsign(&st0, st0_sign);
-#ifdef PECULIAR_486
-	      setcc(SW_C2);
-#else
-	      setcc(0);
-#endif /* PECULIAR_486 */
-	      return;
-	    }
-	  cc = SW_C2;
+		return;
 	}
 
-      control_word = old_cw;
-      partial_status = saved_status;
-      tag = FPU_normalize_nuo(&tmp);
-      reg_copy(&tmp, st0_ptr);
+	if (st0_tag == TAG_Special)
+		st0_tag = FPU_Special(st0_ptr);
+	if (st1_tag == TAG_Special)
+		st1_tag = FPU_Special(st1_ptr);
 
-      /* The only condition to be looked for is underflow,
-	 and it can occur here only if underflow is unmasked. */
-      if ( (exponent16(&tmp) <= EXP_UNDER) && (tag != TAG_Zero)
-	  && !(control_word & CW_Underflow) )
-	{
-	  setcc(cc);
-	  tag = arith_underflow(st0_ptr);
-	  setsign(st0_ptr, st0_sign);
-	  FPU_settag0(tag);
-	  return;
-	}
-      else if ( (exponent16(&tmp) > EXP_UNDER) || (tag == TAG_Zero) )
-	{
-	  stdexp(st0_ptr);
-	  setsign(st0_ptr, st0_sign);
-	}
-      else
-	{
-	  tag = FPU_round(st0_ptr, 0, 0, FULL_PRECISION, st0_sign);
-	}
-      FPU_settag0(tag);
-      setcc(cc);
-
-      return;
-    }
-
-  if ( st0_tag == TAG_Special )
-    st0_tag = FPU_Special(st0_ptr);
-  if ( st1_tag == TAG_Special )
-    st1_tag = FPU_Special(st1_ptr);
-
-  if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
+	if (((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
 	    || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid))
-	    || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) )
-    {
-      if ( denormal_operand() < 0 )
-	return;
-      goto fprem_valid;
-    }
-  else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) )
-    {
-      FPU_stack_underflow();
-      return;
-    }
-  else if ( st0_tag == TAG_Zero )
-    {
-      if ( st1_tag == TAG_Valid )
-	{
-	  setcc(0); return;
-	}
-      else if ( st1_tag == TW_Denormal )
-	{
-	  if ( denormal_operand() < 0 )
-	    return;
-	  setcc(0); return;
-	}
-      else if ( st1_tag == TAG_Zero )
-	{ arith_invalid(0); return; } /* fprem(?,0) always invalid */
-      else if ( st1_tag == TW_Infinity )
-	{ setcc(0); return; }
-    }
-  else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) )
-    {
-      if ( st1_tag == TAG_Zero )
-	{
-	  arith_invalid(0); /* fprem(Valid,Zero) is invalid */
-	  return;
-	}
-      else if ( st1_tag != TW_NaN )
-	{
-	  if ( ((st0_tag == TW_Denormal) || (st1_tag == TW_Denormal))
-	       && (denormal_operand() < 0) )
-	    return;
+	    || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal))) {
+		if (denormal_operand() < 0)
+			return;
+		goto fprem_valid;
+	} else if ((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) {
+		FPU_stack_underflow();
+		return;
+	} else if (st0_tag == TAG_Zero) {
+		if (st1_tag == TAG_Valid) {
+			setcc(0);
+			return;
+		} else if (st1_tag == TW_Denormal) {
+			if (denormal_operand() < 0)
+				return;
+			setcc(0);
+			return;
+		} else if (st1_tag == TAG_Zero) {
+			arith_invalid(0);
+			return;
+		} /* fprem(?,0) always invalid */
+		else if (st1_tag == TW_Infinity) {
+			setcc(0);
+			return;
+		}
+	} else if ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal)) {
+		if (st1_tag == TAG_Zero) {
+			arith_invalid(0);	/* fprem(Valid,Zero) is invalid */
+			return;
+		} else if (st1_tag != TW_NaN) {
+			if (((st0_tag == TW_Denormal)
+			     || (st1_tag == TW_Denormal))
+			    && (denormal_operand() < 0))
+				return;
 
-	  if ( st1_tag == TW_Infinity )
-	    {
-	      /* fprem(Valid,Infinity) is o.k. */
-	      setcc(0); return;
-	    }
+			if (st1_tag == TW_Infinity) {
+				/* fprem(Valid,Infinity) is o.k. */
+				setcc(0);
+				return;
+			}
+		}
+	} else if (st0_tag == TW_Infinity) {
+		if (st1_tag != TW_NaN) {
+			arith_invalid(0);	/* fprem(Infinity,?) is invalid */
+			return;
+		}
 	}
-    }
-  else if ( st0_tag == TW_Infinity )
-    {
-      if ( st1_tag != TW_NaN )
-	{
-	  arith_invalid(0); /* fprem(Infinity,?) is invalid */
-	  return;
-	}
-    }
 
-  /* One of the registers must contain a NaN if we got here. */
+	/* One of the registers must contain a NaN if we got here. */
 
 #ifdef PARANOID
-  if ( (st0_tag != TW_NaN) && (st1_tag != TW_NaN) )
-      EXCEPTION(EX_INTERNAL | 0x118);
+	if ((st0_tag != TW_NaN) && (st1_tag != TW_NaN))
+		EXCEPTION(EX_INTERNAL | 0x118);
 #endif /* PARANOID */
 
-  real_2op_NaN(st1_ptr, st1_tag, 0, st1_ptr);
+	real_2op_NaN(st1_ptr, st1_tag, 0, st1_ptr);
 
 }
 
-
 /* ST(1) <- ST(1) * log ST;  pop ST */
-static void fyl2x(FPU_REG *st0_ptr, u_char st0_tag)
+static void fyl2x(FPU_REG * st0_ptr, u_char st0_tag)
 {
-  FPU_REG *st1_ptr = &st(1), exponent;
-  u_char st1_tag = FPU_gettagi(1);
-  u_char sign;
-  int e, tag;
+	FPU_REG *st1_ptr = &st(1), exponent;
+	u_char st1_tag = FPU_gettagi(1);
+	u_char sign;
+	int e, tag;
 
-  clear_C1();
+	clear_C1();
 
-  if ( (st0_tag == TAG_Valid) && (st1_tag == TAG_Valid) )
-    {
-    both_valid:
-      /* Both regs are Valid or Denormal */
-      if ( signpositive(st0_ptr) )
-	{
-	  if ( st0_tag == TW_Denormal )
-	    FPU_to_exp16(st0_ptr, st0_ptr);
-	  else
-	    /* Convert st(0) for internal use. */
-	    setexponent16(st0_ptr, exponent(st0_ptr));
+	if ((st0_tag == TAG_Valid) && (st1_tag == TAG_Valid)) {
+	      both_valid:
+		/* Both regs are Valid or Denormal */
+		if (signpositive(st0_ptr)) {
+			if (st0_tag == TW_Denormal)
+				FPU_to_exp16(st0_ptr, st0_ptr);
+			else
+				/* Convert st(0) for internal use. */
+				setexponent16(st0_ptr, exponent(st0_ptr));
 
-	  if ( (st0_ptr->sigh == 0x80000000) && (st0_ptr->sigl == 0) )
-	    {
-	      /* Special case. The result can be precise. */
-	      u_char esign;
-	      e = exponent16(st0_ptr);
-	      if ( e >= 0 )
-		{
-		  exponent.sigh = e;
-		  esign = SIGN_POS;
+			if ((st0_ptr->sigh == 0x80000000)
+			    && (st0_ptr->sigl == 0)) {
+				/* Special case. The result can be precise. */
+				u_char esign;
+				e = exponent16(st0_ptr);
+				if (e >= 0) {
+					exponent.sigh = e;
+					esign = SIGN_POS;
+				} else {
+					exponent.sigh = -e;
+					esign = SIGN_NEG;
+				}
+				exponent.sigl = 0;
+				setexponent16(&exponent, 31);
+				tag = FPU_normalize_nuo(&exponent);
+				stdexp(&exponent);
+				setsign(&exponent, esign);
+				tag =
+				    FPU_mul(&exponent, tag, 1, FULL_PRECISION);
+				if (tag >= 0)
+					FPU_settagi(1, tag);
+			} else {
+				/* The usual case */
+				sign = getsign(st1_ptr);
+				if (st1_tag == TW_Denormal)
+					FPU_to_exp16(st1_ptr, st1_ptr);
+				else
+					/* Convert st(1) for internal use. */
+					setexponent16(st1_ptr,
+						      exponent(st1_ptr));
+				poly_l2(st0_ptr, st1_ptr, sign);
+			}
+		} else {
+			/* negative */
+			if (arith_invalid(1) < 0)
+				return;
 		}
-	      else
-		{
-		  exponent.sigh = -e;
-		  esign = SIGN_NEG;
+
+		FPU_pop();
+
+		return;
+	}
+
+	if (st0_tag == TAG_Special)
+		st0_tag = FPU_Special(st0_ptr);
+	if (st1_tag == TAG_Special)
+		st1_tag = FPU_Special(st1_ptr);
+
+	if ((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) {
+		FPU_stack_underflow_pop(1);
+		return;
+	} else if ((st0_tag <= TW_Denormal) && (st1_tag <= TW_Denormal)) {
+		if (st0_tag == TAG_Zero) {
+			if (st1_tag == TAG_Zero) {
+				/* Both args zero is invalid */
+				if (arith_invalid(1) < 0)
+					return;
+			} else {
+				u_char sign;
+				sign = getsign(st1_ptr) ^ SIGN_NEG;
+				if (FPU_divide_by_zero(1, sign) < 0)
+					return;
+
+				setsign(st1_ptr, sign);
+			}
+		} else if (st1_tag == TAG_Zero) {
+			/* st(1) contains zero, st(0) valid <> 0 */
+			/* Zero is the valid answer */
+			sign = getsign(st1_ptr);
+
+			if (signnegative(st0_ptr)) {
+				/* log(negative) */
+				if (arith_invalid(1) < 0)
+					return;
+			} else if ((st0_tag == TW_Denormal)
+				   && (denormal_operand() < 0))
+				return;
+			else {
+				if (exponent(st0_ptr) < 0)
+					sign ^= SIGN_NEG;
+
+				FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
+				setsign(st1_ptr, sign);
+			}
+		} else {
+			/* One or both operands are denormals. */
+			if (denormal_operand() < 0)
+				return;
+			goto both_valid;
 		}
-	      exponent.sigl = 0;
-	      setexponent16(&exponent, 31);
-	      tag = FPU_normalize_nuo(&exponent);
-	      stdexp(&exponent);
-	      setsign(&exponent, esign);
-	      tag = FPU_mul(&exponent, tag, 1, FULL_PRECISION);
-	      if ( tag >= 0 )
-		FPU_settagi(1, tag);
-	    }
-	  else
-	    {
-	      /* The usual case */
-	      sign = getsign(st1_ptr);
-	      if ( st1_tag == TW_Denormal )
-		FPU_to_exp16(st1_ptr, st1_ptr);
-	      else
-		/* Convert st(1) for internal use. */
-		setexponent16(st1_ptr, exponent(st1_ptr));
-	      poly_l2(st0_ptr, st1_ptr, sign);
-	    }
+	} else if ((st0_tag == TW_NaN) || (st1_tag == TW_NaN)) {
+		if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0)
+			return;
 	}
-      else
-	{
-	  /* negative */
-	  if ( arith_invalid(1) < 0 )
-	    return;
+	/* One or both arg must be an infinity */
+	else if (st0_tag == TW_Infinity) {
+		if ((signnegative(st0_ptr)) || (st1_tag == TAG_Zero)) {
+			/* log(-infinity) or 0*log(infinity) */
+			if (arith_invalid(1) < 0)
+				return;
+		} else {
+			u_char sign = getsign(st1_ptr);
+
+			if ((st1_tag == TW_Denormal)
+			    && (denormal_operand() < 0))
+				return;
+
+			FPU_copy_to_reg1(&CONST_INF, TAG_Special);
+			setsign(st1_ptr, sign);
+		}
 	}
+	/* st(1) must be infinity here */
+	else if (((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal))
+		 && (signpositive(st0_ptr))) {
+		if (exponent(st0_ptr) >= 0) {
+			if ((exponent(st0_ptr) == 0) &&
+			    (st0_ptr->sigh == 0x80000000) &&
+			    (st0_ptr->sigl == 0)) {
+				/* st(0) holds 1.0 */
+				/* infinity*log(1) */
+				if (arith_invalid(1) < 0)
+					return;
+			}
+			/* else st(0) is positive and > 1.0 */
+		} else {
+			/* st(0) is positive and < 1.0 */
 
-      FPU_pop();
+			if ((st0_tag == TW_Denormal)
+			    && (denormal_operand() < 0))
+				return;
 
-      return;
-    }
-
-  if ( st0_tag == TAG_Special )
-    st0_tag = FPU_Special(st0_ptr);
-  if ( st1_tag == TAG_Special )
-    st1_tag = FPU_Special(st1_ptr);
-
-  if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) )
-    {
-      FPU_stack_underflow_pop(1);
-      return;
-    }
-  else if ( (st0_tag <= TW_Denormal) && (st1_tag <= TW_Denormal) )
-    {
-      if ( st0_tag == TAG_Zero )
-	{
-	  if ( st1_tag == TAG_Zero )
-	    {
-	      /* Both args zero is invalid */
-	      if ( arith_invalid(1) < 0 )
-		return;
-	    }
-	  else
-	    {
-	      u_char sign;
-	      sign = getsign(st1_ptr)^SIGN_NEG;
-	      if ( FPU_divide_by_zero(1, sign) < 0 )
-		return;
-
-	      setsign(st1_ptr, sign);
-	    }
-	}
-      else if ( st1_tag == TAG_Zero )
-	{
-	  /* st(1) contains zero, st(0) valid <> 0 */
-	  /* Zero is the valid answer */
-	  sign = getsign(st1_ptr);
-	  
-	  if ( signnegative(st0_ptr) )
-	    {
-	      /* log(negative) */
-	      if ( arith_invalid(1) < 0 )
-		return;
-	    }
-	  else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
-	    return;
-	  else
-	    {
-	      if ( exponent(st0_ptr) < 0 )
-		sign ^= SIGN_NEG;
-
-	      FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
-	      setsign(st1_ptr, sign);
-	    }
-	}
-      else
-	{
-	  /* One or both operands are denormals. */
-	  if ( denormal_operand() < 0 )
-	    return;
-	  goto both_valid;
-	}
-    }
-  else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) )
-    {
-      if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
-	return;
-    }
-  /* One or both arg must be an infinity */
-  else if ( st0_tag == TW_Infinity )
-    {
-      if ( (signnegative(st0_ptr)) || (st1_tag == TAG_Zero) )
-	{
-	  /* log(-infinity) or 0*log(infinity) */
-	  if ( arith_invalid(1) < 0 )
-	    return;
-	}
-      else
-	{
-	  u_char sign = getsign(st1_ptr);
-
-	  if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
-	    return;
-
-	  FPU_copy_to_reg1(&CONST_INF, TAG_Special);
-	  setsign(st1_ptr, sign);
-	}
-    }
-  /* st(1) must be infinity here */
-  else if ( ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal))
-	    && ( signpositive(st0_ptr) ) )
-    {
-      if ( exponent(st0_ptr) >= 0 )
-	{
-	  if ( (exponent(st0_ptr) == 0) &&
-	      (st0_ptr->sigh == 0x80000000) &&
-	      (st0_ptr->sigl == 0) )
-	    {
-	      /* st(0) holds 1.0 */
-	      /* infinity*log(1) */
-	      if ( arith_invalid(1) < 0 )
-		return;
-	    }
-	  /* else st(0) is positive and > 1.0 */
-	}
-      else
-	{
-	  /* st(0) is positive and < 1.0 */
-
-	  if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
-	    return;
-
-	  changesign(st1_ptr);
-	}
-    }
-  else
-    {
-      /* st(0) must be zero or negative */
-      if ( st0_tag == TAG_Zero )
-	{
-	  /* This should be invalid, but a real 80486 is happy with it. */
+			changesign(st1_ptr);
+		}
+	} else {
+		/* st(0) must be zero or negative */
+		if (st0_tag == TAG_Zero) {
+			/* This should be invalid, but a real 80486 is happy with it. */
 
 #ifndef PECULIAR_486
-	  sign = getsign(st1_ptr);
-	  if ( FPU_divide_by_zero(1, sign) < 0 )
-	    return;
+			sign = getsign(st1_ptr);
+			if (FPU_divide_by_zero(1, sign) < 0)
+				return;
 #endif /* PECULIAR_486 */
 
-	  changesign(st1_ptr);
+			changesign(st1_ptr);
+		} else if (arith_invalid(1) < 0)	/* log(negative) */
+			return;
 	}
-      else if ( arith_invalid(1) < 0 )	  /* log(negative) */
-	return;
-    }
 
-  FPU_pop();
+	FPU_pop();
 }
 
-
-static void fpatan(FPU_REG *st0_ptr, u_char st0_tag)
+static void fpatan(FPU_REG * st0_ptr, u_char st0_tag)
 {
-  FPU_REG *st1_ptr = &st(1);
-  u_char st1_tag = FPU_gettagi(1);
-  int tag;
+	FPU_REG *st1_ptr = &st(1);
+	u_char st1_tag = FPU_gettagi(1);
+	int tag;
 
-  clear_C1();
-  if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
-    {
-    valid_atan:
+	clear_C1();
+	if (!((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid))) {
+	      valid_atan:
 
-      poly_atan(st0_ptr, st0_tag, st1_ptr, st1_tag);
+		poly_atan(st0_ptr, st0_tag, st1_ptr, st1_tag);
 
-      FPU_pop();
+		FPU_pop();
 
-      return;
-    }
-
-  if ( st0_tag == TAG_Special )
-    st0_tag = FPU_Special(st0_ptr);
-  if ( st1_tag == TAG_Special )
-    st1_tag = FPU_Special(st1_ptr);
-
-  if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
-	    || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid))
-	    || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) )
-    {
-      if ( denormal_operand() < 0 )
-	return;
-
-      goto valid_atan;
-    }
-  else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) )
-    {
-      FPU_stack_underflow_pop(1);
-      return;
-    }
-  else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) )
-    {
-      if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) >= 0 )
-	  FPU_pop();
-      return;
-    }
-  else if ( (st0_tag == TW_Infinity) || (st1_tag == TW_Infinity) )
-    {
-      u_char sign = getsign(st1_ptr);
-      if ( st0_tag == TW_Infinity )
-	{
-	  if ( st1_tag == TW_Infinity )
-	    {
-	      if ( signpositive(st0_ptr) )
-		{
-		  FPU_copy_to_reg1(&CONST_PI4, TAG_Valid);
-		}
-	      else
-		{
-		  setpositive(st1_ptr);
-		  tag = FPU_u_add(&CONST_PI4, &CONST_PI2, st1_ptr,
-				  FULL_PRECISION, SIGN_POS,
-				  exponent(&CONST_PI4), exponent(&CONST_PI2));
-		  if ( tag >= 0 )
-		    FPU_settagi(1, tag);
-		}
-	    }
-	  else
-	    {
-	      if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
 		return;
+	}
 
-	      if ( signpositive(st0_ptr) )
-		{
-		  FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
-		  setsign(st1_ptr, sign);   /* An 80486 preserves the sign */
-		  FPU_pop();
-		  return;
+	if (st0_tag == TAG_Special)
+		st0_tag = FPU_Special(st0_ptr);
+	if (st1_tag == TAG_Special)
+		st1_tag = FPU_Special(st1_ptr);
+
+	if (((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
+	    || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid))
+	    || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal))) {
+		if (denormal_operand() < 0)
+			return;
+
+		goto valid_atan;
+	} else if ((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) {
+		FPU_stack_underflow_pop(1);
+		return;
+	} else if ((st0_tag == TW_NaN) || (st1_tag == TW_NaN)) {
+		if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) >= 0)
+			FPU_pop();
+		return;
+	} else if ((st0_tag == TW_Infinity) || (st1_tag == TW_Infinity)) {
+		u_char sign = getsign(st1_ptr);
+		if (st0_tag == TW_Infinity) {
+			if (st1_tag == TW_Infinity) {
+				if (signpositive(st0_ptr)) {
+					FPU_copy_to_reg1(&CONST_PI4, TAG_Valid);
+				} else {
+					setpositive(st1_ptr);
+					tag =
+					    FPU_u_add(&CONST_PI4, &CONST_PI2,
+						      st1_ptr, FULL_PRECISION,
+						      SIGN_POS,
+						      exponent(&CONST_PI4),
+						      exponent(&CONST_PI2));
+					if (tag >= 0)
+						FPU_settagi(1, tag);
+				}
+			} else {
+				if ((st1_tag == TW_Denormal)
+				    && (denormal_operand() < 0))
+					return;
+
+				if (signpositive(st0_ptr)) {
+					FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
+					setsign(st1_ptr, sign);	/* An 80486 preserves the sign */
+					FPU_pop();
+					return;
+				} else {
+					FPU_copy_to_reg1(&CONST_PI, TAG_Valid);
+				}
+			}
+		} else {
+			/* st(1) is infinity, st(0) not infinity */
+			if ((st0_tag == TW_Denormal)
+			    && (denormal_operand() < 0))
+				return;
+
+			FPU_copy_to_reg1(&CONST_PI2, TAG_Valid);
 		}
-	      else
-		{
-		  FPU_copy_to_reg1(&CONST_PI, TAG_Valid);
+		setsign(st1_ptr, sign);
+	} else if (st1_tag == TAG_Zero) {
+		/* st(0) must be valid or zero */
+		u_char sign = getsign(st1_ptr);
+
+		if ((st0_tag == TW_Denormal) && (denormal_operand() < 0))
+			return;
+
+		if (signpositive(st0_ptr)) {
+			/* An 80486 preserves the sign */
+			FPU_pop();
+			return;
 		}
-	    }
+
+		FPU_copy_to_reg1(&CONST_PI, TAG_Valid);
+		setsign(st1_ptr, sign);
+	} else if (st0_tag == TAG_Zero) {
+		/* st(1) must be TAG_Valid here */
+		u_char sign = getsign(st1_ptr);
+
+		if ((st1_tag == TW_Denormal) && (denormal_operand() < 0))
+			return;
+
+		FPU_copy_to_reg1(&CONST_PI2, TAG_Valid);
+		setsign(st1_ptr, sign);
 	}
-      else
-	{
-	  /* st(1) is infinity, st(0) not infinity */
-	  if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
-	    return;
-
-	  FPU_copy_to_reg1(&CONST_PI2, TAG_Valid);
-	}
-      setsign(st1_ptr, sign);
-    }
-  else if ( st1_tag == TAG_Zero )
-    {
-      /* st(0) must be valid or zero */
-      u_char sign = getsign(st1_ptr);
-
-      if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
-	return;
-
-      if ( signpositive(st0_ptr) )
-	{
-	  /* An 80486 preserves the sign */
-	  FPU_pop();
-	  return;
-	}
-
-      FPU_copy_to_reg1(&CONST_PI, TAG_Valid);
-      setsign(st1_ptr, sign);
-    }
-  else if ( st0_tag == TAG_Zero )
-    {
-      /* st(1) must be TAG_Valid here */
-      u_char sign = getsign(st1_ptr);
-
-      if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
-	return;
-
-      FPU_copy_to_reg1(&CONST_PI2, TAG_Valid);
-      setsign(st1_ptr, sign);
-    }
 #ifdef PARANOID
-  else
-    EXCEPTION(EX_INTERNAL | 0x125);
+	else
+		EXCEPTION(EX_INTERNAL | 0x125);
 #endif /* PARANOID */
 
-  FPU_pop();
-  set_precision_flag_up();  /* We do not really know if up or down */
+	FPU_pop();
+	set_precision_flag_up();	/* We do not really know if up or down */
 }
 
-
-static void fprem(FPU_REG *st0_ptr, u_char st0_tag)
+static void fprem(FPU_REG * st0_ptr, u_char st0_tag)
 {
-  do_fprem(st0_ptr, st0_tag, RC_CHOP);
+	do_fprem(st0_ptr, st0_tag, RC_CHOP);
 }
 
-
-static void fprem1(FPU_REG *st0_ptr, u_char st0_tag)
+static void fprem1(FPU_REG * st0_ptr, u_char st0_tag)
 {
-  do_fprem(st0_ptr, st0_tag, RC_RND);
+	do_fprem(st0_ptr, st0_tag, RC_RND);
 }
 
-
-static void fyl2xp1(FPU_REG *st0_ptr, u_char st0_tag)
+static void fyl2xp1(FPU_REG * st0_ptr, u_char st0_tag)
 {
-  u_char sign, sign1;
-  FPU_REG *st1_ptr = &st(1), a, b;
-  u_char st1_tag = FPU_gettagi(1);
+	u_char sign, sign1;
+	FPU_REG *st1_ptr = &st(1), a, b;
+	u_char st1_tag = FPU_gettagi(1);
 
-  clear_C1();
-  if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
-    {
-    valid_yl2xp1:
+	clear_C1();
+	if (!((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid))) {
+	      valid_yl2xp1:
 
-      sign = getsign(st0_ptr);
-      sign1 = getsign(st1_ptr);
+		sign = getsign(st0_ptr);
+		sign1 = getsign(st1_ptr);
 
-      FPU_to_exp16(st0_ptr, &a);
-      FPU_to_exp16(st1_ptr, &b);
+		FPU_to_exp16(st0_ptr, &a);
+		FPU_to_exp16(st1_ptr, &b);
 
-      if ( poly_l2p1(sign, sign1, &a, &b, st1_ptr) )
-	return;
+		if (poly_l2p1(sign, sign1, &a, &b, st1_ptr))
+			return;
 
-      FPU_pop();
-      return;
-    }
+		FPU_pop();
+		return;
+	}
 
-  if ( st0_tag == TAG_Special )
-    st0_tag = FPU_Special(st0_ptr);
-  if ( st1_tag == TAG_Special )
-    st1_tag = FPU_Special(st1_ptr);
+	if (st0_tag == TAG_Special)
+		st0_tag = FPU_Special(st0_ptr);
+	if (st1_tag == TAG_Special)
+		st1_tag = FPU_Special(st1_ptr);
 
-  if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
+	if (((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
 	    || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid))
-	    || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) )
-    {
-      if ( denormal_operand() < 0 )
-	return;
+	    || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal))) {
+		if (denormal_operand() < 0)
+			return;
 
-      goto valid_yl2xp1;
-    }
-  else if ( (st0_tag == TAG_Empty) | (st1_tag == TAG_Empty) )
-    {
-      FPU_stack_underflow_pop(1);
-      return;
-    }
-  else if ( st0_tag == TAG_Zero )
-    {
-      switch ( st1_tag )
-	{
-	case TW_Denormal:
-	  if ( denormal_operand() < 0 )
-	    return;
+		goto valid_yl2xp1;
+	} else if ((st0_tag == TAG_Empty) | (st1_tag == TAG_Empty)) {
+		FPU_stack_underflow_pop(1);
+		return;
+	} else if (st0_tag == TAG_Zero) {
+		switch (st1_tag) {
+		case TW_Denormal:
+			if (denormal_operand() < 0)
+				return;
 
-	case TAG_Zero:
-	case TAG_Valid:
-	  setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr));
-	  FPU_copy_to_reg1(st0_ptr, st0_tag);
-	  break;
+		case TAG_Zero:
+		case TAG_Valid:
+			setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr));
+			FPU_copy_to_reg1(st0_ptr, st0_tag);
+			break;
 
-	case TW_Infinity:
-	  /* Infinity*log(1) */
-	  if ( arith_invalid(1) < 0 )
-	    return;
-	  break;
+		case TW_Infinity:
+			/* Infinity*log(1) */
+			if (arith_invalid(1) < 0)
+				return;
+			break;
 
-	case TW_NaN:
-	  if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
-	    return;
-	  break;
+		case TW_NaN:
+			if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0)
+				return;
+			break;
 
-	default:
+		default:
 #ifdef PARANOID
-	  EXCEPTION(EX_INTERNAL | 0x116);
-	  return;
+			EXCEPTION(EX_INTERNAL | 0x116);
+			return;
 #endif /* PARANOID */
-	  break;
-	}
-    }
-  else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) )
-    {
-      switch ( st1_tag )
-	{
-	case TAG_Zero:
-	  if ( signnegative(st0_ptr) )
-	    {
-	      if ( exponent(st0_ptr) >= 0 )
-		{
-		  /* st(0) holds <= -1.0 */
-#ifdef PECULIAR_486   /* Stupid 80486 doesn't worry about log(negative). */
-		  changesign(st1_ptr);
-#else
-		  if ( arith_invalid(1) < 0 )
-		    return;
-#endif /* PECULIAR_486 */
+			break;
 		}
-	      else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
-		return;
-	      else
-		changesign(st1_ptr);
-	    }
-	  else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
-	    return;
-	  break;
-
-	case TW_Infinity:
-	  if ( signnegative(st0_ptr) )
-	    {
-	      if ( (exponent(st0_ptr) >= 0) &&
-		  !((st0_ptr->sigh == 0x80000000) &&
-		    (st0_ptr->sigl == 0)) )
-		{
-		  /* st(0) holds < -1.0 */
-#ifdef PECULIAR_486   /* Stupid 80486 doesn't worry about log(negative). */
-		  changesign(st1_ptr);
+	} else if ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal)) {
+		switch (st1_tag) {
+		case TAG_Zero:
+			if (signnegative(st0_ptr)) {
+				if (exponent(st0_ptr) >= 0) {
+					/* st(0) holds <= -1.0 */
+#ifdef PECULIAR_486		/* Stupid 80486 doesn't worry about log(negative). */
+					changesign(st1_ptr);
 #else
-		  if ( arith_invalid(1) < 0 ) return;
+					if (arith_invalid(1) < 0)
+						return;
 #endif /* PECULIAR_486 */
+				} else if ((st0_tag == TW_Denormal)
+					   && (denormal_operand() < 0))
+					return;
+				else
+					changesign(st1_ptr);
+			} else if ((st0_tag == TW_Denormal)
+				   && (denormal_operand() < 0))
+				return;
+			break;
+
+		case TW_Infinity:
+			if (signnegative(st0_ptr)) {
+				if ((exponent(st0_ptr) >= 0) &&
+				    !((st0_ptr->sigh == 0x80000000) &&
+				      (st0_ptr->sigl == 0))) {
+					/* st(0) holds < -1.0 */
+#ifdef PECULIAR_486		/* Stupid 80486 doesn't worry about log(negative). */
+					changesign(st1_ptr);
+#else
+					if (arith_invalid(1) < 0)
+						return;
+#endif /* PECULIAR_486 */
+				} else if ((st0_tag == TW_Denormal)
+					   && (denormal_operand() < 0))
+					return;
+				else
+					changesign(st1_ptr);
+			} else if ((st0_tag == TW_Denormal)
+				   && (denormal_operand() < 0))
+				return;
+			break;
+
+		case TW_NaN:
+			if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0)
+				return;
 		}
-	      else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
-		return;
-	      else
-		changesign(st1_ptr);
-	    }
-	  else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
-	    return;
-	  break;
 
-	case TW_NaN:
-	  if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
-	    return;
-	}
-
-    }
-  else if ( st0_tag == TW_NaN )
-    {
-      if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
-	return;
-    }
-  else if ( st0_tag == TW_Infinity )
-    {
-      if ( st1_tag == TW_NaN )
-	{
-	  if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
-	    return;
-	}
-      else if ( signnegative(st0_ptr) )
-	{
+	} else if (st0_tag == TW_NaN) {
+		if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0)
+			return;
+	} else if (st0_tag == TW_Infinity) {
+		if (st1_tag == TW_NaN) {
+			if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0)
+				return;
+		} else if (signnegative(st0_ptr)) {
 #ifndef PECULIAR_486
-	  /* This should have higher priority than denormals, but... */
-	  if ( arith_invalid(1) < 0 )  /* log(-infinity) */
-	    return;
+			/* This should have higher priority than denormals, but... */
+			if (arith_invalid(1) < 0)	/* log(-infinity) */
+				return;
 #endif /* PECULIAR_486 */
-	  if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
-	    return;
+			if ((st1_tag == TW_Denormal)
+			    && (denormal_operand() < 0))
+				return;
 #ifdef PECULIAR_486
-	  /* Denormal operands actually get higher priority */
-	  if ( arith_invalid(1) < 0 )  /* log(-infinity) */
-	    return;
+			/* Denormal operands actually get higher priority */
+			if (arith_invalid(1) < 0)	/* log(-infinity) */
+				return;
 #endif /* PECULIAR_486 */
-	}
-      else if ( st1_tag == TAG_Zero )
-	{
-	  /* log(infinity) */
-	  if ( arith_invalid(1) < 0 )
-	    return;
-	}
-	
-      /* st(1) must be valid here. */
+		} else if (st1_tag == TAG_Zero) {
+			/* log(infinity) */
+			if (arith_invalid(1) < 0)
+				return;
+		}
 
-      else if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
-	return;
+		/* st(1) must be valid here. */
 
-      /* The Manual says that log(Infinity) is invalid, but a real
-	 80486 sensibly says that it is o.k. */
-      else
-	{
-	  u_char sign = getsign(st1_ptr);
-	  FPU_copy_to_reg1(&CONST_INF, TAG_Special);
-	  setsign(st1_ptr, sign);
+		else if ((st1_tag == TW_Denormal) && (denormal_operand() < 0))
+			return;
+
+		/* The Manual says that log(Infinity) is invalid, but a real
+		   80486 sensibly says that it is o.k. */
+		else {
+			u_char sign = getsign(st1_ptr);
+			FPU_copy_to_reg1(&CONST_INF, TAG_Special);
+			setsign(st1_ptr, sign);
+		}
 	}
-    }
 #ifdef PARANOID
-  else
-    {
-      EXCEPTION(EX_INTERNAL | 0x117);
-      return;
-    }
+	else {
+		EXCEPTION(EX_INTERNAL | 0x117);
+		return;
+	}
 #endif /* PARANOID */
 
-  FPU_pop();
-  return;
+	FPU_pop();
+	return;
 
 }
 
-
-static void fscale(FPU_REG *st0_ptr, u_char st0_tag)
+static void fscale(FPU_REG * st0_ptr, u_char st0_tag)
 {
-  FPU_REG *st1_ptr = &st(1);
-  u_char st1_tag = FPU_gettagi(1);
-  int old_cw = control_word;
-  u_char sign = getsign(st0_ptr);
+	FPU_REG *st1_ptr = &st(1);
+	u_char st1_tag = FPU_gettagi(1);
+	int old_cw = control_word;
+	u_char sign = getsign(st0_ptr);
 
-  clear_C1();
-  if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
-    {
-      long scale;
-      FPU_REG tmp;
+	clear_C1();
+	if (!((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid))) {
+		long scale;
+		FPU_REG tmp;
 
-      /* Convert register for internal use. */
-      setexponent16(st0_ptr, exponent(st0_ptr));
+		/* Convert register for internal use. */
+		setexponent16(st0_ptr, exponent(st0_ptr));
 
-    valid_scale:
+	      valid_scale:
 
-      if ( exponent(st1_ptr) > 30 )
-	{
-	  /* 2^31 is far too large, would require 2^(2^30) or 2^(-2^30) */
+		if (exponent(st1_ptr) > 30) {
+			/* 2^31 is far too large, would require 2^(2^30) or 2^(-2^30) */
 
-	  if ( signpositive(st1_ptr) )
-	    {
-	      EXCEPTION(EX_Overflow);
-	      FPU_copy_to_reg0(&CONST_INF, TAG_Special);
-	    }
-	  else
-	    {
-	      EXCEPTION(EX_Underflow);
-	      FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
-	    }
-	  setsign(st0_ptr, sign);
-	  return;
+			if (signpositive(st1_ptr)) {
+				EXCEPTION(EX_Overflow);
+				FPU_copy_to_reg0(&CONST_INF, TAG_Special);
+			} else {
+				EXCEPTION(EX_Underflow);
+				FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
+			}
+			setsign(st0_ptr, sign);
+			return;
+		}
+
+		control_word &= ~CW_RC;
+		control_word |= RC_CHOP;
+		reg_copy(st1_ptr, &tmp);
+		FPU_round_to_int(&tmp, st1_tag);	/* This can never overflow here */
+		control_word = old_cw;
+		scale = signnegative(st1_ptr) ? -tmp.sigl : tmp.sigl;
+		scale += exponent16(st0_ptr);
+
+		setexponent16(st0_ptr, scale);
+
+		/* Use FPU_round() to properly detect under/overflow etc */
+		FPU_round(st0_ptr, 0, 0, control_word, sign);
+
+		return;
 	}
 
-      control_word &= ~CW_RC;
-      control_word |= RC_CHOP;
-      reg_copy(st1_ptr, &tmp);
-      FPU_round_to_int(&tmp, st1_tag);      /* This can never overflow here */
-      control_word = old_cw;
-      scale = signnegative(st1_ptr) ? -tmp.sigl : tmp.sigl;
-      scale += exponent16(st0_ptr);
+	if (st0_tag == TAG_Special)
+		st0_tag = FPU_Special(st0_ptr);
+	if (st1_tag == TAG_Special)
+		st1_tag = FPU_Special(st1_ptr);
 
-      setexponent16(st0_ptr, scale);
+	if ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal)) {
+		switch (st1_tag) {
+		case TAG_Valid:
+			/* st(0) must be a denormal */
+			if ((st0_tag == TW_Denormal)
+			    && (denormal_operand() < 0))
+				return;
 
-      /* Use FPU_round() to properly detect under/overflow etc */
-      FPU_round(st0_ptr, 0, 0, control_word, sign);
+			FPU_to_exp16(st0_ptr, st0_ptr);	/* Will not be left on stack */
+			goto valid_scale;
 
-      return;
-    }
+		case TAG_Zero:
+			if (st0_tag == TW_Denormal)
+				denormal_operand();
+			return;
 
-  if ( st0_tag == TAG_Special )
-    st0_tag = FPU_Special(st0_ptr);
-  if ( st1_tag == TAG_Special )
-    st1_tag = FPU_Special(st1_ptr);
+		case TW_Denormal:
+			denormal_operand();
+			return;
 
-  if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) )
-    {
-      switch ( st1_tag )
-	{
-	case TAG_Valid:
-	  /* st(0) must be a denormal */
-	  if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
-	    return;
+		case TW_Infinity:
+			if ((st0_tag == TW_Denormal)
+			    && (denormal_operand() < 0))
+				return;
 
-	  FPU_to_exp16(st0_ptr, st0_ptr);  /* Will not be left on stack */
-	  goto valid_scale;
+			if (signpositive(st1_ptr))
+				FPU_copy_to_reg0(&CONST_INF, TAG_Special);
+			else
+				FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
+			setsign(st0_ptr, sign);
+			return;
 
-	case TAG_Zero:
-	  if ( st0_tag == TW_Denormal )
-	    denormal_operand();
-	  return;
+		case TW_NaN:
+			real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
+			return;
+		}
+	} else if (st0_tag == TAG_Zero) {
+		switch (st1_tag) {
+		case TAG_Valid:
+		case TAG_Zero:
+			return;
 
-	case TW_Denormal:
-	  denormal_operand();
-	  return;
+		case TW_Denormal:
+			denormal_operand();
+			return;
 
-	case TW_Infinity:
-	  if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
-	    return;
+		case TW_Infinity:
+			if (signpositive(st1_ptr))
+				arith_invalid(0);	/* Zero scaled by +Infinity */
+			return;
 
-	  if ( signpositive(st1_ptr) )
-	    FPU_copy_to_reg0(&CONST_INF, TAG_Special);
-	  else
-	    FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
-	  setsign(st0_ptr, sign);
-	  return;
+		case TW_NaN:
+			real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
+			return;
+		}
+	} else if (st0_tag == TW_Infinity) {
+		switch (st1_tag) {
+		case TAG_Valid:
+		case TAG_Zero:
+			return;
 
-	case TW_NaN:
-	  real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
-	  return;
+		case TW_Denormal:
+			denormal_operand();
+			return;
+
+		case TW_Infinity:
+			if (signnegative(st1_ptr))
+				arith_invalid(0);	/* Infinity scaled by -Infinity */
+			return;
+
+		case TW_NaN:
+			real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
+			return;
+		}
+	} else if (st0_tag == TW_NaN) {
+		if (st1_tag != TAG_Empty) {
+			real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
+			return;
+		}
 	}
-    }
-  else if ( st0_tag == TAG_Zero )
-    {
-      switch ( st1_tag )
-	{
-	case TAG_Valid:
-	case TAG_Zero:
-	  return;
-
-	case TW_Denormal:
-	  denormal_operand();
-	  return;
-
-	case TW_Infinity:
-	  if ( signpositive(st1_ptr) )
-	    arith_invalid(0); /* Zero scaled by +Infinity */
-	  return;
-
-	case TW_NaN:
-	  real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
-	  return;
-	}
-    }
-  else if ( st0_tag == TW_Infinity )
-    {
-      switch ( st1_tag )
-	{
-	case TAG_Valid:
-	case TAG_Zero:
-	  return;
-
-	case TW_Denormal:
-	  denormal_operand();
-	  return;
-
-	case TW_Infinity:
-	  if ( signnegative(st1_ptr) )
-	    arith_invalid(0); /* Infinity scaled by -Infinity */
-	  return;
-
-	case TW_NaN:
-	  real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
-	  return;
-	}
-    }
-  else if ( st0_tag == TW_NaN )
-    {
-      if ( st1_tag != TAG_Empty )
-	{ real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); return; }
-    }
-
 #ifdef PARANOID
-  if ( !((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) )
-    {
-      EXCEPTION(EX_INTERNAL | 0x115);
-      return;
-    }
+	if (!((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty))) {
+		EXCEPTION(EX_INTERNAL | 0x115);
+		return;
+	}
 #endif
 
-  /* At least one of st(0), st(1) must be empty */
-  FPU_stack_underflow();
+	/* At least one of st(0), st(1) must be empty */
+	FPU_stack_underflow();
 
 }
 
-
 /*---------------------------------------------------------------------------*/
 
 static FUNC_ST0 const trig_table_a[] = {
-  f2xm1, fyl2x, fptan, fpatan,
-  fxtract, fprem1, (FUNC_ST0)fdecstp, (FUNC_ST0)fincstp
+	f2xm1, fyl2x, fptan, fpatan,
+	fxtract, fprem1, (FUNC_ST0) fdecstp, (FUNC_ST0) fincstp
 };
 
 void FPU_triga(void)
 {
-  (trig_table_a[FPU_rm])(&st(0), FPU_gettag0());
+	(trig_table_a[FPU_rm]) (&st(0), FPU_gettag0());
 }
 
-
-static FUNC_ST0 const trig_table_b[] =
-  {
-    fprem, fyl2xp1, fsqrt_, fsincos, frndint_, fscale, (FUNC_ST0)fsin, fcos
-  };
+static FUNC_ST0 const trig_table_b[] = {
+	fprem, fyl2xp1, fsqrt_, fsincos, frndint_, fscale, (FUNC_ST0) fsin, fcos
+};
 
 void FPU_trigb(void)
 {
-  (trig_table_b[FPU_rm])(&st(0), FPU_gettag0());
+	(trig_table_b[FPU_rm]) (&st(0), FPU_gettag0());
 }
diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c
index 2e2c51a8bd3a..d701e2b39e44 100644
--- a/arch/x86/math-emu/get_address.c
+++ b/arch/x86/math-emu/get_address.c
@@ -17,7 +17,6 @@
  |    other processes using the emulator while swapping is in progress.      |
  +---------------------------------------------------------------------------*/
 
-
 #include <linux/stddef.h>
 
 #include <asm/uaccess.h>
@@ -27,31 +26,30 @@
 #include "exception.h"
 #include "fpu_emu.h"
 
-
 #define FPU_WRITE_BIT 0x10
 
 static int reg_offset[] = {
-	offsetof(struct info,___eax),
-	offsetof(struct info,___ecx),
-	offsetof(struct info,___edx),
-	offsetof(struct info,___ebx),
-	offsetof(struct info,___esp),
-	offsetof(struct info,___ebp),
-	offsetof(struct info,___esi),
-	offsetof(struct info,___edi)
+	offsetof(struct info, ___eax),
+	offsetof(struct info, ___ecx),
+	offsetof(struct info, ___edx),
+	offsetof(struct info, ___ebx),
+	offsetof(struct info, ___esp),
+	offsetof(struct info, ___ebp),
+	offsetof(struct info, ___esi),
+	offsetof(struct info, ___edi)
 };
 
 #define REG_(x) (*(long *)(reg_offset[(x)]+(u_char *) FPU_info))
 
 static int reg_offset_vm86[] = {
-	offsetof(struct info,___cs),
-	offsetof(struct info,___vm86_ds),
-	offsetof(struct info,___vm86_es),
-	offsetof(struct info,___vm86_fs),
-	offsetof(struct info,___vm86_gs),
-	offsetof(struct info,___ss),
-	offsetof(struct info,___vm86_ds)
-      };
+	offsetof(struct info, ___cs),
+	offsetof(struct info, ___vm86_ds),
+	offsetof(struct info, ___vm86_es),
+	offsetof(struct info, ___vm86_fs),
+	offsetof(struct info, ___vm86_gs),
+	offsetof(struct info, ___ss),
+	offsetof(struct info, ___vm86_ds)
+};
 
 #define VM86_REG_(x) (*(unsigned short *) \
 		      (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info))
@@ -60,158 +58,141 @@ static int reg_offset_vm86[] = {
 #define ___GS ___ds
 
 static int reg_offset_pm[] = {
-	offsetof(struct info,___cs),
-	offsetof(struct info,___ds),
-	offsetof(struct info,___es),
-	offsetof(struct info,___fs),
-	offsetof(struct info,___GS),
-	offsetof(struct info,___ss),
-	offsetof(struct info,___ds)
-      };
+	offsetof(struct info, ___cs),
+	offsetof(struct info, ___ds),
+	offsetof(struct info, ___es),
+	offsetof(struct info, ___fs),
+	offsetof(struct info, ___GS),
+	offsetof(struct info, ___ss),
+	offsetof(struct info, ___ds)
+};
 
 #define PM_REG_(x) (*(unsigned short *) \
 		      (reg_offset_pm[((unsigned)x)]+(u_char *) FPU_info))
 
-
 /* Decode the SIB byte. This function assumes mod != 0 */
 static int sib(int mod, unsigned long *fpu_eip)
 {
-  u_char ss,index,base;
-  long offset;
+	u_char ss, index, base;
+	long offset;
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_code_access_ok(1);
-  FPU_get_user(base, (u_char __user *) (*fpu_eip));   /* The SIB byte */
-  RE_ENTRANT_CHECK_ON;
-  (*fpu_eip)++;
-  ss = base >> 6;
-  index = (base >> 3) & 7;
-  base &= 7;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_code_access_ok(1);
+	FPU_get_user(base, (u_char __user *) (*fpu_eip));	/* The SIB byte */
+	RE_ENTRANT_CHECK_ON;
+	(*fpu_eip)++;
+	ss = base >> 6;
+	index = (base >> 3) & 7;
+	base &= 7;
 
-  if ((mod == 0) && (base == 5))
-    offset = 0;              /* No base register */
-  else
-    offset = REG_(base);
+	if ((mod == 0) && (base == 5))
+		offset = 0;	/* No base register */
+	else
+		offset = REG_(base);
 
-  if (index == 4)
-    {
-      /* No index register */
-      /* A non-zero ss is illegal */
-      if ( ss )
-	EXCEPTION(EX_Invalid);
-    }
-  else
-    {
-      offset += (REG_(index)) << ss;
-    }
+	if (index == 4) {
+		/* No index register */
+		/* A non-zero ss is illegal */
+		if (ss)
+			EXCEPTION(EX_Invalid);
+	} else {
+		offset += (REG_(index)) << ss;
+	}
 
-  if (mod == 1)
-    {
-      /* 8 bit signed displacement */
-      long displacement;
-      RE_ENTRANT_CHECK_OFF;
-      FPU_code_access_ok(1);
-      FPU_get_user(displacement, (signed char __user *) (*fpu_eip));
-      offset += displacement;
-      RE_ENTRANT_CHECK_ON;
-      (*fpu_eip)++;
-    }
-  else if (mod == 2 || base == 5) /* The second condition also has mod==0 */
-    {
-      /* 32 bit displacement */
-      long displacement;
-      RE_ENTRANT_CHECK_OFF;
-      FPU_code_access_ok(4);
-      FPU_get_user(displacement, (long __user *) (*fpu_eip));
-      offset += displacement;
-      RE_ENTRANT_CHECK_ON;
-      (*fpu_eip) += 4;
-    }
+	if (mod == 1) {
+		/* 8 bit signed displacement */
+		long displacement;
+		RE_ENTRANT_CHECK_OFF;
+		FPU_code_access_ok(1);
+		FPU_get_user(displacement, (signed char __user *)(*fpu_eip));
+		offset += displacement;
+		RE_ENTRANT_CHECK_ON;
+		(*fpu_eip)++;
+	} else if (mod == 2 || base == 5) {	/* The second condition also has mod==0 */
+		/* 32 bit displacement */
+		long displacement;
+		RE_ENTRANT_CHECK_OFF;
+		FPU_code_access_ok(4);
+		FPU_get_user(displacement, (long __user *)(*fpu_eip));
+		offset += displacement;
+		RE_ENTRANT_CHECK_ON;
+		(*fpu_eip) += 4;
+	}
 
-  return offset;
+	return offset;
 }
 
-
-static unsigned long vm86_segment(u_char segment,
-				  struct address *addr)
+static unsigned long vm86_segment(u_char segment, struct address *addr)
 {
-  segment--;
+	segment--;
 #ifdef PARANOID
-  if ( segment > PREFIX_SS_ )
-    {
-      EXCEPTION(EX_INTERNAL|0x130);
-      math_abort(FPU_info,SIGSEGV);
-    }
+	if (segment > PREFIX_SS_) {
+		EXCEPTION(EX_INTERNAL | 0x130);
+		math_abort(FPU_info, SIGSEGV);
+	}
 #endif /* PARANOID */
-  addr->selector = VM86_REG_(segment);
-  return (unsigned long)VM86_REG_(segment) << 4;
+	addr->selector = VM86_REG_(segment);
+	return (unsigned long)VM86_REG_(segment) << 4;
 }
 
-
 /* This should work for 16 and 32 bit protected mode. */
 static long pm_address(u_char FPU_modrm, u_char segment,
 		       struct address *addr, long offset)
-{ 
-  struct desc_struct descriptor;
-  unsigned long base_address, limit, address, seg_top;
+{
+	struct desc_struct descriptor;
+	unsigned long base_address, limit, address, seg_top;
 
-  segment--;
+	segment--;
 
 #ifdef PARANOID
-  /* segment is unsigned, so this also detects if segment was 0: */
-  if ( segment > PREFIX_SS_ )
-    {
-      EXCEPTION(EX_INTERNAL|0x132);
-      math_abort(FPU_info,SIGSEGV);
-    }
+	/* segment is unsigned, so this also detects if segment was 0: */
+	if (segment > PREFIX_SS_) {
+		EXCEPTION(EX_INTERNAL | 0x132);
+		math_abort(FPU_info, SIGSEGV);
+	}
 #endif /* PARANOID */
 
-  switch ( segment )
-    {
-      /* gs isn't used by the kernel, so it still has its
-	 user-space value. */
-    case PREFIX_GS_-1:
-      /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */
-      savesegment(gs, addr->selector);
-      break;
-    default:
-      addr->selector = PM_REG_(segment);
-    }
-
-  descriptor = LDT_DESCRIPTOR(PM_REG_(segment));
-  base_address = SEG_BASE_ADDR(descriptor);
-  address = base_address + offset;
-  limit = base_address
-	+ (SEG_LIMIT(descriptor)+1) * SEG_GRANULARITY(descriptor) - 1;
-  if ( limit < base_address ) limit = 0xffffffff;
-
-  if ( SEG_EXPAND_DOWN(descriptor) )
-    {
-      if ( SEG_G_BIT(descriptor) )
-	seg_top = 0xffffffff;
-      else
-	{
-	  seg_top = base_address + (1 << 20);
-	  if ( seg_top < base_address ) seg_top = 0xffffffff;
+	switch (segment) {
+		/* gs isn't used by the kernel, so it still has its
+		   user-space value. */
+	case PREFIX_GS_ - 1:
+		/* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */
+		savesegment(gs, addr->selector);
+		break;
+	default:
+		addr->selector = PM_REG_(segment);
 	}
-      access_limit =
-	(address <= limit) || (address >= seg_top) ? 0 :
-	  ((seg_top-address) >= 255 ? 255 : seg_top-address);
-    }
-  else
-    {
-      access_limit =
-	(address > limit) || (address < base_address) ? 0 :
-	  ((limit-address) >= 254 ? 255 : limit-address+1);
-    }
-  if ( SEG_EXECUTE_ONLY(descriptor) ||
-      (!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT)) )
-    {
-      access_limit = 0;
-    }
-  return address;
-}
 
+	descriptor = LDT_DESCRIPTOR(PM_REG_(segment));
+	base_address = SEG_BASE_ADDR(descriptor);
+	address = base_address + offset;
+	limit = base_address
+	    + (SEG_LIMIT(descriptor) + 1) * SEG_GRANULARITY(descriptor) - 1;
+	if (limit < base_address)
+		limit = 0xffffffff;
+
+	if (SEG_EXPAND_DOWN(descriptor)) {
+		if (SEG_G_BIT(descriptor))
+			seg_top = 0xffffffff;
+		else {
+			seg_top = base_address + (1 << 20);
+			if (seg_top < base_address)
+				seg_top = 0xffffffff;
+		}
+		access_limit =
+		    (address <= limit) || (address >= seg_top) ? 0 :
+		    ((seg_top - address) >= 255 ? 255 : seg_top - address);
+	} else {
+		access_limit =
+		    (address > limit) || (address < base_address) ? 0 :
+		    ((limit - address) >= 254 ? 255 : limit - address + 1);
+	}
+	if (SEG_EXECUTE_ONLY(descriptor) ||
+	    (!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT))) {
+		access_limit = 0;
+	}
+	return address;
+}
 
 /*
        MOD R/M byte:  MOD == 3 has a special use for the FPU
@@ -221,7 +202,6 @@ static long pm_address(u_char FPU_modrm, u_char segment,
        .....   .........   .........
         MOD    OPCODE(2)     R/M
 
-
        SIB byte
 
        7   6   5   4   3   2   1   0
@@ -231,208 +211,194 @@ static long pm_address(u_char FPU_modrm, u_char segment,
 */
 
 void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip,
-		  struct address *addr,
-		  fpu_addr_modes addr_modes)
+			     struct address *addr, fpu_addr_modes addr_modes)
 {
-  u_char mod;
-  unsigned rm = FPU_modrm & 7;
-  long *cpu_reg_ptr;
-  int address = 0;     /* Initialized just to stop compiler warnings. */
+	u_char mod;
+	unsigned rm = FPU_modrm & 7;
+	long *cpu_reg_ptr;
+	int address = 0;	/* Initialized just to stop compiler warnings. */
 
-  /* Memory accessed via the cs selector is write protected
-     in `non-segmented' 32 bit protected mode. */
-  if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
-      && (addr_modes.override.segment == PREFIX_CS_) )
-    {
-      math_abort(FPU_info,SIGSEGV);
-    }
-
-  addr->selector = FPU_DS;   /* Default, for 32 bit non-segmented mode. */
-
-  mod = (FPU_modrm >> 6) & 3;
-
-  if (rm == 4 && mod != 3)
-    {
-      address = sib(mod, fpu_eip);
-    }
-  else
-    {
-      cpu_reg_ptr = & REG_(rm);
-      switch (mod)
-	{
-	case 0:
-	  if (rm == 5)
-	    {
-	      /* Special case: disp32 */
-	      RE_ENTRANT_CHECK_OFF;
-	      FPU_code_access_ok(4);
-	      FPU_get_user(address, (unsigned long __user *) (*fpu_eip));
-	      (*fpu_eip) += 4;
-	      RE_ENTRANT_CHECK_ON;
-	      addr->offset = address;
-	      return (void __user *) address;
-	    }
-	  else
-	    {
-	      address = *cpu_reg_ptr;  /* Just return the contents
-					  of the cpu register */
-	      addr->offset = address;
-	      return (void __user *) address;
-	    }
-	case 1:
-	  /* 8 bit signed displacement */
-	  RE_ENTRANT_CHECK_OFF;
-	  FPU_code_access_ok(1);
-	  FPU_get_user(address, (signed char __user *) (*fpu_eip));
-	  RE_ENTRANT_CHECK_ON;
-	  (*fpu_eip)++;
-	  break;
-	case 2:
-	  /* 32 bit displacement */
-	  RE_ENTRANT_CHECK_OFF;
-	  FPU_code_access_ok(4);
-	  FPU_get_user(address, (long __user *) (*fpu_eip));
-	  (*fpu_eip) += 4;
-	  RE_ENTRANT_CHECK_ON;
-	  break;
-	case 3:
-	  /* Not legal for the FPU */
-	  EXCEPTION(EX_Invalid);
+	/* Memory accessed via the cs selector is write protected
+	   in `non-segmented' 32 bit protected mode. */
+	if (!addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
+	    && (addr_modes.override.segment == PREFIX_CS_)) {
+		math_abort(FPU_info, SIGSEGV);
 	}
-      address += *cpu_reg_ptr;
-    }
 
-  addr->offset = address;
+	addr->selector = FPU_DS;	/* Default, for 32 bit non-segmented mode. */
 
-  switch ( addr_modes.default_mode )
-    {
-    case 0:
-      break;
-    case VM86:
-      address += vm86_segment(addr_modes.override.segment, addr);
-      break;
-    case PM16:
-    case SEG32:
-      address = pm_address(FPU_modrm, addr_modes.override.segment,
-			   addr, address);
-      break;
-    default:
-      EXCEPTION(EX_INTERNAL|0x133);
-    }
+	mod = (FPU_modrm >> 6) & 3;
 
-  return (void __user *)address;
+	if (rm == 4 && mod != 3) {
+		address = sib(mod, fpu_eip);
+	} else {
+		cpu_reg_ptr = &REG_(rm);
+		switch (mod) {
+		case 0:
+			if (rm == 5) {
+				/* Special case: disp32 */
+				RE_ENTRANT_CHECK_OFF;
+				FPU_code_access_ok(4);
+				FPU_get_user(address,
+					     (unsigned long __user
+					      *)(*fpu_eip));
+				(*fpu_eip) += 4;
+				RE_ENTRANT_CHECK_ON;
+				addr->offset = address;
+				return (void __user *)address;
+			} else {
+				address = *cpu_reg_ptr;	/* Just return the contents
+							   of the cpu register */
+				addr->offset = address;
+				return (void __user *)address;
+			}
+		case 1:
+			/* 8 bit signed displacement */
+			RE_ENTRANT_CHECK_OFF;
+			FPU_code_access_ok(1);
+			FPU_get_user(address, (signed char __user *)(*fpu_eip));
+			RE_ENTRANT_CHECK_ON;
+			(*fpu_eip)++;
+			break;
+		case 2:
+			/* 32 bit displacement */
+			RE_ENTRANT_CHECK_OFF;
+			FPU_code_access_ok(4);
+			FPU_get_user(address, (long __user *)(*fpu_eip));
+			(*fpu_eip) += 4;
+			RE_ENTRANT_CHECK_ON;
+			break;
+		case 3:
+			/* Not legal for the FPU */
+			EXCEPTION(EX_Invalid);
+		}
+		address += *cpu_reg_ptr;
+	}
+
+	addr->offset = address;
+
+	switch (addr_modes.default_mode) {
+	case 0:
+		break;
+	case VM86:
+		address += vm86_segment(addr_modes.override.segment, addr);
+		break;
+	case PM16:
+	case SEG32:
+		address = pm_address(FPU_modrm, addr_modes.override.segment,
+				     addr, address);
+		break;
+	default:
+		EXCEPTION(EX_INTERNAL | 0x133);
+	}
+
+	return (void __user *)address;
 }
 
-
 void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip,
-		     struct address *addr,
-		     fpu_addr_modes addr_modes)
+				struct address *addr, fpu_addr_modes addr_modes)
 {
-  u_char mod;
-  unsigned rm = FPU_modrm & 7;
-  int address = 0;     /* Default used for mod == 0 */
+	u_char mod;
+	unsigned rm = FPU_modrm & 7;
+	int address = 0;	/* Default used for mod == 0 */
 
-  /* Memory accessed via the cs selector is write protected
-     in `non-segmented' 32 bit protected mode. */
-  if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
-      && (addr_modes.override.segment == PREFIX_CS_) )
-    {
-      math_abort(FPU_info,SIGSEGV);
-    }
-
-  addr->selector = FPU_DS;   /* Default, for 32 bit non-segmented mode. */
-
-  mod = (FPU_modrm >> 6) & 3;
-
-  switch (mod)
-    {
-    case 0:
-      if (rm == 6)
-	{
-	  /* Special case: disp16 */
-	  RE_ENTRANT_CHECK_OFF;
-	  FPU_code_access_ok(2);
-	  FPU_get_user(address, (unsigned short __user *) (*fpu_eip));
-	  (*fpu_eip) += 2;
-	  RE_ENTRANT_CHECK_ON;
-	  goto add_segment;
+	/* Memory accessed via the cs selector is write protected
+	   in `non-segmented' 32 bit protected mode. */
+	if (!addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
+	    && (addr_modes.override.segment == PREFIX_CS_)) {
+		math_abort(FPU_info, SIGSEGV);
 	}
-      break;
-    case 1:
-      /* 8 bit signed displacement */
-      RE_ENTRANT_CHECK_OFF;
-      FPU_code_access_ok(1);
-      FPU_get_user(address, (signed char __user *) (*fpu_eip));
-      RE_ENTRANT_CHECK_ON;
-      (*fpu_eip)++;
-      break;
-    case 2:
-      /* 16 bit displacement */
-      RE_ENTRANT_CHECK_OFF;
-      FPU_code_access_ok(2);
-      FPU_get_user(address, (unsigned short __user *) (*fpu_eip));
-      (*fpu_eip) += 2;
-      RE_ENTRANT_CHECK_ON;
-      break;
-    case 3:
-      /* Not legal for the FPU */
-      EXCEPTION(EX_Invalid);
-      break;
-    }
-  switch ( rm )
-    {
-    case 0:
-      address += FPU_info->___ebx + FPU_info->___esi;
-      break;
-    case 1:
-      address += FPU_info->___ebx + FPU_info->___edi;
-      break;
-    case 2:
-      address += FPU_info->___ebp + FPU_info->___esi;
-      if ( addr_modes.override.segment == PREFIX_DEFAULT )
-	addr_modes.override.segment = PREFIX_SS_;
-      break;
-    case 3:
-      address += FPU_info->___ebp + FPU_info->___edi;
-      if ( addr_modes.override.segment == PREFIX_DEFAULT )
-	addr_modes.override.segment = PREFIX_SS_;
-      break;
-    case 4:
-      address += FPU_info->___esi;
-      break;
-    case 5:
-      address += FPU_info->___edi;
-      break;
-    case 6:
-      address += FPU_info->___ebp;
-      if ( addr_modes.override.segment == PREFIX_DEFAULT )
-	addr_modes.override.segment = PREFIX_SS_;
-      break;
-    case 7:
-      address += FPU_info->___ebx;
-      break;
-    }
 
- add_segment:
-  address &= 0xffff;
+	addr->selector = FPU_DS;	/* Default, for 32 bit non-segmented mode. */
 
-  addr->offset = address;
+	mod = (FPU_modrm >> 6) & 3;
 
-  switch ( addr_modes.default_mode )
-    {
-    case 0:
-      break;
-    case VM86:
-      address += vm86_segment(addr_modes.override.segment, addr);
-      break;
-    case PM16:
-    case SEG32:
-      address = pm_address(FPU_modrm, addr_modes.override.segment,
-			   addr, address);
-      break;
-    default:
-      EXCEPTION(EX_INTERNAL|0x131);
-    }
+	switch (mod) {
+	case 0:
+		if (rm == 6) {
+			/* Special case: disp16 */
+			RE_ENTRANT_CHECK_OFF;
+			FPU_code_access_ok(2);
+			FPU_get_user(address,
+				     (unsigned short __user *)(*fpu_eip));
+			(*fpu_eip) += 2;
+			RE_ENTRANT_CHECK_ON;
+			goto add_segment;
+		}
+		break;
+	case 1:
+		/* 8 bit signed displacement */
+		RE_ENTRANT_CHECK_OFF;
+		FPU_code_access_ok(1);
+		FPU_get_user(address, (signed char __user *)(*fpu_eip));
+		RE_ENTRANT_CHECK_ON;
+		(*fpu_eip)++;
+		break;
+	case 2:
+		/* 16 bit displacement */
+		RE_ENTRANT_CHECK_OFF;
+		FPU_code_access_ok(2);
+		FPU_get_user(address, (unsigned short __user *)(*fpu_eip));
+		(*fpu_eip) += 2;
+		RE_ENTRANT_CHECK_ON;
+		break;
+	case 3:
+		/* Not legal for the FPU */
+		EXCEPTION(EX_Invalid);
+		break;
+	}
+	switch (rm) {
+	case 0:
+		address += FPU_info->___ebx + FPU_info->___esi;
+		break;
+	case 1:
+		address += FPU_info->___ebx + FPU_info->___edi;
+		break;
+	case 2:
+		address += FPU_info->___ebp + FPU_info->___esi;
+		if (addr_modes.override.segment == PREFIX_DEFAULT)
+			addr_modes.override.segment = PREFIX_SS_;
+		break;
+	case 3:
+		address += FPU_info->___ebp + FPU_info->___edi;
+		if (addr_modes.override.segment == PREFIX_DEFAULT)
+			addr_modes.override.segment = PREFIX_SS_;
+		break;
+	case 4:
+		address += FPU_info->___esi;
+		break;
+	case 5:
+		address += FPU_info->___edi;
+		break;
+	case 6:
+		address += FPU_info->___ebp;
+		if (addr_modes.override.segment == PREFIX_DEFAULT)
+			addr_modes.override.segment = PREFIX_SS_;
+		break;
+	case 7:
+		address += FPU_info->___ebx;
+		break;
+	}
 
-  return (void __user *)address ;
+      add_segment:
+	address &= 0xffff;
+
+	addr->offset = address;
+
+	switch (addr_modes.default_mode) {
+	case 0:
+		break;
+	case VM86:
+		address += vm86_segment(addr_modes.override.segment, addr);
+		break;
+	case PM16:
+	case SEG32:
+		address = pm_address(FPU_modrm, addr_modes.override.segment,
+				     addr, address);
+		break;
+	default:
+		EXCEPTION(EX_INTERNAL | 0x131);
+	}
+
+	return (void __user *)address;
 }
diff --git a/arch/x86/math-emu/load_store.c b/arch/x86/math-emu/load_store.c
index eebd6fb1c8a8..2931ff355218 100644
--- a/arch/x86/math-emu/load_store.c
+++ b/arch/x86/math-emu/load_store.c
@@ -26,247 +26,257 @@
 #include "status_w.h"
 #include "control_w.h"
 
-
-#define _NONE_ 0   /* st0_ptr etc not needed */
-#define _REG0_ 1   /* Will be storing st(0) */
-#define _PUSH_ 3   /* Need to check for space to push onto stack */
-#define _null_ 4   /* Function illegal or not implemented */
+#define _NONE_ 0		/* st0_ptr etc not needed */
+#define _REG0_ 1		/* Will be storing st(0) */
+#define _PUSH_ 3		/* Need to check for space to push onto stack */
+#define _null_ 4		/* Function illegal or not implemented */
 
 #define pop_0()	{ FPU_settag0(TAG_Empty); top++; }
 
-
 static u_char const type_table[32] = {
-  _PUSH_, _PUSH_, _PUSH_, _PUSH_,
-  _null_, _null_, _null_, _null_,
-  _REG0_, _REG0_, _REG0_, _REG0_,
-  _REG0_, _REG0_, _REG0_, _REG0_,
-  _NONE_, _null_, _NONE_, _PUSH_,
-  _NONE_, _PUSH_, _null_, _PUSH_,
-  _NONE_, _null_, _NONE_, _REG0_,
-  _NONE_, _REG0_, _NONE_, _REG0_
-  };
+	_PUSH_, _PUSH_, _PUSH_, _PUSH_,
+	_null_, _null_, _null_, _null_,
+	_REG0_, _REG0_, _REG0_, _REG0_,
+	_REG0_, _REG0_, _REG0_, _REG0_,
+	_NONE_, _null_, _NONE_, _PUSH_,
+	_NONE_, _PUSH_, _null_, _PUSH_,
+	_NONE_, _null_, _NONE_, _REG0_,
+	_NONE_, _REG0_, _NONE_, _REG0_
+};
 
 u_char const data_sizes_16[32] = {
-  4,  4,  8,  2,  0,  0,  0,  0,
-  4,  4,  8,  2,  4,  4,  8,  2,
-  14, 0, 94, 10,  2, 10,  0,  8,  
-  14, 0, 94, 10,  2, 10,  2,  8
+	4, 4, 8, 2, 0, 0, 0, 0,
+	4, 4, 8, 2, 4, 4, 8, 2,
+	14, 0, 94, 10, 2, 10, 0, 8,
+	14, 0, 94, 10, 2, 10, 2, 8
 };
 
 static u_char const data_sizes_32[32] = {
-  4,  4,  8,  2,  0,  0,  0,  0,
-  4,  4,  8,  2,  4,  4,  8,  2,
-  28, 0,108, 10,  2, 10,  0,  8,  
-  28, 0,108, 10,  2, 10,  2,  8
+	4, 4, 8, 2, 0, 0, 0, 0,
+	4, 4, 8, 2, 4, 4, 8, 2,
+	28, 0, 108, 10, 2, 10, 0, 8,
+	28, 0, 108, 10, 2, 10, 2, 8
 };
 
 int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
-		     void __user *data_address)
+		   void __user * data_address)
 {
-  FPU_REG loaded_data;
-  FPU_REG *st0_ptr;
-  u_char st0_tag = TAG_Empty;  /* This is just to stop a gcc warning. */
-  u_char loaded_tag;
+	FPU_REG loaded_data;
+	FPU_REG *st0_ptr;
+	u_char st0_tag = TAG_Empty;	/* This is just to stop a gcc warning. */
+	u_char loaded_tag;
 
-  st0_ptr = NULL;    /* Initialized just to stop compiler warnings. */
+	st0_ptr = NULL;		/* Initialized just to stop compiler warnings. */
 
-  if ( addr_modes.default_mode & PROTECTED )
-    {
-      if ( addr_modes.default_mode == SEG32 )
-	{
-	  if ( access_limit < data_sizes_32[type] )
-	    math_abort(FPU_info,SIGSEGV);
-	}
-      else if ( addr_modes.default_mode == PM16 )
-	{
-	  if ( access_limit < data_sizes_16[type] )
-	    math_abort(FPU_info,SIGSEGV);
-	}
+	if (addr_modes.default_mode & PROTECTED) {
+		if (addr_modes.default_mode == SEG32) {
+			if (access_limit < data_sizes_32[type])
+				math_abort(FPU_info, SIGSEGV);
+		} else if (addr_modes.default_mode == PM16) {
+			if (access_limit < data_sizes_16[type])
+				math_abort(FPU_info, SIGSEGV);
+		}
 #ifdef PARANOID
-      else
-	EXCEPTION(EX_INTERNAL|0x140);
+		else
+			EXCEPTION(EX_INTERNAL | 0x140);
 #endif /* PARANOID */
-    }
+	}
 
-  switch ( type_table[type] )
-    {
-    case _NONE_:
-      break;
-    case _REG0_:
-      st0_ptr = &st(0);       /* Some of these instructions pop after
-				 storing */
-      st0_tag = FPU_gettag0();
-      break;
-    case _PUSH_:
-      {
-	if ( FPU_gettagi(-1) != TAG_Empty )
-	  { FPU_stack_overflow(); return 0; }
-	top--;
-	st0_ptr = &st(0);
-      }
-      break;
-    case _null_:
-      FPU_illegal();
-      return 0;
+	switch (type_table[type]) {
+	case _NONE_:
+		break;
+	case _REG0_:
+		st0_ptr = &st(0);	/* Some of these instructions pop after
+					   storing */
+		st0_tag = FPU_gettag0();
+		break;
+	case _PUSH_:
+		{
+			if (FPU_gettagi(-1) != TAG_Empty) {
+				FPU_stack_overflow();
+				return 0;
+			}
+			top--;
+			st0_ptr = &st(0);
+		}
+		break;
+	case _null_:
+		FPU_illegal();
+		return 0;
 #ifdef PARANOID
-    default:
-      EXCEPTION(EX_INTERNAL|0x141);
-      return 0;
+	default:
+		EXCEPTION(EX_INTERNAL | 0x141);
+		return 0;
 #endif /* PARANOID */
-    }
+	}
 
-  switch ( type )
-    {
-    case 000:       /* fld m32real */
-      clear_C1();
-      loaded_tag = FPU_load_single((float __user *)data_address, &loaded_data);
-      if ( (loaded_tag == TAG_Special)
-	   && isNaN(&loaded_data)
-	   && (real_1op_NaN(&loaded_data) < 0) )
-	{
-	  top++;
-	  break;
-	}
-      FPU_copy_to_reg0(&loaded_data, loaded_tag);
-      break;
-    case 001:      /* fild m32int */
-      clear_C1();
-      loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data);
-      FPU_copy_to_reg0(&loaded_data, loaded_tag);
-      break;
-    case 002:      /* fld m64real */
-      clear_C1();
-      loaded_tag = FPU_load_double((double __user *)data_address, &loaded_data);
-      if ( (loaded_tag == TAG_Special)
-	   && isNaN(&loaded_data)
-	   && (real_1op_NaN(&loaded_data) < 0) )
-	{
-	  top++;
-	  break;
-	}
-      FPU_copy_to_reg0(&loaded_data, loaded_tag);
-      break;
-    case 003:      /* fild m16int */
-      clear_C1();
-      loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data);
-      FPU_copy_to_reg0(&loaded_data, loaded_tag);
-      break;
-    case 010:      /* fst m32real */
-      clear_C1();
-      FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address);
-      break;
-    case 011:      /* fist m32int */
-      clear_C1();
-      FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address);
-      break;
-    case 012:     /* fst m64real */
-      clear_C1();
-      FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address);
-      break;
-    case 013:     /* fist m16int */
-      clear_C1();
-      FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address);
-      break;
-    case 014:     /* fstp m32real */
-      clear_C1();
-      if ( FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address) )
-	pop_0();  /* pop only if the number was actually stored
-		     (see the 80486 manual p16-28) */
-      break;
-    case 015:     /* fistp m32int */
-      clear_C1();
-      if ( FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address) )
-	pop_0();  /* pop only if the number was actually stored
-		     (see the 80486 manual p16-28) */
-      break;
-    case 016:     /* fstp m64real */
-      clear_C1();
-      if ( FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address) )
-	pop_0();  /* pop only if the number was actually stored
-		     (see the 80486 manual p16-28) */
-      break;
-    case 017:     /* fistp m16int */
-      clear_C1();
-      if ( FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address) )
-	pop_0();  /* pop only if the number was actually stored
-		     (see the 80486 manual p16-28) */
-      break;
-    case 020:     /* fldenv  m14/28byte */
-      fldenv(addr_modes, (u_char __user *)data_address);
-      /* Ensure that the values just loaded are not changed by
-	 fix-up operations. */
-      return 1;
-    case 022:     /* frstor m94/108byte */
-      frstor(addr_modes, (u_char __user *)data_address);
-      /* Ensure that the values just loaded are not changed by
-	 fix-up operations. */
-      return 1;
-    case 023:     /* fbld m80dec */
-      clear_C1();
-      loaded_tag = FPU_load_bcd((u_char __user *)data_address);
-      FPU_settag0(loaded_tag);
-      break;
-    case 024:     /* fldcw */
-      RE_ENTRANT_CHECK_OFF;
-      FPU_access_ok(VERIFY_READ, data_address, 2);
-      FPU_get_user(control_word, (unsigned short __user *) data_address);
-      RE_ENTRANT_CHECK_ON;
-      if ( partial_status & ~control_word & CW_Exceptions )
-	partial_status |= (SW_Summary | SW_Backward);
-      else
-	partial_status &= ~(SW_Summary | SW_Backward);
+	switch (type) {
+	case 000:		/* fld m32real */
+		clear_C1();
+		loaded_tag =
+		    FPU_load_single((float __user *)data_address, &loaded_data);
+		if ((loaded_tag == TAG_Special)
+		    && isNaN(&loaded_data)
+		    && (real_1op_NaN(&loaded_data) < 0)) {
+			top++;
+			break;
+		}
+		FPU_copy_to_reg0(&loaded_data, loaded_tag);
+		break;
+	case 001:		/* fild m32int */
+		clear_C1();
+		loaded_tag =
+		    FPU_load_int32((long __user *)data_address, &loaded_data);
+		FPU_copy_to_reg0(&loaded_data, loaded_tag);
+		break;
+	case 002:		/* fld m64real */
+		clear_C1();
+		loaded_tag =
+		    FPU_load_double((double __user *)data_address,
+				    &loaded_data);
+		if ((loaded_tag == TAG_Special)
+		    && isNaN(&loaded_data)
+		    && (real_1op_NaN(&loaded_data) < 0)) {
+			top++;
+			break;
+		}
+		FPU_copy_to_reg0(&loaded_data, loaded_tag);
+		break;
+	case 003:		/* fild m16int */
+		clear_C1();
+		loaded_tag =
+		    FPU_load_int16((short __user *)data_address, &loaded_data);
+		FPU_copy_to_reg0(&loaded_data, loaded_tag);
+		break;
+	case 010:		/* fst m32real */
+		clear_C1();
+		FPU_store_single(st0_ptr, st0_tag,
+				 (float __user *)data_address);
+		break;
+	case 011:		/* fist m32int */
+		clear_C1();
+		FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address);
+		break;
+	case 012:		/* fst m64real */
+		clear_C1();
+		FPU_store_double(st0_ptr, st0_tag,
+				 (double __user *)data_address);
+		break;
+	case 013:		/* fist m16int */
+		clear_C1();
+		FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address);
+		break;
+	case 014:		/* fstp m32real */
+		clear_C1();
+		if (FPU_store_single
+		    (st0_ptr, st0_tag, (float __user *)data_address))
+			pop_0();	/* pop only if the number was actually stored
+					   (see the 80486 manual p16-28) */
+		break;
+	case 015:		/* fistp m32int */
+		clear_C1();
+		if (FPU_store_int32
+		    (st0_ptr, st0_tag, (long __user *)data_address))
+			pop_0();	/* pop only if the number was actually stored
+					   (see the 80486 manual p16-28) */
+		break;
+	case 016:		/* fstp m64real */
+		clear_C1();
+		if (FPU_store_double
+		    (st0_ptr, st0_tag, (double __user *)data_address))
+			pop_0();	/* pop only if the number was actually stored
+					   (see the 80486 manual p16-28) */
+		break;
+	case 017:		/* fistp m16int */
+		clear_C1();
+		if (FPU_store_int16
+		    (st0_ptr, st0_tag, (short __user *)data_address))
+			pop_0();	/* pop only if the number was actually stored
+					   (see the 80486 manual p16-28) */
+		break;
+	case 020:		/* fldenv  m14/28byte */
+		fldenv(addr_modes, (u_char __user *) data_address);
+		/* Ensure that the values just loaded are not changed by
+		   fix-up operations. */
+		return 1;
+	case 022:		/* frstor m94/108byte */
+		frstor(addr_modes, (u_char __user *) data_address);
+		/* Ensure that the values just loaded are not changed by
+		   fix-up operations. */
+		return 1;
+	case 023:		/* fbld m80dec */
+		clear_C1();
+		loaded_tag = FPU_load_bcd((u_char __user *) data_address);
+		FPU_settag0(loaded_tag);
+		break;
+	case 024:		/* fldcw */
+		RE_ENTRANT_CHECK_OFF;
+		FPU_access_ok(VERIFY_READ, data_address, 2);
+		FPU_get_user(control_word,
+			     (unsigned short __user *)data_address);
+		RE_ENTRANT_CHECK_ON;
+		if (partial_status & ~control_word & CW_Exceptions)
+			partial_status |= (SW_Summary | SW_Backward);
+		else
+			partial_status &= ~(SW_Summary | SW_Backward);
 #ifdef PECULIAR_486
-      control_word |= 0x40;  /* An 80486 appears to always set this bit */
+		control_word |= 0x40;	/* An 80486 appears to always set this bit */
 #endif /* PECULIAR_486 */
-      return 1;
-    case 025:      /* fld m80real */
-      clear_C1();
-      loaded_tag = FPU_load_extended((long double __user *)data_address, 0);
-      FPU_settag0(loaded_tag);
-      break;
-    case 027:      /* fild m64int */
-      clear_C1();
-      loaded_tag = FPU_load_int64((long long __user *)data_address);
-      if (loaded_tag == TAG_Error)
+		return 1;
+	case 025:		/* fld m80real */
+		clear_C1();
+		loaded_tag =
+		    FPU_load_extended((long double __user *)data_address, 0);
+		FPU_settag0(loaded_tag);
+		break;
+	case 027:		/* fild m64int */
+		clear_C1();
+		loaded_tag = FPU_load_int64((long long __user *)data_address);
+		if (loaded_tag == TAG_Error)
+			return 0;
+		FPU_settag0(loaded_tag);
+		break;
+	case 030:		/* fstenv  m14/28byte */
+		fstenv(addr_modes, (u_char __user *) data_address);
+		return 1;
+	case 032:		/* fsave */
+		fsave(addr_modes, (u_char __user *) data_address);
+		return 1;
+	case 033:		/* fbstp m80dec */
+		clear_C1();
+		if (FPU_store_bcd
+		    (st0_ptr, st0_tag, (u_char __user *) data_address))
+			pop_0();	/* pop only if the number was actually stored
+					   (see the 80486 manual p16-28) */
+		break;
+	case 034:		/* fstcw m16int */
+		RE_ENTRANT_CHECK_OFF;
+		FPU_access_ok(VERIFY_WRITE, data_address, 2);
+		FPU_put_user(control_word,
+			     (unsigned short __user *)data_address);
+		RE_ENTRANT_CHECK_ON;
+		return 1;
+	case 035:		/* fstp m80real */
+		clear_C1();
+		if (FPU_store_extended
+		    (st0_ptr, st0_tag, (long double __user *)data_address))
+			pop_0();	/* pop only if the number was actually stored
+					   (see the 80486 manual p16-28) */
+		break;
+	case 036:		/* fstsw m2byte */
+		RE_ENTRANT_CHECK_OFF;
+		FPU_access_ok(VERIFY_WRITE, data_address, 2);
+		FPU_put_user(status_word(),
+			     (unsigned short __user *)data_address);
+		RE_ENTRANT_CHECK_ON;
+		return 1;
+	case 037:		/* fistp m64int */
+		clear_C1();
+		if (FPU_store_int64
+		    (st0_ptr, st0_tag, (long long __user *)data_address))
+			pop_0();	/* pop only if the number was actually stored
+					   (see the 80486 manual p16-28) */
+		break;
+	}
 	return 0;
-      FPU_settag0(loaded_tag);
-      break;
-    case 030:     /* fstenv  m14/28byte */
-      fstenv(addr_modes, (u_char __user *)data_address);
-      return 1;
-    case 032:      /* fsave */
-      fsave(addr_modes, (u_char __user *)data_address);
-      return 1;
-    case 033:      /* fbstp m80dec */
-      clear_C1();
-      if ( FPU_store_bcd(st0_ptr, st0_tag, (u_char __user *)data_address) )
-	pop_0();  /* pop only if the number was actually stored
-		     (see the 80486 manual p16-28) */
-      break;
-    case 034:      /* fstcw m16int */
-      RE_ENTRANT_CHECK_OFF;
-      FPU_access_ok(VERIFY_WRITE,data_address,2);
-      FPU_put_user(control_word, (unsigned short __user *) data_address);
-      RE_ENTRANT_CHECK_ON;
-      return 1;
-    case 035:      /* fstp m80real */
-      clear_C1();
-      if ( FPU_store_extended(st0_ptr, st0_tag, (long double __user *)data_address) )
-	pop_0();  /* pop only if the number was actually stored
-		     (see the 80486 manual p16-28) */
-      break;
-    case 036:      /* fstsw m2byte */
-      RE_ENTRANT_CHECK_OFF;
-      FPU_access_ok(VERIFY_WRITE,data_address,2);
-      FPU_put_user(status_word(),(unsigned short __user *) data_address);
-      RE_ENTRANT_CHECK_ON;
-      return 1;
-    case 037:      /* fistp m64int */
-      clear_C1();
-      if ( FPU_store_int64(st0_ptr, st0_tag, (long long __user *)data_address) )
-	pop_0();  /* pop only if the number was actually stored
-		     (see the 80486 manual p16-28) */
-      break;
-    }
-  return 0;
 }
diff --git a/arch/x86/math-emu/poly.h b/arch/x86/math-emu/poly.h
index 4db798114923..f317de7d8864 100644
--- a/arch/x86/math-emu/poly.h
+++ b/arch/x86/math-emu/poly.h
@@ -21,9 +21,9 @@
    allows. 9-byte would probably be sufficient.
    */
 typedef struct {
-  unsigned long lsw;
-  unsigned long midw;
-  unsigned long msw;
+	unsigned long lsw;
+	unsigned long midw;
+	unsigned long msw;
 } Xsig;
 
 asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b,
@@ -33,12 +33,12 @@ asmlinkage void polynomial_Xsig(Xsig *, const unsigned long long *x,
 
 asmlinkage void mul32_Xsig(Xsig *, const unsigned long mult);
 asmlinkage void mul64_Xsig(Xsig *, const unsigned long long *mult);
-asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult);
+asmlinkage void mul_Xsig_Xsig(Xsig * dest, const Xsig * mult);
 
 asmlinkage void shr_Xsig(Xsig *, const int n);
 asmlinkage int round_Xsig(Xsig *);
 asmlinkage int norm_Xsig(Xsig *);
-asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest);
+asmlinkage void div_Xsig(Xsig * x1, const Xsig * x2, const Xsig * dest);
 
 /* Macro to extract the most significant 32 bits from a long long */
 #define LL_MSW(x)     (((unsigned long *)&x)[1])
@@ -49,7 +49,6 @@ asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest);
 /* Macro to access the 8 ms bytes of an Xsig as a long long */
 #define XSIG_LL(x)         (*(unsigned long long *)&x.midw)
 
-
 /*
    Need to run gcc with optimizations on to get these to
    actually be in-line.
@@ -63,59 +62,53 @@ asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest);
 static inline unsigned long mul_32_32(const unsigned long arg1,
 				      const unsigned long arg2)
 {
-  int retval;
-  asm volatile ("mull %2; movl %%edx,%%eax" \
-		:"=a" (retval) \
-		:"0" (arg1), "g" (arg2) \
-		:"dx");
-  return retval;
+	int retval;
+	asm volatile ("mull %2; movl %%edx,%%eax":"=a" (retval)
+		      :"0"(arg1), "g"(arg2)
+		      :"dx");
+	return retval;
 }
 
-
 /* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */
-static inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2)
+static inline void add_Xsig_Xsig(Xsig * dest, const Xsig * x2)
 {
-  asm volatile ("movl %1,%%edi; movl %2,%%esi;\n"
-                "movl (%%esi),%%eax; addl %%eax,(%%edi);\n"
-                "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n"
-                "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n"
-                 :"=g" (*dest):"g" (dest), "g" (x2)
-                 :"ax","si","di");
+	asm volatile ("movl %1,%%edi; movl %2,%%esi;\n"
+		      "movl (%%esi),%%eax; addl %%eax,(%%edi);\n"
+		      "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n"
+		      "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n":"=g"
+		      (*dest):"g"(dest), "g"(x2)
+		      :"ax", "si", "di");
 }
 
-
 /* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */
 /* Note: the constraints in the asm statement didn't always work properly
    with gcc 2.5.8.  Changing from using edi to using ecx got around the
    problem, but keep fingers crossed! */
-static inline void add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp)
+static inline void add_two_Xsig(Xsig * dest, const Xsig * x2, long int *exp)
 {
-  asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n"
-                "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n"
-                "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n"
-                "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n"
-                "jnc 0f;\n"
-		"rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n"
-                "movl %4,%%ecx; incl (%%ecx)\n"
-                "movl $1,%%eax; jmp 1f;\n"
-                "0: xorl %%eax,%%eax;\n"
-                "1:\n"
-		:"=g" (*exp), "=g" (*dest)
-		:"g" (dest), "g" (x2), "g" (exp)
-		:"cx","si","ax");
+	asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n"
+		      "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n"
+		      "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n"
+		      "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n"
+		      "jnc 0f;\n"
+		      "rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n"
+		      "movl %4,%%ecx; incl (%%ecx)\n"
+		      "movl $1,%%eax; jmp 1f;\n"
+		      "0: xorl %%eax,%%eax;\n" "1:\n":"=g" (*exp), "=g"(*dest)
+		      :"g"(dest), "g"(x2), "g"(exp)
+		      :"cx", "si", "ax");
 }
 
-
 /* Negate (subtract from 1.0) the 12 byte Xsig */
 /* This is faster in a loop on my 386 than using the "neg" instruction. */
-static inline void negate_Xsig(Xsig *x)
+static inline void negate_Xsig(Xsig * x)
 {
-  asm volatile("movl %1,%%esi;\n"
-               "xorl %%ecx,%%ecx;\n"
-               "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n"
-               "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n"
-               "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n"
-               :"=g" (*x):"g" (x):"si","ax","cx");
+	asm volatile ("movl %1,%%esi;\n"
+		      "xorl %%ecx,%%ecx;\n"
+		      "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n"
+		      "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n"
+		      "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n":"=g"
+		      (*x):"g"(x):"si", "ax", "cx");
 }
 
 #endif /* _POLY_H */
diff --git a/arch/x86/math-emu/poly_2xm1.c b/arch/x86/math-emu/poly_2xm1.c
index 9766ad5e9743..d8f2be3c8383 100644
--- a/arch/x86/math-emu/poly_2xm1.c
+++ b/arch/x86/math-emu/poly_2xm1.c
@@ -17,21 +17,19 @@
 #include "control_w.h"
 #include "poly.h"
 
-
 #define	HIPOWER	11
-static const unsigned long long lterms[HIPOWER] =
-{
-  0x0000000000000000LL,  /* This term done separately as 12 bytes */
-  0xf5fdeffc162c7543LL,
-  0x1c6b08d704a0bfa6LL,
-  0x0276556df749cc21LL,
-  0x002bb0ffcf14f6b8LL,
-  0x0002861225ef751cLL,
-  0x00001ffcbfcd5422LL,
-  0x00000162c005d5f1LL,
-  0x0000000da96ccb1bLL,
-  0x0000000078d1b897LL,
-  0x000000000422b029LL
+static const unsigned long long lterms[HIPOWER] = {
+	0x0000000000000000LL,	/* This term done separately as 12 bytes */
+	0xf5fdeffc162c7543LL,
+	0x1c6b08d704a0bfa6LL,
+	0x0276556df749cc21LL,
+	0x002bb0ffcf14f6b8LL,
+	0x0002861225ef751cLL,
+	0x00001ffcbfcd5422LL,
+	0x00000162c005d5f1LL,
+	0x0000000da96ccb1bLL,
+	0x0000000078d1b897LL,
+	0x000000000422b029LL
 };
 
 static const Xsig hiterm = MK_XSIG(0xb17217f7, 0xd1cf79ab, 0xc8a39194);
@@ -45,112 +43,103 @@ static const Xsig shiftterm2 = MK_XSIG(0xb504f333, 0xf9de6484, 0x597d89b3);
 static const Xsig shiftterm3 = MK_XSIG(0xd744fcca, 0xd69d6af4, 0x39a68bb9);
 
 static const Xsig *shiftterm[] = { &shiftterm0, &shiftterm1,
-				     &shiftterm2, &shiftterm3 };
-
+	&shiftterm2, &shiftterm3
+};
 
 /*--- poly_2xm1() -----------------------------------------------------------+
  | Requires st(0) which is TAG_Valid and < 1.                                |
  +---------------------------------------------------------------------------*/
-int	poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result)
+int poly_2xm1(u_char sign, FPU_REG * arg, FPU_REG * result)
 {
-  long int              exponent, shift;
-  unsigned long long    Xll;
-  Xsig                  accumulator, Denom, argSignif;
-  u_char                tag;
+	long int exponent, shift;
+	unsigned long long Xll;
+	Xsig accumulator, Denom, argSignif;
+	u_char tag;
 
-  exponent = exponent16(arg);
+	exponent = exponent16(arg);
 
 #ifdef PARANOID
-  if ( exponent >= 0 )    	/* Don't want a |number| >= 1.0 */
-    {
-      /* Number negative, too large, or not Valid. */
-      EXCEPTION(EX_INTERNAL|0x127);
-      return 1;
-    }
+	if (exponent >= 0) {	/* Don't want a |number| >= 1.0 */
+		/* Number negative, too large, or not Valid. */
+		EXCEPTION(EX_INTERNAL | 0x127);
+		return 1;
+	}
 #endif /* PARANOID */
 
-  argSignif.lsw = 0;
-  XSIG_LL(argSignif) = Xll = significand(arg);
+	argSignif.lsw = 0;
+	XSIG_LL(argSignif) = Xll = significand(arg);
 
-  if ( exponent == -1 )
-    {
-      shift = (argSignif.msw & 0x40000000) ? 3 : 2;
-      /* subtract 0.5 or 0.75 */
-      exponent -= 2;
-      XSIG_LL(argSignif) <<= 2;
-      Xll <<= 2;
-    }
-  else if ( exponent == -2 )
-    {
-      shift = 1;
-      /* subtract 0.25 */
-      exponent--;
-      XSIG_LL(argSignif) <<= 1;
-      Xll <<= 1;
-    }
-  else
-    shift = 0;
+	if (exponent == -1) {
+		shift = (argSignif.msw & 0x40000000) ? 3 : 2;
+		/* subtract 0.5 or 0.75 */
+		exponent -= 2;
+		XSIG_LL(argSignif) <<= 2;
+		Xll <<= 2;
+	} else if (exponent == -2) {
+		shift = 1;
+		/* subtract 0.25 */
+		exponent--;
+		XSIG_LL(argSignif) <<= 1;
+		Xll <<= 1;
+	} else
+		shift = 0;
 
-  if ( exponent < -2 )
-    {
-      /* Shift the argument right by the required places. */
-      if ( FPU_shrx(&Xll, -2-exponent) >= 0x80000000U )
-	Xll++;	/* round up */
-    }
-
-  accumulator.lsw = accumulator.midw = accumulator.msw = 0;
-  polynomial_Xsig(&accumulator, &Xll, lterms, HIPOWER-1);
-  mul_Xsig_Xsig(&accumulator, &argSignif);
-  shr_Xsig(&accumulator, 3);
-
-  mul_Xsig_Xsig(&argSignif, &hiterm);   /* The leading term */
-  add_two_Xsig(&accumulator, &argSignif, &exponent);
-
-  if ( shift )
-    {
-      /* The argument is large, use the identity:
-	 f(x+a) = f(a) * (f(x) + 1) - 1;
-	 */
-      shr_Xsig(&accumulator, - exponent);
-      accumulator.msw |= 0x80000000;      /* add 1.0 */
-      mul_Xsig_Xsig(&accumulator, shiftterm[shift]);
-      accumulator.msw &= 0x3fffffff;      /* subtract 1.0 */
-      exponent = 1;
-    }
-
-  if ( sign != SIGN_POS )
-    {
-      /* The argument is negative, use the identity:
-	     f(-x) = -f(x) / (1 + f(x))
-	 */
-      Denom.lsw = accumulator.lsw;
-      XSIG_LL(Denom) = XSIG_LL(accumulator);
-      if ( exponent < 0 )
-	shr_Xsig(&Denom, - exponent);
-      else if ( exponent > 0 )
-	{
-	  /* exponent must be 1 here */
-	  XSIG_LL(Denom) <<= 1;
-	  if ( Denom.lsw & 0x80000000 )
-	    XSIG_LL(Denom) |= 1;
-	  (Denom.lsw) <<= 1;
+	if (exponent < -2) {
+		/* Shift the argument right by the required places. */
+		if (FPU_shrx(&Xll, -2 - exponent) >= 0x80000000U)
+			Xll++;	/* round up */
 	}
-      Denom.msw |= 0x80000000;      /* add 1.0 */
-      div_Xsig(&accumulator, &Denom, &accumulator);
-    }
 
-  /* Convert to 64 bit signed-compatible */
-  exponent += round_Xsig(&accumulator);
+	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
+	polynomial_Xsig(&accumulator, &Xll, lterms, HIPOWER - 1);
+	mul_Xsig_Xsig(&accumulator, &argSignif);
+	shr_Xsig(&accumulator, 3);
 
-  result = &st(0);
-  significand(result) = XSIG_LL(accumulator);
-  setexponent16(result, exponent);
+	mul_Xsig_Xsig(&argSignif, &hiterm);	/* The leading term */
+	add_two_Xsig(&accumulator, &argSignif, &exponent);
 
-  tag = FPU_round(result, 1, 0, FULL_PRECISION, sign);
+	if (shift) {
+		/* The argument is large, use the identity:
+		   f(x+a) = f(a) * (f(x) + 1) - 1;
+		 */
+		shr_Xsig(&accumulator, -exponent);
+		accumulator.msw |= 0x80000000;	/* add 1.0 */
+		mul_Xsig_Xsig(&accumulator, shiftterm[shift]);
+		accumulator.msw &= 0x3fffffff;	/* subtract 1.0 */
+		exponent = 1;
+	}
 
-  setsign(result, sign);
-  FPU_settag0(tag);
+	if (sign != SIGN_POS) {
+		/* The argument is negative, use the identity:
+		   f(-x) = -f(x) / (1 + f(x))
+		 */
+		Denom.lsw = accumulator.lsw;
+		XSIG_LL(Denom) = XSIG_LL(accumulator);
+		if (exponent < 0)
+			shr_Xsig(&Denom, -exponent);
+		else if (exponent > 0) {
+			/* exponent must be 1 here */
+			XSIG_LL(Denom) <<= 1;
+			if (Denom.lsw & 0x80000000)
+				XSIG_LL(Denom) |= 1;
+			(Denom.lsw) <<= 1;
+		}
+		Denom.msw |= 0x80000000;	/* add 1.0 */
+		div_Xsig(&accumulator, &Denom, &accumulator);
+	}
 
-  return 0;
+	/* Convert to 64 bit signed-compatible */
+	exponent += round_Xsig(&accumulator);
+
+	result = &st(0);
+	significand(result) = XSIG_LL(accumulator);
+	setexponent16(result, exponent);
+
+	tag = FPU_round(result, 1, 0, FULL_PRECISION, sign);
+
+	setsign(result, sign);
+	FPU_settag0(tag);
+
+	return 0;
 
 }
diff --git a/arch/x86/math-emu/poly_atan.c b/arch/x86/math-emu/poly_atan.c
index 82f702952f69..2f4ac8143fc3 100644
--- a/arch/x86/math-emu/poly_atan.c
+++ b/arch/x86/math-emu/poly_atan.c
@@ -18,28 +18,25 @@
 #include "control_w.h"
 #include "poly.h"
 
-
 #define	HIPOWERon	6	/* odd poly, negative terms */
-static const unsigned long long oddnegterms[HIPOWERon] =
-{
-  0x0000000000000000LL, /* Dummy (not for - 1.0) */
-  0x015328437f756467LL,
-  0x0005dda27b73dec6LL,
-  0x0000226bf2bfb91aLL,
-  0x000000ccc439c5f7LL,
-  0x0000000355438407LL
-} ;
+static const unsigned long long oddnegterms[HIPOWERon] = {
+	0x0000000000000000LL,	/* Dummy (not for - 1.0) */
+	0x015328437f756467LL,
+	0x0005dda27b73dec6LL,
+	0x0000226bf2bfb91aLL,
+	0x000000ccc439c5f7LL,
+	0x0000000355438407LL
+};
 
 #define	HIPOWERop	6	/* odd poly, positive terms */
-static const unsigned long long oddplterms[HIPOWERop] =
-{
+static const unsigned long long oddplterms[HIPOWERop] = {
 /*  0xaaaaaaaaaaaaaaabLL,  transferred to fixedpterm[] */
-  0x0db55a71875c9ac2LL,
-  0x0029fce2d67880b0LL,
-  0x0000dfd3908b4596LL,
-  0x00000550fd61dab4LL,
-  0x0000001c9422b3f9LL,
-  0x000000003e3301e1LL
+	0x0db55a71875c9ac2LL,
+	0x0029fce2d67880b0LL,
+	0x0000dfd3908b4596LL,
+	0x00000550fd61dab4LL,
+	0x0000001c9422b3f9LL,
+	0x000000003e3301e1LL
 };
 
 static const unsigned long long denomterm = 0xebd9b842c5c53a0eLL;
@@ -48,182 +45,164 @@ static const Xsig fixedpterm = MK_XSIG(0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa);
 
 static const Xsig pi_signif = MK_XSIG(0xc90fdaa2, 0x2168c234, 0xc4c6628b);
 
-
 /*--- poly_atan() -----------------------------------------------------------+
  |                                                                           |
  +---------------------------------------------------------------------------*/
-void	poly_atan(FPU_REG *st0_ptr, u_char st0_tag,
-		  FPU_REG *st1_ptr, u_char st1_tag)
+void poly_atan(FPU_REG * st0_ptr, u_char st0_tag,
+	       FPU_REG * st1_ptr, u_char st1_tag)
 {
-  u_char	transformed, inverted,
-                sign1, sign2;
-  int           exponent;
-  long int   	dummy_exp;
-  Xsig          accumulator, Numer, Denom, accumulatore, argSignif,
-                argSq, argSqSq;
-  u_char        tag;
-  
-  sign1 = getsign(st0_ptr);
-  sign2 = getsign(st1_ptr);
-  if ( st0_tag == TAG_Valid )
-    {
-      exponent = exponent(st0_ptr);
-    }
-  else
-    {
-      /* This gives non-compatible stack contents... */
-      FPU_to_exp16(st0_ptr, st0_ptr);
-      exponent = exponent16(st0_ptr);
-    }
-  if ( st1_tag == TAG_Valid )
-    {
-      exponent -= exponent(st1_ptr);
-    }
-  else
-    {
-      /* This gives non-compatible stack contents... */
-      FPU_to_exp16(st1_ptr, st1_ptr);
-      exponent -= exponent16(st1_ptr);
-    }
+	u_char transformed, inverted, sign1, sign2;
+	int exponent;
+	long int dummy_exp;
+	Xsig accumulator, Numer, Denom, accumulatore, argSignif, argSq, argSqSq;
+	u_char tag;
 
-  if ( (exponent < 0) || ((exponent == 0) &&
-			  ((st0_ptr->sigh < st1_ptr->sigh) ||
-			   ((st0_ptr->sigh == st1_ptr->sigh) &&
-			    (st0_ptr->sigl < st1_ptr->sigl))) ) )
-    {
-      inverted = 1;
-      Numer.lsw = Denom.lsw = 0;
-      XSIG_LL(Numer) = significand(st0_ptr);
-      XSIG_LL(Denom) = significand(st1_ptr);
-    }
-  else
-    {
-      inverted = 0;
-      exponent = -exponent;
-      Numer.lsw = Denom.lsw = 0;
-      XSIG_LL(Numer) = significand(st1_ptr);
-      XSIG_LL(Denom) = significand(st0_ptr);
-     }
-  div_Xsig(&Numer, &Denom, &argSignif);
-  exponent += norm_Xsig(&argSignif);
+	sign1 = getsign(st0_ptr);
+	sign2 = getsign(st1_ptr);
+	if (st0_tag == TAG_Valid) {
+		exponent = exponent(st0_ptr);
+	} else {
+		/* This gives non-compatible stack contents... */
+		FPU_to_exp16(st0_ptr, st0_ptr);
+		exponent = exponent16(st0_ptr);
+	}
+	if (st1_tag == TAG_Valid) {
+		exponent -= exponent(st1_ptr);
+	} else {
+		/* This gives non-compatible stack contents... */
+		FPU_to_exp16(st1_ptr, st1_ptr);
+		exponent -= exponent16(st1_ptr);
+	}
 
-  if ( (exponent >= -1)
-      || ((exponent == -2) && (argSignif.msw > 0xd413ccd0)) )
-    {
-      /* The argument is greater than sqrt(2)-1 (=0.414213562...) */
-      /* Convert the argument by an identity for atan */
-      transformed = 1;
+	if ((exponent < 0) || ((exponent == 0) &&
+			       ((st0_ptr->sigh < st1_ptr->sigh) ||
+				((st0_ptr->sigh == st1_ptr->sigh) &&
+				 (st0_ptr->sigl < st1_ptr->sigl))))) {
+		inverted = 1;
+		Numer.lsw = Denom.lsw = 0;
+		XSIG_LL(Numer) = significand(st0_ptr);
+		XSIG_LL(Denom) = significand(st1_ptr);
+	} else {
+		inverted = 0;
+		exponent = -exponent;
+		Numer.lsw = Denom.lsw = 0;
+		XSIG_LL(Numer) = significand(st1_ptr);
+		XSIG_LL(Denom) = significand(st0_ptr);
+	}
+	div_Xsig(&Numer, &Denom, &argSignif);
+	exponent += norm_Xsig(&argSignif);
 
-      if ( exponent >= 0 )
-	{
+	if ((exponent >= -1)
+	    || ((exponent == -2) && (argSignif.msw > 0xd413ccd0))) {
+		/* The argument is greater than sqrt(2)-1 (=0.414213562...) */
+		/* Convert the argument by an identity for atan */
+		transformed = 1;
+
+		if (exponent >= 0) {
 #ifdef PARANOID
-	  if ( !( (exponent == 0) && 
-		 (argSignif.lsw == 0) && (argSignif.midw == 0) &&
-		 (argSignif.msw == 0x80000000) ) )
-	    {
-	      EXCEPTION(EX_INTERNAL|0x104);  /* There must be a logic error */
-	      return;
-	    }
+			if (!((exponent == 0) &&
+			      (argSignif.lsw == 0) && (argSignif.midw == 0) &&
+			      (argSignif.msw == 0x80000000))) {
+				EXCEPTION(EX_INTERNAL | 0x104);	/* There must be a logic error */
+				return;
+			}
 #endif /* PARANOID */
-	  argSignif.msw = 0;   /* Make the transformed arg -> 0.0 */
+			argSignif.msw = 0;	/* Make the transformed arg -> 0.0 */
+		} else {
+			Numer.lsw = Denom.lsw = argSignif.lsw;
+			XSIG_LL(Numer) = XSIG_LL(Denom) = XSIG_LL(argSignif);
+
+			if (exponent < -1)
+				shr_Xsig(&Numer, -1 - exponent);
+			negate_Xsig(&Numer);
+
+			shr_Xsig(&Denom, -exponent);
+			Denom.msw |= 0x80000000;
+
+			div_Xsig(&Numer, &Denom, &argSignif);
+
+			exponent = -1 + norm_Xsig(&argSignif);
+		}
+	} else {
+		transformed = 0;
 	}
-      else
-	{
-	  Numer.lsw = Denom.lsw = argSignif.lsw;
-	  XSIG_LL(Numer) = XSIG_LL(Denom) = XSIG_LL(argSignif);
 
-	  if ( exponent < -1 )
-	    shr_Xsig(&Numer, -1-exponent);
-	  negate_Xsig(&Numer);
-      
-	  shr_Xsig(&Denom, -exponent);
-	  Denom.msw |= 0x80000000;
-      
-	  div_Xsig(&Numer, &Denom, &argSignif);
+	argSq.lsw = argSignif.lsw;
+	argSq.midw = argSignif.midw;
+	argSq.msw = argSignif.msw;
+	mul_Xsig_Xsig(&argSq, &argSq);
 
-	  exponent = -1 + norm_Xsig(&argSignif);
+	argSqSq.lsw = argSq.lsw;
+	argSqSq.midw = argSq.midw;
+	argSqSq.msw = argSq.msw;
+	mul_Xsig_Xsig(&argSqSq, &argSqSq);
+
+	accumulatore.lsw = argSq.lsw;
+	XSIG_LL(accumulatore) = XSIG_LL(argSq);
+
+	shr_Xsig(&argSq, 2 * (-1 - exponent - 1));
+	shr_Xsig(&argSqSq, 4 * (-1 - exponent - 1));
+
+	/* Now have argSq etc with binary point at the left
+	   .1xxxxxxxx */
+
+	/* Do the basic fixed point polynomial evaluation */
+	accumulator.msw = accumulator.midw = accumulator.lsw = 0;
+	polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq),
+			oddplterms, HIPOWERop - 1);
+	mul64_Xsig(&accumulator, &XSIG_LL(argSq));
+	negate_Xsig(&accumulator);
+	polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), oddnegterms,
+			HIPOWERon - 1);
+	negate_Xsig(&accumulator);
+	add_two_Xsig(&accumulator, &fixedpterm, &dummy_exp);
+
+	mul64_Xsig(&accumulatore, &denomterm);
+	shr_Xsig(&accumulatore, 1 + 2 * (-1 - exponent));
+	accumulatore.msw |= 0x80000000;
+
+	div_Xsig(&accumulator, &accumulatore, &accumulator);
+
+	mul_Xsig_Xsig(&accumulator, &argSignif);
+	mul_Xsig_Xsig(&accumulator, &argSq);
+
+	shr_Xsig(&accumulator, 3);
+	negate_Xsig(&accumulator);
+	add_Xsig_Xsig(&accumulator, &argSignif);
+
+	if (transformed) {
+		/* compute pi/4 - accumulator */
+		shr_Xsig(&accumulator, -1 - exponent);
+		negate_Xsig(&accumulator);
+		add_Xsig_Xsig(&accumulator, &pi_signif);
+		exponent = -1;
 	}
-    }
-  else
-    {
-      transformed = 0;
-    }
 
-  argSq.lsw = argSignif.lsw; argSq.midw = argSignif.midw;
-  argSq.msw = argSignif.msw;
-  mul_Xsig_Xsig(&argSq, &argSq);
-  
-  argSqSq.lsw = argSq.lsw; argSqSq.midw = argSq.midw; argSqSq.msw = argSq.msw;
-  mul_Xsig_Xsig(&argSqSq, &argSqSq);
+	if (inverted) {
+		/* compute pi/2 - accumulator */
+		shr_Xsig(&accumulator, -exponent);
+		negate_Xsig(&accumulator);
+		add_Xsig_Xsig(&accumulator, &pi_signif);
+		exponent = 0;
+	}
 
-  accumulatore.lsw = argSq.lsw;
-  XSIG_LL(accumulatore) = XSIG_LL(argSq);
+	if (sign1) {
+		/* compute pi - accumulator */
+		shr_Xsig(&accumulator, 1 - exponent);
+		negate_Xsig(&accumulator);
+		add_Xsig_Xsig(&accumulator, &pi_signif);
+		exponent = 1;
+	}
 
-  shr_Xsig(&argSq, 2*(-1-exponent-1));
-  shr_Xsig(&argSqSq, 4*(-1-exponent-1));
+	exponent += round_Xsig(&accumulator);
 
-  /* Now have argSq etc with binary point at the left
-     .1xxxxxxxx */
+	significand(st1_ptr) = XSIG_LL(accumulator);
+	setexponent16(st1_ptr, exponent);
 
-  /* Do the basic fixed point polynomial evaluation */
-  accumulator.msw = accumulator.midw = accumulator.lsw = 0;
-  polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq),
-		   oddplterms, HIPOWERop-1);
-  mul64_Xsig(&accumulator, &XSIG_LL(argSq));
-  negate_Xsig(&accumulator);
-  polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), oddnegterms, HIPOWERon-1);
-  negate_Xsig(&accumulator);
-  add_two_Xsig(&accumulator, &fixedpterm, &dummy_exp);
+	tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign2);
+	FPU_settagi(1, tag);
 
-  mul64_Xsig(&accumulatore, &denomterm);
-  shr_Xsig(&accumulatore, 1 + 2*(-1-exponent));
-  accumulatore.msw |= 0x80000000;
-
-  div_Xsig(&accumulator, &accumulatore, &accumulator);
-
-  mul_Xsig_Xsig(&accumulator, &argSignif);
-  mul_Xsig_Xsig(&accumulator, &argSq);
-
-  shr_Xsig(&accumulator, 3);
-  negate_Xsig(&accumulator);
-  add_Xsig_Xsig(&accumulator, &argSignif);
-
-  if ( transformed )
-    {
-      /* compute pi/4 - accumulator */
-      shr_Xsig(&accumulator, -1-exponent);
-      negate_Xsig(&accumulator);
-      add_Xsig_Xsig(&accumulator, &pi_signif);
-      exponent = -1;
-    }
-
-  if ( inverted )
-    {
-      /* compute pi/2 - accumulator */
-      shr_Xsig(&accumulator, -exponent);
-      negate_Xsig(&accumulator);
-      add_Xsig_Xsig(&accumulator, &pi_signif);
-      exponent = 0;
-    }
-
-  if ( sign1 )
-    {
-      /* compute pi - accumulator */
-      shr_Xsig(&accumulator, 1 - exponent);
-      negate_Xsig(&accumulator);
-      add_Xsig_Xsig(&accumulator, &pi_signif);
-      exponent = 1;
-    }
-
-  exponent += round_Xsig(&accumulator);
-
-  significand(st1_ptr) = XSIG_LL(accumulator);
-  setexponent16(st1_ptr, exponent);
-
-  tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign2);
-  FPU_settagi(1, tag);
-
-  set_precision_flag_up();  /* We do not really know if up or down,
-			       use this as the default. */
+	set_precision_flag_up();	/* We do not really know if up or down,
+					   use this as the default. */
 
 }
diff --git a/arch/x86/math-emu/poly_l2.c b/arch/x86/math-emu/poly_l2.c
index dd00e1d5b074..c0102ae87511 100644
--- a/arch/x86/math-emu/poly_l2.c
+++ b/arch/x86/math-emu/poly_l2.c
@@ -10,7 +10,6 @@
  |                                                                           |
  +---------------------------------------------------------------------------*/
 
-
 #include "exception.h"
 #include "reg_constant.h"
 #include "fpu_emu.h"
@@ -18,255 +17,228 @@
 #include "control_w.h"
 #include "poly.h"
 
-
 static void log2_kernel(FPU_REG const *arg, u_char argsign,
-			Xsig *accum_result, long int *expon);
-
+			Xsig * accum_result, long int *expon);
 
 /*--- poly_l2() -------------------------------------------------------------+
  |   Base 2 logarithm by a polynomial approximation.                         |
  +---------------------------------------------------------------------------*/
-void	poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign)
+void poly_l2(FPU_REG * st0_ptr, FPU_REG * st1_ptr, u_char st1_sign)
 {
-  long int	       exponent, expon, expon_expon;
-  Xsig                 accumulator, expon_accum, yaccum;
-  u_char		       sign, argsign;
-  FPU_REG              x;
-  int                  tag;
+	long int exponent, expon, expon_expon;
+	Xsig accumulator, expon_accum, yaccum;
+	u_char sign, argsign;
+	FPU_REG x;
+	int tag;
 
-  exponent = exponent16(st0_ptr);
+	exponent = exponent16(st0_ptr);
 
-  /* From st0_ptr, make a number > sqrt(2)/2 and < sqrt(2) */
-  if ( st0_ptr->sigh > (unsigned)0xb504f334 )
-    {
-      /* Treat as  sqrt(2)/2 < st0_ptr < 1 */
-      significand(&x) = - significand(st0_ptr);
-      setexponent16(&x, -1);
-      exponent++;
-      argsign = SIGN_NEG;
-    }
-  else
-    {
-      /* Treat as  1 <= st0_ptr < sqrt(2) */
-      x.sigh = st0_ptr->sigh - 0x80000000;
-      x.sigl = st0_ptr->sigl;
-      setexponent16(&x, 0);
-      argsign = SIGN_POS;
-    }
-  tag = FPU_normalize_nuo(&x);
+	/* From st0_ptr, make a number > sqrt(2)/2 and < sqrt(2) */
+	if (st0_ptr->sigh > (unsigned)0xb504f334) {
+		/* Treat as  sqrt(2)/2 < st0_ptr < 1 */
+		significand(&x) = -significand(st0_ptr);
+		setexponent16(&x, -1);
+		exponent++;
+		argsign = SIGN_NEG;
+	} else {
+		/* Treat as  1 <= st0_ptr < sqrt(2) */
+		x.sigh = st0_ptr->sigh - 0x80000000;
+		x.sigl = st0_ptr->sigl;
+		setexponent16(&x, 0);
+		argsign = SIGN_POS;
+	}
+	tag = FPU_normalize_nuo(&x);
 
-  if ( tag == TAG_Zero )
-    {
-      expon = 0;
-      accumulator.msw = accumulator.midw = accumulator.lsw = 0;
-    }
-  else
-    {
-      log2_kernel(&x, argsign, &accumulator, &expon);
-    }
+	if (tag == TAG_Zero) {
+		expon = 0;
+		accumulator.msw = accumulator.midw = accumulator.lsw = 0;
+	} else {
+		log2_kernel(&x, argsign, &accumulator, &expon);
+	}
 
-  if ( exponent < 0 )
-    {
-      sign = SIGN_NEG;
-      exponent = -exponent;
-    }
-  else
-    sign = SIGN_POS;
-  expon_accum.msw = exponent; expon_accum.midw = expon_accum.lsw = 0;
-  if ( exponent )
-    {
-      expon_expon = 31 + norm_Xsig(&expon_accum);
-      shr_Xsig(&accumulator, expon_expon - expon);
+	if (exponent < 0) {
+		sign = SIGN_NEG;
+		exponent = -exponent;
+	} else
+		sign = SIGN_POS;
+	expon_accum.msw = exponent;
+	expon_accum.midw = expon_accum.lsw = 0;
+	if (exponent) {
+		expon_expon = 31 + norm_Xsig(&expon_accum);
+		shr_Xsig(&accumulator, expon_expon - expon);
 
-      if ( sign ^ argsign )
-	negate_Xsig(&accumulator);
-      add_Xsig_Xsig(&accumulator, &expon_accum);
-    }
-  else
-    {
-      expon_expon = expon;
-      sign = argsign;
-    }
+		if (sign ^ argsign)
+			negate_Xsig(&accumulator);
+		add_Xsig_Xsig(&accumulator, &expon_accum);
+	} else {
+		expon_expon = expon;
+		sign = argsign;
+	}
 
-  yaccum.lsw = 0; XSIG_LL(yaccum) = significand(st1_ptr);
-  mul_Xsig_Xsig(&accumulator, &yaccum);
+	yaccum.lsw = 0;
+	XSIG_LL(yaccum) = significand(st1_ptr);
+	mul_Xsig_Xsig(&accumulator, &yaccum);
 
-  expon_expon += round_Xsig(&accumulator);
+	expon_expon += round_Xsig(&accumulator);
 
-  if ( accumulator.msw == 0 )
-    {
-      FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
-      return;
-    }
+	if (accumulator.msw == 0) {
+		FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
+		return;
+	}
 
-  significand(st1_ptr) = XSIG_LL(accumulator);
-  setexponent16(st1_ptr, expon_expon + exponent16(st1_ptr) + 1);
+	significand(st1_ptr) = XSIG_LL(accumulator);
+	setexponent16(st1_ptr, expon_expon + exponent16(st1_ptr) + 1);
 
-  tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign ^ st1_sign);
-  FPU_settagi(1, tag);
+	tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign ^ st1_sign);
+	FPU_settagi(1, tag);
 
-  set_precision_flag_up();  /* 80486 appears to always do this */
+	set_precision_flag_up();	/* 80486 appears to always do this */
 
-  return;
+	return;
 
 }
 
-
 /*--- poly_l2p1() -----------------------------------------------------------+
  |   Base 2 logarithm by a polynomial approximation.                         |
  |   log2(x+1)                                                               |
  +---------------------------------------------------------------------------*/
-int	poly_l2p1(u_char sign0, u_char sign1,
-		  FPU_REG *st0_ptr, FPU_REG *st1_ptr, FPU_REG *dest)
+int poly_l2p1(u_char sign0, u_char sign1,
+	      FPU_REG * st0_ptr, FPU_REG * st1_ptr, FPU_REG * dest)
 {
-  u_char             	tag;
-  long int        	exponent;
-  Xsig              	accumulator, yaccum;
+	u_char tag;
+	long int exponent;
+	Xsig accumulator, yaccum;
 
-  if ( exponent16(st0_ptr) < 0 )
-    {
-      log2_kernel(st0_ptr, sign0, &accumulator, &exponent);
+	if (exponent16(st0_ptr) < 0) {
+		log2_kernel(st0_ptr, sign0, &accumulator, &exponent);
 
-      yaccum.lsw = 0;
-      XSIG_LL(yaccum) = significand(st1_ptr);
-      mul_Xsig_Xsig(&accumulator, &yaccum);
+		yaccum.lsw = 0;
+		XSIG_LL(yaccum) = significand(st1_ptr);
+		mul_Xsig_Xsig(&accumulator, &yaccum);
 
-      exponent += round_Xsig(&accumulator);
+		exponent += round_Xsig(&accumulator);
 
-      exponent += exponent16(st1_ptr) + 1;
-      if ( exponent < EXP_WAY_UNDER ) exponent = EXP_WAY_UNDER;
+		exponent += exponent16(st1_ptr) + 1;
+		if (exponent < EXP_WAY_UNDER)
+			exponent = EXP_WAY_UNDER;
 
-      significand(dest) = XSIG_LL(accumulator);
-      setexponent16(dest, exponent);
+		significand(dest) = XSIG_LL(accumulator);
+		setexponent16(dest, exponent);
 
-      tag = FPU_round(dest, 1, 0, FULL_PRECISION, sign0 ^ sign1);
-      FPU_settagi(1, tag);
+		tag = FPU_round(dest, 1, 0, FULL_PRECISION, sign0 ^ sign1);
+		FPU_settagi(1, tag);
 
-      if ( tag == TAG_Valid )
-	set_precision_flag_up();   /* 80486 appears to always do this */
-    }
-  else
-    {
-      /* The magnitude of st0_ptr is far too large. */
+		if (tag == TAG_Valid)
+			set_precision_flag_up();	/* 80486 appears to always do this */
+	} else {
+		/* The magnitude of st0_ptr is far too large. */
 
-      if ( sign0 != SIGN_POS )
-	{
-	  /* Trying to get the log of a negative number. */
-#ifdef PECULIAR_486   /* Stupid 80486 doesn't worry about log(negative). */
-	  changesign(st1_ptr);
+		if (sign0 != SIGN_POS) {
+			/* Trying to get the log of a negative number. */
+#ifdef PECULIAR_486		/* Stupid 80486 doesn't worry about log(negative). */
+			changesign(st1_ptr);
 #else
-	  if ( arith_invalid(1) < 0 )
-	    return 1;
+			if (arith_invalid(1) < 0)
+				return 1;
 #endif /* PECULIAR_486 */
+		}
+
+		/* 80486 appears to do this */
+		if (sign0 == SIGN_NEG)
+			set_precision_flag_down();
+		else
+			set_precision_flag_up();
 	}
 
-      /* 80486 appears to do this */
-      if ( sign0 == SIGN_NEG )
-	set_precision_flag_down();
-      else
-	set_precision_flag_up();
-    }
+	if (exponent(dest) <= EXP_UNDER)
+		EXCEPTION(EX_Underflow);
 
-  if ( exponent(dest) <= EXP_UNDER )
-    EXCEPTION(EX_Underflow);
-
-  return 0;
+	return 0;
 
 }
 
-
-
-
 #undef HIPOWER
 #define	HIPOWER	10
-static const unsigned long long logterms[HIPOWER] =
-{
-  0x2a8eca5705fc2ef0LL,
-  0xf6384ee1d01febceLL,
-  0x093bb62877cdf642LL,
-  0x006985d8a9ec439bLL,
-  0x0005212c4f55a9c8LL,
-  0x00004326a16927f0LL,
-  0x0000038d1d80a0e7LL,
-  0x0000003141cc80c6LL,
-  0x00000002b1668c9fLL,
-  0x000000002c7a46aaLL
+static const unsigned long long logterms[HIPOWER] = {
+	0x2a8eca5705fc2ef0LL,
+	0xf6384ee1d01febceLL,
+	0x093bb62877cdf642LL,
+	0x006985d8a9ec439bLL,
+	0x0005212c4f55a9c8LL,
+	0x00004326a16927f0LL,
+	0x0000038d1d80a0e7LL,
+	0x0000003141cc80c6LL,
+	0x00000002b1668c9fLL,
+	0x000000002c7a46aaLL
 };
 
 static const unsigned long leadterm = 0xb8000000;
 
-
 /*--- log2_kernel() ---------------------------------------------------------+
  |   Base 2 logarithm by a polynomial approximation.                         |
  |   log2(x+1)                                                               |
  +---------------------------------------------------------------------------*/
-static void log2_kernel(FPU_REG const *arg, u_char argsign, Xsig *accum_result,
+static void log2_kernel(FPU_REG const *arg, u_char argsign, Xsig * accum_result,
 			long int *expon)
 {
-  long int             exponent, adj;
-  unsigned long long   Xsq;
-  Xsig                 accumulator, Numer, Denom, argSignif, arg_signif;
+	long int exponent, adj;
+	unsigned long long Xsq;
+	Xsig accumulator, Numer, Denom, argSignif, arg_signif;
 
-  exponent = exponent16(arg);
-  Numer.lsw = Denom.lsw = 0;
-  XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg);
-  if ( argsign == SIGN_POS )
-    {
-      shr_Xsig(&Denom, 2 - (1 + exponent));
-      Denom.msw |= 0x80000000;
-      div_Xsig(&Numer, &Denom, &argSignif);
-    }
-  else
-    {
-      shr_Xsig(&Denom, 1 - (1 + exponent));
-      negate_Xsig(&Denom);
-      if ( Denom.msw & 0x80000000 )
-	{
-	  div_Xsig(&Numer, &Denom, &argSignif);
-	  exponent ++;
+	exponent = exponent16(arg);
+	Numer.lsw = Denom.lsw = 0;
+	XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg);
+	if (argsign == SIGN_POS) {
+		shr_Xsig(&Denom, 2 - (1 + exponent));
+		Denom.msw |= 0x80000000;
+		div_Xsig(&Numer, &Denom, &argSignif);
+	} else {
+		shr_Xsig(&Denom, 1 - (1 + exponent));
+		negate_Xsig(&Denom);
+		if (Denom.msw & 0x80000000) {
+			div_Xsig(&Numer, &Denom, &argSignif);
+			exponent++;
+		} else {
+			/* Denom must be 1.0 */
+			argSignif.lsw = Numer.lsw;
+			argSignif.midw = Numer.midw;
+			argSignif.msw = Numer.msw;
+		}
 	}
-      else
-	{
-	  /* Denom must be 1.0 */
-	  argSignif.lsw = Numer.lsw; argSignif.midw = Numer.midw;
-	  argSignif.msw = Numer.msw;
-	}
-    }
 
 #ifndef PECULIAR_486
-  /* Should check here that  |local_arg|  is within the valid range */
-  if ( exponent >= -2 )
-    {
-      if ( (exponent > -2) ||
-	  (argSignif.msw > (unsigned)0xafb0ccc0) )
-	{
-	  /* The argument is too large */
+	/* Should check here that  |local_arg|  is within the valid range */
+	if (exponent >= -2) {
+		if ((exponent > -2) || (argSignif.msw > (unsigned)0xafb0ccc0)) {
+			/* The argument is too large */
+		}
 	}
-    }
 #endif /* PECULIAR_486 */
 
-  arg_signif.lsw = argSignif.lsw; XSIG_LL(arg_signif) = XSIG_LL(argSignif);
-  adj = norm_Xsig(&argSignif);
-  accumulator.lsw = argSignif.lsw; XSIG_LL(accumulator) = XSIG_LL(argSignif);
-  mul_Xsig_Xsig(&accumulator, &accumulator);
-  shr_Xsig(&accumulator, 2*(-1 - (1 + exponent + adj)));
-  Xsq = XSIG_LL(accumulator);
-  if ( accumulator.lsw & 0x80000000 )
-    Xsq++;
+	arg_signif.lsw = argSignif.lsw;
+	XSIG_LL(arg_signif) = XSIG_LL(argSignif);
+	adj = norm_Xsig(&argSignif);
+	accumulator.lsw = argSignif.lsw;
+	XSIG_LL(accumulator) = XSIG_LL(argSignif);
+	mul_Xsig_Xsig(&accumulator, &accumulator);
+	shr_Xsig(&accumulator, 2 * (-1 - (1 + exponent + adj)));
+	Xsq = XSIG_LL(accumulator);
+	if (accumulator.lsw & 0x80000000)
+		Xsq++;
 
-  accumulator.msw = accumulator.midw = accumulator.lsw = 0;
-  /* Do the basic fixed point polynomial evaluation */
-  polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER-1);
+	accumulator.msw = accumulator.midw = accumulator.lsw = 0;
+	/* Do the basic fixed point polynomial evaluation */
+	polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER - 1);
 
-  mul_Xsig_Xsig(&accumulator, &argSignif);
-  shr_Xsig(&accumulator, 6 - adj);
+	mul_Xsig_Xsig(&accumulator, &argSignif);
+	shr_Xsig(&accumulator, 6 - adj);
 
-  mul32_Xsig(&arg_signif, leadterm);
-  add_two_Xsig(&accumulator, &arg_signif, &exponent);
+	mul32_Xsig(&arg_signif, leadterm);
+	add_two_Xsig(&accumulator, &arg_signif, &exponent);
 
-  *expon = exponent + 1;
-  accum_result->lsw = accumulator.lsw;
-  accum_result->midw = accumulator.midw;
-  accum_result->msw = accumulator.msw;
+	*expon = exponent + 1;
+	accum_result->lsw = accumulator.lsw;
+	accum_result->midw = accumulator.midw;
+	accum_result->msw = accumulator.msw;
 
 }
diff --git a/arch/x86/math-emu/poly_sin.c b/arch/x86/math-emu/poly_sin.c
index a36313fb06f1..7273ae0c7692 100644
--- a/arch/x86/math-emu/poly_sin.c
+++ b/arch/x86/math-emu/poly_sin.c
@@ -11,7 +11,6 @@
  |                                                                           |
  +---------------------------------------------------------------------------*/
 
-
 #include "exception.h"
 #include "reg_constant.h"
 #include "fpu_emu.h"
@@ -19,379 +18,361 @@
 #include "control_w.h"
 #include "poly.h"
 
-
 #define	N_COEFF_P	4
 #define	N_COEFF_N	4
 
-static const unsigned long long pos_terms_l[N_COEFF_P] =
-{
-  0xaaaaaaaaaaaaaaabLL,
-  0x00d00d00d00cf906LL,
-  0x000006b99159a8bbLL,
-  0x000000000d7392e6LL
+static const unsigned long long pos_terms_l[N_COEFF_P] = {
+	0xaaaaaaaaaaaaaaabLL,
+	0x00d00d00d00cf906LL,
+	0x000006b99159a8bbLL,
+	0x000000000d7392e6LL
 };
 
-static const unsigned long long neg_terms_l[N_COEFF_N] =
-{
-  0x2222222222222167LL,
-  0x0002e3bc74aab624LL,
-  0x0000000b09229062LL,
-  0x00000000000c7973LL
+static const unsigned long long neg_terms_l[N_COEFF_N] = {
+	0x2222222222222167LL,
+	0x0002e3bc74aab624LL,
+	0x0000000b09229062LL,
+	0x00000000000c7973LL
 };
 
-
-
 #define	N_COEFF_PH	4
 #define	N_COEFF_NH	4
-static const unsigned long long pos_terms_h[N_COEFF_PH] =
-{
-  0x0000000000000000LL,
-  0x05b05b05b05b0406LL,
-  0x000049f93edd91a9LL,
-  0x00000000c9c9ed62LL
+static const unsigned long long pos_terms_h[N_COEFF_PH] = {
+	0x0000000000000000LL,
+	0x05b05b05b05b0406LL,
+	0x000049f93edd91a9LL,
+	0x00000000c9c9ed62LL
 };
 
-static const unsigned long long neg_terms_h[N_COEFF_NH] =
-{
-  0xaaaaaaaaaaaaaa98LL,
-  0x001a01a01a019064LL,
-  0x0000008f76c68a77LL,
-  0x0000000000d58f5eLL
+static const unsigned long long neg_terms_h[N_COEFF_NH] = {
+	0xaaaaaaaaaaaaaa98LL,
+	0x001a01a01a019064LL,
+	0x0000008f76c68a77LL,
+	0x0000000000d58f5eLL
 };
 
-
 /*--- poly_sine() -----------------------------------------------------------+
  |                                                                           |
  +---------------------------------------------------------------------------*/
-void	poly_sine(FPU_REG *st0_ptr)
+void poly_sine(FPU_REG * st0_ptr)
 {
-  int                 exponent, echange;
-  Xsig                accumulator, argSqrd, argTo4;
-  unsigned long       fix_up, adj;
-  unsigned long long  fixed_arg;
-  FPU_REG	      result;
+	int exponent, echange;
+	Xsig accumulator, argSqrd, argTo4;
+	unsigned long fix_up, adj;
+	unsigned long long fixed_arg;
+	FPU_REG result;
 
-  exponent = exponent(st0_ptr);
+	exponent = exponent(st0_ptr);
 
-  accumulator.lsw = accumulator.midw = accumulator.msw = 0;
+	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
 
-  /* Split into two ranges, for arguments below and above 1.0 */
-  /* The boundary between upper and lower is approx 0.88309101259 */
-  if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) )
-    {
-      /* The argument is <= 0.88309101259 */
+	/* Split into two ranges, for arguments below and above 1.0 */
+	/* The boundary between upper and lower is approx 0.88309101259 */
+	if ((exponent < -1)
+	    || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) {
+		/* The argument is <= 0.88309101259 */
 
-      argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0;
-      mul64_Xsig(&argSqrd, &significand(st0_ptr));
-      shr_Xsig(&argSqrd, 2*(-1-exponent));
-      argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
-      argTo4.lsw = argSqrd.lsw;
-      mul_Xsig_Xsig(&argTo4, &argTo4);
+		argSqrd.msw = st0_ptr->sigh;
+		argSqrd.midw = st0_ptr->sigl;
+		argSqrd.lsw = 0;
+		mul64_Xsig(&argSqrd, &significand(st0_ptr));
+		shr_Xsig(&argSqrd, 2 * (-1 - exponent));
+		argTo4.msw = argSqrd.msw;
+		argTo4.midw = argSqrd.midw;
+		argTo4.lsw = argSqrd.lsw;
+		mul_Xsig_Xsig(&argTo4, &argTo4);
 
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
-		      N_COEFF_N-1);
-      mul_Xsig_Xsig(&accumulator, &argSqrd);
-      negate_Xsig(&accumulator);
+		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
+				N_COEFF_N - 1);
+		mul_Xsig_Xsig(&accumulator, &argSqrd);
+		negate_Xsig(&accumulator);
 
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
-		      N_COEFF_P-1);
+		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
+				N_COEFF_P - 1);
 
-      shr_Xsig(&accumulator, 2);    /* Divide by four */
-      accumulator.msw |= 0x80000000;  /* Add 1.0 */
+		shr_Xsig(&accumulator, 2);	/* Divide by four */
+		accumulator.msw |= 0x80000000;	/* Add 1.0 */
 
-      mul64_Xsig(&accumulator, &significand(st0_ptr));
-      mul64_Xsig(&accumulator, &significand(st0_ptr));
-      mul64_Xsig(&accumulator, &significand(st0_ptr));
+		mul64_Xsig(&accumulator, &significand(st0_ptr));
+		mul64_Xsig(&accumulator, &significand(st0_ptr));
+		mul64_Xsig(&accumulator, &significand(st0_ptr));
 
-      /* Divide by four, FPU_REG compatible, etc */
-      exponent = 3*exponent;
+		/* Divide by four, FPU_REG compatible, etc */
+		exponent = 3 * exponent;
 
-      /* The minimum exponent difference is 3 */
-      shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
+		/* The minimum exponent difference is 3 */
+		shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
 
-      negate_Xsig(&accumulator);
-      XSIG_LL(accumulator) += significand(st0_ptr);
+		negate_Xsig(&accumulator);
+		XSIG_LL(accumulator) += significand(st0_ptr);
 
-      echange = round_Xsig(&accumulator);
+		echange = round_Xsig(&accumulator);
 
-      setexponentpos(&result, exponent(st0_ptr) + echange);
-    }
-  else
-    {
-      /* The argument is > 0.88309101259 */
-      /* We use sin(st(0)) = cos(pi/2-st(0)) */
+		setexponentpos(&result, exponent(st0_ptr) + echange);
+	} else {
+		/* The argument is > 0.88309101259 */
+		/* We use sin(st(0)) = cos(pi/2-st(0)) */
 
-      fixed_arg = significand(st0_ptr);
+		fixed_arg = significand(st0_ptr);
 
-      if ( exponent == 0 )
-	{
-	  /* The argument is >= 1.0 */
+		if (exponent == 0) {
+			/* The argument is >= 1.0 */
 
-	  /* Put the binary point at the left. */
-	  fixed_arg <<= 1;
+			/* Put the binary point at the left. */
+			fixed_arg <<= 1;
+		}
+		/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
+		fixed_arg = 0x921fb54442d18469LL - fixed_arg;
+		/* There is a special case which arises due to rounding, to fix here. */
+		if (fixed_arg == 0xffffffffffffffffLL)
+			fixed_arg = 0;
+
+		XSIG_LL(argSqrd) = fixed_arg;
+		argSqrd.lsw = 0;
+		mul64_Xsig(&argSqrd, &fixed_arg);
+
+		XSIG_LL(argTo4) = XSIG_LL(argSqrd);
+		argTo4.lsw = argSqrd.lsw;
+		mul_Xsig_Xsig(&argTo4, &argTo4);
+
+		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
+				N_COEFF_NH - 1);
+		mul_Xsig_Xsig(&accumulator, &argSqrd);
+		negate_Xsig(&accumulator);
+
+		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
+				N_COEFF_PH - 1);
+		negate_Xsig(&accumulator);
+
+		mul64_Xsig(&accumulator, &fixed_arg);
+		mul64_Xsig(&accumulator, &fixed_arg);
+
+		shr_Xsig(&accumulator, 3);
+		negate_Xsig(&accumulator);
+
+		add_Xsig_Xsig(&accumulator, &argSqrd);
+
+		shr_Xsig(&accumulator, 1);
+
+		accumulator.lsw |= 1;	/* A zero accumulator here would cause problems */
+		negate_Xsig(&accumulator);
+
+		/* The basic computation is complete. Now fix the answer to
+		   compensate for the error due to the approximation used for
+		   pi/2
+		 */
+
+		/* This has an exponent of -65 */
+		fix_up = 0x898cc517;
+		/* The fix-up needs to be improved for larger args */
+		if (argSqrd.msw & 0xffc00000) {
+			/* Get about 32 bit precision in these: */
+			fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
+		}
+		fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
+
+		adj = accumulator.lsw;	/* temp save */
+		accumulator.lsw -= fix_up;
+		if (accumulator.lsw > adj)
+			XSIG_LL(accumulator)--;
+
+		echange = round_Xsig(&accumulator);
+
+		setexponentpos(&result, echange - 1);
 	}
-      /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
-      fixed_arg = 0x921fb54442d18469LL - fixed_arg;
-      /* There is a special case which arises due to rounding, to fix here. */
-      if ( fixed_arg == 0xffffffffffffffffLL )
-	fixed_arg = 0;
 
-      XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
-      mul64_Xsig(&argSqrd, &fixed_arg);
-
-      XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw;
-      mul_Xsig_Xsig(&argTo4, &argTo4);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
-		      N_COEFF_NH-1);
-      mul_Xsig_Xsig(&accumulator, &argSqrd);
-      negate_Xsig(&accumulator);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
-		      N_COEFF_PH-1);
-      negate_Xsig(&accumulator);
-
-      mul64_Xsig(&accumulator, &fixed_arg);
-      mul64_Xsig(&accumulator, &fixed_arg);
-
-      shr_Xsig(&accumulator, 3);
-      negate_Xsig(&accumulator);
-
-      add_Xsig_Xsig(&accumulator, &argSqrd);
-
-      shr_Xsig(&accumulator, 1);
-
-      accumulator.lsw |= 1;  /* A zero accumulator here would cause problems */
-      negate_Xsig(&accumulator);
-
-      /* The basic computation is complete. Now fix the answer to
-	 compensate for the error due to the approximation used for
-	 pi/2
-	 */
-
-      /* This has an exponent of -65 */
-      fix_up = 0x898cc517;
-      /* The fix-up needs to be improved for larger args */
-      if ( argSqrd.msw & 0xffc00000 )
-	{
-	  /* Get about 32 bit precision in these: */
-	  fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
-	}
-      fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
-
-      adj = accumulator.lsw;    /* temp save */
-      accumulator.lsw -= fix_up;
-      if ( accumulator.lsw > adj )
-	XSIG_LL(accumulator) --;
-
-      echange = round_Xsig(&accumulator);
-
-      setexponentpos(&result, echange - 1);
-    }
-
-  significand(&result) = XSIG_LL(accumulator);
-  setsign(&result, getsign(st0_ptr));
-  FPU_copy_to_reg0(&result, TAG_Valid);
+	significand(&result) = XSIG_LL(accumulator);
+	setsign(&result, getsign(st0_ptr));
+	FPU_copy_to_reg0(&result, TAG_Valid);
 
 #ifdef PARANOID
-  if ( (exponent(&result) >= 0)
-      && (significand(&result) > 0x8000000000000000LL) )
-    {
-      EXCEPTION(EX_INTERNAL|0x150);
-    }
+	if ((exponent(&result) >= 0)
+	    && (significand(&result) > 0x8000000000000000LL)) {
+		EXCEPTION(EX_INTERNAL | 0x150);
+	}
 #endif /* PARANOID */
 
 }
 
-
-
 /*--- poly_cos() ------------------------------------------------------------+
  |                                                                           |
  +---------------------------------------------------------------------------*/
-void	poly_cos(FPU_REG *st0_ptr)
+void poly_cos(FPU_REG * st0_ptr)
 {
-  FPU_REG	      result;
-  long int            exponent, exp2, echange;
-  Xsig                accumulator, argSqrd, fix_up, argTo4;
-  unsigned long long  fixed_arg;
+	FPU_REG result;
+	long int exponent, exp2, echange;
+	Xsig accumulator, argSqrd, fix_up, argTo4;
+	unsigned long long fixed_arg;
 
 #ifdef PARANOID
-  if ( (exponent(st0_ptr) > 0)
-      || ((exponent(st0_ptr) == 0)
-	  && (significand(st0_ptr) > 0xc90fdaa22168c234LL)) )
-    {
-      EXCEPTION(EX_Invalid);
-      FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
-      return;
-    }
+	if ((exponent(st0_ptr) > 0)
+	    || ((exponent(st0_ptr) == 0)
+		&& (significand(st0_ptr) > 0xc90fdaa22168c234LL))) {
+		EXCEPTION(EX_Invalid);
+		FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
+		return;
+	}
 #endif /* PARANOID */
 
-  exponent = exponent(st0_ptr);
+	exponent = exponent(st0_ptr);
 
-  accumulator.lsw = accumulator.midw = accumulator.msw = 0;
+	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
 
-  if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) )
-    {
-      /* arg is < 0.687705 */
+	if ((exponent < -1)
+	    || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) {
+		/* arg is < 0.687705 */
 
-      argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl;
-      argSqrd.lsw = 0;
-      mul64_Xsig(&argSqrd, &significand(st0_ptr));
+		argSqrd.msw = st0_ptr->sigh;
+		argSqrd.midw = st0_ptr->sigl;
+		argSqrd.lsw = 0;
+		mul64_Xsig(&argSqrd, &significand(st0_ptr));
 
-      if ( exponent < -1 )
-	{
-	  /* shift the argument right by the required places */
-	  shr_Xsig(&argSqrd, 2*(-1-exponent));
+		if (exponent < -1) {
+			/* shift the argument right by the required places */
+			shr_Xsig(&argSqrd, 2 * (-1 - exponent));
+		}
+
+		argTo4.msw = argSqrd.msw;
+		argTo4.midw = argSqrd.midw;
+		argTo4.lsw = argSqrd.lsw;
+		mul_Xsig_Xsig(&argTo4, &argTo4);
+
+		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
+				N_COEFF_NH - 1);
+		mul_Xsig_Xsig(&accumulator, &argSqrd);
+		negate_Xsig(&accumulator);
+
+		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
+				N_COEFF_PH - 1);
+		negate_Xsig(&accumulator);
+
+		mul64_Xsig(&accumulator, &significand(st0_ptr));
+		mul64_Xsig(&accumulator, &significand(st0_ptr));
+		shr_Xsig(&accumulator, -2 * (1 + exponent));
+
+		shr_Xsig(&accumulator, 3);
+		negate_Xsig(&accumulator);
+
+		add_Xsig_Xsig(&accumulator, &argSqrd);
+
+		shr_Xsig(&accumulator, 1);
+
+		/* It doesn't matter if accumulator is all zero here, the
+		   following code will work ok */
+		negate_Xsig(&accumulator);
+
+		if (accumulator.lsw & 0x80000000)
+			XSIG_LL(accumulator)++;
+		if (accumulator.msw == 0) {
+			/* The result is 1.0 */
+			FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+			return;
+		} else {
+			significand(&result) = XSIG_LL(accumulator);
+
+			/* will be a valid positive nr with expon = -1 */
+			setexponentpos(&result, -1);
+		}
+	} else {
+		fixed_arg = significand(st0_ptr);
+
+		if (exponent == 0) {
+			/* The argument is >= 1.0 */
+
+			/* Put the binary point at the left. */
+			fixed_arg <<= 1;
+		}
+		/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
+		fixed_arg = 0x921fb54442d18469LL - fixed_arg;
+		/* There is a special case which arises due to rounding, to fix here. */
+		if (fixed_arg == 0xffffffffffffffffLL)
+			fixed_arg = 0;
+
+		exponent = -1;
+		exp2 = -1;
+
+		/* A shift is needed here only for a narrow range of arguments,
+		   i.e. for fixed_arg approx 2^-32, but we pick up more... */
+		if (!(LL_MSW(fixed_arg) & 0xffff0000)) {
+			fixed_arg <<= 16;
+			exponent -= 16;
+			exp2 -= 16;
+		}
+
+		XSIG_LL(argSqrd) = fixed_arg;
+		argSqrd.lsw = 0;
+		mul64_Xsig(&argSqrd, &fixed_arg);
+
+		if (exponent < -1) {
+			/* shift the argument right by the required places */
+			shr_Xsig(&argSqrd, 2 * (-1 - exponent));
+		}
+
+		argTo4.msw = argSqrd.msw;
+		argTo4.midw = argSqrd.midw;
+		argTo4.lsw = argSqrd.lsw;
+		mul_Xsig_Xsig(&argTo4, &argTo4);
+
+		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
+				N_COEFF_N - 1);
+		mul_Xsig_Xsig(&accumulator, &argSqrd);
+		negate_Xsig(&accumulator);
+
+		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
+				N_COEFF_P - 1);
+
+		shr_Xsig(&accumulator, 2);	/* Divide by four */
+		accumulator.msw |= 0x80000000;	/* Add 1.0 */
+
+		mul64_Xsig(&accumulator, &fixed_arg);
+		mul64_Xsig(&accumulator, &fixed_arg);
+		mul64_Xsig(&accumulator, &fixed_arg);
+
+		/* Divide by four, FPU_REG compatible, etc */
+		exponent = 3 * exponent;
+
+		/* The minimum exponent difference is 3 */
+		shr_Xsig(&accumulator, exp2 - exponent);
+
+		negate_Xsig(&accumulator);
+		XSIG_LL(accumulator) += fixed_arg;
+
+		/* The basic computation is complete. Now fix the answer to
+		   compensate for the error due to the approximation used for
+		   pi/2
+		 */
+
+		/* This has an exponent of -65 */
+		XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
+		fix_up.lsw = 0;
+
+		/* The fix-up needs to be improved for larger args */
+		if (argSqrd.msw & 0xffc00000) {
+			/* Get about 32 bit precision in these: */
+			fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
+			fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
+		}
+
+		exp2 += norm_Xsig(&accumulator);
+		shr_Xsig(&accumulator, 1);	/* Prevent overflow */
+		exp2++;
+		shr_Xsig(&fix_up, 65 + exp2);
+
+		add_Xsig_Xsig(&accumulator, &fix_up);
+
+		echange = round_Xsig(&accumulator);
+
+		setexponentpos(&result, exp2 + echange);
+		significand(&result) = XSIG_LL(accumulator);
 	}
 
-      argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
-      argTo4.lsw = argSqrd.lsw;
-      mul_Xsig_Xsig(&argTo4, &argTo4);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
-		      N_COEFF_NH-1);
-      mul_Xsig_Xsig(&accumulator, &argSqrd);
-      negate_Xsig(&accumulator);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
-		      N_COEFF_PH-1);
-      negate_Xsig(&accumulator);
-
-      mul64_Xsig(&accumulator, &significand(st0_ptr));
-      mul64_Xsig(&accumulator, &significand(st0_ptr));
-      shr_Xsig(&accumulator, -2*(1+exponent));
-
-      shr_Xsig(&accumulator, 3);
-      negate_Xsig(&accumulator);
-
-      add_Xsig_Xsig(&accumulator, &argSqrd);
-
-      shr_Xsig(&accumulator, 1);
-
-      /* It doesn't matter if accumulator is all zero here, the
-	 following code will work ok */
-      negate_Xsig(&accumulator);
-
-      if ( accumulator.lsw & 0x80000000 )
-	XSIG_LL(accumulator) ++;
-      if ( accumulator.msw == 0 )
-	{
-	  /* The result is 1.0 */
-	  FPU_copy_to_reg0(&CONST_1, TAG_Valid);
-	  return;
-	}
-      else
-	{
-	  significand(&result) = XSIG_LL(accumulator);
-      
-	  /* will be a valid positive nr with expon = -1 */
-	  setexponentpos(&result, -1);
-	}
-    }
-  else
-    {
-      fixed_arg = significand(st0_ptr);
-
-      if ( exponent == 0 )
-	{
-	  /* The argument is >= 1.0 */
-
-	  /* Put the binary point at the left. */
-	  fixed_arg <<= 1;
-	}
-      /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
-      fixed_arg = 0x921fb54442d18469LL - fixed_arg;
-      /* There is a special case which arises due to rounding, to fix here. */
-      if ( fixed_arg == 0xffffffffffffffffLL )
-	fixed_arg = 0;
-
-      exponent = -1;
-      exp2 = -1;
-
-      /* A shift is needed here only for a narrow range of arguments,
-	 i.e. for fixed_arg approx 2^-32, but we pick up more... */
-      if ( !(LL_MSW(fixed_arg) & 0xffff0000) )
-	{
-	  fixed_arg <<= 16;
-	  exponent -= 16;
-	  exp2 -= 16;
-	}
-
-      XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
-      mul64_Xsig(&argSqrd, &fixed_arg);
-
-      if ( exponent < -1 )
-	{
-	  /* shift the argument right by the required places */
-	  shr_Xsig(&argSqrd, 2*(-1-exponent));
-	}
-
-      argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
-      argTo4.lsw = argSqrd.lsw;
-      mul_Xsig_Xsig(&argTo4, &argTo4);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
-		      N_COEFF_N-1);
-      mul_Xsig_Xsig(&accumulator, &argSqrd);
-      negate_Xsig(&accumulator);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
-		      N_COEFF_P-1);
-
-      shr_Xsig(&accumulator, 2);    /* Divide by four */
-      accumulator.msw |= 0x80000000;  /* Add 1.0 */
-
-      mul64_Xsig(&accumulator, &fixed_arg);
-      mul64_Xsig(&accumulator, &fixed_arg);
-      mul64_Xsig(&accumulator, &fixed_arg);
-
-      /* Divide by four, FPU_REG compatible, etc */
-      exponent = 3*exponent;
-
-      /* The minimum exponent difference is 3 */
-      shr_Xsig(&accumulator, exp2 - exponent);
-
-      negate_Xsig(&accumulator);
-      XSIG_LL(accumulator) += fixed_arg;
-
-      /* The basic computation is complete. Now fix the answer to
-	 compensate for the error due to the approximation used for
-	 pi/2
-	 */
-
-      /* This has an exponent of -65 */
-      XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
-      fix_up.lsw = 0;
-
-      /* The fix-up needs to be improved for larger args */
-      if ( argSqrd.msw & 0xffc00000 )
-	{
-	  /* Get about 32 bit precision in these: */
-	  fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
-	  fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
-	}
-
-      exp2 += norm_Xsig(&accumulator);
-      shr_Xsig(&accumulator, 1); /* Prevent overflow */
-      exp2++;
-      shr_Xsig(&fix_up, 65 + exp2);
-
-      add_Xsig_Xsig(&accumulator, &fix_up);
-
-      echange = round_Xsig(&accumulator);
-
-      setexponentpos(&result, exp2 + echange);
-      significand(&result) = XSIG_LL(accumulator);
-    }
-
-  FPU_copy_to_reg0(&result, TAG_Valid);
+	FPU_copy_to_reg0(&result, TAG_Valid);
 
 #ifdef PARANOID
-  if ( (exponent(&result) >= 0)
-      && (significand(&result) > 0x8000000000000000LL) )
-    {
-      EXCEPTION(EX_INTERNAL|0x151);
-    }
+	if ((exponent(&result) >= 0)
+	    && (significand(&result) > 0x8000000000000000LL)) {
+		EXCEPTION(EX_INTERNAL | 0x151);
+	}
 #endif /* PARANOID */
 
 }
diff --git a/arch/x86/math-emu/poly_tan.c b/arch/x86/math-emu/poly_tan.c
index 8df3e03b6e6f..c0d181e39229 100644
--- a/arch/x86/math-emu/poly_tan.c
+++ b/arch/x86/math-emu/poly_tan.c
@@ -17,206 +17,196 @@
 #include "control_w.h"
 #include "poly.h"
 
-
 #define	HiPOWERop	3	/* odd poly, positive terms */
-static const unsigned long long oddplterm[HiPOWERop] =
-{
-  0x0000000000000000LL,
-  0x0051a1cf08fca228LL,
-  0x0000000071284ff7LL
+static const unsigned long long oddplterm[HiPOWERop] = {
+	0x0000000000000000LL,
+	0x0051a1cf08fca228LL,
+	0x0000000071284ff7LL
 };
 
 #define	HiPOWERon	2	/* odd poly, negative terms */
-static const unsigned long long oddnegterm[HiPOWERon] =
-{
-   0x1291a9a184244e80LL,
-   0x0000583245819c21LL
+static const unsigned long long oddnegterm[HiPOWERon] = {
+	0x1291a9a184244e80LL,
+	0x0000583245819c21LL
 };
 
 #define	HiPOWERep	2	/* even poly, positive terms */
-static const unsigned long long evenplterm[HiPOWERep] =
-{
-  0x0e848884b539e888LL,
-  0x00003c7f18b887daLL
+static const unsigned long long evenplterm[HiPOWERep] = {
+	0x0e848884b539e888LL,
+	0x00003c7f18b887daLL
 };
 
 #define	HiPOWERen	2	/* even poly, negative terms */
-static const unsigned long long evennegterm[HiPOWERen] =
-{
-  0xf1f0200fd51569ccLL,
-  0x003afb46105c4432LL
+static const unsigned long long evennegterm[HiPOWERen] = {
+	0xf1f0200fd51569ccLL,
+	0x003afb46105c4432LL
 };
 
 static const unsigned long long twothirds = 0xaaaaaaaaaaaaaaabLL;
 
-
 /*--- poly_tan() ------------------------------------------------------------+
  |                                                                           |
  +---------------------------------------------------------------------------*/
-void	poly_tan(FPU_REG *st0_ptr)
+void poly_tan(FPU_REG * st0_ptr)
 {
-  long int    		exponent;
-  int                   invert;
-  Xsig                  argSq, argSqSq, accumulatoro, accumulatore, accum,
-                        argSignif, fix_up;
-  unsigned long         adj;
+	long int exponent;
+	int invert;
+	Xsig argSq, argSqSq, accumulatoro, accumulatore, accum,
+	    argSignif, fix_up;
+	unsigned long adj;
 
-  exponent = exponent(st0_ptr);
+	exponent = exponent(st0_ptr);
 
 #ifdef PARANOID
-  if ( signnegative(st0_ptr) )	/* Can't hack a number < 0.0 */
-    { arith_invalid(0); return; }  /* Need a positive number */
+	if (signnegative(st0_ptr)) {	/* Can't hack a number < 0.0 */
+		arith_invalid(0);
+		return;
+	}			/* Need a positive number */
 #endif /* PARANOID */
 
-  /* Split the problem into two domains, smaller and larger than pi/4 */
-  if ( (exponent == 0) || ((exponent == -1) && (st0_ptr->sigh > 0xc90fdaa2)) )
-    {
-      /* The argument is greater than (approx) pi/4 */
-      invert = 1;
-      accum.lsw = 0;
-      XSIG_LL(accum) = significand(st0_ptr);
- 
-      if ( exponent == 0 )
-	{
-	  /* The argument is >= 1.0 */
-	  /* Put the binary point at the left. */
-	  XSIG_LL(accum) <<= 1;
-	}
-      /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
-      XSIG_LL(accum) = 0x921fb54442d18469LL - XSIG_LL(accum);
-      /* This is a special case which arises due to rounding. */
-      if ( XSIG_LL(accum) == 0xffffffffffffffffLL )
-	{
-	  FPU_settag0(TAG_Valid);
-	  significand(st0_ptr) = 0x8a51e04daabda360LL;
-	  setexponent16(st0_ptr, (0x41 + EXTENDED_Ebias) | SIGN_Negative);
-	  return;
+	/* Split the problem into two domains, smaller and larger than pi/4 */
+	if ((exponent == 0)
+	    || ((exponent == -1) && (st0_ptr->sigh > 0xc90fdaa2))) {
+		/* The argument is greater than (approx) pi/4 */
+		invert = 1;
+		accum.lsw = 0;
+		XSIG_LL(accum) = significand(st0_ptr);
+
+		if (exponent == 0) {
+			/* The argument is >= 1.0 */
+			/* Put the binary point at the left. */
+			XSIG_LL(accum) <<= 1;
+		}
+		/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
+		XSIG_LL(accum) = 0x921fb54442d18469LL - XSIG_LL(accum);
+		/* This is a special case which arises due to rounding. */
+		if (XSIG_LL(accum) == 0xffffffffffffffffLL) {
+			FPU_settag0(TAG_Valid);
+			significand(st0_ptr) = 0x8a51e04daabda360LL;
+			setexponent16(st0_ptr,
+				      (0x41 + EXTENDED_Ebias) | SIGN_Negative);
+			return;
+		}
+
+		argSignif.lsw = accum.lsw;
+		XSIG_LL(argSignif) = XSIG_LL(accum);
+		exponent = -1 + norm_Xsig(&argSignif);
+	} else {
+		invert = 0;
+		argSignif.lsw = 0;
+		XSIG_LL(accum) = XSIG_LL(argSignif) = significand(st0_ptr);
+
+		if (exponent < -1) {
+			/* shift the argument right by the required places */
+			if (FPU_shrx(&XSIG_LL(accum), -1 - exponent) >=
+			    0x80000000U)
+				XSIG_LL(accum)++;	/* round up */
+		}
 	}
 
-      argSignif.lsw = accum.lsw;
-      XSIG_LL(argSignif) = XSIG_LL(accum);
-      exponent = -1 + norm_Xsig(&argSignif);
-    }
-  else
-    {
-      invert = 0;
-      argSignif.lsw = 0;
-      XSIG_LL(accum) = XSIG_LL(argSignif) = significand(st0_ptr);
- 
-      if ( exponent < -1 )
-	{
-	  /* shift the argument right by the required places */
-	  if ( FPU_shrx(&XSIG_LL(accum), -1-exponent) >= 0x80000000U )
-	    XSIG_LL(accum) ++;	/* round up */
+	XSIG_LL(argSq) = XSIG_LL(accum);
+	argSq.lsw = accum.lsw;
+	mul_Xsig_Xsig(&argSq, &argSq);
+	XSIG_LL(argSqSq) = XSIG_LL(argSq);
+	argSqSq.lsw = argSq.lsw;
+	mul_Xsig_Xsig(&argSqSq, &argSqSq);
+
+	/* Compute the negative terms for the numerator polynomial */
+	accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0;
+	polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm,
+			HiPOWERon - 1);
+	mul_Xsig_Xsig(&accumulatoro, &argSq);
+	negate_Xsig(&accumulatoro);
+	/* Add the positive terms */
+	polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm,
+			HiPOWERop - 1);
+
+	/* Compute the positive terms for the denominator polynomial */
+	accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0;
+	polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm,
+			HiPOWERep - 1);
+	mul_Xsig_Xsig(&accumulatore, &argSq);
+	negate_Xsig(&accumulatore);
+	/* Add the negative terms */
+	polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm,
+			HiPOWERen - 1);
+	/* Multiply by arg^2 */
+	mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
+	mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
+	/* de-normalize and divide by 2 */
+	shr_Xsig(&accumulatore, -2 * (1 + exponent) + 1);
+	negate_Xsig(&accumulatore);	/* This does 1 - accumulator */
+
+	/* Now find the ratio. */
+	if (accumulatore.msw == 0) {
+		/* accumulatoro must contain 1.0 here, (actually, 0) but it
+		   really doesn't matter what value we use because it will
+		   have negligible effect in later calculations
+		 */
+		XSIG_LL(accum) = 0x8000000000000000LL;
+		accum.lsw = 0;
+	} else {
+		div_Xsig(&accumulatoro, &accumulatore, &accum);
 	}
-    }
 
-  XSIG_LL(argSq) = XSIG_LL(accum); argSq.lsw = accum.lsw;
-  mul_Xsig_Xsig(&argSq, &argSq);
-  XSIG_LL(argSqSq) = XSIG_LL(argSq); argSqSq.lsw = argSq.lsw;
-  mul_Xsig_Xsig(&argSqSq, &argSqSq);
+	/* Multiply by 1/3 * arg^3 */
+	mul64_Xsig(&accum, &XSIG_LL(argSignif));
+	mul64_Xsig(&accum, &XSIG_LL(argSignif));
+	mul64_Xsig(&accum, &XSIG_LL(argSignif));
+	mul64_Xsig(&accum, &twothirds);
+	shr_Xsig(&accum, -2 * (exponent + 1));
 
-  /* Compute the negative terms for the numerator polynomial */
-  accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0;
-  polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm, HiPOWERon-1);
-  mul_Xsig_Xsig(&accumulatoro, &argSq);
-  negate_Xsig(&accumulatoro);
-  /* Add the positive terms */
-  polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm, HiPOWERop-1);
+	/* tan(arg) = arg + accum */
+	add_two_Xsig(&accum, &argSignif, &exponent);
 
-  
-  /* Compute the positive terms for the denominator polynomial */
-  accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0;
-  polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm, HiPOWERep-1);
-  mul_Xsig_Xsig(&accumulatore, &argSq);
-  negate_Xsig(&accumulatore);
-  /* Add the negative terms */
-  polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm, HiPOWERen-1);
-  /* Multiply by arg^2 */
-  mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
-  mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
-  /* de-normalize and divide by 2 */
-  shr_Xsig(&accumulatore, -2*(1+exponent) + 1);
-  negate_Xsig(&accumulatore);      /* This does 1 - accumulator */
+	if (invert) {
+		/* We now have the value of tan(pi_2 - arg) where pi_2 is an
+		   approximation for pi/2
+		 */
+		/* The next step is to fix the answer to compensate for the
+		   error due to the approximation used for pi/2
+		 */
 
-  /* Now find the ratio. */
-  if ( accumulatore.msw == 0 )
-    {
-      /* accumulatoro must contain 1.0 here, (actually, 0) but it
-	 really doesn't matter what value we use because it will
-	 have negligible effect in later calculations
-	 */
-      XSIG_LL(accum) = 0x8000000000000000LL;
-      accum.lsw = 0;
-    }
-  else
-    {
-      div_Xsig(&accumulatoro, &accumulatore, &accum);
-    }
+		/* This is (approx) delta, the error in our approx for pi/2
+		   (see above). It has an exponent of -65
+		 */
+		XSIG_LL(fix_up) = 0x898cc51701b839a2LL;
+		fix_up.lsw = 0;
 
-  /* Multiply by 1/3 * arg^3 */
-  mul64_Xsig(&accum, &XSIG_LL(argSignif));
-  mul64_Xsig(&accum, &XSIG_LL(argSignif));
-  mul64_Xsig(&accum, &XSIG_LL(argSignif));
-  mul64_Xsig(&accum, &twothirds);
-  shr_Xsig(&accum, -2*(exponent+1));
+		if (exponent == 0)
+			adj = 0xffffffff;	/* We want approx 1.0 here, but
+						   this is close enough. */
+		else if (exponent > -30) {
+			adj = accum.msw >> -(exponent + 1);	/* tan */
+			adj = mul_32_32(adj, adj);	/* tan^2 */
+		} else
+			adj = 0;
+		adj = mul_32_32(0x898cc517, adj);	/* delta * tan^2 */
 
-  /* tan(arg) = arg + accum */
-  add_two_Xsig(&accum, &argSignif, &exponent);
+		fix_up.msw += adj;
+		if (!(fix_up.msw & 0x80000000)) {	/* did fix_up overflow ? */
+			/* Yes, we need to add an msb */
+			shr_Xsig(&fix_up, 1);
+			fix_up.msw |= 0x80000000;
+			shr_Xsig(&fix_up, 64 + exponent);
+		} else
+			shr_Xsig(&fix_up, 65 + exponent);
 
-  if ( invert )
-    {
-      /* We now have the value of tan(pi_2 - arg) where pi_2 is an
-	 approximation for pi/2
-	 */
-      /* The next step is to fix the answer to compensate for the
-	 error due to the approximation used for pi/2
-	 */
+		add_two_Xsig(&accum, &fix_up, &exponent);
 
-      /* This is (approx) delta, the error in our approx for pi/2
-	 (see above). It has an exponent of -65
-	 */
-      XSIG_LL(fix_up) = 0x898cc51701b839a2LL;
-      fix_up.lsw = 0;
-
-      if ( exponent == 0 )
-	adj = 0xffffffff;   /* We want approx 1.0 here, but
-			       this is close enough. */
-      else if ( exponent > -30 )
-	{
-	  adj = accum.msw >> -(exponent+1);      /* tan */
-	  adj = mul_32_32(adj, adj);             /* tan^2 */
+		/* accum now contains tan(pi/2 - arg).
+		   Use tan(arg) = 1.0 / tan(pi/2 - arg)
+		 */
+		accumulatoro.lsw = accumulatoro.midw = 0;
+		accumulatoro.msw = 0x80000000;
+		div_Xsig(&accumulatoro, &accum, &accum);
+		exponent = -exponent - 1;
 	}
-      else
-	adj = 0;
-      adj = mul_32_32(0x898cc517, adj);          /* delta * tan^2 */
 
-      fix_up.msw += adj;
-      if ( !(fix_up.msw & 0x80000000) )   /* did fix_up overflow ? */
-	{
-	  /* Yes, we need to add an msb */
-	  shr_Xsig(&fix_up, 1);
-	  fix_up.msw |= 0x80000000;
-	  shr_Xsig(&fix_up, 64 + exponent);
-	}
-      else
-	shr_Xsig(&fix_up, 65 + exponent);
-
-      add_two_Xsig(&accum, &fix_up, &exponent);
-
-      /* accum now contains tan(pi/2 - arg).
-	 Use tan(arg) = 1.0 / tan(pi/2 - arg)
-	 */
-      accumulatoro.lsw = accumulatoro.midw = 0;
-      accumulatoro.msw = 0x80000000;
-      div_Xsig(&accumulatoro, &accum, &accum);
-      exponent = - exponent - 1;
-    }
-
-  /* Transfer the result */
-  round_Xsig(&accum);
-  FPU_settag0(TAG_Valid);
-  significand(st0_ptr) = XSIG_LL(accum);
-  setexponent16(st0_ptr, exponent + EXTENDED_Ebias);  /* Result is positive. */
+	/* Transfer the result */
+	round_Xsig(&accum);
+	FPU_settag0(TAG_Valid);
+	significand(st0_ptr) = XSIG_LL(accum);
+	setexponent16(st0_ptr, exponent + EXTENDED_Ebias);	/* Result is positive. */
 
 }
diff --git a/arch/x86/math-emu/reg_add_sub.c b/arch/x86/math-emu/reg_add_sub.c
index 7cd3b37ac084..deea48b9f13a 100644
--- a/arch/x86/math-emu/reg_add_sub.c
+++ b/arch/x86/math-emu/reg_add_sub.c
@@ -27,7 +27,7 @@
 static
 int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa,
 		     FPU_REG const *b, u_char tagb, u_char signb,
-		     FPU_REG *dest, int deststnr, int control_w);
+		     FPU_REG * dest, int deststnr, int control_w);
 
 /*
   Operates on st(0) and st(n), or on st(0) and temporary data.
@@ -35,340 +35,299 @@ int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa,
   */
 int FPU_add(FPU_REG const *b, u_char tagb, int deststnr, int control_w)
 {
-  FPU_REG *a = &st(0);
-  FPU_REG *dest = &st(deststnr);
-  u_char signb = getsign(b);
-  u_char taga = FPU_gettag0();
-  u_char signa = getsign(a);
-  u_char saved_sign = getsign(dest);
-  int diff, tag, expa, expb;
-  
-  if ( !(taga | tagb) )
-    {
-      expa = exponent(a);
-      expb = exponent(b);
+	FPU_REG *a = &st(0);
+	FPU_REG *dest = &st(deststnr);
+	u_char signb = getsign(b);
+	u_char taga = FPU_gettag0();
+	u_char signa = getsign(a);
+	u_char saved_sign = getsign(dest);
+	int diff, tag, expa, expb;
 
-    valid_add:
-      /* Both registers are valid */
-      if (!(signa ^ signb))
-	{
-	  /* signs are the same */
-	  tag = FPU_u_add(a, b, dest, control_w, signa, expa, expb);
-	}
-      else
-	{
-	  /* The signs are different, so do a subtraction */
-	  diff = expa - expb;
-	  if (!diff)
-	    {
-	      diff = a->sigh - b->sigh;  /* This works only if the ms bits
-					    are identical. */
-	      if (!diff)
-		{
-		  diff = a->sigl > b->sigl;
-		  if (!diff)
-		    diff = -(a->sigl < b->sigl);
+	if (!(taga | tagb)) {
+		expa = exponent(a);
+		expb = exponent(b);
+
+	      valid_add:
+		/* Both registers are valid */
+		if (!(signa ^ signb)) {
+			/* signs are the same */
+			tag =
+			    FPU_u_add(a, b, dest, control_w, signa, expa, expb);
+		} else {
+			/* The signs are different, so do a subtraction */
+			diff = expa - expb;
+			if (!diff) {
+				diff = a->sigh - b->sigh;	/* This works only if the ms bits
+								   are identical. */
+				if (!diff) {
+					diff = a->sigl > b->sigl;
+					if (!diff)
+						diff = -(a->sigl < b->sigl);
+				}
+			}
+
+			if (diff > 0) {
+				tag =
+				    FPU_u_sub(a, b, dest, control_w, signa,
+					      expa, expb);
+			} else if (diff < 0) {
+				tag =
+				    FPU_u_sub(b, a, dest, control_w, signb,
+					      expb, expa);
+			} else {
+				FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+				/* sign depends upon rounding mode */
+				setsign(dest, ((control_w & CW_RC) != RC_DOWN)
+					? SIGN_POS : SIGN_NEG);
+				return TAG_Zero;
+			}
 		}
-	    }
-      
-	  if (diff > 0)
-	    {
-	      tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb);
-	    }
-	  else if ( diff < 0 )
-	    {
-	      tag = FPU_u_sub(b, a, dest, control_w, signb, expb, expa);
-	    }
-	  else
-	    {
-	      FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
-	      /* sign depends upon rounding mode */
-	      setsign(dest, ((control_w & CW_RC) != RC_DOWN)
-		      ? SIGN_POS : SIGN_NEG);
-	      return TAG_Zero;
-	    }
+
+		if (tag < 0) {
+			setsign(dest, saved_sign);
+			return tag;
+		}
+		FPU_settagi(deststnr, tag);
+		return tag;
 	}
 
-      if ( tag < 0 )
-	{
-	  setsign(dest, saved_sign);
-	  return tag;
-	}
-      FPU_settagi(deststnr, tag);
-      return tag;
-    }
+	if (taga == TAG_Special)
+		taga = FPU_Special(a);
+	if (tagb == TAG_Special)
+		tagb = FPU_Special(b);
 
-  if ( taga == TAG_Special )
-    taga = FPU_Special(a);
-  if ( tagb == TAG_Special )
-    tagb = FPU_Special(b);
-
-  if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
+	if (((taga == TAG_Valid) && (tagb == TW_Denormal))
 	    || ((taga == TW_Denormal) && (tagb == TAG_Valid))
-	    || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
-    {
-      FPU_REG x, y;
+	    || ((taga == TW_Denormal) && (tagb == TW_Denormal))) {
+		FPU_REG x, y;
 
-      if ( denormal_operand() < 0 )
-	return FPU_Exception;
+		if (denormal_operand() < 0)
+			return FPU_Exception;
 
-      FPU_to_exp16(a, &x);
-      FPU_to_exp16(b, &y);
-      a = &x;
-      b = &y;
-      expa = exponent16(a);
-      expb = exponent16(b);
-      goto valid_add;
-    }
+		FPU_to_exp16(a, &x);
+		FPU_to_exp16(b, &y);
+		a = &x;
+		b = &y;
+		expa = exponent16(a);
+		expb = exponent16(b);
+		goto valid_add;
+	}
 
-  if ( (taga == TW_NaN) || (tagb == TW_NaN) )
-    {
-      if ( deststnr == 0 )
-	return real_2op_NaN(b, tagb, deststnr, a);
-      else
-	return real_2op_NaN(a, taga, deststnr, a);
-    }
+	if ((taga == TW_NaN) || (tagb == TW_NaN)) {
+		if (deststnr == 0)
+			return real_2op_NaN(b, tagb, deststnr, a);
+		else
+			return real_2op_NaN(a, taga, deststnr, a);
+	}
 
-  return add_sub_specials(a, taga, signa, b, tagb, signb,
-			  dest, deststnr, control_w);
+	return add_sub_specials(a, taga, signa, b, tagb, signb,
+				dest, deststnr, control_w);
 }
 
-
 /* Subtract b from a.  (a-b) -> dest */
 int FPU_sub(int flags, int rm, int control_w)
 {
-  FPU_REG const *a, *b;
-  FPU_REG *dest;
-  u_char taga, tagb, signa, signb, saved_sign, sign;
-  int diff, tag = 0, expa, expb, deststnr;
+	FPU_REG const *a, *b;
+	FPU_REG *dest;
+	u_char taga, tagb, signa, signb, saved_sign, sign;
+	int diff, tag = 0, expa, expb, deststnr;
 
-  a = &st(0);
-  taga = FPU_gettag0();
+	a = &st(0);
+	taga = FPU_gettag0();
 
-  deststnr = 0;
-  if ( flags & LOADED )
-    {
-      b = (FPU_REG *)rm;
-      tagb = flags & 0x0f;
-    }
-  else
-    {
-      b = &st(rm);
-      tagb = FPU_gettagi(rm);
+	deststnr = 0;
+	if (flags & LOADED) {
+		b = (FPU_REG *) rm;
+		tagb = flags & 0x0f;
+	} else {
+		b = &st(rm);
+		tagb = FPU_gettagi(rm);
 
-      if ( flags & DEST_RM )
-	deststnr = rm;
-    }
-
-  signa = getsign(a);
-  signb = getsign(b);
-
-  if ( flags & REV )
-    {
-      signa ^= SIGN_NEG;
-      signb ^= SIGN_NEG;
-    }
-
-  dest = &st(deststnr);
-  saved_sign = getsign(dest);
-
-  if ( !(taga | tagb) )
-    {
-      expa = exponent(a);
-      expb = exponent(b);
-
-    valid_subtract:
-      /* Both registers are valid */
-
-      diff = expa - expb;
-
-      if (!diff)
-	{
-	  diff = a->sigh - b->sigh;  /* Works only if ms bits are identical */
-	  if (!diff)
-	    {
-	      diff = a->sigl > b->sigl;
-	      if (!diff)
-		diff = -(a->sigl < b->sigl);
-	    }
+		if (flags & DEST_RM)
+			deststnr = rm;
 	}
 
-      switch ( (((int)signa)*2 + signb) / SIGN_NEG )
-	{
-	case 0: /* P - P */
-	case 3: /* N - N */
-	  if (diff > 0)
-	    {
-	      /* |a| > |b| */
-	      tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb);
-	    }
-	  else if ( diff == 0 )
-	    {
-	      FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+	signa = getsign(a);
+	signb = getsign(b);
 
-	      /* sign depends upon rounding mode */
-	      setsign(dest, ((control_w & CW_RC) != RC_DOWN)
-		? SIGN_POS : SIGN_NEG);
-	      return TAG_Zero;
-	    }
-	  else
-	    {
-	      sign = signa ^ SIGN_NEG;
-	      tag = FPU_u_sub(b, a, dest, control_w, sign, expb, expa);
-	    }
-	  break;
-	case 1: /* P - N */
-	  tag = FPU_u_add(a, b, dest, control_w, SIGN_POS, expa, expb);
-	  break;
-	case 2: /* N - P */
-	  tag = FPU_u_add(a, b, dest, control_w, SIGN_NEG, expa, expb);
-	  break;
+	if (flags & REV) {
+		signa ^= SIGN_NEG;
+		signb ^= SIGN_NEG;
+	}
+
+	dest = &st(deststnr);
+	saved_sign = getsign(dest);
+
+	if (!(taga | tagb)) {
+		expa = exponent(a);
+		expb = exponent(b);
+
+	      valid_subtract:
+		/* Both registers are valid */
+
+		diff = expa - expb;
+
+		if (!diff) {
+			diff = a->sigh - b->sigh;	/* Works only if ms bits are identical */
+			if (!diff) {
+				diff = a->sigl > b->sigl;
+				if (!diff)
+					diff = -(a->sigl < b->sigl);
+			}
+		}
+
+		switch ((((int)signa) * 2 + signb) / SIGN_NEG) {
+		case 0:	/* P - P */
+		case 3:	/* N - N */
+			if (diff > 0) {
+				/* |a| > |b| */
+				tag =
+				    FPU_u_sub(a, b, dest, control_w, signa,
+					      expa, expb);
+			} else if (diff == 0) {
+				FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+
+				/* sign depends upon rounding mode */
+				setsign(dest, ((control_w & CW_RC) != RC_DOWN)
+					? SIGN_POS : SIGN_NEG);
+				return TAG_Zero;
+			} else {
+				sign = signa ^ SIGN_NEG;
+				tag =
+				    FPU_u_sub(b, a, dest, control_w, sign, expb,
+					      expa);
+			}
+			break;
+		case 1:	/* P - N */
+			tag =
+			    FPU_u_add(a, b, dest, control_w, SIGN_POS, expa,
+				      expb);
+			break;
+		case 2:	/* N - P */
+			tag =
+			    FPU_u_add(a, b, dest, control_w, SIGN_NEG, expa,
+				      expb);
+			break;
 #ifdef PARANOID
-	default:
-	  EXCEPTION(EX_INTERNAL|0x111);
-	  return -1;
+		default:
+			EXCEPTION(EX_INTERNAL | 0x111);
+			return -1;
 #endif
+		}
+		if (tag < 0) {
+			setsign(dest, saved_sign);
+			return tag;
+		}
+		FPU_settagi(deststnr, tag);
+		return tag;
 	}
-      if ( tag < 0 )
-	{
-	  setsign(dest, saved_sign);
-	  return tag;
-	}
-      FPU_settagi(deststnr, tag);
-      return tag;
-    }
 
-  if ( taga == TAG_Special )
-    taga = FPU_Special(a);
-  if ( tagb == TAG_Special )
-    tagb = FPU_Special(b);
+	if (taga == TAG_Special)
+		taga = FPU_Special(a);
+	if (tagb == TAG_Special)
+		tagb = FPU_Special(b);
 
-  if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
+	if (((taga == TAG_Valid) && (tagb == TW_Denormal))
 	    || ((taga == TW_Denormal) && (tagb == TAG_Valid))
-	    || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
-    {
-      FPU_REG x, y;
+	    || ((taga == TW_Denormal) && (tagb == TW_Denormal))) {
+		FPU_REG x, y;
 
-      if ( denormal_operand() < 0 )
-	return FPU_Exception;
+		if (denormal_operand() < 0)
+			return FPU_Exception;
 
-      FPU_to_exp16(a, &x);
-      FPU_to_exp16(b, &y);
-      a = &x;
-      b = &y;
-      expa = exponent16(a);
-      expb = exponent16(b);
+		FPU_to_exp16(a, &x);
+		FPU_to_exp16(b, &y);
+		a = &x;
+		b = &y;
+		expa = exponent16(a);
+		expb = exponent16(b);
 
-      goto valid_subtract;
-    }
-
-  if ( (taga == TW_NaN) || (tagb == TW_NaN) )
-    {
-      FPU_REG const *d1, *d2;
-      if ( flags & REV )
-	{
-	  d1 = b;
-	  d2 = a;
+		goto valid_subtract;
 	}
-      else
-	{
-	  d1 = a;
-	  d2 = b;
-	}
-      if ( flags & LOADED )
-	return real_2op_NaN(b, tagb, deststnr, d1);
-      if ( flags & DEST_RM )
-	return real_2op_NaN(a, taga, deststnr, d2);
-      else
-	return real_2op_NaN(b, tagb, deststnr, d2);
-    }
 
-    return add_sub_specials(a, taga, signa, b, tagb, signb ^ SIGN_NEG,
-			    dest, deststnr, control_w);
+	if ((taga == TW_NaN) || (tagb == TW_NaN)) {
+		FPU_REG const *d1, *d2;
+		if (flags & REV) {
+			d1 = b;
+			d2 = a;
+		} else {
+			d1 = a;
+			d2 = b;
+		}
+		if (flags & LOADED)
+			return real_2op_NaN(b, tagb, deststnr, d1);
+		if (flags & DEST_RM)
+			return real_2op_NaN(a, taga, deststnr, d2);
+		else
+			return real_2op_NaN(b, tagb, deststnr, d2);
+	}
+
+	return add_sub_specials(a, taga, signa, b, tagb, signb ^ SIGN_NEG,
+				dest, deststnr, control_w);
 }
 
-
 static
 int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa,
 		     FPU_REG const *b, u_char tagb, u_char signb,
-		     FPU_REG *dest, int deststnr, int control_w)
+		     FPU_REG * dest, int deststnr, int control_w)
 {
-  if ( ((taga == TW_Denormal) || (tagb == TW_Denormal))
-       && (denormal_operand() < 0) )
-    return FPU_Exception;
+	if (((taga == TW_Denormal) || (tagb == TW_Denormal))
+	    && (denormal_operand() < 0))
+		return FPU_Exception;
 
-  if (taga == TAG_Zero)
-    {
-      if (tagb == TAG_Zero)
-	{
-	  /* Both are zero, result will be zero. */
-	  u_char different_signs = signa ^ signb;
+	if (taga == TAG_Zero) {
+		if (tagb == TAG_Zero) {
+			/* Both are zero, result will be zero. */
+			u_char different_signs = signa ^ signb;
 
-	  FPU_copy_to_regi(a, TAG_Zero, deststnr);
-	  if ( different_signs )
-	    {
-	      /* Signs are different. */
-	      /* Sign of answer depends upon rounding mode. */
-	      setsign(dest, ((control_w & CW_RC) != RC_DOWN)
-		      ? SIGN_POS : SIGN_NEG);
-	    }
-	  else
-	    setsign(dest, signa);  /* signa may differ from the sign of a. */
-	  return TAG_Zero;
+			FPU_copy_to_regi(a, TAG_Zero, deststnr);
+			if (different_signs) {
+				/* Signs are different. */
+				/* Sign of answer depends upon rounding mode. */
+				setsign(dest, ((control_w & CW_RC) != RC_DOWN)
+					? SIGN_POS : SIGN_NEG);
+			} else
+				setsign(dest, signa);	/* signa may differ from the sign of a. */
+			return TAG_Zero;
+		} else {
+			reg_copy(b, dest);
+			if ((tagb == TW_Denormal) && (b->sigh & 0x80000000)) {
+				/* A pseudoDenormal, convert it. */
+				addexponent(dest, 1);
+				tagb = TAG_Valid;
+			} else if (tagb > TAG_Empty)
+				tagb = TAG_Special;
+			setsign(dest, signb);	/* signb may differ from the sign of b. */
+			FPU_settagi(deststnr, tagb);
+			return tagb;
+		}
+	} else if (tagb == TAG_Zero) {
+		reg_copy(a, dest);
+		if ((taga == TW_Denormal) && (a->sigh & 0x80000000)) {
+			/* A pseudoDenormal */
+			addexponent(dest, 1);
+			taga = TAG_Valid;
+		} else if (taga > TAG_Empty)
+			taga = TAG_Special;
+		setsign(dest, signa);	/* signa may differ from the sign of a. */
+		FPU_settagi(deststnr, taga);
+		return taga;
+	} else if (taga == TW_Infinity) {
+		if ((tagb != TW_Infinity) || (signa == signb)) {
+			FPU_copy_to_regi(a, TAG_Special, deststnr);
+			setsign(dest, signa);	/* signa may differ from the sign of a. */
+			return taga;
+		}
+		/* Infinity-Infinity is undefined. */
+		return arith_invalid(deststnr);
+	} else if (tagb == TW_Infinity) {
+		FPU_copy_to_regi(b, TAG_Special, deststnr);
+		setsign(dest, signb);	/* signb may differ from the sign of b. */
+		return tagb;
 	}
-      else
-	{
-	  reg_copy(b, dest);
-	  if ( (tagb == TW_Denormal) && (b->sigh & 0x80000000) )
-	    {
-	      /* A pseudoDenormal, convert it. */
-	      addexponent(dest, 1);
-	      tagb = TAG_Valid;
-	    }
-	  else if ( tagb > TAG_Empty )
-	    tagb = TAG_Special;
-	  setsign(dest, signb);  /* signb may differ from the sign of b. */
-	  FPU_settagi(deststnr, tagb);
-	  return tagb;
-	}
-    }
-  else if (tagb == TAG_Zero)
-    {
-      reg_copy(a, dest);
-      if ( (taga == TW_Denormal) && (a->sigh & 0x80000000) )
-	{
-	  /* A pseudoDenormal */
-	  addexponent(dest, 1);
-	  taga = TAG_Valid;
-	}
-      else if ( taga > TAG_Empty )
-	taga = TAG_Special;
-      setsign(dest, signa);  /* signa may differ from the sign of a. */
-      FPU_settagi(deststnr, taga);
-      return taga;
-    }
-  else if (taga == TW_Infinity)
-    {
-      if ( (tagb != TW_Infinity) || (signa == signb) )
-	{
-	  FPU_copy_to_regi(a, TAG_Special, deststnr);
-	  setsign(dest, signa);  /* signa may differ from the sign of a. */
-	  return taga;
-	}
-      /* Infinity-Infinity is undefined. */
-      return arith_invalid(deststnr);
-    }
-  else if (tagb == TW_Infinity)
-    {
-      FPU_copy_to_regi(b, TAG_Special, deststnr);
-      setsign(dest, signb);  /* signb may differ from the sign of b. */
-      return tagb;
-    }
-
 #ifdef PARANOID
-  EXCEPTION(EX_INTERNAL|0x101);
+	EXCEPTION(EX_INTERNAL | 0x101);
 #endif
 
-  return FPU_Exception;
+	return FPU_Exception;
 }
-
diff --git a/arch/x86/math-emu/reg_compare.c b/arch/x86/math-emu/reg_compare.c
index f37c5b5a35ad..ecce55fc2e2e 100644
--- a/arch/x86/math-emu/reg_compare.c
+++ b/arch/x86/math-emu/reg_compare.c
@@ -20,362 +20,331 @@
 #include "control_w.h"
 #include "status_w.h"
 
-
 static int compare(FPU_REG const *b, int tagb)
 {
-  int diff, exp0, expb;
-  u_char	  	st0_tag;
-  FPU_REG  	*st0_ptr;
-  FPU_REG	x, y;
-  u_char		st0_sign, signb = getsign(b);
+	int diff, exp0, expb;
+	u_char st0_tag;
+	FPU_REG *st0_ptr;
+	FPU_REG x, y;
+	u_char st0_sign, signb = getsign(b);
 
-  st0_ptr = &st(0);
-  st0_tag = FPU_gettag0();
-  st0_sign = getsign(st0_ptr);
+	st0_ptr = &st(0);
+	st0_tag = FPU_gettag0();
+	st0_sign = getsign(st0_ptr);
 
-  if ( tagb == TAG_Special )
-    tagb = FPU_Special(b);
-  if ( st0_tag == TAG_Special )
-    st0_tag = FPU_Special(st0_ptr);
+	if (tagb == TAG_Special)
+		tagb = FPU_Special(b);
+	if (st0_tag == TAG_Special)
+		st0_tag = FPU_Special(st0_ptr);
 
-  if ( ((st0_tag != TAG_Valid) && (st0_tag != TW_Denormal))
-       || ((tagb != TAG_Valid) && (tagb != TW_Denormal)) )
-    {
-      if ( st0_tag == TAG_Zero )
-	{
-	  if ( tagb == TAG_Zero ) return COMP_A_eq_B;
-	  if ( tagb == TAG_Valid )
-	    return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
-	  if ( tagb == TW_Denormal )
-	    return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
-	    | COMP_Denormal;
-	}
-      else if ( tagb == TAG_Zero )
-	{
-	  if ( st0_tag == TAG_Valid )
-	    return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
-	  if ( st0_tag == TW_Denormal )
-	    return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
-	    | COMP_Denormal;
+	if (((st0_tag != TAG_Valid) && (st0_tag != TW_Denormal))
+	    || ((tagb != TAG_Valid) && (tagb != TW_Denormal))) {
+		if (st0_tag == TAG_Zero) {
+			if (tagb == TAG_Zero)
+				return COMP_A_eq_B;
+			if (tagb == TAG_Valid)
+				return ((signb ==
+					 SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
+			if (tagb == TW_Denormal)
+				return ((signb ==
+					 SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
+				    | COMP_Denormal;
+		} else if (tagb == TAG_Zero) {
+			if (st0_tag == TAG_Valid)
+				return ((st0_sign ==
+					 SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
+			if (st0_tag == TW_Denormal)
+				return ((st0_sign ==
+					 SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
+				    | COMP_Denormal;
+		}
+
+		if (st0_tag == TW_Infinity) {
+			if ((tagb == TAG_Valid) || (tagb == TAG_Zero))
+				return ((st0_sign ==
+					 SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
+			else if (tagb == TW_Denormal)
+				return ((st0_sign ==
+					 SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
+				    | COMP_Denormal;
+			else if (tagb == TW_Infinity) {
+				/* The 80486 book says that infinities can be equal! */
+				return (st0_sign == signb) ? COMP_A_eq_B :
+				    ((st0_sign ==
+				      SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
+			}
+			/* Fall through to the NaN code */
+		} else if (tagb == TW_Infinity) {
+			if ((st0_tag == TAG_Valid) || (st0_tag == TAG_Zero))
+				return ((signb ==
+					 SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
+			if (st0_tag == TW_Denormal)
+				return ((signb ==
+					 SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
+				    | COMP_Denormal;
+			/* Fall through to the NaN code */
+		}
+
+		/* The only possibility now should be that one of the arguments
+		   is a NaN */
+		if ((st0_tag == TW_NaN) || (tagb == TW_NaN)) {
+			int signalling = 0, unsupported = 0;
+			if (st0_tag == TW_NaN) {
+				signalling =
+				    (st0_ptr->sigh & 0xc0000000) == 0x80000000;
+				unsupported = !((exponent(st0_ptr) == EXP_OVER)
+						&& (st0_ptr->
+						    sigh & 0x80000000));
+			}
+			if (tagb == TW_NaN) {
+				signalling |=
+				    (b->sigh & 0xc0000000) == 0x80000000;
+				unsupported |= !((exponent(b) == EXP_OVER)
+						 && (b->sigh & 0x80000000));
+			}
+			if (signalling || unsupported)
+				return COMP_No_Comp | COMP_SNaN | COMP_NaN;
+			else
+				/* Neither is a signaling NaN */
+				return COMP_No_Comp | COMP_NaN;
+		}
+
+		EXCEPTION(EX_Invalid);
 	}
 
-      if ( st0_tag == TW_Infinity )
-	{
-	  if ( (tagb == TAG_Valid) || (tagb == TAG_Zero) )
-	    return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
-	  else if ( tagb == TW_Denormal )
-	    return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
-	      | COMP_Denormal;
-	  else if ( tagb == TW_Infinity )
-	    {
-	      /* The 80486 book says that infinities can be equal! */
-	      return (st0_sign == signb) ? COMP_A_eq_B :
-		((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
-	    }
-	  /* Fall through to the NaN code */
-	}
-      else if ( tagb == TW_Infinity )
-	{
-	  if ( (st0_tag == TAG_Valid) || (st0_tag == TAG_Zero) )
-	    return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
-	  if ( st0_tag == TW_Denormal )
-	    return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
-		| COMP_Denormal;
-	  /* Fall through to the NaN code */
+	if (st0_sign != signb) {
+		return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
+		    | (((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
+		       COMP_Denormal : 0);
 	}
 
-      /* The only possibility now should be that one of the arguments
-	 is a NaN */
-      if ( (st0_tag == TW_NaN) || (tagb == TW_NaN) )
-	{
-	  int signalling = 0, unsupported = 0;
-	  if ( st0_tag == TW_NaN )
-	    {
-	      signalling = (st0_ptr->sigh & 0xc0000000) == 0x80000000;
-	      unsupported = !((exponent(st0_ptr) == EXP_OVER)
-			      && (st0_ptr->sigh & 0x80000000));
-	    }
-	  if ( tagb == TW_NaN )
-	    {
-	      signalling |= (b->sigh & 0xc0000000) == 0x80000000;
-	      unsupported |= !((exponent(b) == EXP_OVER)
-			       && (b->sigh & 0x80000000));
-	    }
-	  if ( signalling || unsupported )
-	    return COMP_No_Comp | COMP_SNaN | COMP_NaN;
-	  else
-	    /* Neither is a signaling NaN */
-	    return COMP_No_Comp | COMP_NaN;
+	if ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) {
+		FPU_to_exp16(st0_ptr, &x);
+		FPU_to_exp16(b, &y);
+		st0_ptr = &x;
+		b = &y;
+		exp0 = exponent16(st0_ptr);
+		expb = exponent16(b);
+	} else {
+		exp0 = exponent(st0_ptr);
+		expb = exponent(b);
 	}
-      
-      EXCEPTION(EX_Invalid);
-    }
-  
-  if (st0_sign != signb)
-    {
-      return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
-	| ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
-	    COMP_Denormal : 0);
-    }
-
-  if ( (st0_tag == TW_Denormal) || (tagb == TW_Denormal) )
-    {
-      FPU_to_exp16(st0_ptr, &x);
-      FPU_to_exp16(b, &y);
-      st0_ptr = &x;
-      b = &y;
-      exp0 = exponent16(st0_ptr);
-      expb = exponent16(b);
-    }
-  else
-    {
-      exp0 = exponent(st0_ptr);
-      expb = exponent(b);
-    }
 
 #ifdef PARANOID
-  if (!(st0_ptr->sigh & 0x80000000)) EXCEPTION(EX_Invalid);
-  if (!(b->sigh & 0x80000000)) EXCEPTION(EX_Invalid);
+	if (!(st0_ptr->sigh & 0x80000000))
+		EXCEPTION(EX_Invalid);
+	if (!(b->sigh & 0x80000000))
+		EXCEPTION(EX_Invalid);
 #endif /* PARANOID */
 
-  diff = exp0 - expb;
-  if ( diff == 0 )
-    {
-      diff = st0_ptr->sigh - b->sigh;  /* Works only if ms bits are
-					      identical */
-      if ( diff == 0 )
-	{
-	diff = st0_ptr->sigl > b->sigl;
-	if ( diff == 0 )
-	  diff = -(st0_ptr->sigl < b->sigl);
+	diff = exp0 - expb;
+	if (diff == 0) {
+		diff = st0_ptr->sigh - b->sigh;	/* Works only if ms bits are
+						   identical */
+		if (diff == 0) {
+			diff = st0_ptr->sigl > b->sigl;
+			if (diff == 0)
+				diff = -(st0_ptr->sigl < b->sigl);
+		}
 	}
-    }
 
-  if ( diff > 0 )
-    {
-      return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
-	| ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
-	    COMP_Denormal : 0);
-    }
-  if ( diff < 0 )
-    {
-      return ((st0_sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
-	| ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
-	    COMP_Denormal : 0);
-    }
+	if (diff > 0) {
+		return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
+		    | (((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
+		       COMP_Denormal : 0);
+	}
+	if (diff < 0) {
+		return ((st0_sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
+		    | (((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
+		       COMP_Denormal : 0);
+	}
 
-  return COMP_A_eq_B
-    | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
-	COMP_Denormal : 0);
+	return COMP_A_eq_B
+	    | (((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
+	       COMP_Denormal : 0);
 
 }
 
-
 /* This function requires that st(0) is not empty */
 int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
 {
-  int f = 0, c;
+	int f = 0, c;
 
-  c = compare(loaded_data, loaded_tag);
+	c = compare(loaded_data, loaded_tag);
 
-  if (c & COMP_NaN)
-    {
-      EXCEPTION(EX_Invalid);
-      f = SW_C3 | SW_C2 | SW_C0;
-    }
-  else
-    switch (c & 7)
-      {
-      case COMP_A_lt_B:
-	f = SW_C0;
-	break;
-      case COMP_A_eq_B:
-	f = SW_C3;
-	break;
-      case COMP_A_gt_B:
-	f = 0;
-	break;
-      case COMP_No_Comp:
-	f = SW_C3 | SW_C2 | SW_C0;
-	break;
+	if (c & COMP_NaN) {
+		EXCEPTION(EX_Invalid);
+		f = SW_C3 | SW_C2 | SW_C0;
+	} else
+		switch (c & 7) {
+		case COMP_A_lt_B:
+			f = SW_C0;
+			break;
+		case COMP_A_eq_B:
+			f = SW_C3;
+			break;
+		case COMP_A_gt_B:
+			f = 0;
+			break;
+		case COMP_No_Comp:
+			f = SW_C3 | SW_C2 | SW_C0;
+			break;
 #ifdef PARANOID
-      default:
-	EXCEPTION(EX_INTERNAL|0x121);
-	f = SW_C3 | SW_C2 | SW_C0;
-	break;
+		default:
+			EXCEPTION(EX_INTERNAL | 0x121);
+			f = SW_C3 | SW_C2 | SW_C0;
+			break;
 #endif /* PARANOID */
-      }
-  setcc(f);
-  if (c & COMP_Denormal)
-    {
-      return denormal_operand() < 0;
-    }
-  return 0;
+		}
+	setcc(f);
+	if (c & COMP_Denormal) {
+		return denormal_operand() < 0;
+	}
+	return 0;
 }
 
-
 static int compare_st_st(int nr)
 {
-  int f = 0, c;
-  FPU_REG *st_ptr;
+	int f = 0, c;
+	FPU_REG *st_ptr;
 
-  if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) )
-    {
-      setcc(SW_C3 | SW_C2 | SW_C0);
-      /* Stack fault */
-      EXCEPTION(EX_StackUnder);
-      return !(control_word & CW_Invalid);
-    }
+	if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) {
+		setcc(SW_C3 | SW_C2 | SW_C0);
+		/* Stack fault */
+		EXCEPTION(EX_StackUnder);
+		return !(control_word & CW_Invalid);
+	}
 
-  st_ptr = &st(nr);
-  c = compare(st_ptr, FPU_gettagi(nr));
-  if (c & COMP_NaN)
-    {
-      setcc(SW_C3 | SW_C2 | SW_C0);
-      EXCEPTION(EX_Invalid);
-      return !(control_word & CW_Invalid);
-    }
-  else
-    switch (c & 7)
-      {
-      case COMP_A_lt_B:
-	f = SW_C0;
-	break;
-      case COMP_A_eq_B:
-	f = SW_C3;
-	break;
-      case COMP_A_gt_B:
-	f = 0;
-	break;
-      case COMP_No_Comp:
-	f = SW_C3 | SW_C2 | SW_C0;
-	break;
+	st_ptr = &st(nr);
+	c = compare(st_ptr, FPU_gettagi(nr));
+	if (c & COMP_NaN) {
+		setcc(SW_C3 | SW_C2 | SW_C0);
+		EXCEPTION(EX_Invalid);
+		return !(control_word & CW_Invalid);
+	} else
+		switch (c & 7) {
+		case COMP_A_lt_B:
+			f = SW_C0;
+			break;
+		case COMP_A_eq_B:
+			f = SW_C3;
+			break;
+		case COMP_A_gt_B:
+			f = 0;
+			break;
+		case COMP_No_Comp:
+			f = SW_C3 | SW_C2 | SW_C0;
+			break;
 #ifdef PARANOID
-      default:
-	EXCEPTION(EX_INTERNAL|0x122);
-	f = SW_C3 | SW_C2 | SW_C0;
-	break;
+		default:
+			EXCEPTION(EX_INTERNAL | 0x122);
+			f = SW_C3 | SW_C2 | SW_C0;
+			break;
 #endif /* PARANOID */
-      }
-  setcc(f);
-  if (c & COMP_Denormal)
-    {
-      return denormal_operand() < 0;
-    }
-  return 0;
+		}
+	setcc(f);
+	if (c & COMP_Denormal) {
+		return denormal_operand() < 0;
+	}
+	return 0;
 }
 
-
 static int compare_u_st_st(int nr)
 {
-  int f = 0, c;
-  FPU_REG *st_ptr;
+	int f = 0, c;
+	FPU_REG *st_ptr;
 
-  if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) )
-    {
-      setcc(SW_C3 | SW_C2 | SW_C0);
-      /* Stack fault */
-      EXCEPTION(EX_StackUnder);
-      return !(control_word & CW_Invalid);
-    }
-
-  st_ptr = &st(nr);
-  c = compare(st_ptr, FPU_gettagi(nr));
-  if (c & COMP_NaN)
-    {
-      setcc(SW_C3 | SW_C2 | SW_C0);
-      if (c & COMP_SNaN)       /* This is the only difference between
-				  un-ordered and ordinary comparisons */
-	{
-	  EXCEPTION(EX_Invalid);
-	  return !(control_word & CW_Invalid);
+	if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) {
+		setcc(SW_C3 | SW_C2 | SW_C0);
+		/* Stack fault */
+		EXCEPTION(EX_StackUnder);
+		return !(control_word & CW_Invalid);
 	}
-      return 0;
-    }
-  else
-    switch (c & 7)
-      {
-      case COMP_A_lt_B:
-	f = SW_C0;
-	break;
-      case COMP_A_eq_B:
-	f = SW_C3;
-	break;
-      case COMP_A_gt_B:
-	f = 0;
-	break;
-      case COMP_No_Comp:
-	f = SW_C3 | SW_C2 | SW_C0;
-	break;
+
+	st_ptr = &st(nr);
+	c = compare(st_ptr, FPU_gettagi(nr));
+	if (c & COMP_NaN) {
+		setcc(SW_C3 | SW_C2 | SW_C0);
+		if (c & COMP_SNaN) {	/* This is the only difference between
+					   un-ordered and ordinary comparisons */
+			EXCEPTION(EX_Invalid);
+			return !(control_word & CW_Invalid);
+		}
+		return 0;
+	} else
+		switch (c & 7) {
+		case COMP_A_lt_B:
+			f = SW_C0;
+			break;
+		case COMP_A_eq_B:
+			f = SW_C3;
+			break;
+		case COMP_A_gt_B:
+			f = 0;
+			break;
+		case COMP_No_Comp:
+			f = SW_C3 | SW_C2 | SW_C0;
+			break;
 #ifdef PARANOID
-      default:
-	EXCEPTION(EX_INTERNAL|0x123);
-	f = SW_C3 | SW_C2 | SW_C0;
-	break;
-#endif /* PARANOID */ 
-      }
-  setcc(f);
-  if (c & COMP_Denormal)
-    {
-      return denormal_operand() < 0;
-    }
-  return 0;
+		default:
+			EXCEPTION(EX_INTERNAL | 0x123);
+			f = SW_C3 | SW_C2 | SW_C0;
+			break;
+#endif /* PARANOID */
+		}
+	setcc(f);
+	if (c & COMP_Denormal) {
+		return denormal_operand() < 0;
+	}
+	return 0;
 }
 
 /*---------------------------------------------------------------------------*/
 
 void fcom_st(void)
 {
-  /* fcom st(i) */
-  compare_st_st(FPU_rm);
+	/* fcom st(i) */
+	compare_st_st(FPU_rm);
 }
 
-
 void fcompst(void)
 {
-  /* fcomp st(i) */
-  if ( !compare_st_st(FPU_rm) )
-    FPU_pop();
+	/* fcomp st(i) */
+	if (!compare_st_st(FPU_rm))
+		FPU_pop();
 }
 
-
 void fcompp(void)
 {
-  /* fcompp */
-  if (FPU_rm != 1)
-    {
-      FPU_illegal();
-      return;
-    }
-  if ( !compare_st_st(1) )
-      poppop();
+	/* fcompp */
+	if (FPU_rm != 1) {
+		FPU_illegal();
+		return;
+	}
+	if (!compare_st_st(1))
+		poppop();
 }
 
-
 void fucom_(void)
 {
-  /* fucom st(i) */
-  compare_u_st_st(FPU_rm);
+	/* fucom st(i) */
+	compare_u_st_st(FPU_rm);
 
 }
 
-
 void fucomp(void)
 {
-  /* fucomp st(i) */
-  if ( !compare_u_st_st(FPU_rm) )
-    FPU_pop();
+	/* fucomp st(i) */
+	if (!compare_u_st_st(FPU_rm))
+		FPU_pop();
 }
 
-
 void fucompp(void)
 {
-  /* fucompp */
-  if (FPU_rm == 1)
-    {
-      if ( !compare_u_st_st(1) )
-	poppop();
-    }
-  else
-    FPU_illegal();
+	/* fucompp */
+	if (FPU_rm == 1) {
+		if (!compare_u_st_st(1))
+			poppop();
+	} else
+		FPU_illegal();
 }
diff --git a/arch/x86/math-emu/reg_constant.c b/arch/x86/math-emu/reg_constant.c
index a85015801969..04869e64b18e 100644
--- a/arch/x86/math-emu/reg_constant.c
+++ b/arch/x86/math-emu/reg_constant.c
@@ -16,29 +16,28 @@
 #include "reg_constant.h"
 #include "control_w.h"
 
-
 #define MAKE_REG(s,e,l,h) { l, h, \
                             ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) }
 
-FPU_REG const CONST_1    = MAKE_REG(POS, 0, 0x00000000, 0x80000000);
+FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000);
 #if 0
-FPU_REG const CONST_2    = MAKE_REG(POS, 1, 0x00000000, 0x80000000);
+FPU_REG const CONST_2 = MAKE_REG(POS, 1, 0x00000000, 0x80000000);
 FPU_REG const CONST_HALF = MAKE_REG(POS, -1, 0x00000000, 0x80000000);
-#endif  /*  0  */
-static FPU_REG const CONST_L2T  = MAKE_REG(POS, 1, 0xcd1b8afe, 0xd49a784b);
-static FPU_REG const CONST_L2E  = MAKE_REG(POS, 0, 0x5c17f0bc, 0xb8aa3b29);
-FPU_REG const CONST_PI   = MAKE_REG(POS, 1, 0x2168c235, 0xc90fdaa2);
-FPU_REG const CONST_PI2  = MAKE_REG(POS, 0, 0x2168c235, 0xc90fdaa2);
-FPU_REG const CONST_PI4  = MAKE_REG(POS, -1, 0x2168c235, 0xc90fdaa2);
-static FPU_REG const CONST_LG2  = MAKE_REG(POS, -2, 0xfbcff799, 0x9a209a84);
-static FPU_REG const CONST_LN2  = MAKE_REG(POS, -1, 0xd1cf79ac, 0xb17217f7);
+#endif /*  0  */
+static FPU_REG const CONST_L2T = MAKE_REG(POS, 1, 0xcd1b8afe, 0xd49a784b);
+static FPU_REG const CONST_L2E = MAKE_REG(POS, 0, 0x5c17f0bc, 0xb8aa3b29);
+FPU_REG const CONST_PI = MAKE_REG(POS, 1, 0x2168c235, 0xc90fdaa2);
+FPU_REG const CONST_PI2 = MAKE_REG(POS, 0, 0x2168c235, 0xc90fdaa2);
+FPU_REG const CONST_PI4 = MAKE_REG(POS, -1, 0x2168c235, 0xc90fdaa2);
+static FPU_REG const CONST_LG2 = MAKE_REG(POS, -2, 0xfbcff799, 0x9a209a84);
+static FPU_REG const CONST_LN2 = MAKE_REG(POS, -1, 0xd1cf79ac, 0xb17217f7);
 
 /* Extra bits to take pi/2 to more than 128 bits precision. */
 FPU_REG const CONST_PI2extra = MAKE_REG(NEG, -66,
-					 0xfc8f8cbb, 0xece675d1);
+					0xfc8f8cbb, 0xece675d1);
 
 /* Only the sign (and tag) is used in internal zeroes */
-FPU_REG const CONST_Z    = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0);
+FPU_REG const CONST_Z = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0);
 
 /* Only the sign and significand (and tag) are used in internal NaNs */
 /* The 80486 never generates one of these 
@@ -48,24 +47,22 @@ FPU_REG const CONST_SNAN = MAKE_REG(POS, EXP_OVER, 0x00000001, 0x80000000);
 FPU_REG const CONST_QNaN = MAKE_REG(NEG, EXP_OVER, 0x00000000, 0xC0000000);
 
 /* Only the sign (and tag) is used in internal infinities */
-FPU_REG const CONST_INF  = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000);
-
+FPU_REG const CONST_INF = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000);
 
 static void fld_const(FPU_REG const *c, int adj, u_char tag)
 {
-  FPU_REG *st_new_ptr;
+	FPU_REG *st_new_ptr;
 
-  if ( STACK_OVERFLOW )
-    {
-      FPU_stack_overflow();
-      return;
-    }
-  push();
-  reg_copy(c, st_new_ptr);
-  st_new_ptr->sigl += adj;  /* For all our fldxxx constants, we don't need to
-			       borrow or carry. */
-  FPU_settag0(tag);
-  clear_C1();
+	if (STACK_OVERFLOW) {
+		FPU_stack_overflow();
+		return;
+	}
+	push();
+	reg_copy(c, st_new_ptr);
+	st_new_ptr->sigl += adj;	/* For all our fldxxx constants, we don't need to
+					   borrow or carry. */
+	FPU_settag0(tag);
+	clear_C1();
 }
 
 /* A fast way to find out whether x is one of RC_DOWN or RC_CHOP
@@ -75,46 +72,46 @@ static void fld_const(FPU_REG const *c, int adj, u_char tag)
 
 static void fld1(int rc)
 {
-  fld_const(&CONST_1, 0, TAG_Valid);
+	fld_const(&CONST_1, 0, TAG_Valid);
 }
 
 static void fldl2t(int rc)
 {
-  fld_const(&CONST_L2T, (rc == RC_UP) ? 1 : 0, TAG_Valid);
+	fld_const(&CONST_L2T, (rc == RC_UP) ? 1 : 0, TAG_Valid);
 }
 
 static void fldl2e(int rc)
 {
-  fld_const(&CONST_L2E, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
+	fld_const(&CONST_L2E, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
 }
 
 static void fldpi(int rc)
 {
-  fld_const(&CONST_PI, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
+	fld_const(&CONST_PI, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
 }
 
 static void fldlg2(int rc)
 {
-  fld_const(&CONST_LG2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
+	fld_const(&CONST_LG2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
 }
 
 static void fldln2(int rc)
 {
-  fld_const(&CONST_LN2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
+	fld_const(&CONST_LN2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
 }
 
 static void fldz(int rc)
 {
-  fld_const(&CONST_Z, 0, TAG_Zero);
+	fld_const(&CONST_Z, 0, TAG_Zero);
 }
 
-typedef void (*FUNC_RC)(int);
+typedef void (*FUNC_RC) (int);
 
 static FUNC_RC constants_table[] = {
-  fld1, fldl2t, fldl2e, fldpi, fldlg2, fldln2, fldz, (FUNC_RC)FPU_illegal
+	fld1, fldl2t, fldl2e, fldpi, fldlg2, fldln2, fldz, (FUNC_RC) FPU_illegal
 };
 
 void fconst(void)
 {
-  (constants_table[FPU_rm])(control_word & CW_RC);
+	(constants_table[FPU_rm]) (control_word & CW_RC);
 }
diff --git a/arch/x86/math-emu/reg_convert.c b/arch/x86/math-emu/reg_convert.c
index 45a258752703..afd31b31000d 100644
--- a/arch/x86/math-emu/reg_convert.c
+++ b/arch/x86/math-emu/reg_convert.c
@@ -13,41 +13,34 @@
 #include "exception.h"
 #include "fpu_emu.h"
 
-
-int FPU_to_exp16(FPU_REG const *a, FPU_REG *x)
+int FPU_to_exp16(FPU_REG const *a, FPU_REG * x)
 {
-  int sign = getsign(a);
+	int sign = getsign(a);
 
-  *(long long *)&(x->sigl) = *(const long long *)&(a->sigl);
+	*(long long *)&(x->sigl) = *(const long long *)&(a->sigl);
 
-  /* Set up the exponent as a 16 bit quantity. */
-  setexponent16(x, exponent(a));
+	/* Set up the exponent as a 16 bit quantity. */
+	setexponent16(x, exponent(a));
 
-  if ( exponent16(x) == EXP_UNDER )
-    {
-      /* The number is a de-normal or pseudodenormal. */
-      /* We only deal with the significand and exponent. */
+	if (exponent16(x) == EXP_UNDER) {
+		/* The number is a de-normal or pseudodenormal. */
+		/* We only deal with the significand and exponent. */
 
-      if (x->sigh & 0x80000000)
-	{
-	  /* Is a pseudodenormal. */
-	  /* This is non-80486 behaviour because the number
-	     loses its 'denormal' identity. */
-	  addexponent(x, 1);
+		if (x->sigh & 0x80000000) {
+			/* Is a pseudodenormal. */
+			/* This is non-80486 behaviour because the number
+			   loses its 'denormal' identity. */
+			addexponent(x, 1);
+		} else {
+			/* Is a denormal. */
+			addexponent(x, 1);
+			FPU_normalize_nuo(x);
+		}
 	}
-      else
-	{
-	  /* Is a denormal. */
-	  addexponent(x, 1);
-	  FPU_normalize_nuo(x);
+
+	if (!(x->sigh & 0x80000000)) {
+		EXCEPTION(EX_INTERNAL | 0x180);
 	}
-    }
 
-  if ( !(x->sigh & 0x80000000) )
-    {
-      EXCEPTION(EX_INTERNAL | 0x180);
-    }
-
-  return sign;
+	return sign;
 }
-
diff --git a/arch/x86/math-emu/reg_divide.c b/arch/x86/math-emu/reg_divide.c
index 5cee7ff920d9..6827012db341 100644
--- a/arch/x86/math-emu/reg_divide.c
+++ b/arch/x86/math-emu/reg_divide.c
@@ -26,182 +26,157 @@
   */
 int FPU_div(int flags, int rm, int control_w)
 {
-  FPU_REG x, y;
-  FPU_REG const *a, *b, *st0_ptr, *st_ptr;
-  FPU_REG *dest;
-  u_char taga, tagb, signa, signb, sign, saved_sign;
-  int tag, deststnr;
+	FPU_REG x, y;
+	FPU_REG const *a, *b, *st0_ptr, *st_ptr;
+	FPU_REG *dest;
+	u_char taga, tagb, signa, signb, sign, saved_sign;
+	int tag, deststnr;
 
-  if ( flags & DEST_RM )
-    deststnr = rm;
-  else
-    deststnr = 0;
+	if (flags & DEST_RM)
+		deststnr = rm;
+	else
+		deststnr = 0;
 
-  if ( flags & REV )
-    {
-      b = &st(0);
-      st0_ptr = b;
-      tagb = FPU_gettag0();
-      if ( flags & LOADED )
-	{
-	  a = (FPU_REG *)rm;
-	  taga = flags & 0x0f;
+	if (flags & REV) {
+		b = &st(0);
+		st0_ptr = b;
+		tagb = FPU_gettag0();
+		if (flags & LOADED) {
+			a = (FPU_REG *) rm;
+			taga = flags & 0x0f;
+		} else {
+			a = &st(rm);
+			st_ptr = a;
+			taga = FPU_gettagi(rm);
+		}
+	} else {
+		a = &st(0);
+		st0_ptr = a;
+		taga = FPU_gettag0();
+		if (flags & LOADED) {
+			b = (FPU_REG *) rm;
+			tagb = flags & 0x0f;
+		} else {
+			b = &st(rm);
+			st_ptr = b;
+			tagb = FPU_gettagi(rm);
+		}
 	}
-      else
-	{
-	  a = &st(rm);
-	  st_ptr = a;
-	  taga = FPU_gettagi(rm);
+
+	signa = getsign(a);
+	signb = getsign(b);
+
+	sign = signa ^ signb;
+
+	dest = &st(deststnr);
+	saved_sign = getsign(dest);
+
+	if (!(taga | tagb)) {
+		/* Both regs Valid, this should be the most common case. */
+		reg_copy(a, &x);
+		reg_copy(b, &y);
+		setpositive(&x);
+		setpositive(&y);
+		tag = FPU_u_div(&x, &y, dest, control_w, sign);
+
+		if (tag < 0)
+			return tag;
+
+		FPU_settagi(deststnr, tag);
+		return tag;
 	}
-    }
-  else
-    {
-      a = &st(0);
-      st0_ptr = a;
-      taga = FPU_gettag0();
-      if ( flags & LOADED )
-	{
-	  b = (FPU_REG *)rm;
-	  tagb = flags & 0x0f;
-	}
-      else
-	{
-	  b = &st(rm);
-	  st_ptr = b;
-	  tagb = FPU_gettagi(rm);
-	}
-    }
 
-  signa = getsign(a);
-  signb = getsign(b);
+	if (taga == TAG_Special)
+		taga = FPU_Special(a);
+	if (tagb == TAG_Special)
+		tagb = FPU_Special(b);
 
-  sign = signa ^ signb;
-
-  dest = &st(deststnr);
-  saved_sign = getsign(dest);
-
-  if ( !(taga | tagb) )
-    {
-      /* Both regs Valid, this should be the most common case. */
-      reg_copy(a, &x);
-      reg_copy(b, &y);
-      setpositive(&x);
-      setpositive(&y);
-      tag = FPU_u_div(&x, &y, dest, control_w, sign);
-
-      if ( tag < 0 )
-	return tag;
-
-      FPU_settagi(deststnr, tag);
-      return tag;
-    }
-
-  if ( taga == TAG_Special )
-    taga = FPU_Special(a);
-  if ( tagb == TAG_Special )
-    tagb = FPU_Special(b);
-
-  if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
+	if (((taga == TAG_Valid) && (tagb == TW_Denormal))
 	    || ((taga == TW_Denormal) && (tagb == TAG_Valid))
-	    || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
-    {
-      if ( denormal_operand() < 0 )
-	return FPU_Exception;
+	    || ((taga == TW_Denormal) && (tagb == TW_Denormal))) {
+		if (denormal_operand() < 0)
+			return FPU_Exception;
 
-      FPU_to_exp16(a, &x);
-      FPU_to_exp16(b, &y);
-      tag = FPU_u_div(&x, &y, dest, control_w, sign);
-      if ( tag < 0 )
-	return tag;
+		FPU_to_exp16(a, &x);
+		FPU_to_exp16(b, &y);
+		tag = FPU_u_div(&x, &y, dest, control_w, sign);
+		if (tag < 0)
+			return tag;
 
-      FPU_settagi(deststnr, tag);
-      return tag;
-    }
-  else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) )
-    {
-      if ( tagb != TAG_Zero )
-	{
-	  /* Want to find Zero/Valid */
-	  if ( tagb == TW_Denormal )
-	    {
-	      if ( denormal_operand() < 0 )
-		return FPU_Exception;
-	    }
+		FPU_settagi(deststnr, tag);
+		return tag;
+	} else if ((taga <= TW_Denormal) && (tagb <= TW_Denormal)) {
+		if (tagb != TAG_Zero) {
+			/* Want to find Zero/Valid */
+			if (tagb == TW_Denormal) {
+				if (denormal_operand() < 0)
+					return FPU_Exception;
+			}
 
-	  /* The result is zero. */
-	  FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
-	  setsign(dest, sign);
-	  return TAG_Zero;
+			/* The result is zero. */
+			FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+			setsign(dest, sign);
+			return TAG_Zero;
+		}
+		/* We have an exception condition, either 0/0 or Valid/Zero. */
+		if (taga == TAG_Zero) {
+			/* 0/0 */
+			return arith_invalid(deststnr);
+		}
+		/* Valid/Zero */
+		return FPU_divide_by_zero(deststnr, sign);
 	}
-      /* We have an exception condition, either 0/0 or Valid/Zero. */
-      if ( taga == TAG_Zero )
-	{
-	  /* 0/0 */
-	  return arith_invalid(deststnr);
-	}
-      /* Valid/Zero */
-      return FPU_divide_by_zero(deststnr, sign);
-    }
-  /* Must have infinities, NaNs, etc */
-  else if ( (taga == TW_NaN) || (tagb == TW_NaN) )
-    {
-      if ( flags & LOADED )
-	return real_2op_NaN((FPU_REG *)rm, flags & 0x0f, 0, st0_ptr);
+	/* Must have infinities, NaNs, etc */
+	else if ((taga == TW_NaN) || (tagb == TW_NaN)) {
+		if (flags & LOADED)
+			return real_2op_NaN((FPU_REG *) rm, flags & 0x0f, 0,
+					    st0_ptr);
 
-      if ( flags & DEST_RM )
-	{
-	  int tag;
-	  tag = FPU_gettag0();
-	  if ( tag == TAG_Special )
-	    tag = FPU_Special(st0_ptr);
-	  return real_2op_NaN(st0_ptr, tag, rm, (flags & REV) ? st0_ptr : &st(rm));
-	}
-      else
-	{
-	  int tag;
-	  tag = FPU_gettagi(rm);
-	  if ( tag == TAG_Special )
-	    tag = FPU_Special(&st(rm));
-	  return real_2op_NaN(&st(rm), tag, 0, (flags & REV) ? st0_ptr : &st(rm));
-	}
-    }
-  else if (taga == TW_Infinity)
-    {
-      if (tagb == TW_Infinity)
-	{
-	  /* infinity/infinity */
-	  return arith_invalid(deststnr);
-	}
-      else
-	{
-	  /* tagb must be Valid or Zero */
-	  if ( (tagb == TW_Denormal) && (denormal_operand() < 0) )
-	    return FPU_Exception;
-	  
-	  /* Infinity divided by Zero or Valid does
-	     not raise and exception, but returns Infinity */
-	  FPU_copy_to_regi(a, TAG_Special, deststnr);
-	  setsign(dest, sign);
-	  return taga;
-	}
-    }
-  else if (tagb == TW_Infinity)
-    {
-      if ( (taga == TW_Denormal) && (denormal_operand() < 0) )
-	return FPU_Exception;
+		if (flags & DEST_RM) {
+			int tag;
+			tag = FPU_gettag0();
+			if (tag == TAG_Special)
+				tag = FPU_Special(st0_ptr);
+			return real_2op_NaN(st0_ptr, tag, rm,
+					    (flags & REV) ? st0_ptr : &st(rm));
+		} else {
+			int tag;
+			tag = FPU_gettagi(rm);
+			if (tag == TAG_Special)
+				tag = FPU_Special(&st(rm));
+			return real_2op_NaN(&st(rm), tag, 0,
+					    (flags & REV) ? st0_ptr : &st(rm));
+		}
+	} else if (taga == TW_Infinity) {
+		if (tagb == TW_Infinity) {
+			/* infinity/infinity */
+			return arith_invalid(deststnr);
+		} else {
+			/* tagb must be Valid or Zero */
+			if ((tagb == TW_Denormal) && (denormal_operand() < 0))
+				return FPU_Exception;
 
-      /* The result is zero. */
-      FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
-      setsign(dest, sign);
-      return TAG_Zero;
-    }
+			/* Infinity divided by Zero or Valid does
+			   not raise and exception, but returns Infinity */
+			FPU_copy_to_regi(a, TAG_Special, deststnr);
+			setsign(dest, sign);
+			return taga;
+		}
+	} else if (tagb == TW_Infinity) {
+		if ((taga == TW_Denormal) && (denormal_operand() < 0))
+			return FPU_Exception;
+
+		/* The result is zero. */
+		FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+		setsign(dest, sign);
+		return TAG_Zero;
+	}
 #ifdef PARANOID
-  else
-    {
-      EXCEPTION(EX_INTERNAL|0x102);
-      return FPU_Exception;
-    }
-#endif /* PARANOID */ 
+	else {
+		EXCEPTION(EX_INTERNAL | 0x102);
+		return FPU_Exception;
+	}
+#endif /* PARANOID */
 
 	return 0;
 }
diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c
index e976caef6498..0b2ca8dc2988 100644
--- a/arch/x86/math-emu/reg_ld_str.c
+++ b/arch/x86/math-emu/reg_ld_str.c
@@ -27,1084 +27,938 @@
 #include "control_w.h"
 #include "status_w.h"
 
-
-#define DOUBLE_Emax 1023         /* largest valid exponent */
+#define DOUBLE_Emax 1023	/* largest valid exponent */
 #define DOUBLE_Ebias 1023
-#define DOUBLE_Emin (-1022)      /* smallest valid exponent */
+#define DOUBLE_Emin (-1022)	/* smallest valid exponent */
 
-#define SINGLE_Emax 127          /* largest valid exponent */
+#define SINGLE_Emax 127		/* largest valid exponent */
 #define SINGLE_Ebias 127
-#define SINGLE_Emin (-126)       /* smallest valid exponent */
+#define SINGLE_Emin (-126)	/* smallest valid exponent */
 
-
-static u_char normalize_no_excep(FPU_REG *r, int exp, int sign)
+static u_char normalize_no_excep(FPU_REG * r, int exp, int sign)
 {
-  u_char tag;
+	u_char tag;
 
-  setexponent16(r, exp);
+	setexponent16(r, exp);
 
-  tag = FPU_normalize_nuo(r);
-  stdexp(r);
-  if ( sign )
-    setnegative(r);
+	tag = FPU_normalize_nuo(r);
+	stdexp(r);
+	if (sign)
+		setnegative(r);
 
-  return tag;
+	return tag;
 }
 
-
-int FPU_tagof(FPU_REG *ptr)
+int FPU_tagof(FPU_REG * ptr)
 {
-  int exp;
+	int exp;
 
-  exp = exponent16(ptr) & 0x7fff;
-  if ( exp == 0 )
-    {
-      if ( !(ptr->sigh | ptr->sigl) )
-	{
-	  return TAG_Zero;
+	exp = exponent16(ptr) & 0x7fff;
+	if (exp == 0) {
+		if (!(ptr->sigh | ptr->sigl)) {
+			return TAG_Zero;
+		}
+		/* The number is a de-normal or pseudodenormal. */
+		return TAG_Special;
 	}
-      /* The number is a de-normal or pseudodenormal. */
-      return TAG_Special;
-    }
 
-  if ( exp == 0x7fff )
-    {
-      /* Is an Infinity, a NaN, or an unsupported data type. */
-      return TAG_Special;
-    }
+	if (exp == 0x7fff) {
+		/* Is an Infinity, a NaN, or an unsupported data type. */
+		return TAG_Special;
+	}
 
-  if ( !(ptr->sigh & 0x80000000) )
-    {
-      /* Unsupported data type. */
-      /* Valid numbers have the ms bit set to 1. */
-      /* Unnormal. */
-      return TAG_Special;
-    }
+	if (!(ptr->sigh & 0x80000000)) {
+		/* Unsupported data type. */
+		/* Valid numbers have the ms bit set to 1. */
+		/* Unnormal. */
+		return TAG_Special;
+	}
 
-  return TAG_Valid;
+	return TAG_Valid;
 }
 
-
 /* Get a long double from user memory */
-int FPU_load_extended(long double __user *s, int stnr)
+int FPU_load_extended(long double __user * s, int stnr)
 {
-  FPU_REG *sti_ptr = &st(stnr);
+	FPU_REG *sti_ptr = &st(stnr);
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_READ, s, 10);
-  __copy_from_user(sti_ptr, s, 10);
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_READ, s, 10);
+	__copy_from_user(sti_ptr, s, 10);
+	RE_ENTRANT_CHECK_ON;
 
-  return FPU_tagof(sti_ptr);
+	return FPU_tagof(sti_ptr);
 }
 
-
 /* Get a double from user memory */
-int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data)
+int FPU_load_double(double __user * dfloat, FPU_REG * loaded_data)
 {
-  int exp, tag, negative;
-  unsigned m64, l64;
+	int exp, tag, negative;
+	unsigned m64, l64;
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_READ, dfloat, 8);
-  FPU_get_user(m64, 1 + (unsigned long __user *) dfloat);
-  FPU_get_user(l64, (unsigned long __user *) dfloat);
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_READ, dfloat, 8);
+	FPU_get_user(m64, 1 + (unsigned long __user *)dfloat);
+	FPU_get_user(l64, (unsigned long __user *)dfloat);
+	RE_ENTRANT_CHECK_ON;
 
-  negative = (m64 & 0x80000000) ? SIGN_Negative : SIGN_Positive;
-  exp = ((m64 & 0x7ff00000) >> 20) - DOUBLE_Ebias + EXTENDED_Ebias;
-  m64 &= 0xfffff;
-  if ( exp > DOUBLE_Emax + EXTENDED_Ebias )
-    {
-      /* Infinity or NaN */
-      if ((m64 == 0) && (l64 == 0))
-	{
-	  /* +- infinity */
-	  loaded_data->sigh = 0x80000000;
-	  loaded_data->sigl = 0x00000000;
-	  exp = EXP_Infinity + EXTENDED_Ebias;
-	  tag = TAG_Special;
+	negative = (m64 & 0x80000000) ? SIGN_Negative : SIGN_Positive;
+	exp = ((m64 & 0x7ff00000) >> 20) - DOUBLE_Ebias + EXTENDED_Ebias;
+	m64 &= 0xfffff;
+	if (exp > DOUBLE_Emax + EXTENDED_Ebias) {
+		/* Infinity or NaN */
+		if ((m64 == 0) && (l64 == 0)) {
+			/* +- infinity */
+			loaded_data->sigh = 0x80000000;
+			loaded_data->sigl = 0x00000000;
+			exp = EXP_Infinity + EXTENDED_Ebias;
+			tag = TAG_Special;
+		} else {
+			/* Must be a signaling or quiet NaN */
+			exp = EXP_NaN + EXTENDED_Ebias;
+			loaded_data->sigh = (m64 << 11) | 0x80000000;
+			loaded_data->sigh |= l64 >> 21;
+			loaded_data->sigl = l64 << 11;
+			tag = TAG_Special;	/* The calling function must look for NaNs */
+		}
+	} else if (exp < DOUBLE_Emin + EXTENDED_Ebias) {
+		/* Zero or de-normal */
+		if ((m64 == 0) && (l64 == 0)) {
+			/* Zero */
+			reg_copy(&CONST_Z, loaded_data);
+			exp = 0;
+			tag = TAG_Zero;
+		} else {
+			/* De-normal */
+			loaded_data->sigh = m64 << 11;
+			loaded_data->sigh |= l64 >> 21;
+			loaded_data->sigl = l64 << 11;
+
+			return normalize_no_excep(loaded_data, DOUBLE_Emin,
+						  negative)
+			    | (denormal_operand() < 0 ? FPU_Exception : 0);
+		}
+	} else {
+		loaded_data->sigh = (m64 << 11) | 0x80000000;
+		loaded_data->sigh |= l64 >> 21;
+		loaded_data->sigl = l64 << 11;
+
+		tag = TAG_Valid;
 	}
-      else
-	{
-	  /* Must be a signaling or quiet NaN */
-	  exp = EXP_NaN + EXTENDED_Ebias;
-	  loaded_data->sigh = (m64 << 11) | 0x80000000;
-	  loaded_data->sigh |= l64 >> 21;
-	  loaded_data->sigl = l64 << 11;
-	  tag = TAG_Special;    /* The calling function must look for NaNs */
-	}
-    }
-  else if ( exp < DOUBLE_Emin + EXTENDED_Ebias )
-    {
-      /* Zero or de-normal */
-      if ((m64 == 0) && (l64 == 0))
-	{
-	  /* Zero */
-	  reg_copy(&CONST_Z, loaded_data);
-	  exp = 0;
-	  tag = TAG_Zero;
-	}
-      else
-	{
-	  /* De-normal */
-	  loaded_data->sigh = m64 << 11;
-	  loaded_data->sigh |= l64 >> 21;
-	  loaded_data->sigl = l64 << 11;
 
-	  return normalize_no_excep(loaded_data, DOUBLE_Emin, negative)
-	    | (denormal_operand() < 0 ? FPU_Exception : 0);
-	}
-    }
-  else
-    {
-      loaded_data->sigh = (m64 << 11) | 0x80000000;
-      loaded_data->sigh |= l64 >> 21;
-      loaded_data->sigl = l64 << 11;
+	setexponent16(loaded_data, exp | negative);
 
-      tag = TAG_Valid;
-    }
-
-  setexponent16(loaded_data, exp | negative);
-
-  return tag;
+	return tag;
 }
 
-
 /* Get a float from user memory */
-int FPU_load_single(float __user *single, FPU_REG *loaded_data)
+int FPU_load_single(float __user * single, FPU_REG * loaded_data)
 {
-  unsigned m32;
-  int exp, tag, negative;
+	unsigned m32;
+	int exp, tag, negative;
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_READ, single, 4);
-  FPU_get_user(m32, (unsigned long __user *) single);
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_READ, single, 4);
+	FPU_get_user(m32, (unsigned long __user *)single);
+	RE_ENTRANT_CHECK_ON;
 
-  negative = (m32 & 0x80000000) ? SIGN_Negative : SIGN_Positive;
+	negative = (m32 & 0x80000000) ? SIGN_Negative : SIGN_Positive;
 
-  if (!(m32 & 0x7fffffff))
-    {
-      /* Zero */
-      reg_copy(&CONST_Z, loaded_data);
-      addexponent(loaded_data, negative);
-      return TAG_Zero;
-    }
-  exp = ((m32 & 0x7f800000) >> 23) - SINGLE_Ebias + EXTENDED_Ebias;
-  m32 = (m32 & 0x7fffff) << 8;
-  if ( exp < SINGLE_Emin + EXTENDED_Ebias )
-    {
-      /* De-normals */
-      loaded_data->sigh = m32;
-      loaded_data->sigl = 0;
-
-      return normalize_no_excep(loaded_data, SINGLE_Emin, negative)
-	| (denormal_operand() < 0 ? FPU_Exception : 0);
-    }
-  else if ( exp > SINGLE_Emax + EXTENDED_Ebias )
-    {
-    /* Infinity or NaN */
-      if ( m32 == 0 )
-	{
-	  /* +- infinity */
-	  loaded_data->sigh = 0x80000000;
-	  loaded_data->sigl = 0x00000000;
-	  exp = EXP_Infinity + EXTENDED_Ebias;
-	  tag = TAG_Special;
+	if (!(m32 & 0x7fffffff)) {
+		/* Zero */
+		reg_copy(&CONST_Z, loaded_data);
+		addexponent(loaded_data, negative);
+		return TAG_Zero;
 	}
-      else
-	{
-	  /* Must be a signaling or quiet NaN */
-	  exp = EXP_NaN + EXTENDED_Ebias;
-	  loaded_data->sigh = m32 | 0x80000000;
-	  loaded_data->sigl = 0;
-	  tag = TAG_Special;  /* The calling function must look for NaNs */
+	exp = ((m32 & 0x7f800000) >> 23) - SINGLE_Ebias + EXTENDED_Ebias;
+	m32 = (m32 & 0x7fffff) << 8;
+	if (exp < SINGLE_Emin + EXTENDED_Ebias) {
+		/* De-normals */
+		loaded_data->sigh = m32;
+		loaded_data->sigl = 0;
+
+		return normalize_no_excep(loaded_data, SINGLE_Emin, negative)
+		    | (denormal_operand() < 0 ? FPU_Exception : 0);
+	} else if (exp > SINGLE_Emax + EXTENDED_Ebias) {
+		/* Infinity or NaN */
+		if (m32 == 0) {
+			/* +- infinity */
+			loaded_data->sigh = 0x80000000;
+			loaded_data->sigl = 0x00000000;
+			exp = EXP_Infinity + EXTENDED_Ebias;
+			tag = TAG_Special;
+		} else {
+			/* Must be a signaling or quiet NaN */
+			exp = EXP_NaN + EXTENDED_Ebias;
+			loaded_data->sigh = m32 | 0x80000000;
+			loaded_data->sigl = 0;
+			tag = TAG_Special;	/* The calling function must look for NaNs */
+		}
+	} else {
+		loaded_data->sigh = m32 | 0x80000000;
+		loaded_data->sigl = 0;
+		tag = TAG_Valid;
 	}
-    }
-  else
-    {
-      loaded_data->sigh = m32 | 0x80000000;
-      loaded_data->sigl = 0;
-      tag = TAG_Valid;
-    }
 
-  setexponent16(loaded_data, exp | negative);  /* Set the sign. */
+	setexponent16(loaded_data, exp | negative);	/* Set the sign. */
 
-  return tag;
+	return tag;
 }
 
-
 /* Get a long long from user memory */
-int FPU_load_int64(long long __user *_s)
+int FPU_load_int64(long long __user * _s)
 {
-  long long s;
-  int sign;
-  FPU_REG *st0_ptr = &st(0);
+	long long s;
+	int sign;
+	FPU_REG *st0_ptr = &st(0);
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_READ, _s, 8);
-  if (copy_from_user(&s,_s,8))
-    FPU_abort;
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_READ, _s, 8);
+	if (copy_from_user(&s, _s, 8))
+		FPU_abort;
+	RE_ENTRANT_CHECK_ON;
 
-  if (s == 0)
-    {
-      reg_copy(&CONST_Z, st0_ptr);
-      return TAG_Zero;
-    }
+	if (s == 0) {
+		reg_copy(&CONST_Z, st0_ptr);
+		return TAG_Zero;
+	}
 
-  if (s > 0)
-    sign = SIGN_Positive;
-  else
-  {
-    s = -s;
-    sign = SIGN_Negative;
-  }
+	if (s > 0)
+		sign = SIGN_Positive;
+	else {
+		s = -s;
+		sign = SIGN_Negative;
+	}
 
-  significand(st0_ptr) = s;
+	significand(st0_ptr) = s;
 
-  return normalize_no_excep(st0_ptr, 63, sign);
+	return normalize_no_excep(st0_ptr, 63, sign);
 }
 
-
 /* Get a long from user memory */
-int FPU_load_int32(long __user *_s, FPU_REG *loaded_data)
+int FPU_load_int32(long __user * _s, FPU_REG * loaded_data)
 {
-  long s;
-  int negative;
+	long s;
+	int negative;
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_READ, _s, 4);
-  FPU_get_user(s, _s);
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_READ, _s, 4);
+	FPU_get_user(s, _s);
+	RE_ENTRANT_CHECK_ON;
 
-  if (s == 0)
-    { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; }
+	if (s == 0) {
+		reg_copy(&CONST_Z, loaded_data);
+		return TAG_Zero;
+	}
 
-  if (s > 0)
-    negative = SIGN_Positive;
-  else
-    {
-      s = -s;
-      negative = SIGN_Negative;
-    }
+	if (s > 0)
+		negative = SIGN_Positive;
+	else {
+		s = -s;
+		negative = SIGN_Negative;
+	}
 
-  loaded_data->sigh = s;
-  loaded_data->sigl = 0;
+	loaded_data->sigh = s;
+	loaded_data->sigl = 0;
 
-  return normalize_no_excep(loaded_data, 31, negative);
+	return normalize_no_excep(loaded_data, 31, negative);
 }
 
-
 /* Get a short from user memory */
-int FPU_load_int16(short __user *_s, FPU_REG *loaded_data)
+int FPU_load_int16(short __user * _s, FPU_REG * loaded_data)
 {
-  int s, negative;
+	int s, negative;
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_READ, _s, 2);
-  /* Cast as short to get the sign extended. */
-  FPU_get_user(s, _s);
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_READ, _s, 2);
+	/* Cast as short to get the sign extended. */
+	FPU_get_user(s, _s);
+	RE_ENTRANT_CHECK_ON;
 
-  if (s == 0)
-    { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; }
+	if (s == 0) {
+		reg_copy(&CONST_Z, loaded_data);
+		return TAG_Zero;
+	}
 
-  if (s > 0)
-    negative = SIGN_Positive;
-  else
-    {
-      s = -s;
-      negative = SIGN_Negative;
-    }
+	if (s > 0)
+		negative = SIGN_Positive;
+	else {
+		s = -s;
+		negative = SIGN_Negative;
+	}
 
-  loaded_data->sigh = s << 16;
-  loaded_data->sigl = 0;
+	loaded_data->sigh = s << 16;
+	loaded_data->sigl = 0;
 
-  return normalize_no_excep(loaded_data, 15, negative);
+	return normalize_no_excep(loaded_data, 15, negative);
 }
 
-
 /* Get a packed bcd array from user memory */
-int FPU_load_bcd(u_char __user *s)
+int FPU_load_bcd(u_char __user * s)
 {
-  FPU_REG *st0_ptr = &st(0);
-  int pos;
-  u_char bcd;
-  long long l=0;
-  int sign;
+	FPU_REG *st0_ptr = &st(0);
+	int pos;
+	u_char bcd;
+	long long l = 0;
+	int sign;
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_READ, s, 10);
-  RE_ENTRANT_CHECK_ON;
-  for ( pos = 8; pos >= 0; pos--)
-    {
-      l *= 10;
-      RE_ENTRANT_CHECK_OFF;
-      FPU_get_user(bcd, s+pos);
-      RE_ENTRANT_CHECK_ON;
-      l += bcd >> 4;
-      l *= 10;
-      l += bcd & 0x0f;
-    }
- 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_get_user(sign, s+9);
-  sign = sign & 0x80 ? SIGN_Negative : SIGN_Positive;
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_READ, s, 10);
+	RE_ENTRANT_CHECK_ON;
+	for (pos = 8; pos >= 0; pos--) {
+		l *= 10;
+		RE_ENTRANT_CHECK_OFF;
+		FPU_get_user(bcd, s + pos);
+		RE_ENTRANT_CHECK_ON;
+		l += bcd >> 4;
+		l *= 10;
+		l += bcd & 0x0f;
+	}
 
-  if ( l == 0 )
-    {
-      reg_copy(&CONST_Z, st0_ptr);
-      addexponent(st0_ptr, sign);   /* Set the sign. */
-      return TAG_Zero;
-    }
-  else
-    {
-      significand(st0_ptr) = l;
-      return normalize_no_excep(st0_ptr, 63, sign);
-    }
+	RE_ENTRANT_CHECK_OFF;
+	FPU_get_user(sign, s + 9);
+	sign = sign & 0x80 ? SIGN_Negative : SIGN_Positive;
+	RE_ENTRANT_CHECK_ON;
+
+	if (l == 0) {
+		reg_copy(&CONST_Z, st0_ptr);
+		addexponent(st0_ptr, sign);	/* Set the sign. */
+		return TAG_Zero;
+	} else {
+		significand(st0_ptr) = l;
+		return normalize_no_excep(st0_ptr, 63, sign);
+	}
 }
 
 /*===========================================================================*/
 
 /* Put a long double into user memory */
-int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag, long double __user *d)
+int FPU_store_extended(FPU_REG * st0_ptr, u_char st0_tag,
+		       long double __user * d)
 {
-  /*
-    The only exception raised by an attempt to store to an
-    extended format is the Invalid Stack exception, i.e.
-    attempting to store from an empty register.
-   */
+	/*
+	   The only exception raised by an attempt to store to an
+	   extended format is the Invalid Stack exception, i.e.
+	   attempting to store from an empty register.
+	 */
 
-  if ( st0_tag != TAG_Empty )
-    {
-      RE_ENTRANT_CHECK_OFF;
-      FPU_access_ok(VERIFY_WRITE, d, 10);
+	if (st0_tag != TAG_Empty) {
+		RE_ENTRANT_CHECK_OFF;
+		FPU_access_ok(VERIFY_WRITE, d, 10);
 
-      FPU_put_user(st0_ptr->sigl, (unsigned long __user *) d);
-      FPU_put_user(st0_ptr->sigh, (unsigned long __user *) ((u_char __user *)d + 4));
-      FPU_put_user(exponent16(st0_ptr), (unsigned short __user *) ((u_char __user *)d + 8));
-      RE_ENTRANT_CHECK_ON;
+		FPU_put_user(st0_ptr->sigl, (unsigned long __user *)d);
+		FPU_put_user(st0_ptr->sigh,
+			     (unsigned long __user *)((u_char __user *) d + 4));
+		FPU_put_user(exponent16(st0_ptr),
+			     (unsigned short __user *)((u_char __user *) d +
+						       8));
+		RE_ENTRANT_CHECK_ON;
 
-      return 1;
-    }
+		return 1;
+	}
 
-  /* Empty register (stack underflow) */
-  EXCEPTION(EX_StackUnder);
-  if ( control_word & CW_Invalid )
-    {
-      /* The masked response */
-      /* Put out the QNaN indefinite */
-      RE_ENTRANT_CHECK_OFF;
-      FPU_access_ok(VERIFY_WRITE,d,10);
-      FPU_put_user(0, (unsigned long __user *) d);
-      FPU_put_user(0xc0000000, 1 + (unsigned long __user *) d);
-      FPU_put_user(0xffff, 4 + (short __user *) d);
-      RE_ENTRANT_CHECK_ON;
-      return 1;
-    }
-  else
-    return 0;
+	/* Empty register (stack underflow) */
+	EXCEPTION(EX_StackUnder);
+	if (control_word & CW_Invalid) {
+		/* The masked response */
+		/* Put out the QNaN indefinite */
+		RE_ENTRANT_CHECK_OFF;
+		FPU_access_ok(VERIFY_WRITE, d, 10);
+		FPU_put_user(0, (unsigned long __user *)d);
+		FPU_put_user(0xc0000000, 1 + (unsigned long __user *)d);
+		FPU_put_user(0xffff, 4 + (short __user *)d);
+		RE_ENTRANT_CHECK_ON;
+		return 1;
+	} else
+		return 0;
 
 }
 
-
 /* Put a double into user memory */
-int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat)
+int FPU_store_double(FPU_REG * st0_ptr, u_char st0_tag, double __user * dfloat)
 {
-  unsigned long l[2];
-  unsigned long increment = 0;	/* avoid gcc warnings */
-  int precision_loss;
-  int exp;
-  FPU_REG tmp;
+	unsigned long l[2];
+	unsigned long increment = 0;	/* avoid gcc warnings */
+	int precision_loss;
+	int exp;
+	FPU_REG tmp;
 
-  if ( st0_tag == TAG_Valid )
-    {
-      reg_copy(st0_ptr, &tmp);
-      exp = exponent(&tmp);
+	if (st0_tag == TAG_Valid) {
+		reg_copy(st0_ptr, &tmp);
+		exp = exponent(&tmp);
 
-      if ( exp < DOUBLE_Emin )     /* It may be a denormal */
-	{
-	  addexponent(&tmp, -DOUBLE_Emin + 52);  /* largest exp to be 51 */
+		if (exp < DOUBLE_Emin) {	/* It may be a denormal */
+			addexponent(&tmp, -DOUBLE_Emin + 52);	/* largest exp to be 51 */
 
-	denormal_arg:
+		      denormal_arg:
 
-	  if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) )
-	    {
+			if ((precision_loss = FPU_round_to_int(&tmp, st0_tag))) {
 #ifdef PECULIAR_486
-	      /* Did it round to a non-denormal ? */
-	      /* This behaviour might be regarded as peculiar, it appears
-		 that the 80486 rounds to the dest precision, then
-		 converts to decide underflow. */
-	      if ( !((tmp.sigh == 0x00100000) && (tmp.sigl == 0) &&
-		  (st0_ptr->sigl & 0x000007ff)) )
+				/* Did it round to a non-denormal ? */
+				/* This behaviour might be regarded as peculiar, it appears
+				   that the 80486 rounds to the dest precision, then
+				   converts to decide underflow. */
+				if (!
+				    ((tmp.sigh == 0x00100000) && (tmp.sigl == 0)
+				     && (st0_ptr->sigl & 0x000007ff)))
 #endif /* PECULIAR_486 */
-		{
-		  EXCEPTION(EX_Underflow);
-		  /* This is a special case: see sec 16.2.5.1 of
-		     the 80486 book */
-		  if ( !(control_word & CW_Underflow) )
-		    return 0;
-		}
-	      EXCEPTION(precision_loss);
-	      if ( !(control_word & CW_Precision) )
-		return 0;
-	    }
-	  l[0] = tmp.sigl;
-	  l[1] = tmp.sigh;
-	}
-      else
-	{
-	  if ( tmp.sigl & 0x000007ff )
-	    {
-	      precision_loss = 1;
-	      switch (control_word & CW_RC)
-		{
-		case RC_RND:
-		  /* Rounding can get a little messy.. */
-		  increment = ((tmp.sigl & 0x7ff) > 0x400) |  /* nearest */
-		    ((tmp.sigl & 0xc00) == 0xc00);            /* odd -> even */
-		  break;
-		case RC_DOWN:   /* towards -infinity */
-		  increment = signpositive(&tmp) ? 0 : tmp.sigl & 0x7ff;
-		  break;
-		case RC_UP:     /* towards +infinity */
-		  increment = signpositive(&tmp) ? tmp.sigl & 0x7ff : 0;
-		  break;
-		case RC_CHOP:
-		  increment = 0;
-		  break;
-		}
-	  
-	      /* Truncate the mantissa */
-	      tmp.sigl &= 0xfffff800;
-	  
-	      if ( increment )
-		{
-		  if ( tmp.sigl >= 0xfffff800 )
-		    {
-		      /* the sigl part overflows */
-		      if ( tmp.sigh == 0xffffffff )
-			{
-			  /* The sigh part overflows */
-			  tmp.sigh = 0x80000000;
-			  exp++;
-			  if (exp >= EXP_OVER)
-			    goto overflow;
+				{
+					EXCEPTION(EX_Underflow);
+					/* This is a special case: see sec 16.2.5.1 of
+					   the 80486 book */
+					if (!(control_word & CW_Underflow))
+						return 0;
+				}
+				EXCEPTION(precision_loss);
+				if (!(control_word & CW_Precision))
+					return 0;
 			}
-		      else
-			{
-			  tmp.sigh ++;
+			l[0] = tmp.sigl;
+			l[1] = tmp.sigh;
+		} else {
+			if (tmp.sigl & 0x000007ff) {
+				precision_loss = 1;
+				switch (control_word & CW_RC) {
+				case RC_RND:
+					/* Rounding can get a little messy.. */
+					increment = ((tmp.sigl & 0x7ff) > 0x400) |	/* nearest */
+					    ((tmp.sigl & 0xc00) == 0xc00);	/* odd -> even */
+					break;
+				case RC_DOWN:	/* towards -infinity */
+					increment =
+					    signpositive(&tmp) ? 0 : tmp.
+					    sigl & 0x7ff;
+					break;
+				case RC_UP:	/* towards +infinity */
+					increment =
+					    signpositive(&tmp) ? tmp.
+					    sigl & 0x7ff : 0;
+					break;
+				case RC_CHOP:
+					increment = 0;
+					break;
+				}
+
+				/* Truncate the mantissa */
+				tmp.sigl &= 0xfffff800;
+
+				if (increment) {
+					if (tmp.sigl >= 0xfffff800) {
+						/* the sigl part overflows */
+						if (tmp.sigh == 0xffffffff) {
+							/* The sigh part overflows */
+							tmp.sigh = 0x80000000;
+							exp++;
+							if (exp >= EXP_OVER)
+								goto overflow;
+						} else {
+							tmp.sigh++;
+						}
+						tmp.sigl = 0x00000000;
+					} else {
+						/* We only need to increment sigl */
+						tmp.sigl += 0x00000800;
+					}
+				}
+			} else
+				precision_loss = 0;
+
+			l[0] = (tmp.sigl >> 11) | (tmp.sigh << 21);
+			l[1] = ((tmp.sigh >> 11) & 0xfffff);
+
+			if (exp > DOUBLE_Emax) {
+			      overflow:
+				EXCEPTION(EX_Overflow);
+				if (!(control_word & CW_Overflow))
+					return 0;
+				set_precision_flag_up();
+				if (!(control_word & CW_Precision))
+					return 0;
+
+				/* This is a special case: see sec 16.2.5.1 of the 80486 book */
+				/* Overflow to infinity */
+				l[0] = 0x00000000;	/* Set to */
+				l[1] = 0x7ff00000;	/* + INF */
+			} else {
+				if (precision_loss) {
+					if (increment)
+						set_precision_flag_up();
+					else
+						set_precision_flag_down();
+				}
+				/* Add the exponent */
+				l[1] |= (((exp + DOUBLE_Ebias) & 0x7ff) << 20);
 			}
-		      tmp.sigl = 0x00000000;
-		    }
-		  else
-		    {
-		      /* We only need to increment sigl */
-		      tmp.sigl += 0x00000800;
-		    }
 		}
-	    }
-	  else
-	    precision_loss = 0;
-	  
-	  l[0] = (tmp.sigl >> 11) | (tmp.sigh << 21);
-	  l[1] = ((tmp.sigh >> 11) & 0xfffff);
-
-	  if ( exp > DOUBLE_Emax )
-	    {
-	    overflow:
-	      EXCEPTION(EX_Overflow);
-	      if ( !(control_word & CW_Overflow) )
-		return 0;
-	      set_precision_flag_up();
-	      if ( !(control_word & CW_Precision) )
-		return 0;
-
-	      /* This is a special case: see sec 16.2.5.1 of the 80486 book */
-	      /* Overflow to infinity */
-	      l[0] = 0x00000000;	/* Set to */
-	      l[1] = 0x7ff00000;	/* + INF */
-	    }
-	  else
-	    {
-	      if ( precision_loss )
-		{
-		  if ( increment )
-		    set_precision_flag_up();
-		  else
-		    set_precision_flag_down();
-		}
-	      /* Add the exponent */
-	      l[1] |= (((exp+DOUBLE_Ebias) & 0x7ff) << 20);
-	    }
-	}
-    }
-  else if (st0_tag == TAG_Zero)
-    {
-      /* Number is zero */
-      l[0] = 0;
-      l[1] = 0;
-    }
-  else if ( st0_tag == TAG_Special )
-    {
-      st0_tag = FPU_Special(st0_ptr);
-      if ( st0_tag == TW_Denormal )
-	{
-	  /* A denormal will always underflow. */
+	} else if (st0_tag == TAG_Zero) {
+		/* Number is zero */
+		l[0] = 0;
+		l[1] = 0;
+	} else if (st0_tag == TAG_Special) {
+		st0_tag = FPU_Special(st0_ptr);
+		if (st0_tag == TW_Denormal) {
+			/* A denormal will always underflow. */
 #ifndef PECULIAR_486
-	  /* An 80486 is supposed to be able to generate
-	     a denormal exception here, but... */
-	  /* Underflow has priority. */
-	  if ( control_word & CW_Underflow )
-	    denormal_operand();
+			/* An 80486 is supposed to be able to generate
+			   a denormal exception here, but... */
+			/* Underflow has priority. */
+			if (control_word & CW_Underflow)
+				denormal_operand();
 #endif /* PECULIAR_486 */
-	  reg_copy(st0_ptr, &tmp);
-	  goto denormal_arg;
-	}
-      else if (st0_tag == TW_Infinity)
-	{
-	  l[0] = 0;
-	  l[1] = 0x7ff00000;
-	}
-      else if (st0_tag == TW_NaN)
-	{
-	  /* Is it really a NaN ? */
-	  if ( (exponent(st0_ptr) == EXP_OVER)
-	       && (st0_ptr->sigh & 0x80000000) )
-	    {
-	      /* See if we can get a valid NaN from the FPU_REG */
-	      l[0] = (st0_ptr->sigl >> 11) | (st0_ptr->sigh << 21);
-	      l[1] = ((st0_ptr->sigh >> 11) & 0xfffff);
-	      if ( !(st0_ptr->sigh & 0x40000000) )
-		{
-		  /* It is a signalling NaN */
-		  EXCEPTION(EX_Invalid);
-		  if ( !(control_word & CW_Invalid) )
-		    return 0;
-		  l[1] |= (0x40000000 >> 11);
+			reg_copy(st0_ptr, &tmp);
+			goto denormal_arg;
+		} else if (st0_tag == TW_Infinity) {
+			l[0] = 0;
+			l[1] = 0x7ff00000;
+		} else if (st0_tag == TW_NaN) {
+			/* Is it really a NaN ? */
+			if ((exponent(st0_ptr) == EXP_OVER)
+			    && (st0_ptr->sigh & 0x80000000)) {
+				/* See if we can get a valid NaN from the FPU_REG */
+				l[0] =
+				    (st0_ptr->sigl >> 11) | (st0_ptr->
+							     sigh << 21);
+				l[1] = ((st0_ptr->sigh >> 11) & 0xfffff);
+				if (!(st0_ptr->sigh & 0x40000000)) {
+					/* It is a signalling NaN */
+					EXCEPTION(EX_Invalid);
+					if (!(control_word & CW_Invalid))
+						return 0;
+					l[1] |= (0x40000000 >> 11);
+				}
+				l[1] |= 0x7ff00000;
+			} else {
+				/* It is an unsupported data type */
+				EXCEPTION(EX_Invalid);
+				if (!(control_word & CW_Invalid))
+					return 0;
+				l[0] = 0;
+				l[1] = 0xfff80000;
+			}
 		}
-	      l[1] |= 0x7ff00000;
-	    }
-	  else
-	    {
-	      /* It is an unsupported data type */
-	      EXCEPTION(EX_Invalid);
-	      if ( !(control_word & CW_Invalid) )
-		return 0;
-	      l[0] = 0;
-	      l[1] = 0xfff80000;
-	    }
+	} else if (st0_tag == TAG_Empty) {
+		/* Empty register (stack underflow) */
+		EXCEPTION(EX_StackUnder);
+		if (control_word & CW_Invalid) {
+			/* The masked response */
+			/* Put out the QNaN indefinite */
+			RE_ENTRANT_CHECK_OFF;
+			FPU_access_ok(VERIFY_WRITE, dfloat, 8);
+			FPU_put_user(0, (unsigned long __user *)dfloat);
+			FPU_put_user(0xfff80000,
+				     1 + (unsigned long __user *)dfloat);
+			RE_ENTRANT_CHECK_ON;
+			return 1;
+		} else
+			return 0;
 	}
-    }
-  else if ( st0_tag == TAG_Empty )
-    {
-      /* Empty register (stack underflow) */
-      EXCEPTION(EX_StackUnder);
-      if ( control_word & CW_Invalid )
-	{
-	  /* The masked response */
-	  /* Put out the QNaN indefinite */
-	  RE_ENTRANT_CHECK_OFF;
-	  FPU_access_ok(VERIFY_WRITE,dfloat,8);
-	  FPU_put_user(0, (unsigned long __user *) dfloat);
-	  FPU_put_user(0xfff80000, 1 + (unsigned long __user *) dfloat);
-	  RE_ENTRANT_CHECK_ON;
-	  return 1;
-	}
-      else
-	return 0;
-    }
-  if ( getsign(st0_ptr) )
-    l[1] |= 0x80000000;
+	if (getsign(st0_ptr))
+		l[1] |= 0x80000000;
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_WRITE,dfloat,8);
-  FPU_put_user(l[0], (unsigned long __user *)dfloat);
-  FPU_put_user(l[1], 1 + (unsigned long __user *)dfloat);
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_WRITE, dfloat, 8);
+	FPU_put_user(l[0], (unsigned long __user *)dfloat);
+	FPU_put_user(l[1], 1 + (unsigned long __user *)dfloat);
+	RE_ENTRANT_CHECK_ON;
 
-  return 1;
+	return 1;
 }
 
-
 /* Put a float into user memory */
-int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single)
+int FPU_store_single(FPU_REG * st0_ptr, u_char st0_tag, float __user * single)
 {
-  long templ = 0;
-  unsigned long increment = 0;     	/* avoid gcc warnings */
-  int precision_loss;
-  int exp;
-  FPU_REG tmp;
+	long templ = 0;
+	unsigned long increment = 0;	/* avoid gcc warnings */
+	int precision_loss;
+	int exp;
+	FPU_REG tmp;
 
-  if ( st0_tag == TAG_Valid )
-    {
+	if (st0_tag == TAG_Valid) {
 
-      reg_copy(st0_ptr, &tmp);
-      exp = exponent(&tmp);
+		reg_copy(st0_ptr, &tmp);
+		exp = exponent(&tmp);
 
-      if ( exp < SINGLE_Emin )
-	{
-	  addexponent(&tmp, -SINGLE_Emin + 23);  /* largest exp to be 22 */
+		if (exp < SINGLE_Emin) {
+			addexponent(&tmp, -SINGLE_Emin + 23);	/* largest exp to be 22 */
 
-	denormal_arg:
+		      denormal_arg:
 
-	  if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) )
-	    {
+			if ((precision_loss = FPU_round_to_int(&tmp, st0_tag))) {
 #ifdef PECULIAR_486
-	      /* Did it round to a non-denormal ? */
-	      /* This behaviour might be regarded as peculiar, it appears
-		 that the 80486 rounds to the dest precision, then
-		 converts to decide underflow. */
-	      if ( !((tmp.sigl == 0x00800000) &&
-		  ((st0_ptr->sigh & 0x000000ff) || st0_ptr->sigl)) )
+				/* Did it round to a non-denormal ? */
+				/* This behaviour might be regarded as peculiar, it appears
+				   that the 80486 rounds to the dest precision, then
+				   converts to decide underflow. */
+				if (!((tmp.sigl == 0x00800000) &&
+				      ((st0_ptr->sigh & 0x000000ff)
+				       || st0_ptr->sigl)))
 #endif /* PECULIAR_486 */
-		{
-		  EXCEPTION(EX_Underflow);
-		  /* This is a special case: see sec 16.2.5.1 of
-		     the 80486 book */
-		  if ( !(control_word & CW_Underflow) )
-		    return 0;
-		}
-	      EXCEPTION(precision_loss);
-	      if ( !(control_word & CW_Precision) )
-		return 0;
-	    }
-	  templ = tmp.sigl;
-      }
-      else
-	{
-	  if ( tmp.sigl | (tmp.sigh & 0x000000ff) )
-	    {
-	      unsigned long sigh = tmp.sigh;
-	      unsigned long sigl = tmp.sigl;
-	      
-	      precision_loss = 1;
-	      switch (control_word & CW_RC)
-		{
-		case RC_RND:
-		  increment = ((sigh & 0xff) > 0x80)       /* more than half */
-		    || (((sigh & 0xff) == 0x80) && sigl)   /* more than half */
-		    || ((sigh & 0x180) == 0x180);        /* round to even */
-		  break;
-		case RC_DOWN:   /* towards -infinity */
-		  increment = signpositive(&tmp)
-		    ? 0 : (sigl | (sigh & 0xff));
-		  break;
-		case RC_UP:     /* towards +infinity */
-		  increment = signpositive(&tmp)
-		    ? (sigl | (sigh & 0xff)) : 0;
-		  break;
-		case RC_CHOP:
-		  increment = 0;
-		  break;
-		}
-	  
-	      /* Truncate part of the mantissa */
-	      tmp.sigl = 0;
-	  
-	      if (increment)
-		{
-		  if ( sigh >= 0xffffff00 )
-		    {
-		      /* The sigh part overflows */
-		      tmp.sigh = 0x80000000;
-		      exp++;
-		      if ( exp >= EXP_OVER )
-			goto overflow;
-		    }
-		  else
-		    {
-		      tmp.sigh &= 0xffffff00;
-		      tmp.sigh += 0x100;
-		    }
-		}
-	      else
-		{
-		  tmp.sigh &= 0xffffff00;  /* Finish the truncation */
-		}
-	    }
-	  else
-	    precision_loss = 0;
-      
-	  templ = (tmp.sigh >> 8) & 0x007fffff;
+				{
+					EXCEPTION(EX_Underflow);
+					/* This is a special case: see sec 16.2.5.1 of
+					   the 80486 book */
+					if (!(control_word & CW_Underflow))
+						return 0;
+				}
+				EXCEPTION(precision_loss);
+				if (!(control_word & CW_Precision))
+					return 0;
+			}
+			templ = tmp.sigl;
+		} else {
+			if (tmp.sigl | (tmp.sigh & 0x000000ff)) {
+				unsigned long sigh = tmp.sigh;
+				unsigned long sigl = tmp.sigl;
 
-	  if ( exp > SINGLE_Emax )
-	    {
-	    overflow:
-	      EXCEPTION(EX_Overflow);
-	      if ( !(control_word & CW_Overflow) )
-		return 0;
-	      set_precision_flag_up();
-	      if ( !(control_word & CW_Precision) )
-		return 0;
+				precision_loss = 1;
+				switch (control_word & CW_RC) {
+				case RC_RND:
+					increment = ((sigh & 0xff) > 0x80)	/* more than half */
+					    ||(((sigh & 0xff) == 0x80) && sigl)	/* more than half */
+					    ||((sigh & 0x180) == 0x180);	/* round to even */
+					break;
+				case RC_DOWN:	/* towards -infinity */
+					increment = signpositive(&tmp)
+					    ? 0 : (sigl | (sigh & 0xff));
+					break;
+				case RC_UP:	/* towards +infinity */
+					increment = signpositive(&tmp)
+					    ? (sigl | (sigh & 0xff)) : 0;
+					break;
+				case RC_CHOP:
+					increment = 0;
+					break;
+				}
 
-	      /* This is a special case: see sec 16.2.5.1 of the 80486 book. */
-	      /* Masked response is overflow to infinity. */
-	      templ = 0x7f800000;
-	    }
-	  else
-	    {
-	      if ( precision_loss )
-		{
-		  if ( increment )
-		    set_precision_flag_up();
-		  else
-		    set_precision_flag_down();
+				/* Truncate part of the mantissa */
+				tmp.sigl = 0;
+
+				if (increment) {
+					if (sigh >= 0xffffff00) {
+						/* The sigh part overflows */
+						tmp.sigh = 0x80000000;
+						exp++;
+						if (exp >= EXP_OVER)
+							goto overflow;
+					} else {
+						tmp.sigh &= 0xffffff00;
+						tmp.sigh += 0x100;
+					}
+				} else {
+					tmp.sigh &= 0xffffff00;	/* Finish the truncation */
+				}
+			} else
+				precision_loss = 0;
+
+			templ = (tmp.sigh >> 8) & 0x007fffff;
+
+			if (exp > SINGLE_Emax) {
+			      overflow:
+				EXCEPTION(EX_Overflow);
+				if (!(control_word & CW_Overflow))
+					return 0;
+				set_precision_flag_up();
+				if (!(control_word & CW_Precision))
+					return 0;
+
+				/* This is a special case: see sec 16.2.5.1 of the 80486 book. */
+				/* Masked response is overflow to infinity. */
+				templ = 0x7f800000;
+			} else {
+				if (precision_loss) {
+					if (increment)
+						set_precision_flag_up();
+					else
+						set_precision_flag_down();
+				}
+				/* Add the exponent */
+				templ |= ((exp + SINGLE_Ebias) & 0xff) << 23;
+			}
 		}
-	      /* Add the exponent */
-	      templ |= ((exp+SINGLE_Ebias) & 0xff) << 23;
-	    }
-	}
-    }
-  else if (st0_tag == TAG_Zero)
-    {
-      templ = 0;
-    }
-  else if ( st0_tag == TAG_Special )
-    {
-      st0_tag = FPU_Special(st0_ptr);
-      if (st0_tag == TW_Denormal)
-	{
-	  reg_copy(st0_ptr, &tmp);
+	} else if (st0_tag == TAG_Zero) {
+		templ = 0;
+	} else if (st0_tag == TAG_Special) {
+		st0_tag = FPU_Special(st0_ptr);
+		if (st0_tag == TW_Denormal) {
+			reg_copy(st0_ptr, &tmp);
 
-	  /* A denormal will always underflow. */
+			/* A denormal will always underflow. */
 #ifndef PECULIAR_486
-	  /* An 80486 is supposed to be able to generate
-	     a denormal exception here, but... */
-	  /* Underflow has priority. */
-	  if ( control_word & CW_Underflow )
-	    denormal_operand();
-#endif /* PECULIAR_486 */ 
-	  goto denormal_arg;
-	}
-      else if (st0_tag == TW_Infinity)
-	{
-	  templ = 0x7f800000;
-	}
-      else if (st0_tag == TW_NaN)
-	{
-	  /* Is it really a NaN ? */
-	  if ( (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000) )
-	    {
-	      /* See if we can get a valid NaN from the FPU_REG */
-	      templ = st0_ptr->sigh >> 8;
-	      if ( !(st0_ptr->sigh & 0x40000000) )
-		{
-		  /* It is a signalling NaN */
-		  EXCEPTION(EX_Invalid);
-		  if ( !(control_word & CW_Invalid) )
-		    return 0;
-		  templ |= (0x40000000 >> 8);
+			/* An 80486 is supposed to be able to generate
+			   a denormal exception here, but... */
+			/* Underflow has priority. */
+			if (control_word & CW_Underflow)
+				denormal_operand();
+#endif /* PECULIAR_486 */
+			goto denormal_arg;
+		} else if (st0_tag == TW_Infinity) {
+			templ = 0x7f800000;
+		} else if (st0_tag == TW_NaN) {
+			/* Is it really a NaN ? */
+			if ((exponent(st0_ptr) == EXP_OVER)
+			    && (st0_ptr->sigh & 0x80000000)) {
+				/* See if we can get a valid NaN from the FPU_REG */
+				templ = st0_ptr->sigh >> 8;
+				if (!(st0_ptr->sigh & 0x40000000)) {
+					/* It is a signalling NaN */
+					EXCEPTION(EX_Invalid);
+					if (!(control_word & CW_Invalid))
+						return 0;
+					templ |= (0x40000000 >> 8);
+				}
+				templ |= 0x7f800000;
+			} else {
+				/* It is an unsupported data type */
+				EXCEPTION(EX_Invalid);
+				if (!(control_word & CW_Invalid))
+					return 0;
+				templ = 0xffc00000;
+			}
 		}
-	      templ |= 0x7f800000;
-	    }
-	  else
-	    {
-	      /* It is an unsupported data type */
-	      EXCEPTION(EX_Invalid);
-	      if ( !(control_word & CW_Invalid) )
+#ifdef PARANOID
+		else {
+			EXCEPTION(EX_INTERNAL | 0x164);
+			return 0;
+		}
+#endif
+	} else if (st0_tag == TAG_Empty) {
+		/* Empty register (stack underflow) */
+		EXCEPTION(EX_StackUnder);
+		if (control_word & EX_Invalid) {
+			/* The masked response */
+			/* Put out the QNaN indefinite */
+			RE_ENTRANT_CHECK_OFF;
+			FPU_access_ok(VERIFY_WRITE, single, 4);
+			FPU_put_user(0xffc00000,
+				     (unsigned long __user *)single);
+			RE_ENTRANT_CHECK_ON;
+			return 1;
+		} else
+			return 0;
+	}
+#ifdef PARANOID
+	else {
+		EXCEPTION(EX_INTERNAL | 0x163);
 		return 0;
-	      templ = 0xffc00000;
-	    }
-	}
-#ifdef PARANOID
-      else
-	{
-	  EXCEPTION(EX_INTERNAL|0x164);
-	  return 0;
 	}
 #endif
-    }
-  else if ( st0_tag == TAG_Empty )
-    {
-      /* Empty register (stack underflow) */
-      EXCEPTION(EX_StackUnder);
-      if ( control_word & EX_Invalid )
-	{
-	  /* The masked response */
-	  /* Put out the QNaN indefinite */
-	  RE_ENTRANT_CHECK_OFF;
-	  FPU_access_ok(VERIFY_WRITE,single,4);
-	  FPU_put_user(0xffc00000, (unsigned long __user *) single);
-	  RE_ENTRANT_CHECK_ON;
-	  return 1;
-	}
-      else
-	return 0;
-    }
-#ifdef PARANOID
-  else
-    {
-      EXCEPTION(EX_INTERNAL|0x163);
-      return 0;
-    }
-#endif
-  if ( getsign(st0_ptr) )
-    templ |= 0x80000000;
+	if (getsign(st0_ptr))
+		templ |= 0x80000000;
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_WRITE,single,4);
-  FPU_put_user(templ,(unsigned long __user *) single);
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_WRITE, single, 4);
+	FPU_put_user(templ, (unsigned long __user *)single);
+	RE_ENTRANT_CHECK_ON;
 
-  return 1;
+	return 1;
 }
 
-
 /* Put a long long into user memory */
-int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d)
+int FPU_store_int64(FPU_REG * st0_ptr, u_char st0_tag, long long __user * d)
 {
-  FPU_REG t;
-  long long tll;
-  int precision_loss;
+	FPU_REG t;
+	long long tll;
+	int precision_loss;
 
-  if ( st0_tag == TAG_Empty )
-    {
-      /* Empty register (stack underflow) */
-      EXCEPTION(EX_StackUnder);
-      goto invalid_operand;
-    }
-  else if ( st0_tag == TAG_Special )
-    {
-      st0_tag = FPU_Special(st0_ptr);
-      if ( (st0_tag == TW_Infinity) ||
-	   (st0_tag == TW_NaN) )
-	{
-	  EXCEPTION(EX_Invalid);
-	  goto invalid_operand;
+	if (st0_tag == TAG_Empty) {
+		/* Empty register (stack underflow) */
+		EXCEPTION(EX_StackUnder);
+		goto invalid_operand;
+	} else if (st0_tag == TAG_Special) {
+		st0_tag = FPU_Special(st0_ptr);
+		if ((st0_tag == TW_Infinity) || (st0_tag == TW_NaN)) {
+			EXCEPTION(EX_Invalid);
+			goto invalid_operand;
+		}
 	}
-    }
 
-  reg_copy(st0_ptr, &t);
-  precision_loss = FPU_round_to_int(&t, st0_tag);
-  ((long *)&tll)[0] = t.sigl;
-  ((long *)&tll)[1] = t.sigh;
-  if ( (precision_loss == 1) ||
-      ((t.sigh & 0x80000000) &&
-       !((t.sigh == 0x80000000) && (t.sigl == 0) &&
-	 signnegative(&t))) )
-    {
-      EXCEPTION(EX_Invalid);
-      /* This is a special case: see sec 16.2.5.1 of the 80486 book */
-    invalid_operand:
-      if ( control_word & EX_Invalid )
-	{
-	  /* Produce something like QNaN "indefinite" */
-	  tll = 0x8000000000000000LL;
+	reg_copy(st0_ptr, &t);
+	precision_loss = FPU_round_to_int(&t, st0_tag);
+	((long *)&tll)[0] = t.sigl;
+	((long *)&tll)[1] = t.sigh;
+	if ((precision_loss == 1) ||
+	    ((t.sigh & 0x80000000) &&
+	     !((t.sigh == 0x80000000) && (t.sigl == 0) && signnegative(&t)))) {
+		EXCEPTION(EX_Invalid);
+		/* This is a special case: see sec 16.2.5.1 of the 80486 book */
+	      invalid_operand:
+		if (control_word & EX_Invalid) {
+			/* Produce something like QNaN "indefinite" */
+			tll = 0x8000000000000000LL;
+		} else
+			return 0;
+	} else {
+		if (precision_loss)
+			set_precision_flag(precision_loss);
+		if (signnegative(&t))
+			tll = -tll;
 	}
-      else
-	return 0;
-    }
-  else
-    {
-      if ( precision_loss )
-	set_precision_flag(precision_loss);
-      if ( signnegative(&t) )
-	tll = - tll;
-    }
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_WRITE,d,8);
-  if (copy_to_user(d, &tll, 8))
-    FPU_abort;
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_WRITE, d, 8);
+	if (copy_to_user(d, &tll, 8))
+		FPU_abort;
+	RE_ENTRANT_CHECK_ON;
 
-  return 1;
+	return 1;
 }
 
-
 /* Put a long into user memory */
-int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d)
+int FPU_store_int32(FPU_REG * st0_ptr, u_char st0_tag, long __user * d)
 {
-  FPU_REG t;
-  int precision_loss;
+	FPU_REG t;
+	int precision_loss;
 
-  if ( st0_tag == TAG_Empty )
-    {
-      /* Empty register (stack underflow) */
-      EXCEPTION(EX_StackUnder);
-      goto invalid_operand;
-    }
-  else if ( st0_tag == TAG_Special )
-    {
-      st0_tag = FPU_Special(st0_ptr);
-      if ( (st0_tag == TW_Infinity) ||
-	   (st0_tag == TW_NaN) )
-	{
-	  EXCEPTION(EX_Invalid);
-	  goto invalid_operand;
+	if (st0_tag == TAG_Empty) {
+		/* Empty register (stack underflow) */
+		EXCEPTION(EX_StackUnder);
+		goto invalid_operand;
+	} else if (st0_tag == TAG_Special) {
+		st0_tag = FPU_Special(st0_ptr);
+		if ((st0_tag == TW_Infinity) || (st0_tag == TW_NaN)) {
+			EXCEPTION(EX_Invalid);
+			goto invalid_operand;
+		}
 	}
-    }
 
-  reg_copy(st0_ptr, &t);
-  precision_loss = FPU_round_to_int(&t, st0_tag);
-  if (t.sigh ||
-      ((t.sigl & 0x80000000) &&
-       !((t.sigl == 0x80000000) && signnegative(&t))) )
-    {
-      EXCEPTION(EX_Invalid);
-      /* This is a special case: see sec 16.2.5.1 of the 80486 book */
-    invalid_operand:
-      if ( control_word & EX_Invalid )
-	{
-	  /* Produce something like QNaN "indefinite" */
-	  t.sigl = 0x80000000;
+	reg_copy(st0_ptr, &t);
+	precision_loss = FPU_round_to_int(&t, st0_tag);
+	if (t.sigh ||
+	    ((t.sigl & 0x80000000) &&
+	     !((t.sigl == 0x80000000) && signnegative(&t)))) {
+		EXCEPTION(EX_Invalid);
+		/* This is a special case: see sec 16.2.5.1 of the 80486 book */
+	      invalid_operand:
+		if (control_word & EX_Invalid) {
+			/* Produce something like QNaN "indefinite" */
+			t.sigl = 0x80000000;
+		} else
+			return 0;
+	} else {
+		if (precision_loss)
+			set_precision_flag(precision_loss);
+		if (signnegative(&t))
+			t.sigl = -(long)t.sigl;
 	}
-      else
-	return 0;
-    }
-  else
-    {
-      if ( precision_loss )
-	set_precision_flag(precision_loss);
-      if ( signnegative(&t) )
-	t.sigl = -(long)t.sigl;
-    }
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_WRITE,d,4);
-  FPU_put_user(t.sigl, (unsigned long __user *) d);
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_WRITE, d, 4);
+	FPU_put_user(t.sigl, (unsigned long __user *)d);
+	RE_ENTRANT_CHECK_ON;
 
-  return 1;
+	return 1;
 }
 
-
 /* Put a short into user memory */
-int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d)
+int FPU_store_int16(FPU_REG * st0_ptr, u_char st0_tag, short __user * d)
 {
-  FPU_REG t;
-  int precision_loss;
+	FPU_REG t;
+	int precision_loss;
 
-  if ( st0_tag == TAG_Empty )
-    {
-      /* Empty register (stack underflow) */
-      EXCEPTION(EX_StackUnder);
-      goto invalid_operand;
-    }
-  else if ( st0_tag == TAG_Special )
-    {
-      st0_tag = FPU_Special(st0_ptr);
-      if ( (st0_tag == TW_Infinity) ||
-	   (st0_tag == TW_NaN) )
-	{
-	  EXCEPTION(EX_Invalid);
-	  goto invalid_operand;
+	if (st0_tag == TAG_Empty) {
+		/* Empty register (stack underflow) */
+		EXCEPTION(EX_StackUnder);
+		goto invalid_operand;
+	} else if (st0_tag == TAG_Special) {
+		st0_tag = FPU_Special(st0_ptr);
+		if ((st0_tag == TW_Infinity) || (st0_tag == TW_NaN)) {
+			EXCEPTION(EX_Invalid);
+			goto invalid_operand;
+		}
 	}
-    }
 
-  reg_copy(st0_ptr, &t);
-  precision_loss = FPU_round_to_int(&t, st0_tag);
-  if (t.sigh ||
-      ((t.sigl & 0xffff8000) &&
-       !((t.sigl == 0x8000) && signnegative(&t))) )
-    {
-      EXCEPTION(EX_Invalid);
-      /* This is a special case: see sec 16.2.5.1 of the 80486 book */
-    invalid_operand:
-      if ( control_word & EX_Invalid )
-	{
-	  /* Produce something like QNaN "indefinite" */
-	  t.sigl = 0x8000;
+	reg_copy(st0_ptr, &t);
+	precision_loss = FPU_round_to_int(&t, st0_tag);
+	if (t.sigh ||
+	    ((t.sigl & 0xffff8000) &&
+	     !((t.sigl == 0x8000) && signnegative(&t)))) {
+		EXCEPTION(EX_Invalid);
+		/* This is a special case: see sec 16.2.5.1 of the 80486 book */
+	      invalid_operand:
+		if (control_word & EX_Invalid) {
+			/* Produce something like QNaN "indefinite" */
+			t.sigl = 0x8000;
+		} else
+			return 0;
+	} else {
+		if (precision_loss)
+			set_precision_flag(precision_loss);
+		if (signnegative(&t))
+			t.sigl = -t.sigl;
 	}
-      else
-	return 0;
-    }
-  else
-    {
-      if ( precision_loss )
-	set_precision_flag(precision_loss);
-      if ( signnegative(&t) )
-	t.sigl = -t.sigl;
-    }
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_WRITE,d,2);
-  FPU_put_user((short)t.sigl, d);
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_WRITE, d, 2);
+	FPU_put_user((short)t.sigl, d);
+	RE_ENTRANT_CHECK_ON;
 
-  return 1;
+	return 1;
 }
 
-
 /* Put a packed bcd array into user memory */
-int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d)
+int FPU_store_bcd(FPU_REG * st0_ptr, u_char st0_tag, u_char __user * d)
 {
-  FPU_REG t;
-  unsigned long long ll;
-  u_char b;
-  int i, precision_loss;
-  u_char sign = (getsign(st0_ptr) == SIGN_NEG) ? 0x80 : 0;
+	FPU_REG t;
+	unsigned long long ll;
+	u_char b;
+	int i, precision_loss;
+	u_char sign = (getsign(st0_ptr) == SIGN_NEG) ? 0x80 : 0;
 
-  if ( st0_tag == TAG_Empty )
-    {
-      /* Empty register (stack underflow) */
-      EXCEPTION(EX_StackUnder);
-      goto invalid_operand;
-    }
-  else if ( st0_tag == TAG_Special )
-    {
-      st0_tag = FPU_Special(st0_ptr);
-      if ( (st0_tag == TW_Infinity) ||
-	   (st0_tag == TW_NaN) )
-	{
-	  EXCEPTION(EX_Invalid);
-	  goto invalid_operand;
+	if (st0_tag == TAG_Empty) {
+		/* Empty register (stack underflow) */
+		EXCEPTION(EX_StackUnder);
+		goto invalid_operand;
+	} else if (st0_tag == TAG_Special) {
+		st0_tag = FPU_Special(st0_ptr);
+		if ((st0_tag == TW_Infinity) || (st0_tag == TW_NaN)) {
+			EXCEPTION(EX_Invalid);
+			goto invalid_operand;
+		}
 	}
-    }
 
-  reg_copy(st0_ptr, &t);
-  precision_loss = FPU_round_to_int(&t, st0_tag);
-  ll = significand(&t);
+	reg_copy(st0_ptr, &t);
+	precision_loss = FPU_round_to_int(&t, st0_tag);
+	ll = significand(&t);
 
-  /* Check for overflow, by comparing with 999999999999999999 decimal. */
-  if ( (t.sigh > 0x0de0b6b3) ||
-      ((t.sigh == 0x0de0b6b3) && (t.sigl > 0xa763ffff)) )
-    {
-      EXCEPTION(EX_Invalid);
-      /* This is a special case: see sec 16.2.5.1 of the 80486 book */
-    invalid_operand:
-      if ( control_word & CW_Invalid )
-	{
-	  /* Produce the QNaN "indefinite" */
-	  RE_ENTRANT_CHECK_OFF;
-	  FPU_access_ok(VERIFY_WRITE,d,10);
-	  for ( i = 0; i < 7; i++)
-	    FPU_put_user(0, d+i); /* These bytes "undefined" */
-	  FPU_put_user(0xc0, d+7); /* This byte "undefined" */
-	  FPU_put_user(0xff, d+8);
-	  FPU_put_user(0xff, d+9);
-	  RE_ENTRANT_CHECK_ON;
-	  return 1;
+	/* Check for overflow, by comparing with 999999999999999999 decimal. */
+	if ((t.sigh > 0x0de0b6b3) ||
+	    ((t.sigh == 0x0de0b6b3) && (t.sigl > 0xa763ffff))) {
+		EXCEPTION(EX_Invalid);
+		/* This is a special case: see sec 16.2.5.1 of the 80486 book */
+	      invalid_operand:
+		if (control_word & CW_Invalid) {
+			/* Produce the QNaN "indefinite" */
+			RE_ENTRANT_CHECK_OFF;
+			FPU_access_ok(VERIFY_WRITE, d, 10);
+			for (i = 0; i < 7; i++)
+				FPU_put_user(0, d + i);	/* These bytes "undefined" */
+			FPU_put_user(0xc0, d + 7);	/* This byte "undefined" */
+			FPU_put_user(0xff, d + 8);
+			FPU_put_user(0xff, d + 9);
+			RE_ENTRANT_CHECK_ON;
+			return 1;
+		} else
+			return 0;
+	} else if (precision_loss) {
+		/* Precision loss doesn't stop the data transfer */
+		set_precision_flag(precision_loss);
 	}
-      else
-	return 0;
-    }
-  else if ( precision_loss )
-    {
-      /* Precision loss doesn't stop the data transfer */
-      set_precision_flag(precision_loss);
-    }
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_WRITE,d,10);
-  RE_ENTRANT_CHECK_ON;
-  for ( i = 0; i < 9; i++)
-    {
-      b = FPU_div_small(&ll, 10);
-      b |= (FPU_div_small(&ll, 10)) << 4;
-      RE_ENTRANT_CHECK_OFF;
-      FPU_put_user(b, d+i);
-      RE_ENTRANT_CHECK_ON;
-    }
-  RE_ENTRANT_CHECK_OFF;
-  FPU_put_user(sign, d+9);
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_WRITE, d, 10);
+	RE_ENTRANT_CHECK_ON;
+	for (i = 0; i < 9; i++) {
+		b = FPU_div_small(&ll, 10);
+		b |= (FPU_div_small(&ll, 10)) << 4;
+		RE_ENTRANT_CHECK_OFF;
+		FPU_put_user(b, d + i);
+		RE_ENTRANT_CHECK_ON;
+	}
+	RE_ENTRANT_CHECK_OFF;
+	FPU_put_user(sign, d + 9);
+	RE_ENTRANT_CHECK_ON;
 
-  return 1;
+	return 1;
 }
 
 /*===========================================================================*/
@@ -1117,259 +971,254 @@ int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d)
 /* Overflow is signalled by a non-zero return value (in eax).
    In the case of overflow, the returned significand always has the
    largest possible value */
-int FPU_round_to_int(FPU_REG *r, u_char tag)
+int FPU_round_to_int(FPU_REG * r, u_char tag)
 {
-  u_char     very_big;
-  unsigned eax;
+	u_char very_big;
+	unsigned eax;
 
-  if (tag == TAG_Zero)
-    {
-      /* Make sure that zero is returned */
-      significand(r) = 0;
-      return 0;        /* o.k. */
-    }
+	if (tag == TAG_Zero) {
+		/* Make sure that zero is returned */
+		significand(r) = 0;
+		return 0;	/* o.k. */
+	}
 
-  if (exponent(r) > 63)
-    {
-      r->sigl = r->sigh = ~0;      /* The largest representable number */
-      return 1;        /* overflow */
-    }
+	if (exponent(r) > 63) {
+		r->sigl = r->sigh = ~0;	/* The largest representable number */
+		return 1;	/* overflow */
+	}
 
-  eax = FPU_shrxs(&r->sigl, 63 - exponent(r));
-  very_big = !(~(r->sigh) | ~(r->sigl));  /* test for 0xfff...fff */
+	eax = FPU_shrxs(&r->sigl, 63 - exponent(r));
+	very_big = !(~(r->sigh) | ~(r->sigl));	/* test for 0xfff...fff */
 #define	half_or_more	(eax & 0x80000000)
 #define	frac_part	(eax)
 #define more_than_half  ((eax & 0x80000001) == 0x80000001)
-  switch (control_word & CW_RC)
-    {
-    case RC_RND:
-      if ( more_than_half               	/* nearest */
-	  || (half_or_more && (r->sigl & 1)) )	/* odd -> even */
-	{
-	  if ( very_big ) return 1;        /* overflow */
-	  significand(r) ++;
-	  return PRECISION_LOST_UP;
+	switch (control_word & CW_RC) {
+	case RC_RND:
+		if (more_than_half	/* nearest */
+		    || (half_or_more && (r->sigl & 1))) {	/* odd -> even */
+			if (very_big)
+				return 1;	/* overflow */
+			significand(r)++;
+			return PRECISION_LOST_UP;
+		}
+		break;
+	case RC_DOWN:
+		if (frac_part && getsign(r)) {
+			if (very_big)
+				return 1;	/* overflow */
+			significand(r)++;
+			return PRECISION_LOST_UP;
+		}
+		break;
+	case RC_UP:
+		if (frac_part && !getsign(r)) {
+			if (very_big)
+				return 1;	/* overflow */
+			significand(r)++;
+			return PRECISION_LOST_UP;
+		}
+		break;
+	case RC_CHOP:
+		break;
 	}
-      break;
-    case RC_DOWN:
-      if (frac_part && getsign(r))
-	{
-	  if ( very_big ) return 1;        /* overflow */
-	  significand(r) ++;
-	  return PRECISION_LOST_UP;
-	}
-      break;
-    case RC_UP:
-      if (frac_part && !getsign(r))
-	{
-	  if ( very_big ) return 1;        /* overflow */
-	  significand(r) ++;
-	  return PRECISION_LOST_UP;
-	}
-      break;
-    case RC_CHOP:
-      break;
-    }
 
-  return eax ? PRECISION_LOST_DOWN : 0;
+	return eax ? PRECISION_LOST_DOWN : 0;
 
 }
 
 /*===========================================================================*/
 
-u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s)
+u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user * s)
 {
-  unsigned short tag_word = 0;
-  u_char tag;
-  int i;
+	unsigned short tag_word = 0;
+	u_char tag;
+	int i;
 
-  if ( (addr_modes.default_mode == VM86) ||
-      ((addr_modes.default_mode == PM16)
-      ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) )
-    {
-      RE_ENTRANT_CHECK_OFF;
-      FPU_access_ok(VERIFY_READ, s, 0x0e);
-      FPU_get_user(control_word, (unsigned short __user *) s);
-      FPU_get_user(partial_status, (unsigned short __user *) (s+2));
-      FPU_get_user(tag_word, (unsigned short __user *) (s+4));
-      FPU_get_user(instruction_address.offset, (unsigned short __user *) (s+6));
-      FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+8));
-      FPU_get_user(operand_address.offset, (unsigned short __user *) (s+0x0a));
-      FPU_get_user(operand_address.selector, (unsigned short __user *) (s+0x0c));
-      RE_ENTRANT_CHECK_ON;
-      s += 0x0e;
-      if ( addr_modes.default_mode == VM86 )
-	{
-	  instruction_address.offset
-	    += (instruction_address.selector & 0xf000) << 4;
-	  operand_address.offset += (operand_address.selector & 0xf000) << 4;
+	if ((addr_modes.default_mode == VM86) ||
+	    ((addr_modes.default_mode == PM16)
+	     ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX))) {
+		RE_ENTRANT_CHECK_OFF;
+		FPU_access_ok(VERIFY_READ, s, 0x0e);
+		FPU_get_user(control_word, (unsigned short __user *)s);
+		FPU_get_user(partial_status, (unsigned short __user *)(s + 2));
+		FPU_get_user(tag_word, (unsigned short __user *)(s + 4));
+		FPU_get_user(instruction_address.offset,
+			     (unsigned short __user *)(s + 6));
+		FPU_get_user(instruction_address.selector,
+			     (unsigned short __user *)(s + 8));
+		FPU_get_user(operand_address.offset,
+			     (unsigned short __user *)(s + 0x0a));
+		FPU_get_user(operand_address.selector,
+			     (unsigned short __user *)(s + 0x0c));
+		RE_ENTRANT_CHECK_ON;
+		s += 0x0e;
+		if (addr_modes.default_mode == VM86) {
+			instruction_address.offset
+			    += (instruction_address.selector & 0xf000) << 4;
+			operand_address.offset +=
+			    (operand_address.selector & 0xf000) << 4;
+		}
+	} else {
+		RE_ENTRANT_CHECK_OFF;
+		FPU_access_ok(VERIFY_READ, s, 0x1c);
+		FPU_get_user(control_word, (unsigned short __user *)s);
+		FPU_get_user(partial_status, (unsigned short __user *)(s + 4));
+		FPU_get_user(tag_word, (unsigned short __user *)(s + 8));
+		FPU_get_user(instruction_address.offset,
+			     (unsigned long __user *)(s + 0x0c));
+		FPU_get_user(instruction_address.selector,
+			     (unsigned short __user *)(s + 0x10));
+		FPU_get_user(instruction_address.opcode,
+			     (unsigned short __user *)(s + 0x12));
+		FPU_get_user(operand_address.offset,
+			     (unsigned long __user *)(s + 0x14));
+		FPU_get_user(operand_address.selector,
+			     (unsigned long __user *)(s + 0x18));
+		RE_ENTRANT_CHECK_ON;
+		s += 0x1c;
 	}
-    }
-  else
-    {
-      RE_ENTRANT_CHECK_OFF;
-      FPU_access_ok(VERIFY_READ, s, 0x1c);
-      FPU_get_user(control_word, (unsigned short __user *) s);
-      FPU_get_user(partial_status, (unsigned short __user *) (s+4));
-      FPU_get_user(tag_word, (unsigned short __user *) (s+8));
-      FPU_get_user(instruction_address.offset, (unsigned long __user *) (s+0x0c));
-      FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+0x10));
-      FPU_get_user(instruction_address.opcode, (unsigned short __user *) (s+0x12));
-      FPU_get_user(operand_address.offset, (unsigned long __user *) (s+0x14));
-      FPU_get_user(operand_address.selector, (unsigned long __user *) (s+0x18));
-      RE_ENTRANT_CHECK_ON;
-      s += 0x1c;
-    }
 
 #ifdef PECULIAR_486
-  control_word &= ~0xe080;
-#endif /* PECULIAR_486 */ 
+	control_word &= ~0xe080;
+#endif /* PECULIAR_486 */
 
-  top = (partial_status >> SW_Top_Shift) & 7;
+	top = (partial_status >> SW_Top_Shift) & 7;
 
-  if ( partial_status & ~control_word & CW_Exceptions )
-    partial_status |= (SW_Summary | SW_Backward);
-  else
-    partial_status &= ~(SW_Summary | SW_Backward);
+	if (partial_status & ~control_word & CW_Exceptions)
+		partial_status |= (SW_Summary | SW_Backward);
+	else
+		partial_status &= ~(SW_Summary | SW_Backward);
 
-  for ( i = 0; i < 8; i++ )
-    {
-      tag = tag_word & 3;
-      tag_word >>= 2;
+	for (i = 0; i < 8; i++) {
+		tag = tag_word & 3;
+		tag_word >>= 2;
 
-      if ( tag == TAG_Empty )
-	/* New tag is empty.  Accept it */
-	FPU_settag(i, TAG_Empty);
-      else if ( FPU_gettag(i) == TAG_Empty )
-	{
-	  /* Old tag is empty and new tag is not empty.  New tag is determined
-	     by old reg contents */
-	  if ( exponent(&fpu_register(i)) == - EXTENDED_Ebias )
-	    {
-	      if ( !(fpu_register(i).sigl | fpu_register(i).sigh) )
-		FPU_settag(i, TAG_Zero);
-	      else
-		FPU_settag(i, TAG_Special);
-	    }
-	  else if ( exponent(&fpu_register(i)) == 0x7fff - EXTENDED_Ebias )
-	    {
-	      FPU_settag(i, TAG_Special);
-	    }
-	  else if ( fpu_register(i).sigh & 0x80000000 )
-	    FPU_settag(i, TAG_Valid);
-	  else
-	    FPU_settag(i, TAG_Special);   /* An Un-normal */
-  	}
-      /* Else old tag is not empty and new tag is not empty.  Old tag
-	 remains correct */
-    }
+		if (tag == TAG_Empty)
+			/* New tag is empty.  Accept it */
+			FPU_settag(i, TAG_Empty);
+		else if (FPU_gettag(i) == TAG_Empty) {
+			/* Old tag is empty and new tag is not empty.  New tag is determined
+			   by old reg contents */
+			if (exponent(&fpu_register(i)) == -EXTENDED_Ebias) {
+				if (!
+				    (fpu_register(i).sigl | fpu_register(i).
+				     sigh))
+					FPU_settag(i, TAG_Zero);
+				else
+					FPU_settag(i, TAG_Special);
+			} else if (exponent(&fpu_register(i)) ==
+				   0x7fff - EXTENDED_Ebias) {
+				FPU_settag(i, TAG_Special);
+			} else if (fpu_register(i).sigh & 0x80000000)
+				FPU_settag(i, TAG_Valid);
+			else
+				FPU_settag(i, TAG_Special);	/* An Un-normal */
+		}
+		/* Else old tag is not empty and new tag is not empty.  Old tag
+		   remains correct */
+	}
 
-  return s;
+	return s;
 }
 
-
-void frstor(fpu_addr_modes addr_modes, u_char __user *data_address)
+void frstor(fpu_addr_modes addr_modes, u_char __user * data_address)
 {
-  int i, regnr;
-  u_char __user *s = fldenv(addr_modes, data_address);
-  int offset = (top & 7) * 10, other = 80 - offset;
+	int i, regnr;
+	u_char __user *s = fldenv(addr_modes, data_address);
+	int offset = (top & 7) * 10, other = 80 - offset;
 
-  /* Copy all registers in stack order. */
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_READ,s,80);
-  __copy_from_user(register_base+offset, s, other);
-  if ( offset )
-    __copy_from_user(register_base, s+other, offset);
-  RE_ENTRANT_CHECK_ON;
+	/* Copy all registers in stack order. */
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_READ, s, 80);
+	__copy_from_user(register_base + offset, s, other);
+	if (offset)
+		__copy_from_user(register_base, s + other, offset);
+	RE_ENTRANT_CHECK_ON;
 
-  for ( i = 0; i < 8; i++ )
-    {
-      regnr = (i+top) & 7;
-      if ( FPU_gettag(regnr) != TAG_Empty )
-	/* The loaded data over-rides all other cases. */
-	FPU_settag(regnr, FPU_tagof(&st(i)));
-    }
+	for (i = 0; i < 8; i++) {
+		regnr = (i + top) & 7;
+		if (FPU_gettag(regnr) != TAG_Empty)
+			/* The loaded data over-rides all other cases. */
+			FPU_settag(regnr, FPU_tagof(&st(i)));
+	}
 
 }
 
-
-u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d)
+u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user * d)
 {
-  if ( (addr_modes.default_mode == VM86) ||
-      ((addr_modes.default_mode == PM16)
-      ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) )
-    {
-      RE_ENTRANT_CHECK_OFF;
-      FPU_access_ok(VERIFY_WRITE,d,14);
+	if ((addr_modes.default_mode == VM86) ||
+	    ((addr_modes.default_mode == PM16)
+	     ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX))) {
+		RE_ENTRANT_CHECK_OFF;
+		FPU_access_ok(VERIFY_WRITE, d, 14);
 #ifdef PECULIAR_486
-      FPU_put_user(control_word & ~0xe080, (unsigned long __user *) d);
+		FPU_put_user(control_word & ~0xe080, (unsigned long __user *)d);
 #else
-      FPU_put_user(control_word, (unsigned short __user *) d);
+		FPU_put_user(control_word, (unsigned short __user *)d);
 #endif /* PECULIAR_486 */
-      FPU_put_user(status_word(), (unsigned short __user *) (d+2));
-      FPU_put_user(fpu_tag_word, (unsigned short __user *) (d+4));
-      FPU_put_user(instruction_address.offset, (unsigned short __user *) (d+6));
-      FPU_put_user(operand_address.offset, (unsigned short __user *) (d+0x0a));
-      if ( addr_modes.default_mode == VM86 )
-	{
-	  FPU_put_user((instruction_address.offset & 0xf0000) >> 4,
-		      (unsigned short __user *) (d+8));
-	  FPU_put_user((operand_address.offset & 0xf0000) >> 4,
-		      (unsigned short __user *) (d+0x0c));
-	}
-      else
-	{
-	  FPU_put_user(instruction_address.selector, (unsigned short __user *) (d+8));
-	  FPU_put_user(operand_address.selector, (unsigned short __user *) (d+0x0c));
-	}
-      RE_ENTRANT_CHECK_ON;
-      d += 0x0e;
-    }
-  else
-    {
-      RE_ENTRANT_CHECK_OFF;
-      FPU_access_ok(VERIFY_WRITE, d, 7*4);
+		FPU_put_user(status_word(), (unsigned short __user *)(d + 2));
+		FPU_put_user(fpu_tag_word, (unsigned short __user *)(d + 4));
+		FPU_put_user(instruction_address.offset,
+			     (unsigned short __user *)(d + 6));
+		FPU_put_user(operand_address.offset,
+			     (unsigned short __user *)(d + 0x0a));
+		if (addr_modes.default_mode == VM86) {
+			FPU_put_user((instruction_address.
+				      offset & 0xf0000) >> 4,
+				     (unsigned short __user *)(d + 8));
+			FPU_put_user((operand_address.offset & 0xf0000) >> 4,
+				     (unsigned short __user *)(d + 0x0c));
+		} else {
+			FPU_put_user(instruction_address.selector,
+				     (unsigned short __user *)(d + 8));
+			FPU_put_user(operand_address.selector,
+				     (unsigned short __user *)(d + 0x0c));
+		}
+		RE_ENTRANT_CHECK_ON;
+		d += 0x0e;
+	} else {
+		RE_ENTRANT_CHECK_OFF;
+		FPU_access_ok(VERIFY_WRITE, d, 7 * 4);
 #ifdef PECULIAR_486
-      control_word &= ~0xe080;
-      /* An 80486 sets nearly all of the reserved bits to 1. */
-      control_word |= 0xffff0040;
-      partial_status = status_word() | 0xffff0000;
-      fpu_tag_word |= 0xffff0000;
-      I387.soft.fcs &= ~0xf8000000;
-      I387.soft.fos |= 0xffff0000;
+		control_word &= ~0xe080;
+		/* An 80486 sets nearly all of the reserved bits to 1. */
+		control_word |= 0xffff0040;
+		partial_status = status_word() | 0xffff0000;
+		fpu_tag_word |= 0xffff0000;
+		I387.soft.fcs &= ~0xf8000000;
+		I387.soft.fos |= 0xffff0000;
 #endif /* PECULIAR_486 */
-      if (__copy_to_user(d, &control_word, 7*4))
-	FPU_abort;
-      RE_ENTRANT_CHECK_ON;
-      d += 0x1c;
-    }
-  
-  control_word |= CW_Exceptions;
-  partial_status &= ~(SW_Summary | SW_Backward);
+		if (__copy_to_user(d, &control_word, 7 * 4))
+			FPU_abort;
+		RE_ENTRANT_CHECK_ON;
+		d += 0x1c;
+	}
 
-  return d;
+	control_word |= CW_Exceptions;
+	partial_status &= ~(SW_Summary | SW_Backward);
+
+	return d;
 }
 
-
-void fsave(fpu_addr_modes addr_modes, u_char __user *data_address)
+void fsave(fpu_addr_modes addr_modes, u_char __user * data_address)
 {
-  u_char __user *d;
-  int offset = (top & 7) * 10, other = 80 - offset;
+	u_char __user *d;
+	int offset = (top & 7) * 10, other = 80 - offset;
 
-  d = fstenv(addr_modes, data_address);
+	d = fstenv(addr_modes, data_address);
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_WRITE,d,80);
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_WRITE, d, 80);
 
-  /* Copy all registers in stack order. */
-  if (__copy_to_user(d, register_base+offset, other))
-    FPU_abort;
-  if ( offset )
-    if (__copy_to_user(d+other, register_base, offset))
-      FPU_abort;
-  RE_ENTRANT_CHECK_ON;
+	/* Copy all registers in stack order. */
+	if (__copy_to_user(d, register_base + offset, other))
+		FPU_abort;
+	if (offset)
+		if (__copy_to_user(d + other, register_base, offset))
+			FPU_abort;
+	RE_ENTRANT_CHECK_ON;
 
-  finit();
+	finit();
 }
 
 /*===========================================================================*/
diff --git a/arch/x86/math-emu/reg_mul.c b/arch/x86/math-emu/reg_mul.c
index 40f50b61bc67..36c37f71f713 100644
--- a/arch/x86/math-emu/reg_mul.c
+++ b/arch/x86/math-emu/reg_mul.c
@@ -20,7 +20,6 @@
 #include "reg_constant.h"
 #include "fpu_system.h"
 
-
 /*
   Multiply two registers to give a register result.
   The sources are st(deststnr) and (b,tagb,signb).
@@ -29,104 +28,88 @@
 /* This routine must be called with non-empty source registers */
 int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w)
 {
-  FPU_REG *a = &st(deststnr);
-  FPU_REG *dest = a;
-  u_char taga = FPU_gettagi(deststnr);
-  u_char saved_sign = getsign(dest);
-  u_char sign = (getsign(a) ^ getsign(b));
-  int tag;
+	FPU_REG *a = &st(deststnr);
+	FPU_REG *dest = a;
+	u_char taga = FPU_gettagi(deststnr);
+	u_char saved_sign = getsign(dest);
+	u_char sign = (getsign(a) ^ getsign(b));
+	int tag;
 
+	if (!(taga | tagb)) {
+		/* Both regs Valid, this should be the most common case. */
 
-  if ( !(taga | tagb) )
-    {
-      /* Both regs Valid, this should be the most common case. */
-
-      tag = FPU_u_mul(a, b, dest, control_w, sign, exponent(a) + exponent(b));
-      if ( tag < 0 )
-	{
-	  setsign(dest, saved_sign);
-	  return tag;
+		tag =
+		    FPU_u_mul(a, b, dest, control_w, sign,
+			      exponent(a) + exponent(b));
+		if (tag < 0) {
+			setsign(dest, saved_sign);
+			return tag;
+		}
+		FPU_settagi(deststnr, tag);
+		return tag;
 	}
-      FPU_settagi(deststnr, tag);
-      return tag;
-    }
 
-  if ( taga == TAG_Special )
-    taga = FPU_Special(a);
-  if ( tagb == TAG_Special )
-    tagb = FPU_Special(b);
+	if (taga == TAG_Special)
+		taga = FPU_Special(a);
+	if (tagb == TAG_Special)
+		tagb = FPU_Special(b);
 
-  if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
+	if (((taga == TAG_Valid) && (tagb == TW_Denormal))
 	    || ((taga == TW_Denormal) && (tagb == TAG_Valid))
-	    || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
-    {
-      FPU_REG x, y;
-      if ( denormal_operand() < 0 )
-	return FPU_Exception;
+	    || ((taga == TW_Denormal) && (tagb == TW_Denormal))) {
+		FPU_REG x, y;
+		if (denormal_operand() < 0)
+			return FPU_Exception;
 
-      FPU_to_exp16(a, &x);
-      FPU_to_exp16(b, &y);
-      tag = FPU_u_mul(&x, &y, dest, control_w, sign,
-		      exponent16(&x) + exponent16(&y));
-      if ( tag < 0 )
-	{
-	  setsign(dest, saved_sign);
-	  return tag;
+		FPU_to_exp16(a, &x);
+		FPU_to_exp16(b, &y);
+		tag = FPU_u_mul(&x, &y, dest, control_w, sign,
+				exponent16(&x) + exponent16(&y));
+		if (tag < 0) {
+			setsign(dest, saved_sign);
+			return tag;
+		}
+		FPU_settagi(deststnr, tag);
+		return tag;
+	} else if ((taga <= TW_Denormal) && (tagb <= TW_Denormal)) {
+		if (((tagb == TW_Denormal) || (taga == TW_Denormal))
+		    && (denormal_operand() < 0))
+			return FPU_Exception;
+
+		/* Must have either both arguments == zero, or
+		   one valid and the other zero.
+		   The result is therefore zero. */
+		FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+		/* The 80486 book says that the answer is +0, but a real
+		   80486 behaves this way.
+		   IEEE-754 apparently says it should be this way. */
+		setsign(dest, sign);
+		return TAG_Zero;
+	}
+	/* Must have infinities, NaNs, etc */
+	else if ((taga == TW_NaN) || (tagb == TW_NaN)) {
+		return real_2op_NaN(b, tagb, deststnr, &st(0));
+	} else if (((taga == TW_Infinity) && (tagb == TAG_Zero))
+		   || ((tagb == TW_Infinity) && (taga == TAG_Zero))) {
+		return arith_invalid(deststnr);	/* Zero*Infinity is invalid */
+	} else if (((taga == TW_Denormal) || (tagb == TW_Denormal))
+		   && (denormal_operand() < 0)) {
+		return FPU_Exception;
+	} else if (taga == TW_Infinity) {
+		FPU_copy_to_regi(a, TAG_Special, deststnr);
+		setsign(dest, sign);
+		return TAG_Special;
+	} else if (tagb == TW_Infinity) {
+		FPU_copy_to_regi(b, TAG_Special, deststnr);
+		setsign(dest, sign);
+		return TAG_Special;
 	}
-      FPU_settagi(deststnr, tag);
-      return tag;
-    }
-  else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) )
-    {
-      if ( ((tagb == TW_Denormal) || (taga == TW_Denormal))
-	   && (denormal_operand() < 0) )
-	return FPU_Exception;
-
-      /* Must have either both arguments == zero, or
-	 one valid and the other zero.
-	 The result is therefore zero. */
-      FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
-      /* The 80486 book says that the answer is +0, but a real
-	 80486 behaves this way.
-	 IEEE-754 apparently says it should be this way. */
-      setsign(dest, sign);
-      return TAG_Zero;
-    }
-      /* Must have infinities, NaNs, etc */
-  else if ( (taga == TW_NaN) || (tagb == TW_NaN) )
-    {
-      return real_2op_NaN(b, tagb, deststnr, &st(0));
-    }
-  else if ( ((taga == TW_Infinity) && (tagb == TAG_Zero))
-	    || ((tagb == TW_Infinity) && (taga == TAG_Zero)) )
-    {
-      return arith_invalid(deststnr);  /* Zero*Infinity is invalid */
-    }
-  else if ( ((taga == TW_Denormal) || (tagb == TW_Denormal))
-	    && (denormal_operand() < 0) )
-    {
-      return FPU_Exception;
-    }
-  else if (taga == TW_Infinity)
-    {
-      FPU_copy_to_regi(a, TAG_Special, deststnr);
-      setsign(dest, sign);
-      return TAG_Special;
-    }
-  else if (tagb == TW_Infinity)
-    {
-      FPU_copy_to_regi(b, TAG_Special, deststnr);
-      setsign(dest, sign);
-      return TAG_Special;
-    }
-
 #ifdef PARANOID
-  else
-    {
-      EXCEPTION(EX_INTERNAL|0x102);
-      return FPU_Exception;
-    }
-#endif /* PARANOID */ 
+	else {
+		EXCEPTION(EX_INTERNAL | 0x102);
+		return FPU_Exception;
+	}
+#endif /* PARANOID */
 
 	return 0;
 }
diff --git a/arch/x86/math-emu/status_w.h b/arch/x86/math-emu/status_w.h
index 59e73302aa60..54a3f226982d 100644
--- a/arch/x86/math-emu/status_w.h
+++ b/arch/x86/math-emu/status_w.h
@@ -10,7 +10,7 @@
 #ifndef _STATUS_H_
 #define _STATUS_H_
 
-#include "fpu_emu.h"    /* for definition of PECULIAR_486 */
+#include "fpu_emu.h"		/* for definition of PECULIAR_486 */
 
 #ifdef __ASSEMBLY__
 #define	Const__(x)	$##x
@@ -34,7 +34,7 @@
 #define SW_Denorm_Op   	Const__(0x0002)	/* denormalized operand */
 #define SW_Invalid     	Const__(0x0001)	/* invalid operation */
 
-#define SW_Exc_Mask     Const__(0x27f)  /* Status word exception bit mask */
+#define SW_Exc_Mask     Const__(0x27f)	/* Status word exception bit mask */
 
 #ifndef __ASSEMBLY__
 
@@ -50,8 +50,8 @@
   ((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top))
 static inline void setcc(int cc)
 {
-	partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3);
-	partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3);
+	partial_status &= ~(SW_C0 | SW_C1 | SW_C2 | SW_C3);
+	partial_status |= (cc) & (SW_C0 | SW_C1 | SW_C2 | SW_C3);
 }
 
 #ifdef PECULIAR_486

From e8d591dc710158bae6b53c8b7a0172351025c6e2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:30:12 +0100
Subject: [PATCH 044/890] x86: lindent arch/i386/math-emu, cleanup

manually clean up some of the damage that lindent caused.
(this is a separate commit so that in the unlikely case of
a typo we can bisect it down to the manual edits.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/math-emu/errors.c      |  6 ++--
 arch/x86/math-emu/fpu_emu.h     | 12 +++----
 arch/x86/math-emu/fpu_entry.c   |  6 ++--
 arch/x86/math-emu/fpu_etc.c     |  8 ++---
 arch/x86/math-emu/fpu_proto.h   | 62 ++++++++++++++++-----------------
 arch/x86/math-emu/fpu_trig.c    | 38 ++++++++++----------
 arch/x86/math-emu/poly.h        | 10 +++---
 arch/x86/math-emu/poly_2xm1.c   |  2 +-
 arch/x86/math-emu/poly_atan.c   |  4 +--
 arch/x86/math-emu/poly_l2.c     |  4 +--
 arch/x86/math-emu/poly_sin.c    |  4 +--
 arch/x86/math-emu/poly_tan.c    |  2 +-
 arch/x86/math-emu/reg_convert.c |  2 +-
 arch/x86/math-emu/reg_ld_str.c  | 42 +++++++++++-----------
 14 files changed, 101 insertions(+), 101 deletions(-)

diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c
index 7cb5bf3495b2..145b68a99516 100644
--- a/arch/x86/math-emu/errors.c
+++ b/arch/x86/math-emu/errors.c
@@ -374,7 +374,7 @@ asmlinkage void FPU_exception(int n)
 
 /* Real operation attempted on a NaN. */
 /* Returns < 0 if the exception is unmasked */
-int real_1op_NaN(FPU_REG * a)
+int real_1op_NaN(FPU_REG *a)
 {
 	int signalling, isNaN;
 
@@ -573,7 +573,7 @@ asmlinkage int denormal_operand(void)
 	}
 }
 
-asmlinkage int arith_overflow(FPU_REG * dest)
+asmlinkage int arith_overflow(FPU_REG *dest)
 {
 	int tag = TAG_Valid;
 
@@ -601,7 +601,7 @@ asmlinkage int arith_overflow(FPU_REG * dest)
 
 }
 
-asmlinkage int arith_underflow(FPU_REG * dest)
+asmlinkage int arith_underflow(FPU_REG *dest)
 {
 	int tag = TAG_Valid;
 
diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h
index 656dd4c04b1b..4dae511c85ad 100644
--- a/arch/x86/math-emu/fpu_emu.h
+++ b/arch/x86/math-emu/fpu_emu.h
@@ -122,7 +122,7 @@ struct fpu__reg {
 
 typedef void (*FUNC) (void);
 typedef struct fpu__reg FPU_REG;
-typedef void (*FUNC_ST0) (FPU_REG * st0_ptr, u_char st0_tag);
+typedef void (*FUNC_ST0) (FPU_REG *st0_ptr, u_char st0_tag);
 typedef struct {
 	u_char address_size, operand_size, segment;
 } overrides;
@@ -166,7 +166,7 @@ extern u_char const data_sizes_16[32];
 #define signpositive(a) ( (signbyte(a) & 0x80) == 0 )
 #define signnegative(a) (signbyte(a) & 0x80)
 
-static inline void reg_copy(FPU_REG const *x, FPU_REG * y)
+static inline void reg_copy(FPU_REG const *x, FPU_REG *y)
 {
 	*(short *)&(y->exp) = *(const short *)&(x->exp);
 	*(long long *)&(y->sigl) = *(const long long *)&(x->sigl);
@@ -187,8 +187,8 @@ static inline void reg_copy(FPU_REG const *x, FPU_REG * y)
 /*----- Prototypes for functions written in assembler -----*/
 /* extern void reg_move(FPU_REG *a, FPU_REG *b); */
 
-asmlinkage int FPU_normalize(FPU_REG * x);
-asmlinkage int FPU_normalize_nuo(FPU_REG * x);
+asmlinkage int FPU_normalize(FPU_REG *x);
+asmlinkage int FPU_normalize_nuo(FPU_REG *x);
 asmlinkage int FPU_u_sub(FPU_REG const *arg1, FPU_REG const *arg2,
 			 FPU_REG * answ, unsigned int control_w, u_char sign,
 			 int expa, int expb);
@@ -200,12 +200,12 @@ asmlinkage int FPU_u_div(FPU_REG const *arg1, FPU_REG const *arg2,
 asmlinkage int FPU_u_add(FPU_REG const *arg1, FPU_REG const *arg2,
 			 FPU_REG * answ, unsigned int control_w, u_char sign,
 			 int expa, int expb);
-asmlinkage int wm_sqrt(FPU_REG * n, int dummy1, int dummy2,
+asmlinkage int wm_sqrt(FPU_REG *n, int dummy1, int dummy2,
 		       unsigned int control_w, u_char sign);
 asmlinkage unsigned FPU_shrx(void *l, unsigned x);
 asmlinkage unsigned FPU_shrxs(void *v, unsigned x);
 asmlinkage unsigned long FPU_div_small(unsigned long long *x, unsigned long y);
-asmlinkage int FPU_round(FPU_REG * arg, unsigned int extent, int dummy,
+asmlinkage int FPU_round(FPU_REG *arg, unsigned int extent, int dummy,
 			 unsigned int control_w, u_char sign);
 
 #ifndef MAKING_PROTO
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index cbb8717f09fd..377c60dfa2f0 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -126,7 +126,7 @@ static u_char const type_table[64] = {
 u_char emulating = 0;
 #endif /* RE_ENTRANT_CHECKING */
 
-static int valid_prefix(u_char * Byte, u_char __user ** fpu_eip,
+static int valid_prefix(u_char *Byte, u_char __user ** fpu_eip,
 			overrides * override);
 
 asmlinkage void math_emulate(long arg)
@@ -580,7 +580,7 @@ asmlinkage void math_emulate(long arg)
    all prefix bytes, further changes are needed in the emulator code
    which accesses user address space. Access to separate segments is
    important for msdos emulation. */
-static int valid_prefix(u_char * Byte, u_char __user ** fpu_eip,
+static int valid_prefix(u_char *Byte, u_char __user **fpu_eip,
 			overrides * override)
 {
 	u_char byte;
@@ -673,7 +673,7 @@ void math_abort(struct info *info, unsigned int signal)
 #define sstatus_word() \
   ((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top))
 
-int restore_i387_soft(void *s387, struct _fpstate __user * buf)
+int restore_i387_soft(void *s387, struct _fpstate __user *buf)
 {
 	u_char __user *d = (u_char __user *) buf;
 	int offset, other, i, tags, regnr, tag, newtop;
diff --git a/arch/x86/math-emu/fpu_etc.c b/arch/x86/math-emu/fpu_etc.c
index e73631e0cde9..233e5af566f5 100644
--- a/arch/x86/math-emu/fpu_etc.c
+++ b/arch/x86/math-emu/fpu_etc.c
@@ -16,7 +16,7 @@
 #include "status_w.h"
 #include "reg_constant.h"
 
-static void fchs(FPU_REG * st0_ptr, u_char st0tag)
+static void fchs(FPU_REG *st0_ptr, u_char st0tag)
 {
 	if (st0tag ^ TAG_Empty) {
 		signbyte(st0_ptr) ^= SIGN_NEG;
@@ -25,7 +25,7 @@ static void fchs(FPU_REG * st0_ptr, u_char st0tag)
 		FPU_stack_underflow();
 }
 
-static void fabs(FPU_REG * st0_ptr, u_char st0tag)
+static void fabs(FPU_REG *st0_ptr, u_char st0tag)
 {
 	if (st0tag ^ TAG_Empty) {
 		setpositive(st0_ptr);
@@ -34,7 +34,7 @@ static void fabs(FPU_REG * st0_ptr, u_char st0tag)
 		FPU_stack_underflow();
 }
 
-static void ftst_(FPU_REG * st0_ptr, u_char st0tag)
+static void ftst_(FPU_REG *st0_ptr, u_char st0tag)
 {
 	switch (st0tag) {
 	case TAG_Zero:
@@ -85,7 +85,7 @@ static void ftst_(FPU_REG * st0_ptr, u_char st0tag)
 	}
 }
 
-static void fxam(FPU_REG * st0_ptr, u_char st0tag)
+static void fxam(FPU_REG *st0_ptr, u_char st0tag)
 {
 	int c = 0;
 	switch (st0tag) {
diff --git a/arch/x86/math-emu/fpu_proto.h b/arch/x86/math-emu/fpu_proto.h
index 0f6384102afd..aa49b6a0d850 100644
--- a/arch/x86/math-emu/fpu_proto.h
+++ b/arch/x86/math-emu/fpu_proto.h
@@ -5,7 +5,7 @@
 extern void FPU_illegal(void);
 extern void FPU_printall(void);
 asmlinkage void FPU_exception(int n);
-extern int real_1op_NaN(FPU_REG * a);
+extern int real_1op_NaN(FPU_REG *a);
 extern int real_2op_NaN(FPU_REG const *b, u_char tagb, int deststnr,
 			FPU_REG const *defaultNaN);
 asmlinkage int arith_invalid(int deststnr);
@@ -14,8 +14,8 @@ extern int set_precision_flag(int flags);
 asmlinkage void set_precision_flag_up(void);
 asmlinkage void set_precision_flag_down(void);
 asmlinkage int denormal_operand(void);
-asmlinkage int arith_overflow(FPU_REG * dest);
-asmlinkage int arith_underflow(FPU_REG * dest);
+asmlinkage int arith_overflow(FPU_REG *dest);
+asmlinkage int arith_underflow(FPU_REG *dest);
 extern void FPU_stack_overflow(void);
 extern void FPU_stack_underflow(void);
 extern void FPU_stack_underflow_i(int i);
@@ -84,19 +84,19 @@ extern void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip,
 extern int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
 			  void __user * data_address);
 /* poly_2xm1.c */
-extern int poly_2xm1(u_char sign, FPU_REG * arg, FPU_REG * result);
+extern int poly_2xm1(u_char sign, FPU_REG * arg, FPU_REG *result);
 /* poly_atan.c */
-extern void poly_atan(FPU_REG * st0_ptr, u_char st0_tag, FPU_REG * st1_ptr,
+extern void poly_atan(FPU_REG * st0_ptr, u_char st0_tag, FPU_REG *st1_ptr,
 		      u_char st1_tag);
 /* poly_l2.c */
-extern void poly_l2(FPU_REG * st0_ptr, FPU_REG * st1_ptr, u_char st1_sign);
-extern int poly_l2p1(u_char s0, u_char s1, FPU_REG * r0, FPU_REG * r1,
+extern void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign);
+extern int poly_l2p1(u_char s0, u_char s1, FPU_REG *r0, FPU_REG *r1,
 		     FPU_REG * d);
 /* poly_sin.c */
-extern void poly_sine(FPU_REG * st0_ptr);
-extern void poly_cos(FPU_REG * st0_ptr);
+extern void poly_sine(FPU_REG *st0_ptr);
+extern void poly_cos(FPU_REG *st0_ptr);
 /* poly_tan.c */
-extern void poly_tan(FPU_REG * st0_ptr);
+extern void poly_tan(FPU_REG *st0_ptr);
 /* reg_add_sub.c */
 extern int FPU_add(FPU_REG const *b, u_char tagb, int destrnr, int control_w);
 extern int FPU_sub(int flags, int rm, int control_w);
@@ -111,34 +111,34 @@ extern void fucompp(void);
 /* reg_constant.c */
 extern void fconst(void);
 /* reg_ld_str.c */
-extern int FPU_load_extended(long double __user * s, int stnr);
-extern int FPU_load_double(double __user * dfloat, FPU_REG * loaded_data);
-extern int FPU_load_single(float __user * single, FPU_REG * loaded_data);
-extern int FPU_load_int64(long long __user * _s);
-extern int FPU_load_int32(long __user * _s, FPU_REG * loaded_data);
-extern int FPU_load_int16(short __user * _s, FPU_REG * loaded_data);
-extern int FPU_load_bcd(u_char __user * s);
-extern int FPU_store_extended(FPU_REG * st0_ptr, u_char st0_tag,
+extern int FPU_load_extended(long double __user *s, int stnr);
+extern int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data);
+extern int FPU_load_single(float __user *single, FPU_REG *loaded_data);
+extern int FPU_load_int64(long long __user *_s);
+extern int FPU_load_int32(long __user *_s, FPU_REG *loaded_data);
+extern int FPU_load_int16(short __user *_s, FPU_REG *loaded_data);
+extern int FPU_load_bcd(u_char __user *s);
+extern int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag,
 			      long double __user * d);
-extern int FPU_store_double(FPU_REG * st0_ptr, u_char st0_tag,
+extern int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag,
 			    double __user * dfloat);
-extern int FPU_store_single(FPU_REG * st0_ptr, u_char st0_tag,
+extern int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag,
 			    float __user * single);
-extern int FPU_store_int64(FPU_REG * st0_ptr, u_char st0_tag,
+extern int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag,
 			   long long __user * d);
-extern int FPU_store_int32(FPU_REG * st0_ptr, u_char st0_tag, long __user * d);
-extern int FPU_store_int16(FPU_REG * st0_ptr, u_char st0_tag, short __user * d);
-extern int FPU_store_bcd(FPU_REG * st0_ptr, u_char st0_tag, u_char __user * d);
-extern int FPU_round_to_int(FPU_REG * r, u_char tag);
-extern u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user * s);
-extern void frstor(fpu_addr_modes addr_modes, u_char __user * data_address);
-extern u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user * d);
-extern void fsave(fpu_addr_modes addr_modes, u_char __user * data_address);
-extern int FPU_tagof(FPU_REG * ptr);
+extern int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d);
+extern int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d);
+extern int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d);
+extern int FPU_round_to_int(FPU_REG *r, u_char tag);
+extern u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s);
+extern void frstor(fpu_addr_modes addr_modes, u_char __user *data_address);
+extern u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d);
+extern void fsave(fpu_addr_modes addr_modes, u_char __user *data_address);
+extern int FPU_tagof(FPU_REG *ptr);
 /* reg_mul.c */
 extern int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w);
 
 extern int FPU_div(int flags, int regrm, int control_w);
 /* reg_convert.c */
-extern int FPU_to_exp16(FPU_REG const *a, FPU_REG * x);
+extern int FPU_to_exp16(FPU_REG const *a, FPU_REG *x);
 #endif /* _FPU_PROTO_H */
diff --git a/arch/x86/math-emu/fpu_trig.c b/arch/x86/math-emu/fpu_trig.c
index e5316a288a6e..ecd06680581c 100644
--- a/arch/x86/math-emu/fpu_trig.c
+++ b/arch/x86/math-emu/fpu_trig.c
@@ -30,7 +30,7 @@ static void rem_kernel(unsigned long long st0, unsigned long long *y,
 /* Limited measurements show no results worse than 64 bit precision
    except for the results for arguments close to 2^63, where the
    precision of the result sometimes degrades to about 63.9 bits */
-static int trig_arg(FPU_REG * st0_ptr, int even)
+static int trig_arg(FPU_REG *st0_ptr, int even)
 {
 	FPU_REG tmp;
 	u_char tmptag;
@@ -176,7 +176,7 @@ static void convert_l2reg(long const *arg, int deststnr)
 	return;
 }
 
-static void single_arg_error(FPU_REG * st0_ptr, u_char st0_tag)
+static void single_arg_error(FPU_REG *st0_ptr, u_char st0_tag)
 {
 	if (st0_tag == TAG_Empty)
 		FPU_stack_underflow();	/* Puts a QNaN in st(0) */
@@ -188,7 +188,7 @@ static void single_arg_error(FPU_REG * st0_ptr, u_char st0_tag)
 #endif /* PARANOID */
 }
 
-static void single_arg_2_error(FPU_REG * st0_ptr, u_char st0_tag)
+static void single_arg_2_error(FPU_REG *st0_ptr, u_char st0_tag)
 {
 	int isNaN;
 
@@ -229,7 +229,7 @@ static void single_arg_2_error(FPU_REG * st0_ptr, u_char st0_tag)
 
 /*---------------------------------------------------------------------------*/
 
-static void f2xm1(FPU_REG * st0_ptr, u_char tag)
+static void f2xm1(FPU_REG *st0_ptr, u_char tag)
 {
 	FPU_REG a;
 
@@ -272,7 +272,7 @@ static void f2xm1(FPU_REG * st0_ptr, u_char tag)
 	}
 }
 
-static void fptan(FPU_REG * st0_ptr, u_char st0_tag)
+static void fptan(FPU_REG *st0_ptr, u_char st0_tag)
 {
 	FPU_REG *st_new_ptr;
 	int q;
@@ -351,7 +351,7 @@ static void fptan(FPU_REG * st0_ptr, u_char st0_tag)
 	single_arg_2_error(st0_ptr, st0_tag);
 }
 
-static void fxtract(FPU_REG * st0_ptr, u_char st0_tag)
+static void fxtract(FPU_REG *st0_ptr, u_char st0_tag)
 {
 	FPU_REG *st_new_ptr;
 	u_char sign;
@@ -444,7 +444,7 @@ static void fincstp(void)
 	top++;
 }
 
-static void fsqrt_(FPU_REG * st0_ptr, u_char st0_tag)
+static void fsqrt_(FPU_REG *st0_ptr, u_char st0_tag)
 {
 	int expon;
 
@@ -502,7 +502,7 @@ static void fsqrt_(FPU_REG * st0_ptr, u_char st0_tag)
 
 }
 
-static void frndint_(FPU_REG * st0_ptr, u_char st0_tag)
+static void frndint_(FPU_REG *st0_ptr, u_char st0_tag)
 {
 	int flags, tag;
 
@@ -546,7 +546,7 @@ static void frndint_(FPU_REG * st0_ptr, u_char st0_tag)
 		single_arg_error(st0_ptr, st0_tag);
 }
 
-static int fsin(FPU_REG * st0_ptr, u_char tag)
+static int fsin(FPU_REG *st0_ptr, u_char tag)
 {
 	u_char arg_sign = getsign(st0_ptr);
 
@@ -607,7 +607,7 @@ static int fsin(FPU_REG * st0_ptr, u_char tag)
 	}
 }
 
-static int f_cos(FPU_REG * st0_ptr, u_char tag)
+static int f_cos(FPU_REG *st0_ptr, u_char tag)
 {
 	u_char st0_sign;
 
@@ -677,12 +677,12 @@ static int f_cos(FPU_REG * st0_ptr, u_char tag)
 	}
 }
 
-static void fcos(FPU_REG * st0_ptr, u_char st0_tag)
+static void fcos(FPU_REG *st0_ptr, u_char st0_tag)
 {
 	f_cos(st0_ptr, st0_tag);
 }
 
-static void fsincos(FPU_REG * st0_ptr, u_char st0_tag)
+static void fsincos(FPU_REG *st0_ptr, u_char st0_tag)
 {
 	FPU_REG *st_new_ptr;
 	FPU_REG arg;
@@ -775,7 +775,7 @@ static void rem_kernel(unsigned long long st0, unsigned long long *y,
 /* Remainder of st(0) / st(1) */
 /* This routine produces exact results, i.e. there is never any
    rounding or truncation, etc of the result. */
-static void do_fprem(FPU_REG * st0_ptr, u_char st0_tag, int round)
+static void do_fprem(FPU_REG *st0_ptr, u_char st0_tag, int round)
 {
 	FPU_REG *st1_ptr = &st(1);
 	u_char st1_tag = FPU_gettagi(1);
@@ -1017,7 +1017,7 @@ static void do_fprem(FPU_REG * st0_ptr, u_char st0_tag, int round)
 }
 
 /* ST(1) <- ST(1) * log ST;  pop ST */
-static void fyl2x(FPU_REG * st0_ptr, u_char st0_tag)
+static void fyl2x(FPU_REG *st0_ptr, u_char st0_tag)
 {
 	FPU_REG *st1_ptr = &st(1), exponent;
 	u_char st1_tag = FPU_gettagi(1);
@@ -1188,7 +1188,7 @@ static void fyl2x(FPU_REG * st0_ptr, u_char st0_tag)
 	FPU_pop();
 }
 
-static void fpatan(FPU_REG * st0_ptr, u_char st0_tag)
+static void fpatan(FPU_REG *st0_ptr, u_char st0_tag)
 {
 	FPU_REG *st1_ptr = &st(1);
 	u_char st1_tag = FPU_gettagi(1);
@@ -1298,17 +1298,17 @@ static void fpatan(FPU_REG * st0_ptr, u_char st0_tag)
 	set_precision_flag_up();	/* We do not really know if up or down */
 }
 
-static void fprem(FPU_REG * st0_ptr, u_char st0_tag)
+static void fprem(FPU_REG *st0_ptr, u_char st0_tag)
 {
 	do_fprem(st0_ptr, st0_tag, RC_CHOP);
 }
 
-static void fprem1(FPU_REG * st0_ptr, u_char st0_tag)
+static void fprem1(FPU_REG *st0_ptr, u_char st0_tag)
 {
 	do_fprem(st0_ptr, st0_tag, RC_RND);
 }
 
-static void fyl2xp1(FPU_REG * st0_ptr, u_char st0_tag)
+static void fyl2xp1(FPU_REG *st0_ptr, u_char st0_tag)
 {
 	u_char sign, sign1;
 	FPU_REG *st1_ptr = &st(1), a, b;
@@ -1477,7 +1477,7 @@ static void fyl2xp1(FPU_REG * st0_ptr, u_char st0_tag)
 
 }
 
-static void fscale(FPU_REG * st0_ptr, u_char st0_tag)
+static void fscale(FPU_REG *st0_ptr, u_char st0_tag)
 {
 	FPU_REG *st1_ptr = &st(1);
 	u_char st1_tag = FPU_gettagi(1);
diff --git a/arch/x86/math-emu/poly.h b/arch/x86/math-emu/poly.h
index f317de7d8864..168eb44c93c8 100644
--- a/arch/x86/math-emu/poly.h
+++ b/arch/x86/math-emu/poly.h
@@ -33,12 +33,12 @@ asmlinkage void polynomial_Xsig(Xsig *, const unsigned long long *x,
 
 asmlinkage void mul32_Xsig(Xsig *, const unsigned long mult);
 asmlinkage void mul64_Xsig(Xsig *, const unsigned long long *mult);
-asmlinkage void mul_Xsig_Xsig(Xsig * dest, const Xsig * mult);
+asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult);
 
 asmlinkage void shr_Xsig(Xsig *, const int n);
 asmlinkage int round_Xsig(Xsig *);
 asmlinkage int norm_Xsig(Xsig *);
-asmlinkage void div_Xsig(Xsig * x1, const Xsig * x2, const Xsig * dest);
+asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest);
 
 /* Macro to extract the most significant 32 bits from a long long */
 #define LL_MSW(x)     (((unsigned long *)&x)[1])
@@ -70,7 +70,7 @@ static inline unsigned long mul_32_32(const unsigned long arg1,
 }
 
 /* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */
-static inline void add_Xsig_Xsig(Xsig * dest, const Xsig * x2)
+static inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2)
 {
 	asm volatile ("movl %1,%%edi; movl %2,%%esi;\n"
 		      "movl (%%esi),%%eax; addl %%eax,(%%edi);\n"
@@ -84,7 +84,7 @@ static inline void add_Xsig_Xsig(Xsig * dest, const Xsig * x2)
 /* Note: the constraints in the asm statement didn't always work properly
    with gcc 2.5.8.  Changing from using edi to using ecx got around the
    problem, but keep fingers crossed! */
-static inline void add_two_Xsig(Xsig * dest, const Xsig * x2, long int *exp)
+static inline void add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp)
 {
 	asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n"
 		      "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n"
@@ -101,7 +101,7 @@ static inline void add_two_Xsig(Xsig * dest, const Xsig * x2, long int *exp)
 
 /* Negate (subtract from 1.0) the 12 byte Xsig */
 /* This is faster in a loop on my 386 than using the "neg" instruction. */
-static inline void negate_Xsig(Xsig * x)
+static inline void negate_Xsig(Xsig *x)
 {
 	asm volatile ("movl %1,%%esi;\n"
 		      "xorl %%ecx,%%ecx;\n"
diff --git a/arch/x86/math-emu/poly_2xm1.c b/arch/x86/math-emu/poly_2xm1.c
index d8f2be3c8383..b00e9e10cdce 100644
--- a/arch/x86/math-emu/poly_2xm1.c
+++ b/arch/x86/math-emu/poly_2xm1.c
@@ -49,7 +49,7 @@ static const Xsig *shiftterm[] = { &shiftterm0, &shiftterm1,
 /*--- poly_2xm1() -----------------------------------------------------------+
  | Requires st(0) which is TAG_Valid and < 1.                                |
  +---------------------------------------------------------------------------*/
-int poly_2xm1(u_char sign, FPU_REG * arg, FPU_REG * result)
+int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result)
 {
 	long int exponent, shift;
 	unsigned long long Xll;
diff --git a/arch/x86/math-emu/poly_atan.c b/arch/x86/math-emu/poly_atan.c
index 2f4ac8143fc3..20c28e58e2d4 100644
--- a/arch/x86/math-emu/poly_atan.c
+++ b/arch/x86/math-emu/poly_atan.c
@@ -48,8 +48,8 @@ static const Xsig pi_signif = MK_XSIG(0xc90fdaa2, 0x2168c234, 0xc4c6628b);
 /*--- poly_atan() -----------------------------------------------------------+
  |                                                                           |
  +---------------------------------------------------------------------------*/
-void poly_atan(FPU_REG * st0_ptr, u_char st0_tag,
-	       FPU_REG * st1_ptr, u_char st1_tag)
+void poly_atan(FPU_REG *st0_ptr, u_char st0_tag,
+	       FPU_REG *st1_ptr, u_char st1_tag)
 {
 	u_char transformed, inverted, sign1, sign2;
 	int exponent;
diff --git a/arch/x86/math-emu/poly_l2.c b/arch/x86/math-emu/poly_l2.c
index c0102ae87511..8e2ff4b28a0a 100644
--- a/arch/x86/math-emu/poly_l2.c
+++ b/arch/x86/math-emu/poly_l2.c
@@ -23,7 +23,7 @@ static void log2_kernel(FPU_REG const *arg, u_char argsign,
 /*--- poly_l2() -------------------------------------------------------------+
  |   Base 2 logarithm by a polynomial approximation.                         |
  +---------------------------------------------------------------------------*/
-void poly_l2(FPU_REG * st0_ptr, FPU_REG * st1_ptr, u_char st1_sign)
+void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign)
 {
 	long int exponent, expon, expon_expon;
 	Xsig accumulator, expon_accum, yaccum;
@@ -178,7 +178,7 @@ static const unsigned long leadterm = 0xb8000000;
  |   Base 2 logarithm by a polynomial approximation.                         |
  |   log2(x+1)                                                               |
  +---------------------------------------------------------------------------*/
-static void log2_kernel(FPU_REG const *arg, u_char argsign, Xsig * accum_result,
+static void log2_kernel(FPU_REG const *arg, u_char argsign, Xsig *accum_result,
 			long int *expon)
 {
 	long int exponent, adj;
diff --git a/arch/x86/math-emu/poly_sin.c b/arch/x86/math-emu/poly_sin.c
index 7273ae0c7692..b862039c728e 100644
--- a/arch/x86/math-emu/poly_sin.c
+++ b/arch/x86/math-emu/poly_sin.c
@@ -54,7 +54,7 @@ static const unsigned long long neg_terms_h[N_COEFF_NH] = {
 /*--- poly_sine() -----------------------------------------------------------+
  |                                                                           |
  +---------------------------------------------------------------------------*/
-void poly_sine(FPU_REG * st0_ptr)
+void poly_sine(FPU_REG *st0_ptr)
 {
 	int exponent, echange;
 	Xsig accumulator, argSqrd, argTo4;
@@ -197,7 +197,7 @@ void poly_sine(FPU_REG * st0_ptr)
 /*--- poly_cos() ------------------------------------------------------------+
  |                                                                           |
  +---------------------------------------------------------------------------*/
-void poly_cos(FPU_REG * st0_ptr)
+void poly_cos(FPU_REG *st0_ptr)
 {
 	FPU_REG result;
 	long int exponent, exp2, echange;
diff --git a/arch/x86/math-emu/poly_tan.c b/arch/x86/math-emu/poly_tan.c
index c0d181e39229..1875763e0c02 100644
--- a/arch/x86/math-emu/poly_tan.c
+++ b/arch/x86/math-emu/poly_tan.c
@@ -47,7 +47,7 @@ static const unsigned long long twothirds = 0xaaaaaaaaaaaaaaabLL;
 /*--- poly_tan() ------------------------------------------------------------+
  |                                                                           |
  +---------------------------------------------------------------------------*/
-void poly_tan(FPU_REG * st0_ptr)
+void poly_tan(FPU_REG *st0_ptr)
 {
 	long int exponent;
 	int invert;
diff --git a/arch/x86/math-emu/reg_convert.c b/arch/x86/math-emu/reg_convert.c
index afd31b31000d..108060779977 100644
--- a/arch/x86/math-emu/reg_convert.c
+++ b/arch/x86/math-emu/reg_convert.c
@@ -13,7 +13,7 @@
 #include "exception.h"
 #include "fpu_emu.h"
 
-int FPU_to_exp16(FPU_REG const *a, FPU_REG * x)
+int FPU_to_exp16(FPU_REG const *a, FPU_REG *x)
 {
 	int sign = getsign(a);
 
diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c
index 0b2ca8dc2988..799d4af5be66 100644
--- a/arch/x86/math-emu/reg_ld_str.c
+++ b/arch/x86/math-emu/reg_ld_str.c
@@ -35,7 +35,7 @@
 #define SINGLE_Ebias 127
 #define SINGLE_Emin (-126)	/* smallest valid exponent */
 
-static u_char normalize_no_excep(FPU_REG * r, int exp, int sign)
+static u_char normalize_no_excep(FPU_REG *r, int exp, int sign)
 {
 	u_char tag;
 
@@ -49,7 +49,7 @@ static u_char normalize_no_excep(FPU_REG * r, int exp, int sign)
 	return tag;
 }
 
-int FPU_tagof(FPU_REG * ptr)
+int FPU_tagof(FPU_REG *ptr)
 {
 	int exp;
 
@@ -78,7 +78,7 @@ int FPU_tagof(FPU_REG * ptr)
 }
 
 /* Get a long double from user memory */
-int FPU_load_extended(long double __user * s, int stnr)
+int FPU_load_extended(long double __user *s, int stnr)
 {
 	FPU_REG *sti_ptr = &st(stnr);
 
@@ -91,7 +91,7 @@ int FPU_load_extended(long double __user * s, int stnr)
 }
 
 /* Get a double from user memory */
-int FPU_load_double(double __user * dfloat, FPU_REG * loaded_data)
+int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data)
 {
 	int exp, tag, negative;
 	unsigned m64, l64;
@@ -152,7 +152,7 @@ int FPU_load_double(double __user * dfloat, FPU_REG * loaded_data)
 }
 
 /* Get a float from user memory */
-int FPU_load_single(float __user * single, FPU_REG * loaded_data)
+int FPU_load_single(float __user *single, FPU_REG *loaded_data)
 {
 	unsigned m32;
 	int exp, tag, negative;
@@ -206,7 +206,7 @@ int FPU_load_single(float __user * single, FPU_REG * loaded_data)
 }
 
 /* Get a long long from user memory */
-int FPU_load_int64(long long __user * _s)
+int FPU_load_int64(long long __user *_s)
 {
 	long long s;
 	int sign;
@@ -236,7 +236,7 @@ int FPU_load_int64(long long __user * _s)
 }
 
 /* Get a long from user memory */
-int FPU_load_int32(long __user * _s, FPU_REG * loaded_data)
+int FPU_load_int32(long __user *_s, FPU_REG *loaded_data)
 {
 	long s;
 	int negative;
@@ -265,7 +265,7 @@ int FPU_load_int32(long __user * _s, FPU_REG * loaded_data)
 }
 
 /* Get a short from user memory */
-int FPU_load_int16(short __user * _s, FPU_REG * loaded_data)
+int FPU_load_int16(short __user *_s, FPU_REG *loaded_data)
 {
 	int s, negative;
 
@@ -294,7 +294,7 @@ int FPU_load_int16(short __user * _s, FPU_REG * loaded_data)
 }
 
 /* Get a packed bcd array from user memory */
-int FPU_load_bcd(u_char __user * s)
+int FPU_load_bcd(u_char __user *s)
 {
 	FPU_REG *st0_ptr = &st(0);
 	int pos;
@@ -333,7 +333,7 @@ int FPU_load_bcd(u_char __user * s)
 /*===========================================================================*/
 
 /* Put a long double into user memory */
-int FPU_store_extended(FPU_REG * st0_ptr, u_char st0_tag,
+int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag,
 		       long double __user * d)
 {
 	/*
@@ -375,7 +375,7 @@ int FPU_store_extended(FPU_REG * st0_ptr, u_char st0_tag,
 }
 
 /* Put a double into user memory */
-int FPU_store_double(FPU_REG * st0_ptr, u_char st0_tag, double __user * dfloat)
+int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat)
 {
 	unsigned long l[2];
 	unsigned long increment = 0;	/* avoid gcc warnings */
@@ -565,7 +565,7 @@ int FPU_store_double(FPU_REG * st0_ptr, u_char st0_tag, double __user * dfloat)
 }
 
 /* Put a float into user memory */
-int FPU_store_single(FPU_REG * st0_ptr, u_char st0_tag, float __user * single)
+int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single)
 {
 	long templ = 0;
 	unsigned long increment = 0;	/* avoid gcc warnings */
@@ -754,7 +754,7 @@ int FPU_store_single(FPU_REG * st0_ptr, u_char st0_tag, float __user * single)
 }
 
 /* Put a long long into user memory */
-int FPU_store_int64(FPU_REG * st0_ptr, u_char st0_tag, long long __user * d)
+int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d)
 {
 	FPU_REG t;
 	long long tll;
@@ -804,7 +804,7 @@ int FPU_store_int64(FPU_REG * st0_ptr, u_char st0_tag, long long __user * d)
 }
 
 /* Put a long into user memory */
-int FPU_store_int32(FPU_REG * st0_ptr, u_char st0_tag, long __user * d)
+int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d)
 {
 	FPU_REG t;
 	int precision_loss;
@@ -850,7 +850,7 @@ int FPU_store_int32(FPU_REG * st0_ptr, u_char st0_tag, long __user * d)
 }
 
 /* Put a short into user memory */
-int FPU_store_int16(FPU_REG * st0_ptr, u_char st0_tag, short __user * d)
+int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d)
 {
 	FPU_REG t;
 	int precision_loss;
@@ -896,7 +896,7 @@ int FPU_store_int16(FPU_REG * st0_ptr, u_char st0_tag, short __user * d)
 }
 
 /* Put a packed bcd array into user memory */
-int FPU_store_bcd(FPU_REG * st0_ptr, u_char st0_tag, u_char __user * d)
+int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d)
 {
 	FPU_REG t;
 	unsigned long long ll;
@@ -971,7 +971,7 @@ int FPU_store_bcd(FPU_REG * st0_ptr, u_char st0_tag, u_char __user * d)
 /* Overflow is signalled by a non-zero return value (in eax).
    In the case of overflow, the returned significand always has the
    largest possible value */
-int FPU_round_to_int(FPU_REG * r, u_char tag)
+int FPU_round_to_int(FPU_REG *r, u_char tag)
 {
 	u_char very_big;
 	unsigned eax;
@@ -1028,7 +1028,7 @@ int FPU_round_to_int(FPU_REG * r, u_char tag)
 
 /*===========================================================================*/
 
-u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user * s)
+u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s)
 {
 	unsigned short tag_word = 0;
 	u_char tag;
@@ -1121,7 +1121,7 @@ u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user * s)
 	return s;
 }
 
-void frstor(fpu_addr_modes addr_modes, u_char __user * data_address)
+void frstor(fpu_addr_modes addr_modes, u_char __user *data_address)
 {
 	int i, regnr;
 	u_char __user *s = fldenv(addr_modes, data_address);
@@ -1144,7 +1144,7 @@ void frstor(fpu_addr_modes addr_modes, u_char __user * data_address)
 
 }
 
-u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user * d)
+u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d)
 {
 	if ((addr_modes.default_mode == VM86) ||
 	    ((addr_modes.default_mode == PM16)
@@ -1200,7 +1200,7 @@ u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user * d)
 	return d;
 }
 
-void fsave(fpu_addr_modes addr_modes, u_char __user * data_address)
+void fsave(fpu_addr_modes addr_modes, u_char __user *data_address)
 {
 	u_char __user *d;
 	int offset = (top & 7) * 10, other = 80 - offset;

From 05fccb0e3840248324a96b320562210410be73dc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:30:12 +0100
Subject: [PATCH 045/890] x86: code cleanups in arch/x86/kernel/pci-gart_64.c

code cleanups:

                                       errors   lines of code   errors/KLOC
 arch/x86/kernel/pci-gart_64.c            183             748         244.6
 arch/x86/kernel/pci-gart_64.c              0             790             0

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/pci-gart_64.c | 514 ++++++++++++++++++----------------
 1 file changed, 279 insertions(+), 235 deletions(-)

diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 06bcba536045..d2b46b489412 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -1,12 +1,12 @@
 /*
  * Dynamic DMA mapping support for AMD Hammer.
- * 
+ *
  * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI.
  * This allows to use PCI devices that only support 32bit addresses on systems
- * with more than 4GB. 
+ * with more than 4GB.
  *
  * See Documentation/DMA-mapping.txt for the interface specification.
- * 
+ *
  * Copyright 2002 Andi Kleen, SuSE Labs.
  * Subject to the GNU General Public License v2 only.
  */
@@ -37,23 +37,26 @@
 #include <asm/k8.h>
 
 static unsigned long iommu_bus_base;	/* GART remapping area (physical) */
-static unsigned long iommu_size; 	/* size of remapping area bytes */
+static unsigned long iommu_size;	/* size of remapping area bytes */
 static unsigned long iommu_pages;	/* .. and in pages */
 
-static u32 *iommu_gatt_base; 		/* Remapping table */
+static u32 *iommu_gatt_base;		/* Remapping table */
 
-/* If this is disabled the IOMMU will use an optimized flushing strategy
-   of only flushing when an mapping is reused. With it true the GART is flushed 
-   for every mapping. Problem is that doing the lazy flush seems to trigger
-   bugs with some popular PCI cards, in particular 3ware (but has been also
-   also seen with Qlogic at least). */
+/*
+ * If this is disabled the IOMMU will use an optimized flushing strategy
+ * of only flushing when an mapping is reused. With it true the GART is
+ * flushed for every mapping. Problem is that doing the lazy flush seems
+ * to trigger bugs with some popular PCI cards, in particular 3ware (but
+ * has been also also seen with Qlogic at least).
+ */
 int iommu_fullflush = 1;
 
-/* Allocation bitmap for the remapping area */ 
+/* Allocation bitmap for the remapping area: */
 static DEFINE_SPINLOCK(iommu_bitmap_lock);
-static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
+/* Guarded by iommu_bitmap_lock: */
+static unsigned long *iommu_gart_bitmap;
 
-static u32 gart_unmapped_entry; 
+static u32 gart_unmapped_entry;
 
 #define GPTE_VALID    1
 #define GPTE_COHERENT 2
@@ -61,10 +64,10 @@ static u32 gart_unmapped_entry;
 	(((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
 #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
 
-#define to_pages(addr,size) \
+#define to_pages(addr, size) \
 	(round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
 
-#define EMERGENCY_PAGES 32 /* = 128KB */ 
+#define EMERGENCY_PAGES 32 /* = 128KB */
 
 #ifdef CONFIG_AGP
 #define AGPEXTERN extern
@@ -77,130 +80,152 @@ AGPEXTERN int agp_memory_reserved;
 AGPEXTERN __u32 *agp_gatt_table;
 
 static unsigned long next_bit;  /* protected by iommu_bitmap_lock */
-static int need_flush; 		/* global flush state. set for each gart wrap */
+static int need_flush;		/* global flush state. set for each gart wrap */
 
-static unsigned long alloc_iommu(int size) 
-{ 	
+static unsigned long alloc_iommu(int size)
+{
 	unsigned long offset, flags;
 
-	spin_lock_irqsave(&iommu_bitmap_lock, flags);	
-	offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
+	spin_lock_irqsave(&iommu_bitmap_lock, flags);
+	offset = find_next_zero_string(iommu_gart_bitmap, next_bit,
+					iommu_pages, size);
 	if (offset == -1) {
 		need_flush = 1;
-		offset = find_next_zero_string(iommu_gart_bitmap,0,iommu_pages,size);
+		offset = find_next_zero_string(iommu_gart_bitmap, 0,
+						iommu_pages, size);
 	}
-	if (offset != -1) { 
-		set_bit_string(iommu_gart_bitmap, offset, size); 
-		next_bit = offset+size; 
-		if (next_bit >= iommu_pages) { 
+	if (offset != -1) {
+		set_bit_string(iommu_gart_bitmap, offset, size);
+		next_bit = offset+size;
+		if (next_bit >= iommu_pages) {
 			next_bit = 0;
 			need_flush = 1;
-		} 
-	} 
+		}
+	}
 	if (iommu_fullflush)
 		need_flush = 1;
-	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);      
+	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
+
 	return offset;
-} 
+}
 
 static void free_iommu(unsigned long offset, int size)
-{ 
+{
 	unsigned long flags;
+
 	spin_lock_irqsave(&iommu_bitmap_lock, flags);
 	__clear_bit_string(iommu_gart_bitmap, offset, size);
 	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
-} 
+}
 
-/* 
+/*
  * Use global flush state to avoid races with multiple flushers.
  */
 static void flush_gart(void)
-{ 
+{
 	unsigned long flags;
+
 	spin_lock_irqsave(&iommu_bitmap_lock, flags);
 	if (need_flush) {
 		k8_flush_garts();
 		need_flush = 0;
-	} 
+	}
 	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
-} 
+}
 
 #ifdef CONFIG_IOMMU_LEAK
 
-#define SET_LEAK(x) if (iommu_leak_tab) \
-			iommu_leak_tab[x] = __builtin_return_address(0);
-#define CLEAR_LEAK(x) if (iommu_leak_tab) \
-			iommu_leak_tab[x] = NULL;
+#define SET_LEAK(x)							\
+	do {								\
+		if (iommu_leak_tab)					\
+			iommu_leak_tab[x] = __builtin_return_address(0);\
+	} while (0)
+
+#define CLEAR_LEAK(x)							\
+	do {								\
+		if (iommu_leak_tab)					\
+			iommu_leak_tab[x] = NULL;			\
+	} while (0)
 
 /* Debugging aid for drivers that don't free their IOMMU tables */
-static void **iommu_leak_tab; 
+static void **iommu_leak_tab;
 static int leak_trace;
 static int iommu_leak_pages = 20;
+
 static void dump_leak(void)
 {
 	int i;
-	static int dump; 
-	if (dump || !iommu_leak_tab) return;
+	static int dump;
+
+	if (dump || !iommu_leak_tab)
+		return;
 	dump = 1;
-	show_stack(NULL,NULL);
-	/* Very crude. dump some from the end of the table too */ 
-	printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_pages); 
-	for (i = 0; i < iommu_leak_pages; i+=2) {
-		printk("%lu: ", iommu_pages-i);
+	show_stack(NULL, NULL);
+
+	/* Very crude. dump some from the end of the table too */
+	printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n",
+	       iommu_leak_pages);
+	for (i = 0; i < iommu_leak_pages; i += 2) {
+		printk(KERN_DEBUG "%lu: ", iommu_pages-i);
 		printk_address((unsigned long) iommu_leak_tab[iommu_pages-i]);
-		printk("%c", (i+1)%2 == 0 ? '\n' : ' '); 
-	} 
-	printk("\n");
+		printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
+	}
+	printk(KERN_DEBUG "\n");
 }
 #else
-#define SET_LEAK(x)
-#define CLEAR_LEAK(x)
+# define SET_LEAK(x)
+# define CLEAR_LEAK(x)
 #endif
 
 static void iommu_full(struct device *dev, size_t size, int dir)
 {
-	/* 
+	/*
 	 * Ran out of IOMMU space for this operation. This is very bad.
 	 * Unfortunately the drivers cannot handle this operation properly.
-	 * Return some non mapped prereserved space in the aperture and 
+	 * Return some non mapped prereserved space in the aperture and
 	 * let the Northbridge deal with it. This will result in garbage
 	 * in the IO operation. When the size exceeds the prereserved space
-	 * memory corruption will occur or random memory will be DMAed 
+	 * memory corruption will occur or random memory will be DMAed
 	 * out. Hopefully no network devices use single mappings that big.
-	 */ 
-	
-	printk(KERN_ERR 
-  "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n",
-	       size, dev->bus_id);
+	 */
+
+	printk(KERN_ERR
+		"PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n",
+		size, dev->bus_id);
 
 	if (size > PAGE_SIZE*EMERGENCY_PAGES) {
 		if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
 			panic("PCI-DMA: Memory would be corrupted\n");
-		if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) 
-			panic(KERN_ERR "PCI-DMA: Random memory would be DMAed\n");
-	} 
-
+		if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
+			panic(KERN_ERR
+				"PCI-DMA: Random memory would be DMAed\n");
+	}
 #ifdef CONFIG_IOMMU_LEAK
-	dump_leak(); 
+	dump_leak();
 #endif
-} 
-
-static inline int need_iommu(struct device *dev, unsigned long addr, size_t size)
-{ 
-	u64 mask = *dev->dma_mask;
-	int high = addr + size > mask;
-	int mmu = high;
-	if (force_iommu) 
-		mmu = 1; 
-	return mmu; 
 }
 
-static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
-{ 
+static inline int
+need_iommu(struct device *dev, unsigned long addr, size_t size)
+{
 	u64 mask = *dev->dma_mask;
 	int high = addr + size > mask;
 	int mmu = high;
-	return mmu; 
+
+	if (force_iommu)
+		mmu = 1;
+
+	return mmu;
+}
+
+static inline int
+nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
+{
+	u64 mask = *dev->dma_mask;
+	int high = addr + size > mask;
+	int mmu = high;
+
+	return mmu;
 }
 
 /* Map a single continuous physical area into the IOMMU.
@@ -208,13 +233,14 @@ static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t
  */
 static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
 				size_t size, int dir)
-{ 
+{
 	unsigned long npages = to_pages(phys_mem, size);
 	unsigned long iommu_page = alloc_iommu(npages);
 	int i;
+
 	if (iommu_page == -1) {
 		if (!nonforced_iommu(dev, phys_mem, size))
-			return phys_mem; 
+			return phys_mem;
 		if (panic_on_overflow)
 			panic("dma_map_area overflow %lu bytes\n", size);
 		iommu_full(dev, size, dir);
@@ -229,35 +255,39 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
 	return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
 }
 
-static dma_addr_t gart_map_simple(struct device *dev, char *buf,
-				 size_t size, int dir)
+static dma_addr_t
+gart_map_simple(struct device *dev, char *buf, size_t size, int dir)
 {
 	dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir);
+
 	flush_gart();
+
 	return map;
 }
 
 /* Map a single area into the IOMMU */
-static dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir)
+static dma_addr_t
+gart_map_single(struct device *dev, void *addr, size_t size, int dir)
 {
 	unsigned long phys_mem, bus;
 
 	if (!dev)
 		dev = &fallback_dev;
 
-	phys_mem = virt_to_phys(addr); 
+	phys_mem = virt_to_phys(addr);
 	if (!need_iommu(dev, phys_mem, size))
-		return phys_mem; 
+		return phys_mem;
 
 	bus = gart_map_simple(dev, addr, size, dir);
-	return bus; 
+
+	return bus;
 }
 
 /*
  * Free a DMA mapping.
  */
 static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
-		      size_t size, int direction)
+			      size_t size, int direction)
 {
 	unsigned long iommu_page;
 	int npages;
@@ -266,6 +296,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
 	if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
 	    dma_addr >= iommu_bus_base + iommu_size)
 		return;
+
 	iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
 	npages = to_pages(dma_addr, size);
 	for (i = 0; i < npages; i++) {
@@ -278,7 +309,8 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
 /*
  * Wrapper for pci_unmap_single working with scatterlists.
  */
-static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
+static void
+gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
 {
 	struct scatterlist *s;
 	int i;
@@ -303,12 +335,13 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
 
 	for_each_sg(sg, s, nents, i) {
 		unsigned long addr = sg_phys(s);
-		if (nonforced_iommu(dev, addr, s->length)) { 
+
+		if (nonforced_iommu(dev, addr, s->length)) {
 			addr = dma_map_area(dev, addr, s->length, dir);
-			if (addr == bad_dma_address) { 
-				if (i > 0) 
+			if (addr == bad_dma_address) {
+				if (i > 0)
 					gart_unmap_sg(dev, sg, i, dir);
-				nents = 0; 
+				nents = 0;
 				sg[0].dma_length = 0;
 				break;
 			}
@@ -317,15 +350,16 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
 		s->dma_length = s->length;
 	}
 	flush_gart();
+
 	return nents;
 }
 
 /* Map multiple scatterlist entries continuous into the first. */
 static int __dma_map_cont(struct scatterlist *start, int nelems,
-		      struct scatterlist *sout, unsigned long pages)
+			  struct scatterlist *sout, unsigned long pages)
 {
 	unsigned long iommu_start = alloc_iommu(pages);
-	unsigned long iommu_page = iommu_start; 
+	unsigned long iommu_page = iommu_start;
 	struct scatterlist *s;
 	int i;
 
@@ -335,32 +369,33 @@ static int __dma_map_cont(struct scatterlist *start, int nelems,
 	for_each_sg(start, s, nelems, i) {
 		unsigned long pages, addr;
 		unsigned long phys_addr = s->dma_address;
-		
+
 		BUG_ON(s != start && s->offset);
 		if (s == start) {
 			sout->dma_address = iommu_bus_base;
 			sout->dma_address += iommu_page*PAGE_SIZE + s->offset;
 			sout->dma_length = s->length;
-		} else { 
-			sout->dma_length += s->length; 
+		} else {
+			sout->dma_length += s->length;
 		}
 
 		addr = phys_addr;
-		pages = to_pages(s->offset, s->length); 
-		while (pages--) { 
-			iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); 
+		pages = to_pages(s->offset, s->length);
+		while (pages--) {
+			iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
 			SET_LEAK(iommu_page);
 			addr += PAGE_SIZE;
 			iommu_page++;
 		}
-	} 
-	BUG_ON(iommu_page - iommu_start != pages);	
+	}
+	BUG_ON(iommu_page - iommu_start != pages);
+
 	return 0;
 }
 
-static inline int dma_map_cont(struct scatterlist *start, int nelems,
-		      struct scatterlist *sout,
-		      unsigned long pages, int need)
+static inline int
+dma_map_cont(struct scatterlist *start, int nelems, struct scatterlist *sout,
+	     unsigned long pages, int need)
 {
 	if (!need) {
 		BUG_ON(nelems != 1);
@@ -370,22 +405,19 @@ static inline int dma_map_cont(struct scatterlist *start, int nelems,
 	}
 	return __dma_map_cont(start, nelems, sout, pages);
 }
-		
+
 /*
  * DMA map all entries in a scatterlist.
- * Merge chunks that have page aligned sizes into a continuous mapping. 
+ * Merge chunks that have page aligned sizes into a continuous mapping.
  */
-static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-			int dir)
+static int
+gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
 {
-	int i;
-	int out;
-	int start;
-	unsigned long pages = 0;
-	int need = 0, nextneed;
 	struct scatterlist *s, *ps, *start_sg, *sgmap;
+	int need = 0, nextneed, i, out, start;
+	unsigned long pages = 0;
 
-	if (nents == 0) 
+	if (nents == 0)
 		return 0;
 
 	if (!dev)
@@ -397,15 +429,19 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 	ps = NULL; /* shut up gcc */
 	for_each_sg(sg, s, nents, i) {
 		dma_addr_t addr = sg_phys(s);
-		s->dma_address = addr;
-		BUG_ON(s->length == 0); 
 
-		nextneed = need_iommu(dev, addr, s->length); 
+		s->dma_address = addr;
+		BUG_ON(s->length == 0);
+
+		nextneed = need_iommu(dev, addr, s->length);
 
 		/* Handle the previous not yet processed entries */
 		if (i > start) {
-			/* Can only merge when the last chunk ends on a page 
-			   boundary and the new one doesn't have an offset. */
+			/*
+			 * Can only merge when the last chunk ends on a
+			 * page boundary and the new one doesn't have an
+			 * offset.
+			 */
 			if (!iommu_merge || !nextneed || !need || s->offset ||
 			    (ps->offset + ps->length) % PAGE_SIZE) {
 				if (dma_map_cont(start_sg, i - start, sgmap,
@@ -436,6 +472,7 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 error:
 	flush_gart();
 	gart_unmap_sg(dev, sg, out, dir);
+
 	/* When it was forced or merged try again in a dumb way */
 	if (force_iommu || iommu_merge) {
 		out = dma_map_sg_nonforce(dev, sg, nents, dir);
@@ -444,64 +481,68 @@ error:
 	}
 	if (panic_on_overflow)
 		panic("dma_map_sg: overflow on %lu pages\n", pages);
+
 	iommu_full(dev, pages << PAGE_SHIFT, dir);
 	for_each_sg(sg, s, nents, i)
 		s->dma_address = bad_dma_address;
 	return 0;
-} 
+}
 
 static int no_agp;
 
 static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
-{ 
-	unsigned long a; 
-	if (!iommu_size) { 
-		iommu_size = aper_size; 
-		if (!no_agp) 
-			iommu_size /= 2; 
-	} 
+{
+	unsigned long a;
 
-	a = aper + iommu_size; 
+	if (!iommu_size) {
+		iommu_size = aper_size;
+		if (!no_agp)
+			iommu_size /= 2;
+	}
+
+	a = aper + iommu_size;
 	iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a;
 
-	if (iommu_size < 64*1024*1024) 
+	if (iommu_size < 64*1024*1024) {
 		printk(KERN_WARNING
-  "PCI-DMA: Warning: Small IOMMU %luMB. Consider increasing the AGP aperture in BIOS\n",iommu_size>>20); 
-	
+			"PCI-DMA: Warning: Small IOMMU %luMB."
+			" Consider increasing the AGP aperture in BIOS\n",
+				iommu_size >> 20);
+	}
+
 	return iommu_size;
-} 
+}
 
-static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) 
-{ 
-	unsigned aper_size = 0, aper_base_32;
+static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
+{
+	unsigned aper_size = 0, aper_base_32, aper_order;
 	u64 aper_base;
-	unsigned aper_order;
 
-	pci_read_config_dword(dev, 0x94, &aper_base_32); 
+	pci_read_config_dword(dev, 0x94, &aper_base_32);
 	pci_read_config_dword(dev, 0x90, &aper_order);
-	aper_order = (aper_order >> 1) & 7;	
+	aper_order = (aper_order >> 1) & 7;
 
-	aper_base = aper_base_32 & 0x7fff; 
+	aper_base = aper_base_32 & 0x7fff;
 	aper_base <<= 25;
 
-	aper_size = (32 * 1024 * 1024) << aper_order; 
-       if (aper_base + aper_size > 0x100000000UL || !aper_size)
+	aper_size = (32 * 1024 * 1024) << aper_order;
+	if (aper_base + aper_size > 0x100000000UL || !aper_size)
 		aper_base = 0;
 
 	*size = aper_size;
 	return aper_base;
-} 
+}
 
-/* 
+/*
  * Private Northbridge GATT initialization in case we cannot use the
- * AGP driver for some reason.  
+ * AGP driver for some reason.
  */
 static __init int init_k8_gatt(struct agp_kern_info *info)
-{ 
+{
+	unsigned aper_size, gatt_size, new_aper_size;
+	unsigned aper_base, new_aper_base;
 	struct pci_dev *dev;
 	void *gatt;
-	unsigned aper_base, new_aper_base;
-	unsigned aper_size, gatt_size, new_aper_size;
 	int i;
 
 	printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
@@ -509,75 +550,77 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 	dev = NULL;
 	for (i = 0; i < num_k8_northbridges; i++) {
 		dev = k8_northbridges[i];
-		new_aper_base = read_aperture(dev, &new_aper_size); 
-		if (!new_aper_base) 
-			goto nommu; 
-		
-		if (!aper_base) { 
+		new_aper_base = read_aperture(dev, &new_aper_size);
+		if (!new_aper_base)
+			goto nommu;
+
+		if (!aper_base) {
 			aper_size = new_aper_size;
 			aper_base = new_aper_base;
-		} 
-		if (aper_size != new_aper_size || aper_base != new_aper_base) 
+		}
+		if (aper_size != new_aper_size || aper_base != new_aper_base)
 			goto nommu;
 	}
 	if (!aper_base)
-		goto nommu; 
+		goto nommu;
 	info->aper_base = aper_base;
-	info->aper_size = aper_size>>20; 
+	info->aper_size = aper_size >> 20;
 
-	gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); 
-	gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); 
-	if (!gatt) 
+	gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
+	gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size));
+	if (!gatt)
 		panic("Cannot allocate GATT table");
-	if (change_page_attr_addr((unsigned long)gatt, gatt_size >> PAGE_SHIFT, PAGE_KERNEL_NOCACHE))
+	if (change_page_attr_addr((unsigned long)gatt, gatt_size >> PAGE_SHIFT,
+				  PAGE_KERNEL_NOCACHE))
 		panic("Could not set GART PTEs to uncacheable pages");
 	global_flush_tlb();
 
-	memset(gatt, 0, gatt_size); 
+	memset(gatt, 0, gatt_size);
 	agp_gatt_table = gatt;
 
 	for (i = 0; i < num_k8_northbridges; i++) {
-		u32 ctl; 
-		u32 gatt_reg; 
+		u32 gatt_reg;
+		u32 ctl;
 
 		dev = k8_northbridges[i];
-		gatt_reg = __pa(gatt) >> 12; 
-		gatt_reg <<= 4; 
+		gatt_reg = __pa(gatt) >> 12;
+		gatt_reg <<= 4;
 		pci_write_config_dword(dev, 0x98, gatt_reg);
-		pci_read_config_dword(dev, 0x90, &ctl); 
+		pci_read_config_dword(dev, 0x90, &ctl);
 
 		ctl |= 1;
 		ctl &= ~((1<<4) | (1<<5));
 
-		pci_write_config_dword(dev, 0x90, ctl); 
+		pci_write_config_dword(dev, 0x90, ctl);
 	}
 	flush_gart();
-	
-	printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); 
+
+	printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
+	       aper_base, aper_size>>10);
 	return 0;
 
  nommu:
- 	/* Should not happen anymore */
+	/* Should not happen anymore */
 	printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n"
 	       KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction.\n");
-	return -1; 
-} 
+	return -1;
+}
 
 extern int agp_amd64_init(void);
 
 static const struct dma_mapping_ops gart_dma_ops = {
-	.mapping_error = NULL,
-	.map_single = gart_map_single,
-	.map_simple = gart_map_simple,
-	.unmap_single = gart_unmap_single,
-	.sync_single_for_cpu = NULL,
-	.sync_single_for_device = NULL,
-	.sync_single_range_for_cpu = NULL,
-	.sync_single_range_for_device = NULL,
-	.sync_sg_for_cpu = NULL,
-	.sync_sg_for_device = NULL,
-	.map_sg = gart_map_sg,
-	.unmap_sg = gart_unmap_sg,
+	.mapping_error			= NULL,
+	.map_single			= gart_map_single,
+	.map_simple			= gart_map_simple,
+	.unmap_single			= gart_unmap_single,
+	.sync_single_for_cpu		= NULL,
+	.sync_single_for_device		= NULL,
+	.sync_single_range_for_cpu	= NULL,
+	.sync_single_range_for_device	= NULL,
+	.sync_sg_for_cpu		= NULL,
+	.sync_sg_for_device		= NULL,
+	.map_sg				= gart_map_sg,
+	.unmap_sg			= gart_unmap_sg,
 };
 
 void gart_iommu_shutdown(void)
@@ -588,23 +631,23 @@ void gart_iommu_shutdown(void)
 	if (no_agp && (dma_ops != &gart_dma_ops))
 		return;
 
-        for (i = 0; i < num_k8_northbridges; i++) {
-                u32 ctl;
+	for (i = 0; i < num_k8_northbridges; i++) {
+		u32 ctl;
 
-                dev = k8_northbridges[i];
-                pci_read_config_dword(dev, 0x90, &ctl);
+		dev = k8_northbridges[i];
+		pci_read_config_dword(dev, 0x90, &ctl);
 
-                ctl &= ~1;
+		ctl &= ~1;
 
-                pci_write_config_dword(dev, 0x90, ctl);
-        }
+		pci_write_config_dword(dev, 0x90, ctl);
+	}
 }
 
 void __init gart_iommu_init(void)
-{ 
+{
 	struct agp_kern_info info;
-	unsigned long aper_size;
 	unsigned long iommu_start;
+	unsigned long aper_size;
 	unsigned long scratch;
 	long i;
 
@@ -614,14 +657,14 @@ void __init gart_iommu_init(void)
 	}
 
 #ifndef CONFIG_AGP_AMD64
-	no_agp = 1; 
+	no_agp = 1;
 #else
 	/* Makefile puts PCI initialization via subsys_initcall first. */
 	/* Add other K8 AGP bridge drivers here */
-	no_agp = no_agp || 
-		(agp_amd64_init() < 0) || 
+	no_agp = no_agp ||
+		(agp_amd64_init() < 0) ||
 		(agp_copy_info(agp_bridge, &info) < 0);
-#endif	
+#endif
 
 	if (swiotlb)
 		return;
@@ -643,77 +686,78 @@ void __init gart_iommu_init(void)
 	}
 
 	printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
-	aper_size = info.aper_size * 1024 * 1024;	
-	iommu_size = check_iommu_size(info.aper_base, aper_size); 
-	iommu_pages = iommu_size >> PAGE_SHIFT; 
+	aper_size = info.aper_size * 1024 * 1024;
+	iommu_size = check_iommu_size(info.aper_base, aper_size);
+	iommu_pages = iommu_size >> PAGE_SHIFT;
 
-	iommu_gart_bitmap = (void*)__get_free_pages(GFP_KERNEL, 
-						    get_order(iommu_pages/8)); 
-	if (!iommu_gart_bitmap) 
-		panic("Cannot allocate iommu bitmap\n"); 
+	iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL,
+						      get_order(iommu_pages/8));
+	if (!iommu_gart_bitmap)
+		panic("Cannot allocate iommu bitmap\n");
 	memset(iommu_gart_bitmap, 0, iommu_pages/8);
 
 #ifdef CONFIG_IOMMU_LEAK
-	if (leak_trace) { 
-		iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, 
+	if (leak_trace) {
+		iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL,
 				  get_order(iommu_pages*sizeof(void *)));
-		if (iommu_leak_tab) 
-			memset(iommu_leak_tab, 0, iommu_pages * 8); 
+		if (iommu_leak_tab)
+			memset(iommu_leak_tab, 0, iommu_pages * 8);
 		else
-			printk("PCI-DMA: Cannot allocate leak trace area\n"); 
-	} 
+			printk(KERN_DEBUG
+			       "PCI-DMA: Cannot allocate leak trace area\n");
+	}
 #endif
 
-	/* 
+	/*
 	 * Out of IOMMU space handling.
-	 * Reserve some invalid pages at the beginning of the GART. 
-	 */ 
-	set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); 
+	 * Reserve some invalid pages at the beginning of the GART.
+	 */
+	set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
 
-	agp_memory_reserved = iommu_size;	
+	agp_memory_reserved = iommu_size;
 	printk(KERN_INFO
 	       "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
-	       iommu_size>>20); 
+	       iommu_size >> 20);
 
-	iommu_start = aper_size - iommu_size;	
-	iommu_bus_base = info.aper_base + iommu_start; 
+	iommu_start = aper_size - iommu_size;
+	iommu_bus_base = info.aper_base + iommu_start;
 	bad_dma_address = iommu_bus_base;
 	iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
 
-	/* 
+	/*
 	 * Unmap the IOMMU part of the GART. The alias of the page is
 	 * always mapped with cache enabled and there is no full cache
 	 * coherency across the GART remapping. The unmapping avoids
 	 * automatic prefetches from the CPU allocating cache lines in
 	 * there. All CPU accesses are done via the direct mapping to
 	 * the backing memory. The GART address is only used by PCI
-	 * devices. 
+	 * devices.
 	 */
 	clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size);
 
-	/* 
-	 * Try to workaround a bug (thanks to BenH) 
-	 * Set unmapped entries to a scratch page instead of 0. 
+	/*
+	 * Try to workaround a bug (thanks to BenH)
+	 * Set unmapped entries to a scratch page instead of 0.
 	 * Any prefetches that hit unmapped entries won't get an bus abort
 	 * then.
 	 */
-	scratch = get_zeroed_page(GFP_KERNEL); 
-	if (!scratch) 
+	scratch = get_zeroed_page(GFP_KERNEL);
+	if (!scratch)
 		panic("Cannot allocate iommu scratch page");
 	gart_unmapped_entry = GPTE_ENCODE(__pa(scratch));
-	for (i = EMERGENCY_PAGES; i < iommu_pages; i++) 
+	for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
 		iommu_gatt_base[i] = gart_unmapped_entry;
 
 	flush_gart();
 	dma_ops = &gart_dma_ops;
-} 
+}
 
 void __init gart_parse_options(char *p)
 {
 	int arg;
 
 #ifdef CONFIG_IOMMU_LEAK
-	if (!strncmp(p,"leak",4)) {
+	if (!strncmp(p, "leak", 4)) {
 		leak_trace = 1;
 		p += 4;
 		if (*p == '=') ++p;
@@ -723,18 +767,18 @@ void __init gart_parse_options(char *p)
 #endif
 	if (isdigit(*p) && get_option(&p, &arg))
 		iommu_size = arg;
-	if (!strncmp(p, "fullflush",8))
+	if (!strncmp(p, "fullflush", 8))
 		iommu_fullflush = 1;
-	if (!strncmp(p, "nofullflush",11))
+	if (!strncmp(p, "nofullflush", 11))
 		iommu_fullflush = 0;
-	if (!strncmp(p,"noagp",5))
+	if (!strncmp(p, "noagp", 5))
 		no_agp = 1;
-	if (!strncmp(p, "noaperture",10))
+	if (!strncmp(p, "noaperture", 10))
 		fix_aperture = 0;
 	/* duplicated from pci-dma.c */
-	if (!strncmp(p,"force",5))
+	if (!strncmp(p, "force", 5))
 		gart_iommu_aperture_allowed = 1;
-	if (!strncmp(p,"allowed",7))
+	if (!strncmp(p, "allowed", 7))
 		gart_iommu_aperture_allowed = 1;
 	if (!strncmp(p, "memaper", 7)) {
 		fallback_aper_force = 1;

From 2f36fa13ce49ffd000249feaedfcbefbcc83a72f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:12 +0100
Subject: [PATCH 046/890] x86: clean up arch/x86/kernel/e820_64.c

White space and coding style cleanup.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820_64.c | 305 ++++++++++++++++++++++----------------
 1 file changed, 177 insertions(+), 128 deletions(-)

diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 04698e0b056c..d41cd2f01733 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Handle the memory map.
  * The functions here do the job until bootmem takes over.
  *
@@ -29,44 +29,44 @@
 
 struct e820map e820;
 
-/* 
+/*
  * PFN of last memory page.
  */
-unsigned long end_pfn; 
+unsigned long end_pfn;
 EXPORT_SYMBOL(end_pfn);
 
-/* 
+/*
  * end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
  * The direct mapping extends to end_pfn_map, so that we can directly access
  * apertures, ACPI and other tables without having to play with fixmaps.
- */ 
-unsigned long end_pfn_map; 
+ */
+unsigned long end_pfn_map;
 
-/* 
+/*
  * Last pfn which the user wants to use.
  */
 static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
 
 extern struct resource code_resource, data_resource, bss_resource;
 
-/* Check for some hardcoded bad areas that early boot is not allowed to touch */ 
+/* Check for some hardcoded bad areas that early boot is not allowed to touch */
 static inline int bad_addr(unsigned long *addrp, unsigned long size)
-{ 
-	unsigned long addr = *addrp, last = addr + size; 
+{
+	unsigned long addr = *addrp, last = addr + size;
 
 	/* various gunk below that needed for SMP startup */
-	if (addr < 0x8000) { 
+	if (addr < 0x8000) {
 		*addrp = PAGE_ALIGN(0x8000);
-		return 1; 
+		return 1;
 	}
 
 	/* direct mapping tables of the kernel */
-	if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) { 
+	if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) {
 		*addrp = PAGE_ALIGN(table_end << PAGE_SHIFT);
 		return 1;
-	} 
+	}
 
-	/* initrd */ 
+	/* initrd */
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
 		unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
@@ -77,7 +77,7 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
 			*addrp = PAGE_ALIGN(ramdisk_end);
 			return 1;
 		}
-	} 
+	}
 #endif
 	/* kernel code */
 	if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
@@ -97,9 +97,9 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
 		return 1;
 	}
 #endif
-	/* XXX ramdisk image here? */ 
+	/* XXX ramdisk image here? */
 	return 0;
-} 
+}
 
 /*
  * This function checks if any part of the range <start,end> is mapped
@@ -107,16 +107,18 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
  */
 int
 e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
-{ 
+{
 	int i;
-	for (i = 0; i < e820.nr_map; i++) { 
-		struct e820entry *ei = &e820.map[i]; 
-		if (type && ei->type != type) 
+
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+
+		if (type && ei->type != type)
 			continue;
 		if (ei->addr >= end || ei->addr + ei->size <= start)
-			continue; 
-		return 1; 
-	} 
+			continue;
+		return 1;
+	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(e820_any_mapped);
@@ -127,11 +129,14 @@ EXPORT_SYMBOL_GPL(e820_any_mapped);
  * Note: this function only works correct if the e820 table is sorted and
  * not-overlapping, which is the case
  */
-int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type)
+int __init e820_all_mapped(unsigned long start, unsigned long end,
+			   unsigned type)
 {
 	int i;
+
 	for (i = 0; i < e820.nr_map; i++) {
 		struct e820entry *ei = &e820.map[i];
+
 		if (type && ei->type != type)
 			continue;
 		/* is the region (part) in overlap with the current region ?*/
@@ -143,59 +148,66 @@ int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type
 		 */
 		if (ei->addr <= start)
 			start = ei->addr + ei->size;
-		/* if start is now at or beyond end, we're done, full coverage */
+		/*
+		 * if start is now at or beyond end, we're done, full
+		 * coverage
+		 */
 		if (start >= end)
-			return 1; /* we're done */
+			return 1;
 	}
 	return 0;
 }
 
-/* 
- * Find a free area in a specific range. 
- */ 
-unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsigned size) 
-{ 
-	int i; 
-	for (i = 0; i < e820.nr_map; i++) { 
-		struct e820entry *ei = &e820.map[i]; 
-		unsigned long addr = ei->addr, last; 
-		if (ei->type != E820_RAM) 
-			continue; 
-		if (addr < start) 
+/*
+ * Find a free area in a specific range.
+ */
+unsigned long __init find_e820_area(unsigned long start, unsigned long end,
+				    unsigned size)
+{
+	int i;
+
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		unsigned long addr = ei->addr, last;
+
+		if (ei->type != E820_RAM)
+			continue;
+		if (addr < start)
 			addr = start;
-		if (addr > ei->addr + ei->size) 
-			continue; 
+		if (addr > ei->addr + ei->size)
+			continue;
 		while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
 			;
 		last = PAGE_ALIGN(addr) + size;
 		if (last > ei->addr + ei->size)
 			continue;
-		if (last > end) 
+		if (last > end)
 			continue;
-		return addr; 
-	} 
-	return -1UL;		
-} 
+		return addr;
+	}
+	return -1UL;
+}
 
 /*
  * Find the highest page frame number we have available
  */
 unsigned long __init e820_end_of_ram(void)
 {
-	unsigned long end_pfn = 0;
+	unsigned long end_pfn;
+
 	end_pfn = find_max_pfn_with_active_regions();
-	
-	if (end_pfn > end_pfn_map) 
+
+	if (end_pfn > end_pfn_map)
 		end_pfn_map = end_pfn;
 	if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
 		end_pfn_map = MAXMEM>>PAGE_SHIFT;
 	if (end_pfn > end_user_pfn)
 		end_pfn = end_user_pfn;
-	if (end_pfn > end_pfn_map) 
-		end_pfn = end_pfn_map; 
+	if (end_pfn > end_pfn_map)
+		end_pfn = end_pfn_map;
 
-	printk("end_pfn_map = %lu\n", end_pfn_map);
-	return end_pfn;	
+	printk(KERN_INFO "end_pfn_map = %lu\n", end_pfn_map);
+	return end_pfn;
 }
 
 /*
@@ -219,9 +231,9 @@ void __init e820_reserve_resources(void)
 		request_resource(&iomem_resource, res);
 		if (e820.map[i].type == E820_RAM) {
 			/*
-			 *  We don't know which RAM region contains kernel data,
-			 *  so we try it repeatedly and let the resource manager
-			 *  test it.
+			 * We don't know which RAM region contains kernel data,
+			 * so we try it repeatedly and let the resource manager
+			 * test it.
 			 */
 			request_resource(res, &code_resource);
 			request_resource(res, &data_resource);
@@ -322,9 +334,9 @@ e820_register_active_regions(int nid, unsigned long start_pfn,
 			add_active_range(nid, ei_startpfn, ei_endpfn);
 }
 
-/* 
+/*
  * Add a memory region to the kernel e820 map.
- */ 
+ */
 void __init add_memory_region(unsigned long start, unsigned long size, int type)
 {
 	int x = e820.nr_map;
@@ -349,9 +361,7 @@ unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long end_pfn = end >> PAGE_SHIFT;
-	unsigned long ei_startpfn;
-	unsigned long ei_endpfn;
-	unsigned long ram = 0;
+	unsigned long ei_startpfn, ei_endpfn, ram = 0;
 	int i;
 
 	for (i = 0; i < e820.nr_map; i++) {
@@ -369,22 +379,25 @@ void __init e820_print_map(char *who)
 
 	for (i = 0; i < e820.nr_map; i++) {
 		printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
-			(unsigned long long) e820.map[i].addr,
-			(unsigned long long) (e820.map[i].addr + e820.map[i].size));
+		       (unsigned long long) e820.map[i].addr,
+		       (unsigned long long)
+		       (e820.map[i].addr + e820.map[i].size));
 		switch (e820.map[i].type) {
-		case E820_RAM:	printk("(usable)\n");
-				break;
+		case E820_RAM:
+			printk(KERN_CONT "(usable)\n");
+			break;
 		case E820_RESERVED:
-				printk("(reserved)\n");
-				break;
+			printk(KERN_CONT "(reserved)\n");
+			break;
 		case E820_ACPI:
-				printk("(ACPI data)\n");
-				break;
+			printk(KERN_CONT "(ACPI data)\n");
+			break;
 		case E820_NVS:
-				printk("(ACPI NVS)\n");
-				break;
-		default:	printk("type %u\n", e820.map[i].type);
-				break;
+			printk(KERN_CONT "(ACPI NVS)\n");
+			break;
+		default:
+			printk(KERN_CONT "type %u\n", e820.map[i].type);
+			break;
 		}
 	}
 }
@@ -392,11 +405,11 @@ void __init e820_print_map(char *who)
 /*
  * Sanitize the BIOS e820 map.
  *
- * Some e820 responses include overlapping entries.  The following 
+ * Some e820 responses include overlapping entries. The following
  * replaces the original e820 map with a new one, removing overlaps.
  *
  */
-static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
+static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map)
 {
 	struct change_member {
 		struct e820entry *pbios; /* pointer to original bios entry */
@@ -416,7 +429,8 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
 	int i;
 
 	/*
-		Visually we're performing the following (1,2,3,4 = memory types)...
+		Visually we're performing the following
+		(1,2,3,4 = memory types)...
 
 		Sample memory map (w/overlaps):
 		   ____22__________________
@@ -458,22 +472,23 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
 	old_nr = *pnr_map;
 
 	/* bail out if we find any unreasonable addresses in bios map */
-	for (i=0; i<old_nr; i++)
+	for (i = 0; i < old_nr; i++)
 		if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
 			return -1;
 
 	/* create pointers for initial change-point information (for sorting) */
-	for (i=0; i < 2*old_nr; i++)
+	for (i = 0; i < 2 * old_nr; i++)
 		change_point[i] = &change_point_list[i];
 
 	/* record all known change-points (starting and ending addresses),
 	   omitting those that are for empty memory regions */
 	chgidx = 0;
-	for (i=0; i < old_nr; i++)	{
+	for (i = 0; i < old_nr; i++)	{
 		if (biosmap[i].size != 0) {
 			change_point[chgidx]->addr = biosmap[i].addr;
 			change_point[chgidx++]->pbios = &biosmap[i];
-			change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
+			change_point[chgidx]->addr = biosmap[i].addr +
+				biosmap[i].size;
 			change_point[chgidx++]->pbios = &biosmap[i];
 		}
 	}
@@ -483,75 +498,106 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
 	still_changing = 1;
 	while (still_changing)	{
 		still_changing = 0;
-		for (i=1; i < chg_nr; i++)  {
-			/* if <current_addr> > <last_addr>, swap */
-			/* or, if current=<start_addr> & last=<end_addr>, swap */
-			if ((change_point[i]->addr < change_point[i-1]->addr) ||
-				((change_point[i]->addr == change_point[i-1]->addr) &&
-				 (change_point[i]->addr == change_point[i]->pbios->addr) &&
-				 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
-			   )
-			{
+		for (i = 1; i < chg_nr; i++)  {
+			unsigned long long curaddr, lastaddr;
+			unsigned long long curpbaddr, lastpbaddr;
+
+			curaddr = change_point[i]->addr;
+			lastaddr = change_point[i - 1]->addr;
+			curpbaddr = change_point[i]->pbios->addr;
+			lastpbaddr = change_point[i - 1]->pbios->addr;
+
+			/*
+			 * swap entries, when:
+			 *
+			 * curaddr > lastaddr or
+			 * curaddr == lastaddr and curaddr == curpbaddr and
+			 * lastaddr != lastpbaddr
+			 */
+			if (curaddr < lastaddr ||
+			    (curaddr == lastaddr && curaddr == curpbaddr &&
+			     lastaddr != lastpbaddr)) {
 				change_tmp = change_point[i];
 				change_point[i] = change_point[i-1];
 				change_point[i-1] = change_tmp;
-				still_changing=1;
+				still_changing = 1;
 			}
 		}
 	}
 
 	/* create a new bios memory map, removing overlaps */
-	overlap_entries=0;	 /* number of entries in the overlap table */
-	new_bios_entry=0;	 /* index for creating new bios map entries */
+	overlap_entries = 0;	 /* number of entries in the overlap table */
+	new_bios_entry = 0;	 /* index for creating new bios map entries */
 	last_type = 0;		 /* start with undefined memory type */
 	last_addr = 0;		 /* start with 0 as last starting address */
+
 	/* loop through change-points, determining affect on the new bios map */
-	for (chgidx=0; chgidx < chg_nr; chgidx++)
-	{
+	for (chgidx = 0; chgidx < chg_nr; chgidx++) {
 		/* keep track of all overlapping bios entries */
-		if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
-		{
-			/* add map entry to overlap list (> 1 entry implies an overlap) */
-			overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
-		}
-		else
-		{
-			/* remove entry from list (order independent, so swap with last) */
-			for (i=0; i<overlap_entries; i++)
-			{
-				if (overlap_list[i] == change_point[chgidx]->pbios)
-					overlap_list[i] = overlap_list[overlap_entries-1];
+		if (change_point[chgidx]->addr ==
+		    change_point[chgidx]->pbios->addr) {
+			/*
+			 * add map entry to overlap list (> 1 entry
+			 * implies an overlap)
+			 */
+			overlap_list[overlap_entries++] =
+				change_point[chgidx]->pbios;
+		} else {
+			/*
+			 * remove entry from list (order independent,
+			 * so swap with last)
+			 */
+			for (i = 0; i < overlap_entries; i++) {
+				if (overlap_list[i] ==
+				    change_point[chgidx]->pbios)
+					overlap_list[i] =
+						overlap_list[overlap_entries-1];
 			}
 			overlap_entries--;
 		}
-		/* if there are overlapping entries, decide which "type" to use */
-		/* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
+		/*
+		 * if there are overlapping entries, decide which
+		 * "type" to use (larger value takes precedence --
+		 * 1=usable, 2,3,4,4+=unusable)
+		 */
 		current_type = 0;
-		for (i=0; i<overlap_entries; i++)
+		for (i = 0; i < overlap_entries; i++)
 			if (overlap_list[i]->type > current_type)
 				current_type = overlap_list[i]->type;
-		/* continue building up new bios map based on this information */
+		/*
+		 * continue building up new bios map based on this
+		 * information
+		 */
 		if (current_type != last_type)	{
 			if (last_type != 0)	 {
 				new_bios[new_bios_entry].size =
 					change_point[chgidx]->addr - last_addr;
-				/* move forward only if the new size was non-zero */
+				/*
+				 * move forward only if the new size
+				 * was non-zero
+				 */
 				if (new_bios[new_bios_entry].size != 0)
+					/*
+					 * no more space left for new
+					 * bios entries ?
+					 */
 					if (++new_bios_entry >= E820MAX)
-						break; 	/* no more space left for new bios entries */
+						break;
 			}
 			if (current_type != 0)	{
-				new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
+				new_bios[new_bios_entry].addr =
+					change_point[chgidx]->addr;
 				new_bios[new_bios_entry].type = current_type;
-				last_addr=change_point[chgidx]->addr;
+				last_addr = change_point[chgidx]->addr;
 			}
 			last_type = current_type;
 		}
 	}
-	new_nr = new_bios_entry;   /* retain count for new bios entries */
+	/* retain count for new bios entries */
+	new_nr = new_bios_entry;
 
 	/* copy new bios mapping into original location */
-	memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
+	memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
 	*pnr_map = new_nr;
 
 	return 0;
@@ -566,7 +612,7 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
  * will have given us a memory map that we can use to properly
  * set up memory.  If we aren't, we'll fake a memory map.
  */
-static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
+static int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
 {
 	/* Only one memory region (or negative)? Ignore it */
 	if (nr_map < 2)
@@ -583,7 +629,7 @@ static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
 			return -1;
 
 		add_memory_region(start, size, type);
-	} while (biosmap++,--nr_map);
+	} while (biosmap++, --nr_map);
 	return 0;
 }
 
@@ -613,9 +659,9 @@ static int __init parse_memopt(char *p)
 	if (!p)
 		return -EINVAL;
 	end_user_pfn = memparse(p, &p);
-	end_user_pfn >>= PAGE_SHIFT;	
+	end_user_pfn >>= PAGE_SHIFT;
 	return 0;
-} 
+}
 early_param("mem", parse_memopt);
 
 static int userdef __initdata;
@@ -627,9 +673,9 @@ static int __init parse_memmap_opt(char *p)
 
 	if (!strcmp(p, "exactmap")) {
 #ifdef CONFIG_CRASH_DUMP
-		/* If we are doing a crash dump, we
-		 * still need to know the real mem
-		 * size before original memory map is
+		/*
+		 * If we are doing a crash dump, we still need to know
+		 * the real mem size before original memory map is
 		 * reset.
 		 */
 		e820_register_active_regions(0, 0, -1UL);
@@ -713,8 +759,10 @@ __init void e820_setup_gap(void)
 
 	if (!found) {
 		gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
-		printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit address range\n"
-		       KERN_ERR "PCI: Unassigned devices with 32bit resource registers may break!\n");
+		printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
+		       "address range\n"
+		       KERN_ERR "PCI: Unassigned devices with 32bit resource "
+		       "registers may break!\n");
 	}
 
 	/*
@@ -727,8 +775,9 @@ __init void e820_setup_gap(void)
 	/* Fun with two's complement */
 	pci_mem_start = (gapstart + round) & -round;
 
-	printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
-		pci_mem_start, gapstart, gapsize);
+	printk(KERN_INFO
+	       "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
+	       pci_mem_start, gapstart, gapsize);
 }
 
 int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)

From 78aa1f66f77da078357bd263fcac95fbf6bca15b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:13 +0100
Subject: [PATCH 047/890] x86: clean up arch/x86/kernel/ldt_32/64.c

White space and coding style clenaup.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ldt_32.c | 60 ++++++++++++++++++----------------
 arch/x86/kernel/ldt_64.c | 69 +++++++++++++++++++++-------------------
 2 files changed, 69 insertions(+), 60 deletions(-)

diff --git a/arch/x86/kernel/ldt_32.c b/arch/x86/kernel/ldt_32.c
index 9ff90a27c45f..e366c5fd0d19 100644
--- a/arch/x86/kernel/ldt_32.c
+++ b/arch/x86/kernel/ldt_32.c
@@ -17,7 +17,7 @@
 #include <asm/desc.h>
 #include <asm/mmu_context.h>
 
-#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
+#ifdef CONFIG_SMP
 static void flush_ldt(void *null)
 {
 	if (current->active_mm)
@@ -34,19 +34,20 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
 	if (mincount <= pc->size)
 		return 0;
 	oldsize = pc->size;
-	mincount = (mincount+511)&(~511);
-	if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
-		newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
+	mincount = (mincount + 511) & (~511);
+	if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
+		newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
 	else
-		newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+		newldt = kmalloc(mincount * LDT_ENTRY_SIZE, GFP_KERNEL);
 
 	if (!newldt)
 		return -ENOMEM;
 
 	if (oldsize)
-		memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
+		memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE);
 	oldldt = pc->ldt;
-	memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
+	memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
+	       (mincount - oldsize) * LDT_ENTRY_SIZE);
 	pc->ldt = newldt;
 	wmb();
 	pc->size = mincount;
@@ -55,6 +56,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
 	if (reload) {
 #ifdef CONFIG_SMP
 		cpumask_t mask;
+
 		preempt_disable();
 		load_LDT(pc);
 		mask = cpumask_of_cpu(smp_processor_id());
@@ -66,7 +68,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
 #endif
 	}
 	if (oldsize) {
-		if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
+		if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
 			vfree(oldldt);
 		else
 			kfree(oldldt);
@@ -77,9 +79,10 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
 static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
 {
 	int err = alloc_ldt(new, old->size, 0);
+
 	if (err < 0)
 		return err;
-	memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
+	memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE);
 	return 0;
 }
 
@@ -89,7 +92,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
  */
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
-	struct mm_struct * old_mm;
+	struct mm_struct *old_mm;
 	int retval = 0;
 
 	mutex_init(&mm->context.lock);
@@ -111,7 +114,7 @@ void destroy_context(struct mm_struct *mm)
 	if (mm->context.size) {
 		if (mm == current->active_mm)
 			clear_LDT();
-		if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
+		if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
 			vfree(mm->context.ldt);
 		else
 			kfree(mm->context.ldt);
@@ -119,19 +122,19 @@ void destroy_context(struct mm_struct *mm)
 	}
 }
 
-static int read_ldt(void __user * ptr, unsigned long bytecount)
+static int read_ldt(void __user *ptr, unsigned long bytecount)
 {
 	int err;
 	unsigned long size;
-	struct mm_struct * mm = current->mm;
+	struct mm_struct *mm = current->mm;
 
 	if (!mm->context.size)
 		return 0;
-	if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
-		bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
+	if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
+		bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
 
 	mutex_lock(&mm->context.lock);
-	size = mm->context.size*LDT_ENTRY_SIZE;
+	size = mm->context.size * LDT_ENTRY_SIZE;
 	if (size > bytecount)
 		size = bytecount;
 
@@ -143,7 +146,7 @@ static int read_ldt(void __user * ptr, unsigned long bytecount)
 		goto error_return;
 	if (size != bytecount) {
 		/* zero-fill the rest */
-		if (clear_user(ptr+size, bytecount-size) != 0) {
+		if (clear_user(ptr + size, bytecount - size) != 0) {
 			err = -EFAULT;
 			goto error_return;
 		}
@@ -153,13 +156,13 @@ error_return:
 	return err;
 }
 
-static int read_default_ldt(void __user * ptr, unsigned long bytecount)
+static int read_default_ldt(void __user *ptr, unsigned long bytecount)
 {
 	int err;
 	unsigned long size;
 
 	err = 0;
-	size = 5*sizeof(struct desc_struct);
+	size = 5 * sizeof(struct desc_struct);
 	if (size > bytecount)
 		size = bytecount;
 
@@ -170,9 +173,9 @@ static int read_default_ldt(void __user * ptr, unsigned long bytecount)
 	return err;
 }
 
-static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
+static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 {
-	struct mm_struct * mm = current->mm;
+	struct mm_struct *mm = current->mm;
 	__u32 entry_1, entry_2;
 	int error;
 	struct user_desc ldt_info;
@@ -180,7 +183,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
 	error = -EINVAL;
 	if (bytecount != sizeof(ldt_info))
 		goto out;
-	error = -EFAULT; 	
+	error = -EFAULT;
 	if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
 		goto out;
 
@@ -196,13 +199,14 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
 
 	mutex_lock(&mm->context.lock);
 	if (ldt_info.entry_number >= mm->context.size) {
-		error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
+		error = alloc_ldt(&current->mm->context,
+				  ldt_info.entry_number + 1, 1);
 		if (error < 0)
 			goto out_unlock;
 	}
 
-   	/* Allow LDTs to be cleared by the user. */
-   	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+	/* Allow LDTs to be cleared by the user. */
+	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
 		if (oldmode || LDT_empty(&ldt_info)) {
 			entry_1 = 0;
 			entry_2 = 0;
@@ -217,7 +221,8 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
 
 	/* Install the new entry ...  */
 install:
-	write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, entry_2);
+	write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1,
+			entry_2);
 	error = 0;
 
 out_unlock:
@@ -226,7 +231,8 @@ out:
 	return error;
 }
 
-asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
+asmlinkage int sys_modify_ldt(int func, void __user *ptr,
+			      unsigned long bytecount)
 {
 	int ret = -ENOSYS;
 
diff --git a/arch/x86/kernel/ldt_64.c b/arch/x86/kernel/ldt_64.c
index 60e57abb8e90..6c41db367de8 100644
--- a/arch/x86/kernel/ldt_64.c
+++ b/arch/x86/kernel/ldt_64.c
@@ -2,7 +2,7 @@
  * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
  * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
  * Copyright (C) 2002 Andi Kleen
- * 
+ *
  * This handles calls from both 32bit and 64bit mode.
  */
 
@@ -20,11 +20,11 @@
 #include <asm/desc.h>
 #include <asm/proto.h>
 
-#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
+#ifdef CONFIG_SMP
 static void flush_ldt(void *null)
 {
 	if (current->active_mm)
-               load_LDT(&current->active_mm->context);
+		load_LDT(&current->active_mm->context);
 }
 #endif
 
@@ -37,19 +37,20 @@ static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload)
 	if (mincount <= (unsigned)pc->size)
 		return 0;
 	oldsize = pc->size;
-	mincount = (mincount+511)&(~511);
-	if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
-		newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
+	mincount = (mincount + 511) & (~511);
+	if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
+		newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
 	else
-		newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+		newldt = kmalloc(mincount * LDT_ENTRY_SIZE, GFP_KERNEL);
 
 	if (!newldt)
 		return -ENOMEM;
 
 	if (oldsize)
-		memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
+		memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE);
 	oldldt = pc->ldt;
-	memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
+	memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
+	       (mincount - oldsize) * LDT_ENTRY_SIZE);
 	wmb();
 	pc->ldt = newldt;
 	wmb();
@@ -70,7 +71,7 @@ static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload)
 #endif
 	}
 	if (oldsize) {
-		if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
+		if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
 			vfree(oldldt);
 		else
 			kfree(oldldt);
@@ -81,9 +82,10 @@ static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload)
 static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
 {
 	int err = alloc_ldt(new, old->size, 0);
+
 	if (err < 0)
 		return err;
-	memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
+	memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE);
 	return 0;
 }
 
@@ -93,7 +95,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
  */
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
-	struct mm_struct * old_mm;
+	struct mm_struct *old_mm;
 	int retval = 0;
 
 	mutex_init(&mm->context.lock);
@@ -108,13 +110,12 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 }
 
 /*
- * 
  * Don't touch the LDT register - we're already in the next thread.
  */
 void destroy_context(struct mm_struct *mm)
 {
 	if (mm->context.size) {
-		if ((unsigned)mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
+		if ((unsigned)mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
 			vfree(mm->context.ldt);
 		else
 			kfree(mm->context.ldt);
@@ -122,19 +123,19 @@ void destroy_context(struct mm_struct *mm)
 	}
 }
 
-static int read_ldt(void __user * ptr, unsigned long bytecount)
+static int read_ldt(void __user *ptr, unsigned long bytecount)
 {
 	int err;
 	unsigned long size;
-	struct mm_struct * mm = current->mm;
+	struct mm_struct *mm = current->mm;
 
 	if (!mm->context.size)
 		return 0;
-	if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
-		bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
+	if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
+		bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
 
 	mutex_lock(&mm->context.lock);
-	size = mm->context.size*LDT_ENTRY_SIZE;
+	size = mm->context.size * LDT_ENTRY_SIZE;
 	if (size > bytecount)
 		size = bytecount;
 
@@ -146,7 +147,7 @@ static int read_ldt(void __user * ptr, unsigned long bytecount)
 		goto error_return;
 	if (size != bytecount) {
 		/* zero-fill the rest */
-		if (clear_user(ptr+size, bytecount-size) != 0) {
+		if (clear_user(ptr + size, bytecount - size) != 0) {
 			err = -EFAULT;
 			goto error_return;
 		}
@@ -156,21 +157,21 @@ error_return:
 	return err;
 }
 
-static int read_default_ldt(void __user * ptr, unsigned long bytecount)
+static int read_default_ldt(void __user *ptr, unsigned long bytecount)
 {
-	/* Arbitrary number */ 
+	/* Arbitrary number */
 	/* x86-64 default LDT is all zeros */
-	if (bytecount > 128) 
-		bytecount = 128; 	
+	if (bytecount > 128)
+		bytecount = 128;
 	if (clear_user(ptr, bytecount))
 		return -EFAULT;
-	return bytecount; 
+	return bytecount;
 }
 
-static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
+static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 {
 	struct task_struct *me = current;
-	struct mm_struct * mm = me->mm;
+	struct mm_struct *mm = me->mm;
 	__u32 entry_1, entry_2, *lp;
 	int error;
 	struct user_desc ldt_info;
@@ -179,7 +180,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
 
 	if (bytecount != sizeof(ldt_info))
 		goto out;
-	error = -EFAULT; 	
+	error = -EFAULT;
 	if (copy_from_user(&ldt_info, ptr, bytecount))
 		goto out;
 
@@ -195,15 +196,16 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
 
 	mutex_lock(&mm->context.lock);
 	if (ldt_info.entry_number >= (unsigned)mm->context.size) {
-		error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
+		error = alloc_ldt(&current->mm->context,
+				  ldt_info.entry_number + 1, 1);
 		if (error < 0)
 			goto out_unlock;
 	}
 
-	lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
+	lp = (__u32 *)((ldt_info.entry_number << 3) + (char *)mm->context.ldt);
 
-   	/* Allow LDTs to be cleared by the user. */
-   	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+	/* Allow LDTs to be cleared by the user. */
+	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
 		if (oldmode || LDT_empty(&ldt_info)) {
 			entry_1 = 0;
 			entry_2 = 0;
@@ -228,7 +230,8 @@ out:
 	return error;
 }
 
-asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
+asmlinkage int sys_modify_ldt(int func, void __user *ptr,
+			      unsigned long bytecount)
 {
 	int ret = -ENOSYS;
 

From 4edc8f5454814201d2fb222354284365b4f0537c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:13 +0100
Subject: [PATCH 048/890] x86: clean up include/asm-x86/desc_64.h

White space and coding style clenaup.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/desc_64.h | 95 +++++++++++++++++++++------------------
 1 file changed, 51 insertions(+), 44 deletions(-)

diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h
index 7d9c938e69fd..bb2009ecbbca 100644
--- a/include/asm-x86/desc_64.h
+++ b/include/asm-x86/desc_64.h
@@ -1,4 +1,4 @@
-/* Written 2000 by Andi Kleen */ 
+/* Written 2000 by Andi Kleen */
 #ifndef __ARCH_DESC_H
 #define __ARCH_DESC_H
 
@@ -35,7 +35,7 @@ static inline unsigned long __store_tr(void)
  * something other than this.
  */
 extern struct desc_struct default_ldt[];
-extern struct gate_struct idt_table[]; 
+extern struct gate_struct idt_table[];
 extern struct desc_ptr cpu_gdt_descr[];
 
 /* the cpu gdt accessor */
@@ -51,40 +51,45 @@ static inline void store_gdt(struct desc_ptr *ptr)
        asm("sgdt %w0":"=m" (*ptr));
 }
 
-static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsigned dpl, unsigned ist)  
+static inline void _set_gate(void *adr, unsigned type, unsigned long func,
+			     unsigned dpl, unsigned ist)
 {
-	struct gate_struct s; 	
-	s.offset_low = PTR_LOW(func); 
+	struct gate_struct s;
+
+	s.offset_low = PTR_LOW(func);
 	s.segment = __KERNEL_CS;
-	s.ist = ist; 
+	s.ist = ist;
 	s.p = 1;
-	s.dpl = dpl; 
+	s.dpl = dpl;
 	s.zero0 = 0;
-	s.zero1 = 0; 
-	s.type = type; 
-	s.offset_middle = PTR_MIDDLE(func); 
-	s.offset_high = PTR_HIGH(func); 
-	/* does not need to be atomic because it is only done once at setup time */ 
-	memcpy(adr, &s, 16); 
-} 
+	s.zero1 = 0;
+	s.type = type;
+	s.offset_middle = PTR_MIDDLE(func);
+	s.offset_high = PTR_HIGH(func);
+	/*
+	 * does not need to be atomic because it is only done once at
+	 * setup time
+	 */
+	memcpy(adr, &s, 16);
+}
 
-static inline void set_intr_gate(int nr, void *func) 
-{ 
+static inline void set_intr_gate(int nr, void *func)
+{
 	BUG_ON((unsigned)nr > 0xFF);
-	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 0); 
-} 
+	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 0);
+}
 
-static inline void set_intr_gate_ist(int nr, void *func, unsigned ist) 
-{ 
+static inline void set_intr_gate_ist(int nr, void *func, unsigned ist)
+{
 	BUG_ON((unsigned)nr > 0xFF);
-	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, ist); 
-} 
+	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, ist);
+}
 
-static inline void set_system_gate(int nr, void *func) 
-{ 
+static inline void set_system_gate(int nr, void *func)
+{
 	BUG_ON((unsigned)nr > 0xFF);
-	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0); 
-} 
+	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0);
+}
 
 static inline void set_system_gate_ist(int nr, void *func, unsigned ist)
 {
@@ -101,24 +106,25 @@ static inline void store_idt(struct desc_ptr *dtr)
        asm("sidt %w0":"=m" (*dtr));
 }
 
-static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type, 
-					 unsigned size) 
-{ 
+static inline void set_tssldt_descriptor(void *ptr, unsigned long tss,
+					 unsigned type, unsigned size)
+{
 	struct ldttss_desc d;
-	memset(&d,0,sizeof(d)); 
+
+	memset(&d, 0, sizeof(d));
 	d.limit0 = size & 0xFFFF;
-	d.base0 = PTR_LOW(tss); 
-	d.base1 = PTR_MIDDLE(tss) & 0xFF; 
+	d.base0 = PTR_LOW(tss);
+	d.base1 = PTR_MIDDLE(tss) & 0xFF;
 	d.type = type;
-	d.p = 1; 
+	d.p = 1;
 	d.limit1 = (size >> 16) & 0xF;
-	d.base2 = (PTR_MIDDLE(tss) >> 8) & 0xFF; 
-	d.base3 = PTR_HIGH(tss); 
-	memcpy(ptr, &d, 16); 
+	d.base2 = (PTR_MIDDLE(tss) >> 8) & 0xFF;
+	d.base3 = PTR_HIGH(tss);
+	memcpy(ptr, &d, 16);
 }
 
 static inline void set_tss_desc(unsigned cpu, void *addr)
-{ 
+{
 	/*
 	 * sizeof(unsigned long) coming from an extra "long" at the end
 	 * of the iobitmap. See tss_struct definition in processor.h
@@ -129,18 +135,18 @@ static inline void set_tss_desc(unsigned cpu, void *addr)
 	set_tssldt_descriptor(&cpu_gdt(cpu)[GDT_ENTRY_TSS],
 		(unsigned long)addr, DESC_TSS,
 		IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
-} 
+}
 
 static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
-{ 
+{
 	set_tssldt_descriptor(&cpu_gdt(cpu)[GDT_ENTRY_LDT], (unsigned long)addr,
 			      DESC_LDT, size * 8 - 1);
 }
 
 #define LDT_entry_a(info) \
 	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
-/* Don't allow setting of the lm bit. It is useless anyways because 
-   64bit system calls require __USER_CS. */ 
+/* Don't allow setting of the lm bit. It is useless anyways because
+   64bit system calls require __USER_CS. */
 #define LDT_entry_b(info) \
 	(((info)->base_addr & 0xff000000) | \
 	(((info)->base_addr & 0x00ff0000) >> 16) | \
@@ -172,12 +178,12 @@ static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
 
 	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
 		gdt[i] = t->tls_array[i];
-} 
+}
 
 /*
  * load one particular LDT into the current CPU
  */
-static inline void load_LDT_nolock (mm_context_t *pc, int cpu)
+static inline void load_LDT_nolock(mm_context_t *pc, int cpu)
 {
 	int count = pc->size;
 
@@ -185,7 +191,7 @@ static inline void load_LDT_nolock (mm_context_t *pc, int cpu)
 		clear_LDT();
 		return;
 	}
-		
+
 	set_ldt_desc(cpu, pc->ldt, count);
 	load_LDT_desc();
 }
@@ -193,6 +199,7 @@ static inline void load_LDT_nolock (mm_context_t *pc, int cpu)
 static inline void load_LDT(mm_context_t *pc)
 {
 	int cpu = get_cpu();
+
 	load_LDT_nolock(pc, cpu);
 	put_cpu();
 }

From fc2d625c4fac18e672a3b7c61af5de22d7ab7d87 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:13 +0100
Subject: [PATCH 049/890] x86: introduce ldt_write accessor

Create a ldt write accessor like the 32 bit one.

Preparatory patch for merging ldt.c and anyway necessary for
64bit paravirt ops.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ldt_64.c  | 8 +++-----
 include/asm-x86/desc_64.h | 9 +++++++++
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/ldt_64.c b/arch/x86/kernel/ldt_64.c
index 6c41db367de8..d72dc7a0636f 100644
--- a/arch/x86/kernel/ldt_64.c
+++ b/arch/x86/kernel/ldt_64.c
@@ -172,7 +172,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 {
 	struct task_struct *me = current;
 	struct mm_struct *mm = me->mm;
-	__u32 entry_1, entry_2, *lp;
+	__u32 entry_1, entry_2;
 	int error;
 	struct user_desc ldt_info;
 
@@ -202,8 +202,6 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 			goto out_unlock;
 	}
 
-	lp = (__u32 *)((ldt_info.entry_number << 3) + (char *)mm->context.ldt);
-
 	/* Allow LDTs to be cleared by the user. */
 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
 		if (oldmode || LDT_empty(&ldt_info)) {
@@ -220,8 +218,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 
 	/* Install the new entry ...  */
 install:
-	*lp	= entry_1;
-	*(lp+1)	= entry_2;
+	write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1,
+			entry_2);
 	error = 0;
 
 out_unlock:
diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h
index bb2009ecbbca..7d48df72aef2 100644
--- a/include/asm-x86/desc_64.h
+++ b/include/asm-x86/desc_64.h
@@ -38,6 +38,15 @@ extern struct desc_struct default_ldt[];
 extern struct gate_struct idt_table[];
 extern struct desc_ptr cpu_gdt_descr[];
 
+static inline void write_ldt_entry(struct desc_struct *ldt,
+				   int entry, u32 entry_low, u32 entry_high)
+{
+	__u32 *lp = (__u32 *)((entry << 3) + (char *)ldt);
+
+	lp[0] = entry_low;
+	lp[1] = entry_high;
+}
+
 /* the cpu gdt accessor */
 #define cpu_gdt(_cpu) ((struct desc_struct *)cpu_gdt_descr[_cpu].address)
 

From 70f5088dd5e9fbd3a71b3a5b01395c676158194b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:13 +0100
Subject: [PATCH 050/890] x86: prepare arch/x86/kernel/ldt_32/64.c for merging

White space and coding style cleanups.

Change unsigned to int. There is no win when we compare mincount against pc->size,
which is an int as well. Casting pc->size to unsigned just might hide real problems.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ldt_32.c |  3 +--
 arch/x86/kernel/ldt_64.c | 24 +++++++++++-------------
 2 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/ldt_32.c b/arch/x86/kernel/ldt_32.c
index e366c5fd0d19..bb15753abaf2 100644
--- a/arch/x86/kernel/ldt_32.c
+++ b/arch/x86/kernel/ldt_32.c
@@ -27,8 +27,7 @@ static void flush_ldt(void *null)
 
 static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
 {
-	void *oldldt;
-	void *newldt;
+	void *oldldt, *newldt;
 	int oldsize;
 
 	if (mincount <= pc->size)
diff --git a/arch/x86/kernel/ldt_64.c b/arch/x86/kernel/ldt_64.c
index d72dc7a0636f..95903938e7ad 100644
--- a/arch/x86/kernel/ldt_64.c
+++ b/arch/x86/kernel/ldt_64.c
@@ -18,7 +18,7 @@
 #include <asm/system.h>
 #include <asm/ldt.h>
 #include <asm/desc.h>
-#include <asm/proto.h>
+#include <asm/mmu_context.h>
 
 #ifdef CONFIG_SMP
 static void flush_ldt(void *null)
@@ -28,13 +28,12 @@ static void flush_ldt(void *null)
 }
 #endif
 
-static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload)
+static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
 {
-	void *oldldt;
-	void *newldt;
-	unsigned oldsize;
+	void *oldldt, *newldt;
+	int oldsize;
 
-	if (mincount <= (unsigned)pc->size)
+	if (mincount <= pc->size)
 		return 0;
 	oldsize = pc->size;
 	mincount = (mincount + 511) & (~511);
@@ -56,13 +55,14 @@ static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload)
 	wmb();
 	pc->size = mincount;
 	wmb();
+
 	if (reload) {
 #ifdef CONFIG_SMP
 		cpumask_t mask;
 
 		preempt_disable();
-		mask = cpumask_of_cpu(smp_processor_id());
 		load_LDT(pc);
+		mask = cpumask_of_cpu(smp_processor_id());
 		if (!cpus_equal(current->mm->cpu_vm_mask, mask))
 			smp_call_function(flush_ldt, NULL, 1, 1);
 		preempt_enable();
@@ -115,7 +115,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 void destroy_context(struct mm_struct *mm)
 {
 	if (mm->context.size) {
-		if ((unsigned)mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
+		if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
 			vfree(mm->context.ldt);
 		else
 			kfree(mm->context.ldt);
@@ -170,18 +170,16 @@ static int read_default_ldt(void __user *ptr, unsigned long bytecount)
 
 static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 {
-	struct task_struct *me = current;
-	struct mm_struct *mm = me->mm;
+	struct mm_struct *mm = current->mm;
 	__u32 entry_1, entry_2;
 	int error;
 	struct user_desc ldt_info;
 
 	error = -EINVAL;
-
 	if (bytecount != sizeof(ldt_info))
 		goto out;
 	error = -EFAULT;
-	if (copy_from_user(&ldt_info, ptr, bytecount))
+	if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
 		goto out;
 
 	error = -EINVAL;
@@ -195,7 +193,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 	}
 
 	mutex_lock(&mm->context.lock);
-	if (ldt_info.entry_number >= (unsigned)mm->context.size) {
+	if (ldt_info.entry_number >= mm->context.size) {
 		error = alloc_ldt(&current->mm->context,
 				  ldt_info.entry_number + 1, 1);
 		if (error < 0)

From 77e463d1040d6310211ac5162729f5d4afc4dd8c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:14 +0100
Subject: [PATCH 051/890] x86: merge arch/x86/kernel/ldt_32/64.c

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile_32         |   2 +-
 arch/x86/kernel/Makefile_64         |   2 +-
 arch/x86/kernel/{ldt_64.c => ldt.c} |  25 ++-
 arch/x86/kernel/ldt_32.c            | 253 ----------------------------
 4 files changed, 22 insertions(+), 260 deletions(-)
 rename arch/x86/kernel/{ldt_64.c => ldt.c} (90%)
 delete mode 100644 arch/x86/kernel/ldt_32.c

diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index 0cc1981d1e38..31ff982bc26b 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -6,7 +6,7 @@ extra-y := head_32.o init_task.o vmlinux.lds
 CPPFLAGS_vmlinux.lds += -Ui386
 
 obj-y	:= process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
-		ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \
+		ptrace_32.o time_32.o ioport_32.o ldt.o setup_32.o i8259_32.o sys_i386_32.o \
 		pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\
 		quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o
 
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index 08a68f0d8fda..9cb3df27c413 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -7,7 +7,7 @@ CPPFLAGS_vmlinux.lds += -Ux86_64
 EXTRA_AFLAGS	:= -traditional
 
 obj-y	:= process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
-		ptrace_64.o time_64.o ioport_64.o ldt_64.o setup_64.o i8259_64.o sys_x86_64.o \
+		ptrace_64.o time_64.o ioport_64.o ldt.o setup_64.o i8259_64.o sys_x86_64.o \
 		x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \
 		setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \
 		pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \
diff --git a/arch/x86/kernel/ldt_64.c b/arch/x86/kernel/ldt.c
similarity index 90%
rename from arch/x86/kernel/ldt_64.c
rename to arch/x86/kernel/ldt.c
index 95903938e7ad..a8cdca3615bf 100644
--- a/arch/x86/kernel/ldt_64.c
+++ b/arch/x86/kernel/ldt.c
@@ -50,7 +50,11 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
 	oldldt = pc->ldt;
 	memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
 	       (mincount - oldsize) * LDT_ENTRY_SIZE);
+
+#ifdef CONFIG_X86_64
+	/* CHECKME: Do we really need this ? */
 	wmb();
+#endif
 	pc->ldt = newldt;
 	wmb();
 	pc->size = mincount;
@@ -110,11 +114,18 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 }
 
 /*
- * Don't touch the LDT register - we're already in the next thread.
+ * No need to lock the MM as we are the last user
+ *
+ * 64bit: Don't touch the LDT register - we're already in the next thread.
  */
 void destroy_context(struct mm_struct *mm)
 {
 	if (mm->context.size) {
+#ifdef CONFIG_X86_32
+		/* CHECKME: Can this ever happen ? */
+		if (mm == current->active_mm)
+			clear_LDT();
+#endif
 		if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
 			vfree(mm->context.ldt);
 		else
@@ -159,10 +170,14 @@ error_return:
 
 static int read_default_ldt(void __user *ptr, unsigned long bytecount)
 {
-	/* Arbitrary number */
-	/* x86-64 default LDT is all zeros */
-	if (bytecount > 128)
-		bytecount = 128;
+	/* CHECKME: Can we use _one_ random number ? */
+#ifdef CONFIG_X86_32
+	unsigned long size = 5 * sizeof(struct desc_struct);
+#else
+	unsigned long size = 128;
+#endif
+	if (bytecount > size)
+		bytecount = size;
 	if (clear_user(ptr, bytecount))
 		return -EFAULT;
 	return bytecount;
diff --git a/arch/x86/kernel/ldt_32.c b/arch/x86/kernel/ldt_32.c
deleted file mode 100644
index bb15753abaf2..000000000000
--- a/arch/x86/kernel/ldt_32.c
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
- * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/ldt.h>
-#include <asm/desc.h>
-#include <asm/mmu_context.h>
-
-#ifdef CONFIG_SMP
-static void flush_ldt(void *null)
-{
-	if (current->active_mm)
-		load_LDT(&current->active_mm->context);
-}
-#endif
-
-static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
-{
-	void *oldldt, *newldt;
-	int oldsize;
-
-	if (mincount <= pc->size)
-		return 0;
-	oldsize = pc->size;
-	mincount = (mincount + 511) & (~511);
-	if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
-		newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
-	else
-		newldt = kmalloc(mincount * LDT_ENTRY_SIZE, GFP_KERNEL);
-
-	if (!newldt)
-		return -ENOMEM;
-
-	if (oldsize)
-		memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE);
-	oldldt = pc->ldt;
-	memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
-	       (mincount - oldsize) * LDT_ENTRY_SIZE);
-	pc->ldt = newldt;
-	wmb();
-	pc->size = mincount;
-	wmb();
-
-	if (reload) {
-#ifdef CONFIG_SMP
-		cpumask_t mask;
-
-		preempt_disable();
-		load_LDT(pc);
-		mask = cpumask_of_cpu(smp_processor_id());
-		if (!cpus_equal(current->mm->cpu_vm_mask, mask))
-			smp_call_function(flush_ldt, NULL, 1, 1);
-		preempt_enable();
-#else
-		load_LDT(pc);
-#endif
-	}
-	if (oldsize) {
-		if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
-			vfree(oldldt);
-		else
-			kfree(oldldt);
-	}
-	return 0;
-}
-
-static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
-{
-	int err = alloc_ldt(new, old->size, 0);
-
-	if (err < 0)
-		return err;
-	memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE);
-	return 0;
-}
-
-/*
- * we do not have to muck with descriptors here, that is
- * done in switch_mm() as needed.
- */
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
-	struct mm_struct *old_mm;
-	int retval = 0;
-
-	mutex_init(&mm->context.lock);
-	mm->context.size = 0;
-	old_mm = current->mm;
-	if (old_mm && old_mm->context.size > 0) {
-		mutex_lock(&old_mm->context.lock);
-		retval = copy_ldt(&mm->context, &old_mm->context);
-		mutex_unlock(&old_mm->context.lock);
-	}
-	return retval;
-}
-
-/*
- * No need to lock the MM as we are the last user
- */
-void destroy_context(struct mm_struct *mm)
-{
-	if (mm->context.size) {
-		if (mm == current->active_mm)
-			clear_LDT();
-		if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
-			vfree(mm->context.ldt);
-		else
-			kfree(mm->context.ldt);
-		mm->context.size = 0;
-	}
-}
-
-static int read_ldt(void __user *ptr, unsigned long bytecount)
-{
-	int err;
-	unsigned long size;
-	struct mm_struct *mm = current->mm;
-
-	if (!mm->context.size)
-		return 0;
-	if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
-		bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
-
-	mutex_lock(&mm->context.lock);
-	size = mm->context.size * LDT_ENTRY_SIZE;
-	if (size > bytecount)
-		size = bytecount;
-
-	err = 0;
-	if (copy_to_user(ptr, mm->context.ldt, size))
-		err = -EFAULT;
-	mutex_unlock(&mm->context.lock);
-	if (err < 0)
-		goto error_return;
-	if (size != bytecount) {
-		/* zero-fill the rest */
-		if (clear_user(ptr + size, bytecount - size) != 0) {
-			err = -EFAULT;
-			goto error_return;
-		}
-	}
-	return bytecount;
-error_return:
-	return err;
-}
-
-static int read_default_ldt(void __user *ptr, unsigned long bytecount)
-{
-	int err;
-	unsigned long size;
-
-	err = 0;
-	size = 5 * sizeof(struct desc_struct);
-	if (size > bytecount)
-		size = bytecount;
-
-	err = size;
-	if (clear_user(ptr, size))
-		err = -EFAULT;
-
-	return err;
-}
-
-static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
-{
-	struct mm_struct *mm = current->mm;
-	__u32 entry_1, entry_2;
-	int error;
-	struct user_desc ldt_info;
-
-	error = -EINVAL;
-	if (bytecount != sizeof(ldt_info))
-		goto out;
-	error = -EFAULT;
-	if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
-		goto out;
-
-	error = -EINVAL;
-	if (ldt_info.entry_number >= LDT_ENTRIES)
-		goto out;
-	if (ldt_info.contents == 3) {
-		if (oldmode)
-			goto out;
-		if (ldt_info.seg_not_present == 0)
-			goto out;
-	}
-
-	mutex_lock(&mm->context.lock);
-	if (ldt_info.entry_number >= mm->context.size) {
-		error = alloc_ldt(&current->mm->context,
-				  ldt_info.entry_number + 1, 1);
-		if (error < 0)
-			goto out_unlock;
-	}
-
-	/* Allow LDTs to be cleared by the user. */
-	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
-		if (oldmode || LDT_empty(&ldt_info)) {
-			entry_1 = 0;
-			entry_2 = 0;
-			goto install;
-		}
-	}
-
-	entry_1 = LDT_entry_a(&ldt_info);
-	entry_2 = LDT_entry_b(&ldt_info);
-	if (oldmode)
-		entry_2 &= ~(1 << 20);
-
-	/* Install the new entry ...  */
-install:
-	write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1,
-			entry_2);
-	error = 0;
-
-out_unlock:
-	mutex_unlock(&mm->context.lock);
-out:
-	return error;
-}
-
-asmlinkage int sys_modify_ldt(int func, void __user *ptr,
-			      unsigned long bytecount)
-{
-	int ret = -ENOSYS;
-
-	switch (func) {
-	case 0:
-		ret = read_ldt(ptr, bytecount);
-		break;
-	case 1:
-		ret = write_ldt(ptr, bytecount, 1);
-		break;
-	case 2:
-		ret = read_default_ldt(ptr, bytecount);
-		break;
-	case 0x11:
-		ret = write_ldt(ptr, bytecount, 0);
-		break;
-	}
-	return ret;
-}

From 2d539553c96771bc8f77156f27500d35e1fe114c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:14 +0100
Subject: [PATCH 052/890] x86: unify include/asm-x86/apicdef_32/64.h

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/apicdef.h    | 403 ++++++++++++++++++++++++++++++++++-
 include/asm-x86/apicdef_32.h | 375 --------------------------------
 include/asm-x86/apicdef_64.h | 392 ----------------------------------
 3 files changed, 399 insertions(+), 771 deletions(-)
 delete mode 100644 include/asm-x86/apicdef_32.h
 delete mode 100644 include/asm-x86/apicdef_64.h

diff --git a/include/asm-x86/apicdef.h b/include/asm-x86/apicdef.h
index 4542c220bf4d..83ac1e6e3625 100644
--- a/include/asm-x86/apicdef.h
+++ b/include/asm-x86/apicdef.h
@@ -1,5 +1,400 @@
-#ifdef CONFIG_X86_32
-# include "apicdef_32.h"
-#else
-# include "apicdef_64.h"
+#ifndef _ASM_X86_APICDEF_H
+#define _ASM_X86_APICDEF_H
+
+/*
+ * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
+ *
+ * Alan Cox <Alan.Cox@linux.org>, 1995.
+ * Ingo Molnar <mingo@redhat.com>, 1999, 2000
+ */
+
+#define	APIC_DEFAULT_PHYS_BASE	0xfee00000
+
+#define	APIC_ID		0x20
+
+#ifdef CONFIG_X86_64
+# define	APIC_ID_MASK		(0xFFu<<24)
+# define	GET_APIC_ID(x)		(((x)>>24)&0xFFu)
+# define	SET_APIC_ID(x)		(((x)<<24))
+#endif
+
+#define	APIC_LVR	0x30
+#define		APIC_LVR_MASK		0xFF00FF
+#define		GET_APIC_VERSION(x)	((x)&0xFFu)
+#define		GET_APIC_MAXLVT(x)	(((x)>>16)&0xFFu)
+#define		APIC_INTEGRATED(x)	((x)&0xF0u)
+#define		APIC_XAPIC(x)		((x) >= 0x14)
+#define	APIC_TASKPRI	0x80
+#define		APIC_TPRI_MASK		0xFFu
+#define	APIC_ARBPRI	0x90
+#define		APIC_ARBPRI_MASK	0xFFu
+#define	APIC_PROCPRI	0xA0
+#define	APIC_EOI	0xB0
+#define		APIC_EIO_ACK		0x0
+#define	APIC_RRR	0xC0
+#define	APIC_LDR	0xD0
+#define		APIC_LDR_MASK		(0xFFu<<24)
+#define		GET_APIC_LOGICAL_ID(x)	(((x)>>24)&0xFFu)
+#define		SET_APIC_LOGICAL_ID(x)	(((x)<<24))
+#define		APIC_ALL_CPUS		0xFFu
+#define	APIC_DFR	0xE0
+#define		APIC_DFR_CLUSTER		0x0FFFFFFFul
+#define		APIC_DFR_FLAT			0xFFFFFFFFul
+#define	APIC_SPIV	0xF0
+#define		APIC_SPIV_FOCUS_DISABLED	(1<<9)
+#define		APIC_SPIV_APIC_ENABLED		(1<<8)
+#define	APIC_ISR	0x100
+#define	APIC_ISR_NR     0x8     /* Number of 32 bit ISR registers. */
+#define	APIC_TMR	0x180
+#define	APIC_IRR	0x200
+#define	APIC_ESR	0x280
+#define		APIC_ESR_SEND_CS	0x00001
+#define		APIC_ESR_RECV_CS	0x00002
+#define		APIC_ESR_SEND_ACC	0x00004
+#define		APIC_ESR_RECV_ACC	0x00008
+#define		APIC_ESR_SENDILL	0x00020
+#define		APIC_ESR_RECVILL	0x00040
+#define		APIC_ESR_ILLREGA	0x00080
+#define	APIC_ICR	0x300
+#define		APIC_DEST_SELF		0x40000
+#define		APIC_DEST_ALLINC	0x80000
+#define		APIC_DEST_ALLBUT	0xC0000
+#define		APIC_ICR_RR_MASK	0x30000
+#define		APIC_ICR_RR_INVALID	0x00000
+#define		APIC_ICR_RR_INPROG	0x10000
+#define		APIC_ICR_RR_VALID	0x20000
+#define		APIC_INT_LEVELTRIG	0x08000
+#define		APIC_INT_ASSERT		0x04000
+#define		APIC_ICR_BUSY		0x01000
+#define		APIC_DEST_LOGICAL	0x00800
+#define		APIC_DEST_PHYSICAL	0x00000
+#define		APIC_DM_FIXED		0x00000
+#define		APIC_DM_LOWEST		0x00100
+#define		APIC_DM_SMI		0x00200
+#define		APIC_DM_REMRD		0x00300
+#define		APIC_DM_NMI		0x00400
+#define		APIC_DM_INIT		0x00500
+#define		APIC_DM_STARTUP		0x00600
+#define		APIC_DM_EXTINT		0x00700
+#define		APIC_VECTOR_MASK	0x000FF
+#define	APIC_ICR2	0x310
+#define		GET_APIC_DEST_FIELD(x)	(((x)>>24)&0xFF)
+#define		SET_APIC_DEST_FIELD(x)	((x)<<24)
+#define	APIC_LVTT	0x320
+#define	APIC_LVTTHMR	0x330
+#define	APIC_LVTPC	0x340
+#define	APIC_LVT0	0x350
+#define		APIC_LVT_TIMER_BASE_MASK	(0x3<<18)
+#define		GET_APIC_TIMER_BASE(x)		(((x)>>18)&0x3)
+#define		SET_APIC_TIMER_BASE(x)		(((x)<<18))
+#define		APIC_TIMER_BASE_CLKIN		0x0
+#define		APIC_TIMER_BASE_TMBASE		0x1
+#define		APIC_TIMER_BASE_DIV		0x2
+#define		APIC_LVT_TIMER_PERIODIC		(1<<17)
+#define		APIC_LVT_MASKED			(1<<16)
+#define		APIC_LVT_LEVEL_TRIGGER		(1<<15)
+#define		APIC_LVT_REMOTE_IRR		(1<<14)
+#define		APIC_INPUT_POLARITY		(1<<13)
+#define		APIC_SEND_PENDING		(1<<12)
+#define		APIC_MODE_MASK			0x700
+#define		GET_APIC_DELIVERY_MODE(x)	(((x)>>8)&0x7)
+#define		SET_APIC_DELIVERY_MODE(x,y)	(((x)&~0x700)|((y)<<8))
+#define			APIC_MODE_FIXED		0x0
+#define			APIC_MODE_NMI		0x4
+#define			APIC_MODE_EXTINT	0x7
+#define	APIC_LVT1	0x360
+#define	APIC_LVTERR	0x370
+#define	APIC_TMICT	0x380
+#define	APIC_TMCCT	0x390
+#define	APIC_TDCR	0x3E0
+#define		APIC_TDR_DIV_TMBASE	(1<<2)
+#define		APIC_TDR_DIV_1		0xB
+#define		APIC_TDR_DIV_2		0x0
+#define		APIC_TDR_DIV_4		0x1
+#define		APIC_TDR_DIV_8		0x2
+#define		APIC_TDR_DIV_16		0x3
+#define		APIC_TDR_DIV_32		0x8
+#define		APIC_TDR_DIV_64		0x9
+#define		APIC_TDR_DIV_128	0xA
+#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
+
+#ifdef CONFIG_X86_32
+# define MAX_IO_APICS 64
+#else
+# define MAX_IO_APICS 128
+# define MAX_LOCAL_APIC 256
+#endif
+
+/*
+ * All x86-64 systems are xAPIC compatible.
+ * In the following, "apicid" is a physical APIC ID.
+ */
+#define XAPIC_DEST_CPUS_SHIFT	4
+#define XAPIC_DEST_CPUS_MASK	((1u << XAPIC_DEST_CPUS_SHIFT) - 1)
+#define XAPIC_DEST_CLUSTER_MASK	(XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT)
+#define APIC_CLUSTER(apicid)	((apicid) & XAPIC_DEST_CLUSTER_MASK)
+#define APIC_CLUSTERID(apicid)	(APIC_CLUSTER(apicid) >> XAPIC_DEST_CPUS_SHIFT)
+#define APIC_CPUID(apicid)	((apicid) & XAPIC_DEST_CPUS_MASK)
+#define NUM_APIC_CLUSTERS	((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT)
+
+/*
+ * the local APIC register structure, memory mapped. Not terribly well
+ * tested, but we might eventually use this one in the future - the
+ * problem why we cannot use it right now is the P5 APIC, it has an
+ * errata which cannot take 8-bit reads and writes, only 32-bit ones ...
+ */
+#define u32 unsigned int
+
+struct local_apic {
+
+/*000*/	struct { u32 __reserved[4]; } __reserved_01;
+
+/*010*/	struct { u32 __reserved[4]; } __reserved_02;
+
+/*020*/	struct { /* APIC ID Register */
+		u32   __reserved_1	: 24,
+			phys_apic_id	:  4,
+			__reserved_2	:  4;
+		u32 __reserved[3];
+	} id;
+
+/*030*/	const
+	struct { /* APIC Version Register */
+		u32   version		:  8,
+			__reserved_1	:  8,
+			max_lvt		:  8,
+			__reserved_2	:  8;
+		u32 __reserved[3];
+	} version;
+
+/*040*/	struct { u32 __reserved[4]; } __reserved_03;
+
+/*050*/	struct { u32 __reserved[4]; } __reserved_04;
+
+/*060*/	struct { u32 __reserved[4]; } __reserved_05;
+
+/*070*/	struct { u32 __reserved[4]; } __reserved_06;
+
+/*080*/	struct { /* Task Priority Register */
+		u32   priority	:  8,
+			__reserved_1	: 24;
+		u32 __reserved_2[3];
+	} tpr;
+
+/*090*/	const
+	struct { /* Arbitration Priority Register */
+		u32   priority	:  8,
+			__reserved_1	: 24;
+		u32 __reserved_2[3];
+	} apr;
+
+/*0A0*/	const
+	struct { /* Processor Priority Register */
+		u32   priority	:  8,
+			__reserved_1	: 24;
+		u32 __reserved_2[3];
+	} ppr;
+
+/*0B0*/	struct { /* End Of Interrupt Register */
+		u32   eoi;
+		u32 __reserved[3];
+	} eoi;
+
+/*0C0*/	struct { u32 __reserved[4]; } __reserved_07;
+
+/*0D0*/	struct { /* Logical Destination Register */
+		u32   __reserved_1	: 24,
+			logical_dest	:  8;
+		u32 __reserved_2[3];
+	} ldr;
+
+/*0E0*/	struct { /* Destination Format Register */
+		u32   __reserved_1	: 28,
+			model		:  4;
+		u32 __reserved_2[3];
+	} dfr;
+
+/*0F0*/	struct { /* Spurious Interrupt Vector Register */
+		u32	spurious_vector	:  8,
+			apic_enabled	:  1,
+			focus_cpu	:  1,
+			__reserved_2	: 22;
+		u32 __reserved_3[3];
+	} svr;
+
+/*100*/	struct { /* In Service Register */
+/*170*/		u32 bitfield;
+		u32 __reserved[3];
+	} isr [8];
+
+/*180*/	struct { /* Trigger Mode Register */
+/*1F0*/		u32 bitfield;
+		u32 __reserved[3];
+	} tmr [8];
+
+/*200*/	struct { /* Interrupt Request Register */
+/*270*/		u32 bitfield;
+		u32 __reserved[3];
+	} irr [8];
+
+/*280*/	union { /* Error Status Register */
+		struct {
+			u32   send_cs_error			:  1,
+				receive_cs_error		:  1,
+				send_accept_error		:  1,
+				receive_accept_error		:  1,
+				__reserved_1			:  1,
+				send_illegal_vector		:  1,
+				receive_illegal_vector		:  1,
+				illegal_register_address	:  1,
+				__reserved_2			: 24;
+			u32 __reserved_3[3];
+		} error_bits;
+		struct {
+			u32 errors;
+			u32 __reserved_3[3];
+		} all_errors;
+	} esr;
+
+/*290*/	struct { u32 __reserved[4]; } __reserved_08;
+
+/*2A0*/	struct { u32 __reserved[4]; } __reserved_09;
+
+/*2B0*/	struct { u32 __reserved[4]; } __reserved_10;
+
+/*2C0*/	struct { u32 __reserved[4]; } __reserved_11;
+
+/*2D0*/	struct { u32 __reserved[4]; } __reserved_12;
+
+/*2E0*/	struct { u32 __reserved[4]; } __reserved_13;
+
+/*2F0*/	struct { u32 __reserved[4]; } __reserved_14;
+
+/*300*/	struct { /* Interrupt Command Register 1 */
+		u32   vector			:  8,
+			delivery_mode		:  3,
+			destination_mode	:  1,
+			delivery_status		:  1,
+			__reserved_1		:  1,
+			level			:  1,
+			trigger			:  1,
+			__reserved_2		:  2,
+			shorthand		:  2,
+			__reserved_3		:  12;
+		u32 __reserved_4[3];
+	} icr1;
+
+/*310*/	struct { /* Interrupt Command Register 2 */
+		union {
+			u32   __reserved_1	: 24,
+				phys_dest	:  4,
+				__reserved_2	:  4;
+			u32   __reserved_3	: 24,
+				logical_dest	:  8;
+		} dest;
+		u32 __reserved_4[3];
+	} icr2;
+
+/*320*/	struct { /* LVT - Timer */
+		u32   vector		:  8,
+			__reserved_1	:  4,
+			delivery_status	:  1,
+			__reserved_2	:  3,
+			mask		:  1,
+			timer_mode	:  1,
+			__reserved_3	: 14;
+		u32 __reserved_4[3];
+	} lvt_timer;
+
+/*330*/	struct { /* LVT - Thermal Sensor */
+		u32  vector		:  8,
+			delivery_mode	:  3,
+			__reserved_1	:  1,
+			delivery_status	:  1,
+			__reserved_2	:  3,
+			mask		:  1,
+			__reserved_3	: 15;
+		u32 __reserved_4[3];
+	} lvt_thermal;
+
+/*340*/	struct { /* LVT - Performance Counter */
+		u32   vector		:  8,
+			delivery_mode	:  3,
+			__reserved_1	:  1,
+			delivery_status	:  1,
+			__reserved_2	:  3,
+			mask		:  1,
+			__reserved_3	: 15;
+		u32 __reserved_4[3];
+	} lvt_pc;
+
+/*350*/	struct { /* LVT - LINT0 */
+		u32   vector		:  8,
+			delivery_mode	:  3,
+			__reserved_1	:  1,
+			delivery_status	:  1,
+			polarity	:  1,
+			remote_irr	:  1,
+			trigger		:  1,
+			mask		:  1,
+			__reserved_2	: 15;
+		u32 __reserved_3[3];
+	} lvt_lint0;
+
+/*360*/	struct { /* LVT - LINT1 */
+		u32   vector		:  8,
+			delivery_mode	:  3,
+			__reserved_1	:  1,
+			delivery_status	:  1,
+			polarity	:  1,
+			remote_irr	:  1,
+			trigger		:  1,
+			mask		:  1,
+			__reserved_2	: 15;
+		u32 __reserved_3[3];
+	} lvt_lint1;
+
+/*370*/	struct { /* LVT - Error */
+		u32   vector		:  8,
+			__reserved_1	:  4,
+			delivery_status	:  1,
+			__reserved_2	:  3,
+			mask		:  1,
+			__reserved_3	: 15;
+		u32 __reserved_4[3];
+	} lvt_error;
+
+/*380*/	struct { /* Timer Initial Count Register */
+		u32   initial_count;
+		u32 __reserved_2[3];
+	} timer_icr;
+
+/*390*/	const
+	struct { /* Timer Current Count Register */
+		u32   curr_count;
+		u32 __reserved_2[3];
+	} timer_ccr;
+
+/*3A0*/	struct { u32 __reserved[4]; } __reserved_16;
+
+/*3B0*/	struct { u32 __reserved[4]; } __reserved_17;
+
+/*3C0*/	struct { u32 __reserved[4]; } __reserved_18;
+
+/*3D0*/	struct { u32 __reserved[4]; } __reserved_19;
+
+/*3E0*/	struct { /* Timer Divide Configuration Register */
+		u32   divisor		:  4,
+			__reserved_1	: 28;
+		u32 __reserved_2[3];
+	} timer_dcr;
+
+/*3F0*/	struct { u32 __reserved[4]; } __reserved_20;
+
+} __attribute__ ((packed));
+
+#undef u32
+
+#define BAD_APICID 0xFFu
+
 #endif
diff --git a/include/asm-x86/apicdef_32.h b/include/asm-x86/apicdef_32.h
deleted file mode 100644
index 9f6995341fdc..000000000000
--- a/include/asm-x86/apicdef_32.h
+++ /dev/null
@@ -1,375 +0,0 @@
-#ifndef __ASM_APICDEF_H
-#define __ASM_APICDEF_H
-
-/*
- * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
- *
- * Alan Cox <Alan.Cox@linux.org>, 1995.
- * Ingo Molnar <mingo@redhat.com>, 1999, 2000
- */
-
-#define		APIC_DEFAULT_PHYS_BASE	0xfee00000
- 
-#define		APIC_ID		0x20
-#define		APIC_LVR	0x30
-#define			APIC_LVR_MASK		0xFF00FF
-#define			GET_APIC_VERSION(x)	((x)&0xFF)
-#define			GET_APIC_MAXLVT(x)	(((x)>>16)&0xFF)
-#define			APIC_INTEGRATED(x)	((x)&0xF0)
-#define			APIC_XAPIC(x)		((x) >= 0x14)
-#define		APIC_TASKPRI	0x80
-#define			APIC_TPRI_MASK		0xFF
-#define		APIC_ARBPRI	0x90
-#define			APIC_ARBPRI_MASK	0xFF
-#define		APIC_PROCPRI	0xA0
-#define		APIC_EOI	0xB0
-#define			APIC_EIO_ACK		0x0		/* Write this to the EOI register */
-#define		APIC_RRR	0xC0
-#define		APIC_LDR	0xD0
-#define			APIC_LDR_MASK		(0xFF<<24)
-#define			GET_APIC_LOGICAL_ID(x)	(((x)>>24)&0xFF)
-#define			SET_APIC_LOGICAL_ID(x)	(((x)<<24))
-#define			APIC_ALL_CPUS		0xFF
-#define		APIC_DFR	0xE0
-#define			APIC_DFR_CLUSTER		0x0FFFFFFFul
-#define			APIC_DFR_FLAT			0xFFFFFFFFul
-#define		APIC_SPIV	0xF0
-#define			APIC_SPIV_FOCUS_DISABLED	(1<<9)
-#define			APIC_SPIV_APIC_ENABLED		(1<<8)
-#define		APIC_ISR	0x100
-#define         APIC_ISR_NR     0x8     /* Number of 32 bit ISR registers. */
-#define		APIC_TMR	0x180
-#define 	APIC_IRR	0x200
-#define 	APIC_ESR	0x280
-#define			APIC_ESR_SEND_CS	0x00001
-#define			APIC_ESR_RECV_CS	0x00002
-#define			APIC_ESR_SEND_ACC	0x00004
-#define			APIC_ESR_RECV_ACC	0x00008
-#define			APIC_ESR_SENDILL	0x00020
-#define			APIC_ESR_RECVILL	0x00040
-#define			APIC_ESR_ILLREGA	0x00080
-#define		APIC_ICR	0x300
-#define			APIC_DEST_SELF		0x40000
-#define			APIC_DEST_ALLINC	0x80000
-#define			APIC_DEST_ALLBUT	0xC0000
-#define			APIC_ICR_RR_MASK	0x30000
-#define			APIC_ICR_RR_INVALID	0x00000
-#define			APIC_ICR_RR_INPROG	0x10000
-#define			APIC_ICR_RR_VALID	0x20000
-#define			APIC_INT_LEVELTRIG	0x08000
-#define			APIC_INT_ASSERT		0x04000
-#define			APIC_ICR_BUSY		0x01000
-#define			APIC_DEST_LOGICAL	0x00800
-#define			APIC_DM_FIXED		0x00000
-#define			APIC_DM_LOWEST		0x00100
-#define			APIC_DM_SMI		0x00200
-#define			APIC_DM_REMRD		0x00300
-#define			APIC_DM_NMI		0x00400
-#define			APIC_DM_INIT		0x00500
-#define			APIC_DM_STARTUP		0x00600
-#define			APIC_DM_EXTINT		0x00700
-#define			APIC_VECTOR_MASK	0x000FF
-#define		APIC_ICR2	0x310
-#define			GET_APIC_DEST_FIELD(x)	(((x)>>24)&0xFF)
-#define			SET_APIC_DEST_FIELD(x)	((x)<<24)
-#define		APIC_LVTT	0x320
-#define		APIC_LVTTHMR	0x330
-#define		APIC_LVTPC	0x340
-#define		APIC_LVT0	0x350
-#define			APIC_LVT_TIMER_BASE_MASK	(0x3<<18)
-#define			GET_APIC_TIMER_BASE(x)		(((x)>>18)&0x3)
-#define			SET_APIC_TIMER_BASE(x)		(((x)<<18))
-#define			APIC_TIMER_BASE_CLKIN		0x0
-#define			APIC_TIMER_BASE_TMBASE		0x1
-#define			APIC_TIMER_BASE_DIV		0x2
-#define			APIC_LVT_TIMER_PERIODIC		(1<<17)
-#define			APIC_LVT_MASKED			(1<<16)
-#define			APIC_LVT_LEVEL_TRIGGER		(1<<15)
-#define			APIC_LVT_REMOTE_IRR		(1<<14)
-#define			APIC_INPUT_POLARITY		(1<<13)
-#define			APIC_SEND_PENDING		(1<<12)
-#define			APIC_MODE_MASK			0x700
-#define			GET_APIC_DELIVERY_MODE(x)	(((x)>>8)&0x7)
-#define			SET_APIC_DELIVERY_MODE(x,y)	(((x)&~0x700)|((y)<<8))
-#define				APIC_MODE_FIXED		0x0
-#define				APIC_MODE_NMI		0x4
-#define				APIC_MODE_EXTINT	0x7
-#define 	APIC_LVT1	0x360
-#define		APIC_LVTERR	0x370
-#define		APIC_TMICT	0x380
-#define		APIC_TMCCT	0x390
-#define		APIC_TDCR	0x3E0
-#define			APIC_TDR_DIV_TMBASE	(1<<2)
-#define			APIC_TDR_DIV_1		0xB
-#define			APIC_TDR_DIV_2		0x0
-#define			APIC_TDR_DIV_4		0x1
-#define			APIC_TDR_DIV_8		0x2
-#define			APIC_TDR_DIV_16		0x3
-#define			APIC_TDR_DIV_32		0x8
-#define			APIC_TDR_DIV_64		0x9
-#define			APIC_TDR_DIV_128	0xA
-
-#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
-
-#define MAX_IO_APICS 64
-
-/*
- * the local APIC register structure, memory mapped. Not terribly well
- * tested, but we might eventually use this one in the future - the
- * problem why we cannot use it right now is the P5 APIC, it has an
- * errata which cannot take 8-bit reads and writes, only 32-bit ones ...
- */
-#define u32 unsigned int
-
-
-struct local_apic {
-
-/*000*/	struct { u32 __reserved[4]; } __reserved_01;
-
-/*010*/	struct { u32 __reserved[4]; } __reserved_02;
-
-/*020*/	struct { /* APIC ID Register */
-		u32   __reserved_1	: 24,
-			phys_apic_id	:  4,
-			__reserved_2	:  4;
-		u32 __reserved[3];
-	} id;
-
-/*030*/	const
-	struct { /* APIC Version Register */
-		u32   version		:  8,
-			__reserved_1	:  8,
-			max_lvt		:  8,
-			__reserved_2	:  8;
-		u32 __reserved[3];
-	} version;
-
-/*040*/	struct { u32 __reserved[4]; } __reserved_03;
-
-/*050*/	struct { u32 __reserved[4]; } __reserved_04;
-
-/*060*/	struct { u32 __reserved[4]; } __reserved_05;
-
-/*070*/	struct { u32 __reserved[4]; } __reserved_06;
-
-/*080*/	struct { /* Task Priority Register */
-		u32   priority	:  8,
-			__reserved_1	: 24;
-		u32 __reserved_2[3];
-	} tpr;
-
-/*090*/	const
-	struct { /* Arbitration Priority Register */
-		u32   priority	:  8,
-			__reserved_1	: 24;
-		u32 __reserved_2[3];
-	} apr;
-
-/*0A0*/	const
-	struct { /* Processor Priority Register */
-		u32   priority	:  8,
-			__reserved_1	: 24;
-		u32 __reserved_2[3];
-	} ppr;
-
-/*0B0*/	struct { /* End Of Interrupt Register */
-		u32   eoi;
-		u32 __reserved[3];
-	} eoi;
-
-/*0C0*/	struct { u32 __reserved[4]; } __reserved_07;
-
-/*0D0*/	struct { /* Logical Destination Register */
-		u32   __reserved_1	: 24,
-			logical_dest	:  8;
-		u32 __reserved_2[3];
-	} ldr;
-
-/*0E0*/	struct { /* Destination Format Register */
-		u32   __reserved_1	: 28,
-			model		:  4;
-		u32 __reserved_2[3];
-	} dfr;
-
-/*0F0*/	struct { /* Spurious Interrupt Vector Register */
-		u32	spurious_vector	:  8,
-			apic_enabled	:  1,
-			focus_cpu	:  1,
-			__reserved_2	: 22;
-		u32 __reserved_3[3];
-	} svr;
-
-/*100*/	struct { /* In Service Register */
-/*170*/		u32 bitfield;
-		u32 __reserved[3];
-	} isr [8];
-
-/*180*/	struct { /* Trigger Mode Register */
-/*1F0*/		u32 bitfield;
-		u32 __reserved[3];
-	} tmr [8];
-
-/*200*/	struct { /* Interrupt Request Register */
-/*270*/		u32 bitfield;
-		u32 __reserved[3];
-	} irr [8];
-
-/*280*/	union { /* Error Status Register */
-		struct {
-			u32   send_cs_error			:  1,
-				receive_cs_error		:  1,
-				send_accept_error		:  1,
-				receive_accept_error		:  1,
-				__reserved_1			:  1,
-				send_illegal_vector		:  1,
-				receive_illegal_vector		:  1,
-				illegal_register_address	:  1,
-				__reserved_2			: 24;
-			u32 __reserved_3[3];
-		} error_bits;
-		struct {
-			u32 errors;
-			u32 __reserved_3[3];
-		} all_errors;
-	} esr;
-
-/*290*/	struct { u32 __reserved[4]; } __reserved_08;
-
-/*2A0*/	struct { u32 __reserved[4]; } __reserved_09;
-
-/*2B0*/	struct { u32 __reserved[4]; } __reserved_10;
-
-/*2C0*/	struct { u32 __reserved[4]; } __reserved_11;
-
-/*2D0*/	struct { u32 __reserved[4]; } __reserved_12;
-
-/*2E0*/	struct { u32 __reserved[4]; } __reserved_13;
-
-/*2F0*/	struct { u32 __reserved[4]; } __reserved_14;
-
-/*300*/	struct { /* Interrupt Command Register 1 */
-		u32   vector			:  8,
-			delivery_mode		:  3,
-			destination_mode	:  1,
-			delivery_status		:  1,
-			__reserved_1		:  1,
-			level			:  1,
-			trigger			:  1,
-			__reserved_2		:  2,
-			shorthand		:  2,
-			__reserved_3		:  12;
-		u32 __reserved_4[3];
-	} icr1;
-
-/*310*/	struct { /* Interrupt Command Register 2 */
-		union {
-			u32   __reserved_1	: 24,
-				phys_dest	:  4,
-				__reserved_2	:  4;
-			u32   __reserved_3	: 24,
-				logical_dest	:  8;
-		} dest;
-		u32 __reserved_4[3];
-	} icr2;
-
-/*320*/	struct { /* LVT - Timer */
-		u32   vector		:  8,
-			__reserved_1	:  4,
-			delivery_status	:  1,
-			__reserved_2	:  3,
-			mask		:  1,
-			timer_mode	:  1,
-			__reserved_3	: 14;
-		u32 __reserved_4[3];
-	} lvt_timer;
-
-/*330*/	struct { /* LVT - Thermal Sensor */
-		u32  vector		:  8,
-			delivery_mode	:  3,
-			__reserved_1	:  1,
-			delivery_status	:  1,
-			__reserved_2	:  3,
-			mask		:  1,
-			__reserved_3	: 15;
-		u32 __reserved_4[3];
-	} lvt_thermal;
-
-/*340*/	struct { /* LVT - Performance Counter */
-		u32   vector		:  8,
-			delivery_mode	:  3,
-			__reserved_1	:  1,
-			delivery_status	:  1,
-			__reserved_2	:  3,
-			mask		:  1,
-			__reserved_3	: 15;
-		u32 __reserved_4[3];
-	} lvt_pc;
-
-/*350*/	struct { /* LVT - LINT0 */
-		u32   vector		:  8,
-			delivery_mode	:  3,
-			__reserved_1	:  1,
-			delivery_status	:  1,
-			polarity	:  1,
-			remote_irr	:  1,
-			trigger		:  1,
-			mask		:  1,
-			__reserved_2	: 15;
-		u32 __reserved_3[3];
-	} lvt_lint0;
-
-/*360*/	struct { /* LVT - LINT1 */
-		u32   vector		:  8,
-			delivery_mode	:  3,
-			__reserved_1	:  1,
-			delivery_status	:  1,
-			polarity	:  1,
-			remote_irr	:  1,
-			trigger		:  1,
-			mask		:  1,
-			__reserved_2	: 15;
-		u32 __reserved_3[3];
-	} lvt_lint1;
-
-/*370*/	struct { /* LVT - Error */
-		u32   vector		:  8,
-			__reserved_1	:  4,
-			delivery_status	:  1,
-			__reserved_2	:  3,
-			mask		:  1,
-			__reserved_3	: 15;
-		u32 __reserved_4[3];
-	} lvt_error;
-
-/*380*/	struct { /* Timer Initial Count Register */
-		u32   initial_count;
-		u32 __reserved_2[3];
-	} timer_icr;
-
-/*390*/	const
-	struct { /* Timer Current Count Register */
-		u32   curr_count;
-		u32 __reserved_2[3];
-	} timer_ccr;
-
-/*3A0*/	struct { u32 __reserved[4]; } __reserved_16;
-
-/*3B0*/	struct { u32 __reserved[4]; } __reserved_17;
-
-/*3C0*/	struct { u32 __reserved[4]; } __reserved_18;
-
-/*3D0*/	struct { u32 __reserved[4]; } __reserved_19;
-
-/*3E0*/	struct { /* Timer Divide Configuration Register */
-		u32   divisor		:  4,
-			__reserved_1	: 28;
-		u32 __reserved_2[3];
-	} timer_dcr;
-
-/*3F0*/	struct { u32 __reserved[4]; } __reserved_20;
-
-} __attribute__ ((packed));
-
-#undef u32
-
-#endif
diff --git a/include/asm-x86/apicdef_64.h b/include/asm-x86/apicdef_64.h
deleted file mode 100644
index 1dd40067c67c..000000000000
--- a/include/asm-x86/apicdef_64.h
+++ /dev/null
@@ -1,392 +0,0 @@
-#ifndef __ASM_APICDEF_H
-#define __ASM_APICDEF_H
-
-/*
- * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
- *
- * Alan Cox <Alan.Cox@linux.org>, 1995.
- * Ingo Molnar <mingo@redhat.com>, 1999, 2000
- */
-
-#define		APIC_DEFAULT_PHYS_BASE	0xfee00000
- 
-#define		APIC_ID		0x20
-#define			APIC_ID_MASK		(0xFFu<<24)
-#define			GET_APIC_ID(x)		(((x)>>24)&0xFFu)
-#define			SET_APIC_ID(x)		(((x)<<24))
-#define		APIC_LVR	0x30
-#define			APIC_LVR_MASK		0xFF00FF
-#define			GET_APIC_VERSION(x)	((x)&0xFFu)
-#define			GET_APIC_MAXLVT(x)	(((x)>>16)&0xFFu)
-#define			APIC_INTEGRATED(x)	((x)&0xF0u)
-#define		APIC_TASKPRI	0x80
-#define			APIC_TPRI_MASK		0xFFu
-#define		APIC_ARBPRI	0x90
-#define			APIC_ARBPRI_MASK	0xFFu
-#define		APIC_PROCPRI	0xA0
-#define		APIC_EOI	0xB0
-#define			APIC_EIO_ACK		0x0		/* Write this to the EOI register */
-#define		APIC_RRR	0xC0
-#define		APIC_LDR	0xD0
-#define			APIC_LDR_MASK		(0xFFu<<24)
-#define			GET_APIC_LOGICAL_ID(x)	(((x)>>24)&0xFFu)
-#define			SET_APIC_LOGICAL_ID(x)	(((x)<<24))
-#define			APIC_ALL_CPUS		0xFFu
-#define		APIC_DFR	0xE0
-#define			APIC_DFR_CLUSTER		0x0FFFFFFFul
-#define			APIC_DFR_FLAT			0xFFFFFFFFul
-#define		APIC_SPIV	0xF0
-#define			APIC_SPIV_FOCUS_DISABLED	(1<<9)
-#define			APIC_SPIV_APIC_ENABLED		(1<<8)
-#define		APIC_ISR	0x100
-#define		APIC_ISR_NR	0x8	/* Number of 32 bit ISR registers. */
-#define		APIC_TMR	0x180
-#define 	APIC_IRR	0x200
-#define 	APIC_ESR	0x280
-#define			APIC_ESR_SEND_CS	0x00001
-#define			APIC_ESR_RECV_CS	0x00002
-#define			APIC_ESR_SEND_ACC	0x00004
-#define			APIC_ESR_RECV_ACC	0x00008
-#define			APIC_ESR_SENDILL	0x00020
-#define			APIC_ESR_RECVILL	0x00040
-#define			APIC_ESR_ILLREGA	0x00080
-#define		APIC_ICR	0x300
-#define			APIC_DEST_SELF		0x40000
-#define			APIC_DEST_ALLINC	0x80000
-#define			APIC_DEST_ALLBUT	0xC0000
-#define			APIC_ICR_RR_MASK	0x30000
-#define			APIC_ICR_RR_INVALID	0x00000
-#define			APIC_ICR_RR_INPROG	0x10000
-#define			APIC_ICR_RR_VALID	0x20000
-#define			APIC_INT_LEVELTRIG	0x08000
-#define			APIC_INT_ASSERT		0x04000
-#define			APIC_ICR_BUSY		0x01000
-#define			APIC_DEST_LOGICAL	0x00800
-#define			APIC_DEST_PHYSICAL	0x00000
-#define			APIC_DM_FIXED		0x00000
-#define			APIC_DM_LOWEST		0x00100
-#define			APIC_DM_SMI		0x00200
-#define			APIC_DM_REMRD		0x00300
-#define			APIC_DM_NMI		0x00400
-#define			APIC_DM_INIT		0x00500
-#define			APIC_DM_STARTUP		0x00600
-#define			APIC_DM_EXTINT		0x00700
-#define			APIC_VECTOR_MASK	0x000FF
-#define		APIC_ICR2	0x310
-#define			GET_APIC_DEST_FIELD(x)	(((x)>>24)&0xFF)
-#define			SET_APIC_DEST_FIELD(x)	((x)<<24)
-#define		APIC_LVTT	0x320
-#define		APIC_LVTTHMR	0x330
-#define		APIC_LVTPC	0x340
-#define		APIC_LVT0	0x350
-#define			APIC_LVT_TIMER_BASE_MASK	(0x3<<18)
-#define			GET_APIC_TIMER_BASE(x)		(((x)>>18)&0x3)
-#define			SET_APIC_TIMER_BASE(x)		(((x)<<18))
-#define			APIC_TIMER_BASE_CLKIN		0x0
-#define			APIC_TIMER_BASE_TMBASE		0x1
-#define			APIC_TIMER_BASE_DIV		0x2
-#define			APIC_LVT_TIMER_PERIODIC		(1<<17)
-#define			APIC_LVT_MASKED			(1<<16)
-#define			APIC_LVT_LEVEL_TRIGGER		(1<<15)
-#define			APIC_LVT_REMOTE_IRR		(1<<14)
-#define			APIC_INPUT_POLARITY		(1<<13)
-#define			APIC_SEND_PENDING		(1<<12)
-#define			APIC_MODE_MASK			0x700
-#define			GET_APIC_DELIVERY_MODE(x)	(((x)>>8)&0x7)
-#define			SET_APIC_DELIVERY_MODE(x,y)	(((x)&~0x700)|((y)<<8))
-#define				APIC_MODE_FIXED		0x0
-#define				APIC_MODE_NMI		0x4
-#define				APIC_MODE_EXTINT	0x7
-#define 	APIC_LVT1	0x360
-#define		APIC_LVTERR	0x370
-#define		APIC_TMICT	0x380
-#define		APIC_TMCCT	0x390
-#define		APIC_TDCR	0x3E0
-#define			APIC_TDR_DIV_TMBASE	(1<<2)
-#define			APIC_TDR_DIV_1		0xB
-#define			APIC_TDR_DIV_2		0x0
-#define			APIC_TDR_DIV_4		0x1
-#define			APIC_TDR_DIV_8		0x2
-#define			APIC_TDR_DIV_16		0x3
-#define			APIC_TDR_DIV_32		0x8
-#define			APIC_TDR_DIV_64		0x9
-#define			APIC_TDR_DIV_128	0xA
-
-#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
-
-#define MAX_IO_APICS 128
-#define MAX_LOCAL_APIC 256
-
-/*
- * All x86-64 systems are xAPIC compatible.
- * In the following, "apicid" is a physical APIC ID.
- */
-#define XAPIC_DEST_CPUS_SHIFT	4
-#define XAPIC_DEST_CPUS_MASK	((1u << XAPIC_DEST_CPUS_SHIFT) - 1)
-#define XAPIC_DEST_CLUSTER_MASK	(XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT)
-#define APIC_CLUSTER(apicid)	((apicid) & XAPIC_DEST_CLUSTER_MASK)
-#define APIC_CLUSTERID(apicid)	(APIC_CLUSTER(apicid) >> XAPIC_DEST_CPUS_SHIFT)
-#define APIC_CPUID(apicid)	((apicid) & XAPIC_DEST_CPUS_MASK)
-#define NUM_APIC_CLUSTERS	((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT)
-
-/*
- * the local APIC register structure, memory mapped. Not terribly well
- * tested, but we might eventually use this one in the future - the
- * problem why we cannot use it right now is the P5 APIC, it has an
- * errata which cannot take 8-bit reads and writes, only 32-bit ones ...
- */
-#define u32 unsigned int
-
-struct local_apic {
-
-/*000*/	struct { u32 __reserved[4]; } __reserved_01;
-
-/*010*/	struct { u32 __reserved[4]; } __reserved_02;
-
-/*020*/	struct { /* APIC ID Register */
-		u32   __reserved_1	: 24,
-			phys_apic_id	:  4,
-			__reserved_2	:  4;
-		u32 __reserved[3];
-	} id;
-
-/*030*/	const
-	struct { /* APIC Version Register */
-		u32   version		:  8,
-			__reserved_1	:  8,
-			max_lvt		:  8,
-			__reserved_2	:  8;
-		u32 __reserved[3];
-	} version;
-
-/*040*/	struct { u32 __reserved[4]; } __reserved_03;
-
-/*050*/	struct { u32 __reserved[4]; } __reserved_04;
-
-/*060*/	struct { u32 __reserved[4]; } __reserved_05;
-
-/*070*/	struct { u32 __reserved[4]; } __reserved_06;
-
-/*080*/	struct { /* Task Priority Register */
-		u32   priority	:  8,
-			__reserved_1	: 24;
-		u32 __reserved_2[3];
-	} tpr;
-
-/*090*/	const
-	struct { /* Arbitration Priority Register */
-		u32   priority	:  8,
-			__reserved_1	: 24;
-		u32 __reserved_2[3];
-	} apr;
-
-/*0A0*/	const
-	struct { /* Processor Priority Register */
-		u32   priority	:  8,
-			__reserved_1	: 24;
-		u32 __reserved_2[3];
-	} ppr;
-
-/*0B0*/	struct { /* End Of Interrupt Register */
-		u32   eoi;
-		u32 __reserved[3];
-	} eoi;
-
-/*0C0*/	struct { u32 __reserved[4]; } __reserved_07;
-
-/*0D0*/	struct { /* Logical Destination Register */
-		u32   __reserved_1	: 24,
-			logical_dest	:  8;
-		u32 __reserved_2[3];
-	} ldr;
-
-/*0E0*/	struct { /* Destination Format Register */
-		u32   __reserved_1	: 28,
-			model		:  4;
-		u32 __reserved_2[3];
-	} dfr;
-
-/*0F0*/	struct { /* Spurious Interrupt Vector Register */
-		u32	spurious_vector	:  8,
-			apic_enabled	:  1,
-			focus_cpu	:  1,
-			__reserved_2	: 22;
-		u32 __reserved_3[3];
-	} svr;
-
-/*100*/	struct { /* In Service Register */
-/*170*/		u32 bitfield;
-		u32 __reserved[3];
-	} isr [8];
-
-/*180*/	struct { /* Trigger Mode Register */
-/*1F0*/		u32 bitfield;
-		u32 __reserved[3];
-	} tmr [8];
-
-/*200*/	struct { /* Interrupt Request Register */
-/*270*/		u32 bitfield;
-		u32 __reserved[3];
-	} irr [8];
-
-/*280*/	union { /* Error Status Register */
-		struct {
-			u32   send_cs_error			:  1,
-				receive_cs_error		:  1,
-				send_accept_error		:  1,
-				receive_accept_error		:  1,
-				__reserved_1			:  1,
-				send_illegal_vector		:  1,
-				receive_illegal_vector		:  1,
-				illegal_register_address	:  1,
-				__reserved_2			: 24;
-			u32 __reserved_3[3];
-		} error_bits;
-		struct {
-			u32 errors;
-			u32 __reserved_3[3];
-		} all_errors;
-	} esr;
-
-/*290*/	struct { u32 __reserved[4]; } __reserved_08;
-
-/*2A0*/	struct { u32 __reserved[4]; } __reserved_09;
-
-/*2B0*/	struct { u32 __reserved[4]; } __reserved_10;
-
-/*2C0*/	struct { u32 __reserved[4]; } __reserved_11;
-
-/*2D0*/	struct { u32 __reserved[4]; } __reserved_12;
-
-/*2E0*/	struct { u32 __reserved[4]; } __reserved_13;
-
-/*2F0*/	struct { u32 __reserved[4]; } __reserved_14;
-
-/*300*/	struct { /* Interrupt Command Register 1 */
-		u32   vector			:  8,
-			delivery_mode		:  3,
-			destination_mode	:  1,
-			delivery_status		:  1,
-			__reserved_1		:  1,
-			level			:  1,
-			trigger			:  1,
-			__reserved_2		:  2,
-			shorthand		:  2,
-			__reserved_3		:  12;
-		u32 __reserved_4[3];
-	} icr1;
-
-/*310*/	struct { /* Interrupt Command Register 2 */
-		union {
-			u32   __reserved_1	: 24,
-				phys_dest	:  4,
-				__reserved_2	:  4;
-			u32   __reserved_3	: 24,
-				logical_dest	:  8;
-		} dest;
-		u32 __reserved_4[3];
-	} icr2;
-
-/*320*/	struct { /* LVT - Timer */
-		u32   vector		:  8,
-			__reserved_1	:  4,
-			delivery_status	:  1,
-			__reserved_2	:  3,
-			mask		:  1,
-			timer_mode	:  1,
-			__reserved_3	: 14;
-		u32 __reserved_4[3];
-	} lvt_timer;
-
-/*330*/	struct { /* LVT - Thermal Sensor */
-		u32  vector		:  8,
-			delivery_mode	:  3,
-			__reserved_1	:  1,
-			delivery_status	:  1,
-			__reserved_2	:  3,
-			mask		:  1,
-			__reserved_3	: 15;
-		u32 __reserved_4[3];
-	} lvt_thermal;
-
-/*340*/	struct { /* LVT - Performance Counter */
-		u32   vector		:  8,
-			delivery_mode	:  3,
-			__reserved_1	:  1,
-			delivery_status	:  1,
-			__reserved_2	:  3,
-			mask		:  1,
-			__reserved_3	: 15;
-		u32 __reserved_4[3];
-	} lvt_pc;
-
-/*350*/	struct { /* LVT - LINT0 */
-		u32   vector		:  8,
-			delivery_mode	:  3,
-			__reserved_1	:  1,
-			delivery_status	:  1,
-			polarity	:  1,
-			remote_irr	:  1,
-			trigger		:  1,
-			mask		:  1,
-			__reserved_2	: 15;
-		u32 __reserved_3[3];
-	} lvt_lint0;
-
-/*360*/	struct { /* LVT - LINT1 */
-		u32   vector		:  8,
-			delivery_mode	:  3,
-			__reserved_1	:  1,
-			delivery_status	:  1,
-			polarity	:  1,
-			remote_irr	:  1,
-			trigger		:  1,
-			mask		:  1,
-			__reserved_2	: 15;
-		u32 __reserved_3[3];
-	} lvt_lint1;
-
-/*370*/	struct { /* LVT - Error */
-		u32   vector		:  8,
-			__reserved_1	:  4,
-			delivery_status	:  1,
-			__reserved_2	:  3,
-			mask		:  1,
-			__reserved_3	: 15;
-		u32 __reserved_4[3];
-	} lvt_error;
-
-/*380*/	struct { /* Timer Initial Count Register */
-		u32   initial_count;
-		u32 __reserved_2[3];
-	} timer_icr;
-
-/*390*/	const
-	struct { /* Timer Current Count Register */
-		u32   curr_count;
-		u32 __reserved_2[3];
-	} timer_ccr;
-
-/*3A0*/	struct { u32 __reserved[4]; } __reserved_16;
-
-/*3B0*/	struct { u32 __reserved[4]; } __reserved_17;
-
-/*3C0*/	struct { u32 __reserved[4]; } __reserved_18;
-
-/*3D0*/	struct { u32 __reserved[4]; } __reserved_19;
-
-/*3E0*/	struct { /* Timer Divide Configuration Register */
-		u32   divisor		:  4,
-			__reserved_1	: 28;
-		u32 __reserved_2[3];
-	} timer_dcr;
-
-/*3F0*/	struct { u32 __reserved[4]; } __reserved_20;
-
-} __attribute__ ((packed));
-
-#undef u32
-
-#define BAD_APICID 0xFFu
-
-#endif

From cff90dbfe41f2c8eabe2d0a7e379829f4a9980af Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:14 +0100
Subject: [PATCH 053/890] x86: prepare unification of
 include/asm-x86/apic_32/64.h

White space and coding style clenaup.

Move the K8 local apic defines to apicdef.h, where they belong

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/apic_32.h | 49 ++++++++++++++++++---------------------
 include/asm-x86/apic_64.h | 39 +++++++++++++------------------
 include/asm-x86/apicdef.h | 10 +++++++-
 3 files changed, 48 insertions(+), 50 deletions(-)

diff --git a/include/asm-x86/apic_32.h b/include/asm-x86/apic_32.h
index be158b27d54b..f909e2daf226 100644
--- a/include/asm-x86/apic_32.h
+++ b/include/asm-x86/apic_32.h
@@ -17,8 +17,6 @@
 #define APIC_VERBOSE 1
 #define APIC_DEBUG   2
 
-extern int apic_verbosity;
-
 /*
  * Define the default level of output to be very little
  * This can be turned up by using apic=verbose for more
@@ -35,6 +33,11 @@ extern void generic_apic_probe(void);
 
 #ifdef CONFIG_X86_LOCAL_APIC
 
+extern int apic_verbosity;
+extern int timer_over_8254;
+extern int local_apic_timer_c2_ok;
+extern int local_apic_timer_disabled;
+
 /*
  * Basic functions accessing APICs.
  */
@@ -65,9 +68,9 @@ static __inline fastcall unsigned long native_apic_read(unsigned long reg)
 	return *((volatile unsigned long *)(APIC_BASE+reg));
 }
 
-void apic_wait_icr_idle(void);
-unsigned long safe_apic_wait_icr_idle(void);
-int get_physical_broadcast(void);
+extern void apic_wait_icr_idle(void);
+extern unsigned long safe_apic_wait_icr_idle(void);
+extern int get_physical_broadcast(void);
 
 #ifdef CONFIG_X86_GOOD_APIC
 # define FORCE_READ_AROUND_WRITE 0
@@ -94,30 +97,24 @@ static inline void ack_APIC_irq(void)
 
 extern int lapic_get_maxlvt(void);
 extern void clear_local_APIC(void);
-extern void connect_bsp_APIC (void);
-extern void disconnect_bsp_APIC (int virt_wire_setup);
-extern void disable_local_APIC (void);
-extern void lapic_shutdown (void);
-extern int verify_local_APIC (void);
-extern void cache_APIC_registers (void);
-extern void sync_Arb_IDs (void);
-extern void init_bsp_APIC (void);
-extern void setup_local_APIC (void);
-extern void init_apic_mappings (void);
-extern void smp_local_timer_interrupt (void);
-extern void setup_boot_APIC_clock (void);
-extern void setup_secondary_APIC_clock (void);
-extern int APIC_init_uniprocessor (void);
-
-extern void enable_NMI_through_LVT0 (void * dummy);
+extern void connect_bsp_APIC(void);
+extern void disconnect_bsp_APIC(int virt_wire_setup);
+extern void disable_local_APIC(void);
+extern void lapic_shutdown(void);
+extern int verify_local_APIC(void);
+extern void cache_APIC_registers(void);
+extern void sync_Arb_IDs(void);
+extern void init_bsp_APIC(void);
+extern void setup_local_APIC(void);
+extern void init_apic_mappings(void);
+extern void smp_local_timer_interrupt(void);
+extern void setup_boot_APIC_clock(void);
+extern void setup_secondary_APIC_clock(void);
+extern int APIC_init_uniprocessor(void);
+extern void enable_NMI_through_LVT0(void *dummy);
 
 #define ARCH_APICTIMER_STOPS_ON_C3	1
 
-extern int timer_over_8254;
-extern int local_apic_timer_c2_ok;
-
-extern int local_apic_timer_disabled;
-
 #else /* !CONFIG_X86_LOCAL_APIC */
 static inline void lapic_shutdown(void) { }
 #define local_apic_timer_c2_ok		1
diff --git a/include/asm-x86/apic_64.h b/include/asm-x86/apic_64.h
index 2747a11a2b19..9a0ec02a49a1 100644
--- a/include/asm-x86/apic_64.h
+++ b/include/asm-x86/apic_64.h
@@ -64,22 +64,22 @@ static inline void ack_APIC_irq(void)
 	apic_write(APIC_EOI, 0);
 }
 
-extern int get_maxlvt (void);
-extern void clear_local_APIC (void);
-extern void connect_bsp_APIC (void);
-extern void disconnect_bsp_APIC (int virt_wire_setup);
-extern void disable_local_APIC (void);
-extern void lapic_shutdown (void);
-extern int verify_local_APIC (void);
-extern void cache_APIC_registers (void);
-extern void sync_Arb_IDs (void);
-extern void init_bsp_APIC (void);
-extern void setup_local_APIC (void);
-extern void init_apic_mappings (void);
-extern void smp_local_timer_interrupt (void);
-extern void setup_boot_APIC_clock (void);
-extern void setup_secondary_APIC_clock (void);
-extern int APIC_init_uniprocessor (void);
+extern int get_maxlvt(void);
+extern void clear_local_APIC(void);
+extern void connect_bsp_APIC(void);
+extern void disconnect_bsp_APIC(int virt_wire_setup);
+extern void disable_local_APIC(void);
+extern void lapic_shutdown(void);
+extern int verify_local_APIC(void);
+extern void cache_APIC_registers(void);
+extern void sync_Arb_IDs(void);
+extern void init_bsp_APIC(void);
+extern void setup_local_APIC(void);
+extern void init_apic_mappings(void);
+extern void smp_local_timer_interrupt(void);
+extern void setup_boot_APIC_clock(void);
+extern void setup_secondary_APIC_clock(void);
+extern int APIC_init_uniprocessor(void);
 extern void setup_apic_routing(void);
 
 extern void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector,
@@ -87,13 +87,6 @@ extern void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector,
 
 extern int apic_is_clustered_box(void);
 
-#define K8_APIC_EXT_LVT_BASE    0x500
-#define K8_APIC_EXT_INT_MSG_FIX 0x0
-#define K8_APIC_EXT_INT_MSG_SMI 0x2
-#define K8_APIC_EXT_INT_MSG_NMI 0x4
-#define K8_APIC_EXT_INT_MSG_EXT 0x7
-#define K8_APIC_EXT_LVT_ENTRY_THRESHOLD    0
-
 #define ARCH_APICTIMER_STOPS_ON_C3	1
 
 extern unsigned boot_cpu_id;
diff --git a/include/asm-x86/apicdef.h b/include/asm-x86/apicdef.h
index 83ac1e6e3625..5f7abe9b5f87 100644
--- a/include/asm-x86/apicdef.h
+++ b/include/asm-x86/apicdef.h
@@ -98,7 +98,7 @@
 #define		APIC_SEND_PENDING		(1<<12)
 #define		APIC_MODE_MASK			0x700
 #define		GET_APIC_DELIVERY_MODE(x)	(((x)>>8)&0x7)
-#define		SET_APIC_DELIVERY_MODE(x,y)	(((x)&~0x700)|((y)<<8))
+#define		SET_APIC_DELIVERY_MODE(x, y)	(((x)&~0x700)|((y)<<8))
 #define			APIC_MODE_FIXED		0x0
 #define			APIC_MODE_NMI		0x4
 #define			APIC_MODE_EXTINT	0x7
@@ -116,6 +116,14 @@
 #define		APIC_TDR_DIV_32		0x8
 #define		APIC_TDR_DIV_64		0x9
 #define		APIC_TDR_DIV_128	0xA
+
+#define K8_APIC_EXT_LVT_BASE		0x500
+#define K8_APIC_EXT_INT_MSG_FIX		0x0
+#define K8_APIC_EXT_INT_MSG_SMI		0x2
+#define K8_APIC_EXT_INT_MSG_NMI		0x4
+#define K8_APIC_EXT_INT_MSG_EXT		0x7
+#define K8_APIC_EXT_LVT_ENTRY_THRESHOLD	0
+
 #define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
 
 #ifdef CONFIG_X86_32

From 37e650c7c8a27de533d409b53c29f4135dcc7af6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:14 +0100
Subject: [PATCH 054/890] x86: rename get_maxlvt to lapic_get_maxlvt

Use the same name for the 32 and 64 bit variant.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_64.c    | 12 ++++++------
 arch/x86/kernel/io_apic_64.c |  2 +-
 arch/x86/kernel/smpboot_64.c |  2 +-
 include/asm-x86/apic_64.h    |  2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index fa6cdee6d303..dfeda91fa80c 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -160,7 +160,7 @@ void enable_NMI_through_LVT0 (void * dummy)
 	apic_write(APIC_LVT0, v);
 }
 
-int get_maxlvt(void)
+int lapic_get_maxlvt(void)
 {
 	unsigned int v, maxlvt;
 
@@ -194,7 +194,7 @@ void clear_local_APIC(void)
 	int maxlvt;
 	unsigned int v;
 
-	maxlvt = get_maxlvt();
+	maxlvt = lapic_get_maxlvt();
 
 	/*
 	 * Masking an LVT entry can trigger a local APIC error
@@ -333,7 +333,7 @@ int __init verify_local_APIC(void)
 	reg1 = GET_APIC_VERSION(reg0);
 	if (reg1 == 0x00 || reg1 == 0xff)
 		return 0;
-	reg1 = get_maxlvt();
+	reg1 = lapic_get_maxlvt();
 	if (reg1 < 0x02 || reg1 == 0xff)
 		return 0;
 
@@ -519,7 +519,7 @@ void __cpuinit setup_local_APIC (void)
 
 	{
 		unsigned oldvalue;
-		maxlvt = get_maxlvt();
+		maxlvt = lapic_get_maxlvt();
 		oldvalue = apic_read(APIC_ESR);
 		value = ERROR_APIC_VECTOR;      // enables sending errors
 		apic_write(APIC_LVTERR, value);
@@ -571,7 +571,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
 	if (!apic_pm_state.active)
 		return 0;
 
-	maxlvt = get_maxlvt();
+	maxlvt = lapic_get_maxlvt();
 
 	apic_pm_state.apic_id = apic_read(APIC_ID);
 	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
@@ -605,7 +605,7 @@ static int lapic_resume(struct sys_device *dev)
 	if (!apic_pm_state.active)
 		return 0;
 
-	maxlvt = get_maxlvt();
+	maxlvt = lapic_get_maxlvt();
 
 	local_irq_save(flags);
 	rdmsr(MSR_IA32_APICBASE, l, h);
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 23a3ac06a23e..d4f5286101a9 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1069,7 +1069,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
 	v = apic_read(APIC_LVR);
 	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 	ver = GET_APIC_VERSION(v);
-	maxlvt = get_maxlvt();
+	maxlvt = lapic_get_maxlvt();
 
 	v = apic_read(APIC_TASKPRI);
 	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index aaf4e1291217..8147b7d4db63 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -466,7 +466,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
 	 */
 	Dprintk("#startup loops: %d.\n", num_starts);
 
-	maxlvt = get_maxlvt();
+	maxlvt = lapic_get_maxlvt();
 
 	for (j = 1; j <= num_starts; j++) {
 		Dprintk("Sending STARTUP #%d.\n",j);
diff --git a/include/asm-x86/apic_64.h b/include/asm-x86/apic_64.h
index 9a0ec02a49a1..b5f850f25114 100644
--- a/include/asm-x86/apic_64.h
+++ b/include/asm-x86/apic_64.h
@@ -64,7 +64,7 @@ static inline void ack_APIC_irq(void)
 	apic_write(APIC_EOI, 0);
 }
 
-extern int get_maxlvt(void);
+extern int lapic_get_maxlvt(void);
 extern void clear_local_APIC(void);
 extern void connect_bsp_APIC(void);
 extern void disconnect_bsp_APIC(int virt_wire_setup);

From 3c6bb07ac1b4174318606a26f0de8ceb9f6d8133 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:15 +0100
Subject: [PATCH 055/890] x86: use u32 for safe_apic_wait_icr_idle()

Preperatory patch for merging apic headers.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_64.c    |  4 ++--
 arch/x86/kernel/smpboot_64.c | 11 ++++++-----
 include/asm-x86/apic_64.h    |  2 +-
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index dfeda91fa80c..3de3764a862c 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -135,9 +135,9 @@ void apic_wait_icr_idle(void)
 		cpu_relax();
 }
 
-unsigned int safe_apic_wait_icr_idle(void)
+u32 safe_apic_wait_icr_idle(void)
 {
-	unsigned int send_status;
+	u32 send_status;
 	int timeout;
 
 	timeout = 0;
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index 8147b7d4db63..b36d32ff0b39 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -384,19 +384,20 @@ static void inquire_remote_apic(int apicid)
 	unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
 	char *names[] = { "ID", "VERSION", "SPIV" };
 	int timeout;
-	unsigned int status;
+	u32 status;
 
 	printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
 
 	for (i = 0; i < ARRAY_SIZE(regs); i++) {
-		printk("... APIC #%d %s: ", apicid, names[i]);
+		printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]);
 
 		/*
 		 * Wait for idle.
 		 */
 		status = safe_apic_wait_icr_idle();
 		if (status)
-			printk("a previous APIC delivery may have failed\n");
+			printk(KERN_CONT
+			       "a previous APIC delivery may have failed\n");
 
 		apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
 		apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
@@ -410,10 +411,10 @@ static void inquire_remote_apic(int apicid)
 		switch (status) {
 		case APIC_ICR_RR_VALID:
 			status = apic_read(APIC_RRR);
-			printk("%08x\n", status);
+			printk(KERN_CONT "%08x\n", status);
 			break;
 		default:
-			printk("failed\n");
+			printk(KERN_CONT "failed\n");
 		}
 	}
 }
diff --git a/include/asm-x86/apic_64.h b/include/asm-x86/apic_64.h
index b5f850f25114..7bfad0224178 100644
--- a/include/asm-x86/apic_64.h
+++ b/include/asm-x86/apic_64.h
@@ -49,7 +49,7 @@ static __inline unsigned int apic_read(unsigned long reg)
 }
 
 extern void apic_wait_icr_idle(void);
-extern unsigned int safe_apic_wait_icr_idle(void);
+extern u32 safe_apic_wait_icr_idle(void);
 
 static inline void ack_APIC_irq(void)
 {

From 42e0a9aa5d467188687c6b705412578e53c14af6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:15 +0100
Subject: [PATCH 056/890] x86: use u32 for some lapic functions

Use u32 so 32 and 64bit have the same interface.

Andrew Morton: xen, lguest build fixes

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c  |  4 ++--
 arch/x86/lguest/boot.c     |  4 ++--
 arch/x86/xen/enlighten.c   |  4 ++--
 include/asm-x86/apic_32.h  | 16 +++++++---------
 include/asm-x86/apic_64.h  |  6 +++---
 include/asm-x86/paravirt.h | 12 ++++++------
 6 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index a56c782653be..3a069acb270c 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -135,9 +135,9 @@ void apic_wait_icr_idle(void)
 		cpu_relax();
 }
 
-unsigned long safe_apic_wait_icr_idle(void)
+u32 safe_apic_wait_icr_idle(void)
 {
-	unsigned long send_status;
+	u32 send_status;
 	int timeout;
 
 	timeout = 0;
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 92c56117eae5..df04bf884dd4 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -788,11 +788,11 @@ static void lguest_wbinvd(void)
  * code qualifies for Advanced.  It will also never interrupt anything.  It
  * does, however, allow us to get through the Linux boot code. */
 #ifdef CONFIG_X86_LOCAL_APIC
-static void lguest_apic_write(unsigned long reg, unsigned long v)
+static void lguest_apic_write(unsigned long reg, u32 v)
 {
 }
 
-static unsigned long lguest_apic_read(unsigned long reg)
+static u32 lguest_apic_read(unsigned long reg)
 {
 	return 0;
 }
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 79ad15252150..00829401389e 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -521,12 +521,12 @@ static void xen_io_delay(void)
 }
 
 #ifdef CONFIG_X86_LOCAL_APIC
-static unsigned long xen_apic_read(unsigned long reg)
+static u32 xen_apic_read(unsigned long reg)
 {
 	return 0;
 }
 
-static void xen_apic_write(unsigned long reg, unsigned long val)
+static void xen_apic_write(unsigned long reg, u32 val)
 {
 	/* Warn to see if there's any stray references */
 	WARN_ON(1);
diff --git a/include/asm-x86/apic_32.h b/include/asm-x86/apic_32.h
index f909e2daf226..649e9a6f6683 100644
--- a/include/asm-x86/apic_32.h
+++ b/include/asm-x86/apic_32.h
@@ -51,25 +51,23 @@ extern int local_apic_timer_disabled;
 #define setup_secondary_clock setup_secondary_APIC_clock
 #endif
 
-static __inline fastcall void native_apic_write(unsigned long reg,
-						unsigned long v)
+static __inline fastcall void native_apic_write(unsigned long reg, u32 v)
 {
-	*((volatile unsigned long *)(APIC_BASE+reg)) = v;
+	*((volatile u32 *)(APIC_BASE + reg)) = v;
 }
 
-static __inline fastcall void native_apic_write_atomic(unsigned long reg,
-						       unsigned long v)
+static __inline fastcall void native_apic_write_atomic(unsigned long reg, u32 v)
 {
-	xchg((volatile unsigned long *)(APIC_BASE+reg), v);
+	(void) xchg((u32 *)(APIC_BASE + reg), v);
 }
 
-static __inline fastcall unsigned long native_apic_read(unsigned long reg)
+static __inline fastcall u32 native_apic_read(unsigned long reg)
 {
-	return *((volatile unsigned long *)(APIC_BASE+reg));
+	return *((volatile u32 *)(APIC_BASE + reg));
 }
 
 extern void apic_wait_icr_idle(void);
-extern unsigned long safe_apic_wait_icr_idle(void);
+extern u32 safe_apic_wait_icr_idle(void);
 extern int get_physical_broadcast(void);
 
 #ifdef CONFIG_X86_GOOD_APIC
diff --git a/include/asm-x86/apic_64.h b/include/asm-x86/apic_64.h
index 7bfad0224178..9d0c06c4df91 100644
--- a/include/asm-x86/apic_64.h
+++ b/include/asm-x86/apic_64.h
@@ -38,14 +38,14 @@ struct pt_regs;
  * Basic functions accessing APICs.
  */
 
-static __inline void apic_write(unsigned long reg, unsigned int v)
+static __inline void apic_write(unsigned long reg, u32 v)
 {
 	*((volatile unsigned int *)(APIC_BASE+reg)) = v;
 }
 
-static __inline unsigned int apic_read(unsigned long reg)
+static __inline u32 apic_read(unsigned long reg)
 {
-	return *((volatile unsigned int *)(APIC_BASE+reg));
+	return *((volatile u32 *)(APIC_BASE+reg));
 }
 
 extern void apic_wait_icr_idle(void);
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index f59d370c5df4..19fd3e67b08c 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -150,9 +150,9 @@ struct pv_apic_ops {
 	 * Direct APIC operations, principally for VMI.  Ideally
 	 * these shouldn't be in this interface.
 	 */
-	void (*apic_write)(unsigned long reg, unsigned long v);
-	void (*apic_write_atomic)(unsigned long reg, unsigned long v);
-	unsigned long (*apic_read)(unsigned long reg);
+	void (*apic_write)(unsigned long reg, u32 v);
+	void (*apic_write_atomic)(unsigned long reg, u32 v);
+	u32 (*apic_read)(unsigned long reg);
 	void (*setup_boot_clock)(void);
 	void (*setup_secondary_clock)(void);
 
@@ -690,17 +690,17 @@ static inline void slow_down_io(void) {
 /*
  * Basic functions accessing APICs.
  */
-static inline void apic_write(unsigned long reg, unsigned long v)
+static inline void apic_write(unsigned long reg, u32 v)
 {
 	PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
 }
 
-static inline void apic_write_atomic(unsigned long reg, unsigned long v)
+static inline void apic_write_atomic(unsigned long reg, u32 v)
 {
 	PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v);
 }
 
-static inline unsigned long apic_read(unsigned long reg)
+static inline u32 apic_read(unsigned long reg)
 {
 	return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
 }

From 67c5fc5c330f63360e26609534b219df1aaa03ca Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:15 +0100
Subject: [PATCH 057/890] x86: merge apic_32/64.h

Unify apic.h variants.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/apic.h    | 140 +++++++++++++++++++++++++++++++++++++-
 include/asm-x86/apic_32.h | 122 ---------------------------------
 include/asm-x86/apic_64.h |  95 --------------------------
 3 files changed, 137 insertions(+), 220 deletions(-)
 delete mode 100644 include/asm-x86/apic_32.h
 delete mode 100644 include/asm-x86/apic_64.h

diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index 9fbcc0bd2ac4..c064c1f84bab 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -1,5 +1,139 @@
-#ifdef CONFIG_X86_32
-# include "apic_32.h"
+#ifndef _ASM_X86_APIC_H
+#define _ASM_X86_APIC_H
+
+#include <linux/pm.h>
+#include <linux/delay.h>
+#include <asm/fixmap.h>
+#include <asm/apicdef.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+
+#define ARCH_APICTIMER_STOPS_ON_C3	1
+
+#define Dprintk(x...)
+
+/*
+ * Debugging macros
+ */
+#define APIC_QUIET   0
+#define APIC_VERBOSE 1
+#define APIC_DEBUG   2
+
+/*
+ * Define the default level of output to be very little
+ * This can be turned up by using apic=verbose for more
+ * information and apic=debug for _lots_ of information.
+ * apic_verbosity is defined in apic.c
+ */
+#define apic_printk(v, s, a...) do {       \
+		if ((v) <= apic_verbosity) \
+			printk(s, ##a);    \
+	} while (0)
+
+
+extern void generic_apic_probe(void);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+extern int apic_verbosity;
+extern int timer_over_8254;
+extern int local_apic_timer_c2_ok;
+extern int local_apic_timer_disabled;
+
+extern int apic_runs_main_timer;
+extern int ioapic_force;
+extern int disable_apic_timer;
+extern unsigned boot_cpu_id;
+
+/*
+ * Basic functions accessing APICs.
+ */
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
 #else
-# include "apic_64.h"
+#define apic_write native_apic_write
+#define apic_write_atomic native_apic_write_atomic
+#define apic_read native_apic_read
+#define setup_boot_clock setup_boot_APIC_clock
+#define setup_secondary_clock setup_secondary_APIC_clock
 #endif
+
+static inline fastcall void native_apic_write(unsigned long reg, u32 v)
+{
+	*((volatile u32 *)(APIC_BASE + reg)) = v;
+}
+
+static inline fastcall void native_apic_write_atomic(unsigned long reg, u32 v)
+{
+	(void) xchg((u32*)(APIC_BASE + reg), v);
+}
+
+static inline fastcall u32 native_apic_read(unsigned long reg)
+{
+	return *((volatile u32 *)(APIC_BASE + reg));
+}
+
+extern void apic_wait_icr_idle(void);
+extern u32 safe_apic_wait_icr_idle(void);
+extern int get_physical_broadcast(void);
+
+#ifdef CONFIG_X86_GOOD_APIC
+# define FORCE_READ_AROUND_WRITE 0
+# define apic_read_around(x)
+# define apic_write_around(x, y) apic_write((x), (y))
+#else
+# define FORCE_READ_AROUND_WRITE 1
+# define apic_read_around(x) apic_read(x)
+# define apic_write_around(x, y) apic_write_atomic((x), (y))
+#endif
+
+static inline void ack_APIC_irq(void)
+{
+	/*
+	 * ack_APIC_irq() actually gets compiled as a single instruction:
+	 * - a single rmw on Pentium/82489DX
+	 * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
+	 * ... yummie.
+	 */
+
+	/* Docs say use 0 for future compatibility */
+	apic_write_around(APIC_EOI, 0);
+}
+
+extern int lapic_get_maxlvt(void);
+extern void clear_local_APIC(void);
+extern void connect_bsp_APIC(void);
+extern void disconnect_bsp_APIC(int virt_wire_setup);
+extern void disable_local_APIC(void);
+extern void lapic_shutdown(void);
+extern int verify_local_APIC(void);
+extern void cache_APIC_registers(void);
+extern void sync_Arb_IDs(void);
+extern void init_bsp_APIC(void);
+extern void setup_local_APIC(void);
+extern void init_apic_mappings(void);
+extern void smp_local_timer_interrupt(void);
+extern void setup_boot_APIC_clock(void);
+extern void setup_secondary_APIC_clock(void);
+extern int APIC_init_uniprocessor(void);
+extern void enable_NMI_through_LVT0(void *dummy);
+
+/*
+ * On 32bit this is mach-xxx local
+ */
+#ifdef CONFIG_X86_64
+extern void setup_apic_routing(void);
+#endif
+
+extern void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector,
+				    unsigned char msg_type, unsigned char mask);
+
+extern int apic_is_clustered_box(void);
+
+#else /* !CONFIG_X86_LOCAL_APIC */
+static inline void lapic_shutdown(void) { }
+#define local_apic_timer_c2_ok		1
+
+#endif /* !CONFIG_X86_LOCAL_APIC */
+
+#endif /* __ASM_APIC_H */
diff --git a/include/asm-x86/apic_32.h b/include/asm-x86/apic_32.h
deleted file mode 100644
index 649e9a6f6683..000000000000
--- a/include/asm-x86/apic_32.h
+++ /dev/null
@@ -1,122 +0,0 @@
-#ifndef __ASM_APIC_H
-#define __ASM_APIC_H
-
-#include <linux/pm.h>
-#include <linux/delay.h>
-#include <asm/fixmap.h>
-#include <asm/apicdef.h>
-#include <asm/processor.h>
-#include <asm/system.h>
-
-#define Dprintk(x...)
-
-/*
- * Debugging macros
- */
-#define APIC_QUIET   0
-#define APIC_VERBOSE 1
-#define APIC_DEBUG   2
-
-/*
- * Define the default level of output to be very little
- * This can be turned up by using apic=verbose for more
- * information and apic=debug for _lots_ of information.
- * apic_verbosity is defined in apic.c
- */
-#define apic_printk(v, s, a...) do {       \
-		if ((v) <= apic_verbosity) \
-			printk(s, ##a);    \
-	} while (0)
-
-
-extern void generic_apic_probe(void);
-
-#ifdef CONFIG_X86_LOCAL_APIC
-
-extern int apic_verbosity;
-extern int timer_over_8254;
-extern int local_apic_timer_c2_ok;
-extern int local_apic_timer_disabled;
-
-/*
- * Basic functions accessing APICs.
- */
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define apic_write native_apic_write
-#define apic_write_atomic native_apic_write_atomic
-#define apic_read native_apic_read
-#define setup_boot_clock setup_boot_APIC_clock
-#define setup_secondary_clock setup_secondary_APIC_clock
-#endif
-
-static __inline fastcall void native_apic_write(unsigned long reg, u32 v)
-{
-	*((volatile u32 *)(APIC_BASE + reg)) = v;
-}
-
-static __inline fastcall void native_apic_write_atomic(unsigned long reg, u32 v)
-{
-	(void) xchg((u32 *)(APIC_BASE + reg), v);
-}
-
-static __inline fastcall u32 native_apic_read(unsigned long reg)
-{
-	return *((volatile u32 *)(APIC_BASE + reg));
-}
-
-extern void apic_wait_icr_idle(void);
-extern u32 safe_apic_wait_icr_idle(void);
-extern int get_physical_broadcast(void);
-
-#ifdef CONFIG_X86_GOOD_APIC
-# define FORCE_READ_AROUND_WRITE 0
-# define apic_read_around(x)
-# define apic_write_around(x,y) apic_write((x),(y))
-#else
-# define FORCE_READ_AROUND_WRITE 1
-# define apic_read_around(x) apic_read(x)
-# define apic_write_around(x,y) apic_write_atomic((x),(y))
-#endif
-
-static inline void ack_APIC_irq(void)
-{
-	/*
-	 * ack_APIC_irq() actually gets compiled as a single instruction:
-	 * - a single rmw on Pentium/82489DX
-	 * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
-	 * ... yummie.
-	 */
-
-	/* Docs say use 0 for future compatibility */
-	apic_write_around(APIC_EOI, 0);
-}
-
-extern int lapic_get_maxlvt(void);
-extern void clear_local_APIC(void);
-extern void connect_bsp_APIC(void);
-extern void disconnect_bsp_APIC(int virt_wire_setup);
-extern void disable_local_APIC(void);
-extern void lapic_shutdown(void);
-extern int verify_local_APIC(void);
-extern void cache_APIC_registers(void);
-extern void sync_Arb_IDs(void);
-extern void init_bsp_APIC(void);
-extern void setup_local_APIC(void);
-extern void init_apic_mappings(void);
-extern void smp_local_timer_interrupt(void);
-extern void setup_boot_APIC_clock(void);
-extern void setup_secondary_APIC_clock(void);
-extern int APIC_init_uniprocessor(void);
-extern void enable_NMI_through_LVT0(void *dummy);
-
-#define ARCH_APICTIMER_STOPS_ON_C3	1
-
-#else /* !CONFIG_X86_LOCAL_APIC */
-static inline void lapic_shutdown(void) { }
-#define local_apic_timer_c2_ok		1
-
-#endif /* !CONFIG_X86_LOCAL_APIC */
-
-#endif /* __ASM_APIC_H */
diff --git a/include/asm-x86/apic_64.h b/include/asm-x86/apic_64.h
deleted file mode 100644
index 9d0c06c4df91..000000000000
--- a/include/asm-x86/apic_64.h
+++ /dev/null
@@ -1,95 +0,0 @@
-#ifndef __ASM_APIC_H
-#define __ASM_APIC_H
-
-#include <linux/pm.h>
-#include <linux/delay.h>
-#include <asm/fixmap.h>
-#include <asm/apicdef.h>
-#include <asm/system.h>
-
-#define Dprintk(x...)
-
-/*
- * Debugging macros
- */
-#define APIC_QUIET   0
-#define APIC_VERBOSE 1
-#define APIC_DEBUG   2
-
-extern int apic_verbosity;
-extern int apic_runs_main_timer;
-extern int ioapic_force;
-extern int disable_apic_timer;
-
-/*
- * Define the default level of output to be very little
- * This can be turned up by using apic=verbose for more
- * information and apic=debug for _lots_ of information.
- * apic_verbosity is defined in apic.c
- */
-#define apic_printk(v, s, a...) do {       \
-		if ((v) <= apic_verbosity) \
-			printk(s, ##a);    \
-	} while (0)
-
-struct pt_regs;
-
-/*
- * Basic functions accessing APICs.
- */
-
-static __inline void apic_write(unsigned long reg, u32 v)
-{
-	*((volatile unsigned int *)(APIC_BASE+reg)) = v;
-}
-
-static __inline u32 apic_read(unsigned long reg)
-{
-	return *((volatile u32 *)(APIC_BASE+reg));
-}
-
-extern void apic_wait_icr_idle(void);
-extern u32 safe_apic_wait_icr_idle(void);
-
-static inline void ack_APIC_irq(void)
-{
-	/*
-	 * ack_APIC_irq() actually gets compiled as a single instruction:
-	 * - a single rmw on Pentium/82489DX
-	 * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
-	 * ... yummie.
-	 */
-
-	/* Docs say use 0 for future compatibility */
-	apic_write(APIC_EOI, 0);
-}
-
-extern int lapic_get_maxlvt(void);
-extern void clear_local_APIC(void);
-extern void connect_bsp_APIC(void);
-extern void disconnect_bsp_APIC(int virt_wire_setup);
-extern void disable_local_APIC(void);
-extern void lapic_shutdown(void);
-extern int verify_local_APIC(void);
-extern void cache_APIC_registers(void);
-extern void sync_Arb_IDs(void);
-extern void init_bsp_APIC(void);
-extern void setup_local_APIC(void);
-extern void init_apic_mappings(void);
-extern void smp_local_timer_interrupt(void);
-extern void setup_boot_APIC_clock(void);
-extern void setup_secondary_APIC_clock(void);
-extern int APIC_init_uniprocessor(void);
-extern void setup_apic_routing(void);
-
-extern void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector,
-				    unsigned char msg_type, unsigned char mask);
-
-extern int apic_is_clustered_box(void);
-
-#define ARCH_APICTIMER_STOPS_ON_C3	1
-
-extern unsigned boot_cpu_id;
-extern int local_apic_timer_c2_ok;
-
-#endif /* __ASM_APIC_H */

From 63fefb7d5bac6e65b5df2b7eb4aa56bb4bbadd48 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:15 +0100
Subject: [PATCH 058/890] x86: remove duplicated declarations

Remove declarations which are made already in the appropriate header file.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/io_apic_64.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/include/asm-x86/io_apic_64.h b/include/asm-x86/io_apic_64.h
index e2c13675ee4e..762a841b939e 100644
--- a/include/asm-x86/io_apic_64.h
+++ b/include/asm-x86/io_apic_64.h
@@ -129,10 +129,4 @@ extern int io_apic_set_pci_routing (int ioapic, int pin, int irq, int, int);
 
 extern int sis_apic_bug; /* dummy */ 
 
-void enable_NMI_through_LVT0 (void * dummy);
-
-extern spinlock_t i8259A_lock;
-
-extern int timer_over_8254;
-
 #endif

From 376ff0352c24a5fa47f1250dd60937b5a9077672 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:16 +0100
Subject: [PATCH 059/890] x86: move acpi and pci declarations

Move acpi/pci related declarations to the correct headers
and remove the duplicate.

Build fix from: Andrew Morton

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_32.c | 3 +--
 arch/x86/kernel/setup_64.c | 2 --
 include/asm-x86/acpi_32.h  | 2 --
 include/asm-x86/pci.h      | 4 +++-
 include/asm-x86/proto.h    | 3 ---
 5 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 51bdc0b1b72e..236d30b264d8 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -44,6 +44,7 @@
 #include <linux/crash_dump.h>
 #include <linux/dmi.h>
 #include <linux/pfn.h>
+#include <linux/pci.h>
 
 #include <video/edid.h>
 
@@ -663,9 +664,7 @@ void __init setup_arch(char **cmdline_p)
 	acpi_boot_table_init();
 #endif
 
-#ifdef CONFIG_PCI
 	early_quirks();
-#endif
 
 #ifdef CONFIG_ACPI
 	acpi_boot_init();
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index ec976edf0399..97a497652d20 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -407,9 +407,7 @@ void __init setup_arch(char **cmdline_p)
 	reserve_crashkernel();
 	paging_init();
 
-#ifdef CONFIG_PCI
 	early_quirks();
-#endif
 
 	/*
 	 * set this early, so we dont allocate cpu0
diff --git a/include/asm-x86/acpi_32.h b/include/asm-x86/acpi_32.h
index 723493e6c851..3cce5a0f2d9c 100644
--- a/include/asm-x86/acpi_32.h
+++ b/include/asm-x86/acpi_32.h
@@ -81,8 +81,6 @@ int __acpi_release_global_lock(unsigned int *lock);
         :"=r"(n_hi), "=r"(n_lo)     \
         :"0"(n_hi), "1"(n_lo))
 
-extern void early_quirks(void);
-
 #ifdef CONFIG_ACPI
 extern int acpi_lapic;
 extern int acpi_ioapic;
diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h
index e88361966347..6983730d86fd 100644
--- a/include/asm-x86/pci.h
+++ b/include/asm-x86/pci.h
@@ -66,6 +66,7 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 
 
 #ifdef CONFIG_PCI
+extern void early_quirks(void);
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
 					unsigned long *strategy_parameter)
@@ -73,9 +74,10 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 	*strat = PCI_DMA_BURST_INFINITY;
 	*strategy_parameter = ~0UL;
 }
+#else
+static inline void early_quirks(void) { }
 #endif
 
-
 #endif  /* __KERNEL__ */
 
 #ifdef CONFIG_X86_32
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index dabba55f7ed8..64fe816ea37a 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -58,8 +58,6 @@ extern void show_registers(struct pt_regs *regs);
 
 extern void exception_table_check(void);
 
-extern void acpi_reserve_bootmem(void);
-
 extern void swap_low_mappings(void);
 
 extern void __show_regs(struct pt_regs * regs);
@@ -69,7 +67,6 @@ extern void syscall32_cpu_init(void);
 
 extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end);
 
-extern void early_quirks(void);
 extern void check_efer(void);
 
 extern void select_idle_routine(const struct cpuinfo_x86 *c);

From 8c61b900ebb8ec5918ffd776ba1a61a5f022566d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:16 +0100
Subject: [PATCH 060/890] x86: make early_indentify_cpu static

early_indentify_cpu is only used in setup_64.c

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_64.c | 4 +++-
 include/asm-x86/proto.h    | 2 --
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 97a497652d20..0e2bffe2e203 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -141,6 +141,8 @@ struct resource bss_resource = {
 	.flags = IORESOURCE_RAM,
 };
 
+static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
+
 #ifdef CONFIG_PROC_VMCORE
 /* elfcorehdr= specifies the location of elf core header
  * stored by the crashed kernel. This option will be passed
@@ -844,7 +846,7 @@ struct cpu_model_info {
 /* Do some early cpuid on the boot CPU to get some parameter that are
    needed before check_bugs. Everything advanced is in identify_cpu
    below. */
-void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
+static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 {
 	u32 tfms;
 
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index 64fe816ea37a..9e3d51fd91a2 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -40,8 +40,6 @@ extern int nohpet;
 
 extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2)));
 
-extern void early_identify_cpu(struct cpuinfo_x86 *c);
-
 extern int k8_scan_nodes(unsigned long start, unsigned long end);
 
 extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);

From fc36367211931cd503f050e4ceff5d9864cec817 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:16 +0100
Subject: [PATCH 061/890] x86: move idle related declarations

Move idle related declarations to processor_64.h, where the
the others are as well.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/processor_64.h | 4 ++++
 include/asm-x86/proto.h        | 4 ----
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index e4f19970a82b..6b96e1f12a42 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -434,6 +434,10 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
 
 extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
 
+extern int force_mwait;
+
+extern void select_idle_routine(const struct cpuinfo_x86 *c);
+
 #define stack_current() \
 ({								\
 	struct thread_info *ti;					\
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index 9e3d51fd91a2..77b4177d17f5 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -67,8 +67,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long en
 
 extern void check_efer(void);
 
-extern void select_idle_routine(const struct cpuinfo_x86 *c);
-
 extern unsigned long table_start, table_end;
 
 extern int exception_trace;
@@ -80,8 +78,6 @@ extern int notsc_setup(char *);
 
 extern int gsi_irq_sharing(int gsi);
 
-extern int force_mwait;
-
 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
 
 #define round_up(x,y) (((x) + (y) - 1) & ~((y)-1))

From c9ff03428f24219b927d9d9d3c0c581622967794 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:16 +0100
Subject: [PATCH 062/890] x86: move k8 related declarations

Move k8 related declarations to k8.h and fix numa_64.c

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/numa_64.c   | 1 +
 include/asm-x86/k8.h    | 1 +
 include/asm-x86/proto.h | 2 --
 3 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 3d6926ba8995..2d8fdc05f415 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -17,6 +17,7 @@
 #include <asm/dma.h>
 #include <asm/numa.h>
 #include <asm/acpi.h>
+#include <asm/k8.h>
 
 #ifndef Dprintk
 #define Dprintk(x...)
diff --git a/include/asm-x86/k8.h b/include/asm-x86/k8.h
index 699dd6961eda..452e2b696ff4 100644
--- a/include/asm-x86/k8.h
+++ b/include/asm-x86/k8.h
@@ -10,5 +10,6 @@ extern struct pci_dev **k8_northbridges;
 extern int num_k8_northbridges;
 extern int cache_k8_northbridges(void);
 extern void k8_flush_garts(void);
+extern int k8_scan_nodes(unsigned long start, unsigned long end);
 
 #endif
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index 77b4177d17f5..9b96dae5b7ac 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -40,8 +40,6 @@ extern int nohpet;
 
 extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2)));
 
-extern int k8_scan_nodes(unsigned long start, unsigned long end);
-
 extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
 extern unsigned long numa_free_all_bootmem(void);
 

From 718fc13b4675470ea191522ef98b02a55d990fa1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:17 +0100
Subject: [PATCH 063/890] x86: move debug related declarations to kdebug.h

Move them and fixup some users.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820_64.c      | 1 +
 arch/x86/kernel/head64.c       | 1 +
 arch/x86/kernel/process_32.c   | 1 +
 arch/x86/mm/init_64.c          | 1 +
 include/asm-x86/arch_hooks.h   | 5 +----
 include/asm-x86/kdebug.h       | 5 +++++
 include/asm-x86/processor_32.h | 1 -
 include/asm-x86/proto.h        | 8 --------
 include/asm-x86/system_32.h    | 1 -
 9 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index d41cd2f01733..e510cfd5bb71 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -26,6 +26,7 @@
 #include <asm/proto.h>
 #include <asm/setup.h>
 #include <asm/sections.h>
+#include <asm/kdebug.h>
 
 struct e820map e820;
 
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 6b3469311e42..d156bfbffa9a 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -19,6 +19,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
+#include <asm/kdebug.h>
 
 static void __init zap_identity_mappings(void)
 {
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a63d2d2556ee..a8cdd09ad53f 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -55,6 +55,7 @@
 
 #include <asm/tlbflush.h>
 #include <asm/cpu.h>
+#include <asm/kdebug.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0f9c8c890658..9677abb6cf8a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -43,6 +43,7 @@
 #include <asm/proto.h>
 #include <asm/smp.h>
 #include <asm/sections.h>
+#include <asm/kdebug.h>
 
 #ifndef Dprintk
 #define Dprintk(x...)
diff --git a/include/asm-x86/arch_hooks.h b/include/asm-x86/arch_hooks.h
index a8c1fca9726d..768aee8a04ef 100644
--- a/include/asm-x86/arch_hooks.h
+++ b/include/asm-x86/arch_hooks.h
@@ -6,7 +6,7 @@
 /*
  *	linux/include/asm/arch_hooks.h
  *
- *	define the architecture specific hooks 
+ *	define the architecture specific hooks
  */
 
 /* these aren't arch hooks, they are generic routines
@@ -24,7 +24,4 @@ extern void trap_init_hook(void);
 extern void time_init_hook(void);
 extern void mca_nmi_hook(void);
 
-extern int setup_early_printk(char *);
-extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2)));
-
 #endif
diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h
index e2f9b62e535e..49e5c91d490c 100644
--- a/include/asm-x86/kdebug.h
+++ b/include/asm-x86/kdebug.h
@@ -22,10 +22,15 @@ enum die_val {
 	DIE_PAGE_FAULT,
 };
 
+extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2)));
 extern void printk_address(unsigned long address);
 extern void die(const char *,struct pt_regs *,long);
 extern void __die(const char *,struct pt_regs *,long);
 extern void show_registers(struct pt_regs *regs);
+extern void __show_registers(struct pt_regs *, int all);
+extern void show_trace(struct task_struct *, struct pt_regs *, unsigned long *);
+extern void __show_regs(struct pt_regs *regs);
+extern void show_regs(struct pt_regs *regs);
 extern void dump_pagetable(unsigned long);
 extern unsigned long oops_begin(void);
 extern void oops_end(unsigned long);
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 13976b086837..58880a1ddd65 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -423,7 +423,6 @@ extern void prepare_to_copy(struct task_struct *tsk);
 extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
 
 extern unsigned long thread_saved_pc(struct task_struct *tsk);
-void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack);
 
 unsigned long get_wchan(struct task_struct *p);
 
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index 9b96dae5b7ac..b10cd1b82089 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -38,8 +38,6 @@ extern u32 pmtmr_ioport;
 #endif
 extern int nohpet;
 
-extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2)));
-
 extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
 extern unsigned long numa_free_all_bootmem(void);
 
@@ -49,16 +47,10 @@ extern void load_gs_index(unsigned gs);
 
 extern unsigned long end_pfn_map; 
 
-extern void show_trace(struct task_struct *, struct pt_regs *, unsigned long * rsp);
-extern void show_registers(struct pt_regs *regs);
-
 extern void exception_table_check(void);
 
 extern void swap_low_mappings(void);
 
-extern void __show_regs(struct pt_regs * regs);
-extern void show_regs(struct pt_regs * regs);
-
 extern void syscall32_cpu_init(void);
 
 extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end);
diff --git a/include/asm-x86/system_32.h b/include/asm-x86/system_32.h
index ef8468883bac..db6283eb5e46 100644
--- a/include/asm-x86/system_32.h
+++ b/include/asm-x86/system_32.h
@@ -315,6 +315,5 @@ extern unsigned long arch_align_stack(unsigned long sp);
 extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
 
 void default_idle(void);
-void __show_registers(struct pt_regs *, int all);
 
 #endif

From af7a78e9258ffcca681e080cbc857f854869144f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:17 +0100
Subject: [PATCH 064/890] x86: move mce related declarations

Move the mce related declarations where they belong, fix the
users and remove 32bit dependency in mce.h

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_64.c |  1 +
 include/asm-x86/mce.h      | 14 +++++++-------
 include/asm-x86/proto.h    |  1 -
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 0e2bffe2e203..a84a4efc7fe6 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -59,6 +59,7 @@
 #include <asm/sections.h>
 #include <asm/dmi.h>
 #include <asm/cacheflush.h>
+#include <asm/mce.h>
 
 /*
  * Machine setup..
diff --git a/include/asm-x86/mce.h b/include/asm-x86/mce.h
index df304fd89c27..e6ff507a73b0 100644
--- a/include/asm-x86/mce.h
+++ b/include/asm-x86/mce.h
@@ -85,14 +85,7 @@ struct mce_log {
 #ifdef __KERNEL__
 
 #ifdef CONFIG_X86_32
-#ifdef CONFIG_X86_MCE
-extern void mcheck_init(struct cpuinfo_x86 *c);
-#else
-#define mcheck_init(c) do {} while(0)
-#endif
-
 extern int mce_disabled;
-
 #else /* CONFIG_X86_32 */
 
 #include <asm/atomic.h>
@@ -121,6 +114,13 @@ extern int mce_notify_user(void);
 
 #endif /* !CONFIG_X86_32 */
 
+
+
+#ifdef CONFIG_X86_MCE
+extern void mcheck_init(struct cpuinfo_x86 *c);
+#else
+#define mcheck_init(c) do { } while (0)
+#endif
 extern void stop_mce(void);
 extern void restart_mce(void);
 
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index b10cd1b82089..02420dea73c6 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -13,7 +13,6 @@ extern void pda_init(int);
 
 extern void early_idt_handler(void);
 
-extern void mcheck_init(struct cpuinfo_x86 *c);
 extern void init_memory_mapping(unsigned long start, unsigned long end);
 
 extern void system_call(void); 

From aaa64e04f9af8c05a10ab3d67df44154742d15cf Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:17 +0100
Subject: [PATCH 065/890] x86: move numa related declarations

More stuff shuffeled to the correct place

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_64.c     | 1 +
 include/asm-x86/numa_64.h | 6 ++++++
 include/asm-x86/proto.h   | 5 -----
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9677abb6cf8a..0fbb657a8b19 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -44,6 +44,7 @@
 #include <asm/smp.h>
 #include <asm/sections.h>
 #include <asm/kdebug.h>
+#include <asm/numa.h>
 
 #ifndef Dprintk
 #define Dprintk(x...)
diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h
index 0cc5c97a7fc9..e6bc0b5dfe3a 100644
--- a/include/asm-x86/numa_64.h
+++ b/include/asm-x86/numa_64.h
@@ -21,6 +21,12 @@ extern void srat_reserve_add_area(int nodeid);
 extern int hotadd_percent;
 
 extern unsigned char apicid_to_node[MAX_LOCAL_APIC];
+
+extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
+extern unsigned long numa_free_all_bootmem(void);
+extern void setup_node_bootmem(int nodeid, unsigned long start,
+			       unsigned long end);
+
 #ifdef CONFIG_NUMA
 extern void __init init_cpu_to_node(void);
 
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index 02420dea73c6..71a419c6e041 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -37,9 +37,6 @@ extern u32 pmtmr_ioport;
 #endif
 extern int nohpet;
 
-extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
-extern unsigned long numa_free_all_bootmem(void);
-
 extern void reserve_bootmem_generic(unsigned long phys, unsigned len);
 
 extern void load_gs_index(unsigned gs);
@@ -52,8 +49,6 @@ extern void swap_low_mappings(void);
 
 extern void syscall32_cpu_init(void);
 
-extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end);
-
 extern void check_efer(void);
 
 extern unsigned long table_start, table_end;

From a89518b882283fdb3e11fb6d1929dc874b380ca8 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:17 +0100
Subject: [PATCH 066/890] x86: move page related declaration

end_pfn is in page.h, so end_pfn_map has a place there as well

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/page_64.h | 1 +
 include/asm-x86/proto.h   | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index c3b52bcb171e..6fdc904a5fa5 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -41,6 +41,7 @@
 #ifndef __ASSEMBLY__
 
 extern unsigned long end_pfn;
+extern unsigned long end_pfn_map;
 
 void clear_page(void *);
 void copy_page(void *, void *);
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index 71a419c6e041..7ff3d765de05 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -41,8 +41,6 @@ extern void reserve_bootmem_generic(unsigned long phys, unsigned len);
 
 extern void load_gs_index(unsigned gs);
 
-extern unsigned long end_pfn_map; 
-
 extern void exception_table_check(void);
 
 extern void swap_low_mappings(void);

From 40fec50ac4dd436b06630f415789e57200c5b06f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:18 +0100
Subject: [PATCH 067/890] x86: move pda related declaration

pda has its own header file as well.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/pda.h   | 1 +
 include/asm-x86/proto.h | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h
index 35962bbe5e72..3d9dd653628a 100644
--- a/include/asm-x86/pda.h
+++ b/include/asm-x86/pda.h
@@ -40,6 +40,7 @@ struct x8664_pda {
 
 extern struct x8664_pda *_cpu_pda[];
 extern struct x8664_pda boot_cpu_pda[];
+extern void pda_init(int);
 
 #define cpu_pda(i) (_cpu_pda[i])
 
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index 7ff3d765de05..8ed1ebf666be 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -9,7 +9,6 @@ struct cpuinfo_x86;
 struct pt_regs;
 
 extern void start_kernel(void);
-extern void pda_init(int); 
 
 extern void early_idt_handler(void);
 

From 80ca9c98f50c11b63b87971594d7e38cba0bbcef Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:18 +0100
Subject: [PATCH 068/890] x86: move tsc related declarations

tsc has also it's own header file. Nuke the stupid 64 bit ifdef
while at it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/proto.h | 3 ---
 include/asm-x86/tsc.h   | 3 +--
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index 8ed1ebf666be..a47e526716f4 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -51,11 +51,8 @@ extern void check_efer(void);
 extern unsigned long table_start, table_end;
 
 extern int exception_trace;
-extern unsigned cpu_khz;
-extern unsigned tsc_khz;
 
 extern int reboot_force;
-extern int notsc_setup(char *);
 
 extern int gsi_irq_sharing(int gsi);
 
diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h
index 6baab30dc2c8..d7b1c4e7a108 100644
--- a/include/asm-x86/tsc.h
+++ b/include/asm-x86/tsc.h
@@ -73,8 +73,7 @@ int check_tsc_unstable(void);
 extern void check_tsc_sync_source(int cpu);
 extern void check_tsc_sync_target(void);
 
-#ifdef CONFIG_X86_64
 extern void tsc_calibrate(void);
-#endif
+extern int notsc_setup(char *);
 
 #endif

From 70a20025632ca320316b5068326784d07c8ff351 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:18 +0100
Subject: [PATCH 069/890] x86: move pmtmr related declarations

Move more stuff out of proto.h

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_64.c    | 1 +
 arch/x86/kernel/pmtimer_64.c | 4 ++--
 include/asm-x86/proto.h      | 9 ---------
 include/linux/acpi_pmtmr.h   | 2 ++
 4 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 3de3764a862c..0cb14d4c2c5c 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -26,6 +26,7 @@
 #include <linux/module.h>
 #include <linux/ioport.h>
 #include <linux/clockchips.h>
+#include <linux/acpi_pmtmr.h>
 
 #include <asm/atomic.h>
 #include <asm/smp.h>
diff --git a/arch/x86/kernel/pmtimer_64.c b/arch/x86/kernel/pmtimer_64.c
index ae8f91214f15..b112406f1996 100644
--- a/arch/x86/kernel/pmtimer_64.c
+++ b/arch/x86/kernel/pmtimer_64.c
@@ -19,13 +19,13 @@
 #include <linux/time.h>
 #include <linux/init.h>
 #include <linux/cpumask.h>
+#include <linux/acpi_pmtmr.h>
+
 #include <asm/io.h>
 #include <asm/proto.h>
 #include <asm/msr.h>
 #include <asm/vsyscall.h>
 
-#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
-
 static inline u32 cyc2us(u32 cycles)
 {
 	/* The Power Management Timer ticks at 3.579545 ticks per microsecond.
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index a47e526716f4..9074aa7ebc65 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -25,15 +25,6 @@ extern void ia32_sysenter_target(void);
 extern void config_acpi_tables(void);
 extern void ia32_syscall(void);
 
-extern int pmtimer_mark_offset(void);
-extern void pmtimer_resume(void);
-extern void pmtimer_wait(unsigned);
-extern unsigned int do_gettimeoffset_pm(void);
-#ifdef CONFIG_X86_PM_TIMER
-extern u32 pmtmr_ioport;
-#else
-#define pmtmr_ioport 0
-#endif
 extern int nohpet;
 
 extern void reserve_bootmem_generic(unsigned long phys, unsigned len);
diff --git a/include/linux/acpi_pmtmr.h b/include/linux/acpi_pmtmr.h
index 1d0ef1ae8036..7e3d2859be50 100644
--- a/include/linux/acpi_pmtmr.h
+++ b/include/linux/acpi_pmtmr.h
@@ -25,6 +25,8 @@ static inline u32 acpi_pm_read_early(void)
 	return acpi_pm_read_verified() & ACPI_PM_MASK;
 }
 
+extern void pmtimer_wait(unsigned);
+
 #else
 
 static inline u32 acpi_pm_read_early(void)

From 6fc30f9e292d2275248b6ee5ca903248b66efca1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:18 +0100
Subject: [PATCH 070/890] x86: remove obsolete nohpet declaration

Lonely user is hpet.c, so no need to declare it elsewhere.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/proto.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index 9074aa7ebc65..4eddf2761804 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -25,8 +25,6 @@ extern void ia32_sysenter_target(void);
 extern void config_acpi_tables(void);
 extern void ia32_syscall(void);
 
-extern int nohpet;
-
 extern void reserve_bootmem_generic(unsigned long phys, unsigned len);
 
 extern void load_gs_index(unsigned gs);

From eaf76e8b93dd5e7a53e2cd6db53f3ca18ca0fe4c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:19 +0100
Subject: [PATCH 071/890] x86: remove duplicate start_kernel declaration

start_kernel is already declared in a generic header file.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/head64.c | 1 +
 include/asm-x86/proto.h  | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index d156bfbffa9a..6643f3f994fb 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/percpu.h>
+#include <linux/start_kernel.h>
 
 #include <asm/processor.h>
 #include <asm/proto.h>
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index 4eddf2761804..5a45478b5d3a 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -8,8 +8,6 @@
 struct cpuinfo_x86; 
 struct pt_regs;
 
-extern void start_kernel(void);
-
 extern void early_idt_handler(void);
 
 extern void init_memory_mapping(unsigned long start, unsigned long end);

From 84c873ed2a00eab3e8ac49dc7889d7aad142ce22 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:19 +0100
Subject: [PATCH 072/890] x86: remove obsolte declarations from proto.h

Nuke duplicate and obsolete crap from this ugly dump bin.
There are still some entries left which need to be sorted out,
but I'm tired of that puzzle game right now.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/proto.h | 23 +++--------------------
 1 file changed, 3 insertions(+), 20 deletions(-)

diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index 5a45478b5d3a..3c6fb89f1120 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -5,44 +5,27 @@
 
 /* misc architecture specific prototypes */
 
-struct cpuinfo_x86; 
-struct pt_regs;
-
 extern void early_idt_handler(void);
 
 extern void init_memory_mapping(unsigned long start, unsigned long end);
 
-extern void system_call(void); 
-extern int kernel_syscall(void);
+extern void system_call(void);
 extern void syscall_init(void);
 
 extern void ia32_syscall(void);
-extern void ia32_cstar_target(void); 
-extern void ia32_sysenter_target(void); 
-
-extern void config_acpi_tables(void);
-extern void ia32_syscall(void);
+extern void ia32_cstar_target(void);
+extern void ia32_sysenter_target(void);
 
 extern void reserve_bootmem_generic(unsigned long phys, unsigned len);
 
-extern void load_gs_index(unsigned gs);
-
-extern void exception_table_check(void);
-
-extern void swap_low_mappings(void);
-
 extern void syscall32_cpu_init(void);
 
 extern void check_efer(void);
 
 extern unsigned long table_start, table_end;
 
-extern int exception_trace;
-
 extern int reboot_force;
 
-extern int gsi_irq_sharing(int gsi);
-
 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
 
 #define round_up(x,y) (((x) + (y) - 1) & ~((y)-1))

From 3e35a0e525253837fc0ea4d0e060de3302bd9537 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:19 +0100
Subject: [PATCH 073/890] x86: move ioapic code where it belongs

The commit 399287229c775a8962a852a761d65dc9475dec7c hacked the
ioapic resource mapping into apic.c for no good reason.
Move the code into io_apic_64.c where it belongs.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_64.c    | 87 ----------------------------------
 arch/x86/kernel/io_apic_64.c | 90 ++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/setup_64.c   |  1 +
 include/asm-x86/io_apic_64.h |  2 +
 4 files changed, 93 insertions(+), 87 deletions(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 0cb14d4c2c5c..e0191cdd6f7a 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -49,7 +49,6 @@ static int apic_calibrate_pmtmr __initdata;
 int local_apic_timer_c2_ok;
 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
 
-static struct resource *ioapic_resources;
 static struct resource lapic_resource = {
 	.name = "Local APIC",
 	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
@@ -714,64 +713,6 @@ static int __init detect_init_APIC (void)
 	return 0;
 }
 
-#ifdef CONFIG_X86_IO_APIC
-static struct resource * __init ioapic_setup_resources(void)
-{
-#define IOAPIC_RESOURCE_NAME_SIZE 11
-	unsigned long n;
-	struct resource *res;
-	char *mem;
-	int i;
-
-	if (nr_ioapics <= 0)
-		return NULL;
-
-	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
-	n *= nr_ioapics;
-
-	mem = alloc_bootmem(n);
-	res = (void *)mem;
-
-	if (mem != NULL) {
-		memset(mem, 0, n);
-		mem += sizeof(struct resource) * nr_ioapics;
-
-		for (i = 0; i < nr_ioapics; i++) {
-			res[i].name = mem;
-			res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-			sprintf(mem,  "IOAPIC %u", i);
-			mem += IOAPIC_RESOURCE_NAME_SIZE;
-		}
-	}
-
-	ioapic_resources = res;
-
-	return res;
-}
-
-static int __init ioapic_insert_resources(void)
-{
-	int i;
-	struct resource *r = ioapic_resources;
-
-	if (!r) {
-		printk("IO APIC resources could be not be allocated.\n");
-		return -1;
-	}
-
-	for (i = 0; i < nr_ioapics; i++) {
-		insert_resource(&iomem_resource, r);
-		r++;
-	}
-
-	return 0;
-}
-
-/* Insert the IO APIC resources after PCI initialization has occured to handle
- * IO APICS that are mapped in on a BAR in PCI space. */
-late_initcall(ioapic_insert_resources);
-#endif
-
 void __init init_apic_mappings(void)
 {
 	unsigned long apic_phys;
@@ -801,34 +742,6 @@ void __init init_apic_mappings(void)
 	 * default configuration (or the MP table is broken).
 	 */
 	boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
-
-	{
-		unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
-		int i;
-		struct resource *ioapic_res;
-
-		ioapic_res = ioapic_setup_resources();
-		for (i = 0; i < nr_ioapics; i++) {
-			if (smp_found_config) {
-				ioapic_phys = mp_ioapics[i].mpc_apicaddr;
-			} else {
-				ioapic_phys = (unsigned long)
-					alloc_bootmem_pages(PAGE_SIZE);
-				ioapic_phys = __pa(ioapic_phys);
-			}
-			set_fixmap_nocache(idx, ioapic_phys);
-			apic_printk(APIC_VERBOSE,
-				    "mapped IOAPIC to %016lx (%016lx)\n",
-				    __fix_to_virt(idx), ioapic_phys);
-			idx++;
-
-			if (ioapic_res != NULL) {
-				ioapic_res->start = ioapic_phys;
-				ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
-				ioapic_res++;
-			}
-		}
-	}
 }
 
 /*
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index d4f5286101a9..c6de7854ac63 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -35,6 +35,7 @@
 #ifdef CONFIG_ACPI
 #include <acpi/acpi_bus.h>
 #endif
+#include <linux/bootmem.h>
 
 #include <asm/idle.h>
 #include <asm/io.h>
@@ -2288,3 +2289,92 @@ void __init setup_ioapic_dest(void)
 }
 #endif
 
+#define IOAPIC_RESOURCE_NAME_SIZE 11
+
+static struct resource *ioapic_resources;
+
+static struct resource * __init ioapic_setup_resources(void)
+{
+	unsigned long n;
+	struct resource *res;
+	char *mem;
+	int i;
+
+	if (nr_ioapics <= 0)
+		return NULL;
+
+	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
+	n *= nr_ioapics;
+
+	mem = alloc_bootmem(n);
+	res = (void *)mem;
+
+	if (mem != NULL) {
+		memset(mem, 0, n);
+		mem += sizeof(struct resource) * nr_ioapics;
+
+		for (i = 0; i < nr_ioapics; i++) {
+			res[i].name = mem;
+			res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+			sprintf(mem,  "IOAPIC %u", i);
+			mem += IOAPIC_RESOURCE_NAME_SIZE;
+		}
+	}
+
+	ioapic_resources = res;
+
+	return res;
+}
+
+void __init ioapic_init_mappings(void)
+{
+	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
+	struct resource *ioapic_res;
+	int i;
+
+	ioapic_res = ioapic_setup_resources();
+	for (i = 0; i < nr_ioapics; i++) {
+		if (smp_found_config) {
+			ioapic_phys = mp_ioapics[i].mpc_apicaddr;
+		} else {
+			ioapic_phys = (unsigned long)
+				alloc_bootmem_pages(PAGE_SIZE);
+			ioapic_phys = __pa(ioapic_phys);
+		}
+		set_fixmap_nocache(idx, ioapic_phys);
+		apic_printk(APIC_VERBOSE,
+			    "mapped IOAPIC to %016lx (%016lx)\n",
+			    __fix_to_virt(idx), ioapic_phys);
+		idx++;
+
+		if (ioapic_res != NULL) {
+			ioapic_res->start = ioapic_phys;
+			ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+			ioapic_res++;
+		}
+	}
+}
+
+static int __init ioapic_insert_resources(void)
+{
+	int i;
+	struct resource *r = ioapic_resources;
+
+	if (!r) {
+		printk(KERN_ERR
+		       "IO APIC resources could be not be allocated.\n");
+		return -1;
+	}
+
+	for (i = 0; i < nr_ioapics; i++) {
+		insert_resource(&iomem_resource, r);
+		r++;
+	}
+
+	return 0;
+}
+
+/* Insert the IO APIC resources after PCI initialization has occured to handle
+ * IO APICS that are mapped in on a BAR in PCI space. */
+late_initcall(ioapic_insert_resources);
+
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index a84a4efc7fe6..bcb5f3aaa097 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -433,6 +433,7 @@ void __init setup_arch(char **cmdline_p)
 	if (smp_found_config)
 		get_smp_config();
 	init_apic_mappings();
+	ioapic_init_mappings();
 
 	/*
 	 * We trust e820 completely. No explicit ROM probing in memory.
diff --git a/include/asm-x86/io_apic_64.h b/include/asm-x86/io_apic_64.h
index 762a841b939e..3e3c2114d93a 100644
--- a/include/asm-x86/io_apic_64.h
+++ b/include/asm-x86/io_apic_64.h
@@ -127,6 +127,8 @@ extern int io_apic_get_redir_entries (int ioapic);
 extern int io_apic_set_pci_routing (int ioapic, int pin, int irq, int, int);
 #endif
 
+extern void ioapic_init_mappings(void);
+
 extern int sis_apic_bug; /* dummy */ 
 
 #endif

From 87ebecf14ca4f669cb52be46c954f3d9201394b8 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:19 +0100
Subject: [PATCH 074/890] x86: move ack_bad_irq into irq code

Match i386, where we have this in the irq code. It belongs there.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_64.c | 20 --------------------
 arch/x86/kernel/irq_64.c  | 20 ++++++++++++++++++++
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index e0191cdd6f7a..032bf1e765df 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -169,26 +169,6 @@ int lapic_get_maxlvt(void)
 	return maxlvt;
 }
 
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
- */
-void ack_bad_irq(unsigned int irq)
-{
-	printk("unexpected IRQ trap at vector %02x\n", irq);
-	/*
-	 * Currently unexpected vectors happen only on SMP and APIC.
-	 * We _must_ ack these because every local APIC has only N
-	 * irq slots per priority level, and a 'hanging, unacked' IRQ
-	 * holds up an irq slot - in excessive cases (when multiple
-	 * unexpected vectors occur) that might lock up the APIC
-	 * completely.
-	 * But don't ack when the APIC is disabled. -AK
-	 */
-	if (!disable_apic)
-		ack_APIC_irq();
-}
-
 void clear_local_APIC(void)
 {
 	int maxlvt;
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 6b5c730d67b9..6c3a3b6e5cf4 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -20,6 +20,26 @@
 
 atomic_t irq_err_count;
 
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+	printk(KERN_WARNING "unexpected IRQ trap at vector %02x\n", irq);
+	/*
+	 * Currently unexpected vectors happen only on SMP and APIC.
+	 * We _must_ ack these because every local APIC has only N
+	 * irq slots per priority level, and a 'hanging, unacked' IRQ
+	 * holds up an irq slot - in excessive cases (when multiple
+	 * unexpected vectors occur) that might lock up the APIC
+	 * completely.
+	 * But don't ack when the APIC is disabled. -AK
+	 */
+	if (!disable_apic)
+		ack_APIC_irq();
+}
+
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 /*
  * Probabilistic stack overflow check:

From 3a12d93dc0b46bc710317272bf91640d7a8b6f18 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:20 +0100
Subject: [PATCH 075/890] x86: make smp_local_timer_interrupt() static

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_64.c | 2 +-
 include/asm-x86/apic.h    | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 032bf1e765df..5bf329ed9d78 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -923,7 +923,7 @@ void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector,
  * value into /proc/profile.
  */
 
-void smp_local_timer_interrupt(void)
+static void smp_local_timer_interrupt(void)
 {
 	int cpu = smp_processor_id();
 	struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index c064c1f84bab..d0a221fa1fc3 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -112,7 +112,6 @@ extern void sync_Arb_IDs(void);
 extern void init_bsp_APIC(void);
 extern void setup_local_APIC(void);
 extern void init_apic_mappings(void);
-extern void smp_local_timer_interrupt(void);
 extern void setup_boot_APIC_clock(void);
 extern void setup_secondary_APIC_clock(void);
 extern int APIC_init_uniprocessor(void);

From 0e078e2f5060e06f9b3f32e55665ea835343440e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:20 +0100
Subject: [PATCH 076/890] x86: prepare merging arch/x86/kernel/apic_32/64.c

Shuffle code around, so we get a readable diff.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c |   92 ++--
 arch/x86/kernel/apic_64.c | 1035 ++++++++++++++++++++-----------------
 2 files changed, 597 insertions(+), 530 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 3a069acb270c..420c15842e44 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -563,6 +563,9 @@ static void local_apic_timer_interrupt(void)
 		return;
 	}
 
+	/*
+	 * the NMI deadlock-detector uses this.
+	 */
 	per_cpu(irq_stat, cpu).apic_timer_irqs++;
 
 	evt->event_handler(evt);
@@ -617,7 +620,7 @@ int setup_profiling_timer(unsigned int multiplier)
 void clear_local_APIC(void)
 {
 	int maxlvt = lapic_get_maxlvt();
-	unsigned long v;
+	u32 v;
 
 	/*
 	 * Masking an LVT entry can trigger a local APIC error
@@ -1209,50 +1212,6 @@ int __init APIC_init_uniprocessor (void)
 	return 0;
 }
 
-/*
- * APIC command line parameters
- */
-static int __init parse_lapic(char *arg)
-{
-	enable_local_apic = 1;
-	return 0;
-}
-early_param("lapic", parse_lapic);
-
-static int __init parse_nolapic(char *arg)
-{
-	enable_local_apic = -1;
-	clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
-	return 0;
-}
-early_param("nolapic", parse_nolapic);
-
-static int __init parse_disable_lapic_timer(char *arg)
-{
-	local_apic_timer_disabled = 1;
-	return 0;
-}
-early_param("nolapic_timer", parse_disable_lapic_timer);
-
-static int __init parse_lapic_timer_c2_ok(char *arg)
-{
-	local_apic_timer_c2_ok = 1;
-	return 0;
-}
-early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
-
-static int __init apic_set_verbosity(char *str)
-{
-	if (strcmp("debug", str) == 0)
-		apic_verbosity = APIC_DEBUG;
-	else if (strcmp("verbose", str) == 0)
-		apic_verbosity = APIC_VERBOSE;
-	return 1;
-}
-
-__setup("apic=", apic_set_verbosity);
-
-
 /*
  * Local APIC interrupts
  */
@@ -1565,3 +1524,46 @@ device_initcall(init_lapic_sysfs);
 static void apic_pm_activate(void) { }
 
 #endif	/* CONFIG_PM */
+
+/*
+ * APIC command line parameters
+ */
+static int __init parse_lapic(char *arg)
+{
+	enable_local_apic = 1;
+	return 0;
+}
+early_param("lapic", parse_lapic);
+
+static int __init parse_nolapic(char *arg)
+{
+	enable_local_apic = -1;
+	clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+	return 0;
+}
+early_param("nolapic", parse_nolapic);
+
+static int __init parse_disable_lapic_timer(char *arg)
+{
+	local_apic_timer_disabled = 1;
+	return 0;
+}
+early_param("nolapic_timer", parse_disable_lapic_timer);
+
+static int __init parse_lapic_timer_c2_ok(char *arg)
+{
+	local_apic_timer_c2_ok = 1;
+	return 0;
+}
+early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
+
+static int __init apic_set_verbosity(char *str)
+{
+	if (strcmp("debug", str) == 0)
+		apic_verbosity = APIC_DEBUG;
+	else if (strcmp("verbose", str) == 0)
+		apic_verbosity = APIC_VERBOSE;
+	return 1;
+}
+__setup("apic=", apic_set_verbosity);
+
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 5bf329ed9d78..915808bd8a2a 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -44,6 +44,7 @@
 int apic_verbosity;
 int disable_apic_timer __cpuinitdata;
 static int apic_calibrate_pmtmr __initdata;
+int disable_apic;
 
 /* Local APIC timer works in C2? */
 int local_apic_timer_c2_ok;
@@ -60,10 +61,8 @@ static int lapic_next_event(unsigned long delta,
 			    struct clock_event_device *evt);
 static void lapic_timer_setup(enum clock_event_mode mode,
 			      struct clock_event_device *evt);
-
 static void lapic_timer_broadcast(cpumask_t mask);
-
-static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen);
+static void apic_pm_activate(void);
 
 static struct clock_event_device lapic_clockevent = {
 	.name		= "lapic",
@@ -78,6 +77,130 @@ static struct clock_event_device lapic_clockevent = {
 };
 static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 
+/*
+ * Get the LAPIC version
+ */
+static inline int lapic_get_version(void)
+{
+	return GET_APIC_VERSION(apic_read(APIC_LVR));
+}
+
+/*
+ * Check, if the APIC is integrated or a seperate chip
+ */
+static inline int lapic_is_integrated(void)
+{
+	return 1;
+}
+
+/*
+ * Check, whether this is a modern or a first generation APIC
+ */
+static int modern_apic(void)
+{
+	/* AMD systems use old APIC versions, so check the CPU */
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+	    boot_cpu_data.x86 >= 0xf)
+		return 1;
+	return lapic_get_version() >= 0x14;
+}
+
+void apic_wait_icr_idle(void)
+{
+	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
+		cpu_relax();
+}
+
+u32 safe_apic_wait_icr_idle(void)
+{
+	u32 send_status;
+	int timeout;
+
+	timeout = 0;
+	do {
+		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+		if (!send_status)
+			break;
+		udelay(100);
+	} while (timeout++ < 1000);
+
+	return send_status;
+}
+
+/**
+ * enable_NMI_through_LVT0 - enable NMI through local vector table 0
+ */
+void enable_NMI_through_LVT0(void *dummy)
+{
+	unsigned int v;
+
+	/* unmask and set to NMI */
+	v = APIC_DM_NMI;
+	apic_write(APIC_LVT0, v);
+}
+
+/**
+ * lapic_get_maxlvt - get the maximum number of local vector table entries
+ */
+int lapic_get_maxlvt(void)
+{
+	unsigned int v, maxlvt;
+
+	v = apic_read(APIC_LVR);
+	maxlvt = GET_APIC_MAXLVT(v);
+	return maxlvt;
+}
+
+/*
+ * This function sets up the local APIC timer, with a timeout of
+ * 'clocks' APIC bus clock. During calibration we actually call
+ * this function twice on the boot CPU, once with a bogus timeout
+ * value, second time for real. The other (noncalibrating) CPUs
+ * call this function only once, with the real, calibrated value.
+ *
+ * We do reads before writes even if unnecessary, to get around the
+ * P5 APIC double write bug.
+ */
+
+static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
+{
+	unsigned int lvtt_value, tmp_value;
+
+	lvtt_value = LOCAL_TIMER_VECTOR;
+	if (!oneshot)
+		lvtt_value |= APIC_LVT_TIMER_PERIODIC;
+	if (!irqen)
+		lvtt_value |= APIC_LVT_MASKED;
+
+	apic_write(APIC_LVTT, lvtt_value);
+
+	/*
+	 * Divide PICLK by 16
+	 */
+	tmp_value = apic_read(APIC_TDCR);
+	apic_write(APIC_TDCR, (tmp_value
+				& ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
+				| APIC_TDR_DIV_16);
+
+	if (!oneshot)
+		apic_write(APIC_TMICT, clocks);
+}
+
+/*
+ * Setup extended LVT (K8 specific)
+ */
+void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector,
+			     unsigned char msg_type, unsigned char mask)
+{
+	unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE;
+	unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
+
+	apic_write(reg, v);
+}
+
+/*
+ * Program the next event, relative to now
+ */
 static int lapic_next_event(unsigned long delta,
 			    struct clock_event_device *evt)
 {
@@ -85,6 +208,9 @@ static int lapic_next_event(unsigned long delta,
 	return 0;
 }
 
+/*
+ * Setup the lapic timer in periodic or oneshot mode
+ */
 static void lapic_timer_setup(enum clock_event_mode mode,
 			      struct clock_event_device *evt)
 {
@@ -127,54 +253,236 @@ static void lapic_timer_broadcast(cpumask_t mask)
 #endif
 }
 
-static void apic_pm_activate(void);
-
-void apic_wait_icr_idle(void)
+/*
+ * Setup the local APIC timer for this CPU. Copy the initilized values
+ * of the boot CPU and register the clock event in the framework.
+ */
+static void setup_APIC_timer(void)
 {
-	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
-		cpu_relax();
+	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
+
+	memcpy(levt, &lapic_clockevent, sizeof(*levt));
+	levt->cpumask = cpumask_of_cpu(smp_processor_id());
+
+	clockevents_register_device(levt);
 }
 
-u32 safe_apic_wait_icr_idle(void)
+/*
+ * In this function we calibrate APIC bus clocks to the external
+ * timer. Unfortunately we cannot use jiffies and the timer irq
+ * to calibrate, since some later bootup code depends on getting
+ * the first irq? Ugh.
+ *
+ * We want to do the calibration only once since we
+ * want to have local timer irqs syncron. CPUs connected
+ * by the same APIC bus have the very same bus frequency.
+ * And we want to have irqs off anyways, no accidental
+ * APIC irq that way.
+ */
+
+#define TICK_COUNT 100000000
+
+static void __init calibrate_APIC_clock(void)
 {
-	u32 send_status;
-	int timeout;
+	unsigned apic, apic_start;
+	unsigned long tsc, tsc_start;
+	int result;
 
-	timeout = 0;
-	do {
-		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
-		if (!send_status)
-			break;
-		udelay(100);
-	} while (timeout++ < 1000);
+	local_irq_disable();
 
-	return send_status;
+	/*
+	 * Put whatever arbitrary (but long enough) timeout
+	 * value into the APIC clock, we just want to get the
+	 * counter running for calibration.
+	 *
+	 * No interrupt enable !
+	 */
+	__setup_APIC_LVTT(250000000, 0, 0);
+
+	apic_start = apic_read(APIC_TMCCT);
+#ifdef CONFIG_X86_PM_TIMER
+	if (apic_calibrate_pmtmr && pmtmr_ioport) {
+		pmtimer_wait(5000);  /* 5ms wait */
+		apic = apic_read(APIC_TMCCT);
+		result = (apic_start - apic) * 1000L / 5;
+	} else
+#endif
+	{
+		rdtscll(tsc_start);
+
+		do {
+			apic = apic_read(APIC_TMCCT);
+			rdtscll(tsc);
+		} while ((tsc - tsc_start) < TICK_COUNT &&
+				(apic_start - apic) < TICK_COUNT);
+
+		result = (apic_start - apic) * 1000L * tsc_khz /
+					(tsc - tsc_start);
+	}
+
+	local_irq_enable();
+
+	printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
+
+	printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
+		result / 1000 / 1000, result / 1000 % 1000);
+
+	/* Calculate the scaled math multiplication factor */
+	lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32);
+	lapic_clockevent.max_delta_ns =
+		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+	lapic_clockevent.min_delta_ns =
+		clockevent_delta2ns(0xF, &lapic_clockevent);
+
+	calibration_result = result / HZ;
 }
 
-void enable_NMI_through_LVT0 (void * dummy)
+void __init setup_boot_APIC_clock(void)
 {
-	unsigned int v;
+	/*
+	 * The local apic timer can be disabled via the kernel commandline.
+	 * Register the lapic timer as a dummy clock event source on SMP
+	 * systems, so the broadcast mechanism is used. On UP systems simply
+	 * ignore it.
+	 */
+	if (disable_apic_timer) {
+		printk(KERN_INFO "Disabling APIC timer\n");
+		/* No broadcast on UP ! */
+		if (num_possible_cpus() > 1)
+			setup_APIC_timer();
+		return;
+	}
 
-	/* unmask and set to NMI */
-	v = APIC_DM_NMI;
-	apic_write(APIC_LVT0, v);
+	printk(KERN_INFO "Using local APIC timer interrupts.\n");
+	calibrate_APIC_clock();
+
+	/*
+	 * If nmi_watchdog is set to IO_APIC, we need the
+	 * PIT/HPET going.  Otherwise register lapic as a dummy
+	 * device.
+	 */
+	if (nmi_watchdog != NMI_IO_APIC)
+		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
+	else
+		printk(KERN_WARNING "APIC timer registered as dummy,"
+		       " due to nmi_watchdog=1!\n");
+
+	setup_APIC_timer();
 }
 
-int lapic_get_maxlvt(void)
+/*
+ * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the
+ * C1E flag only in the secondary CPU, so when we detect the wreckage
+ * we already have enabled the boot CPU local apic timer. Check, if
+ * disable_apic_timer is set and the DUMMY flag is cleared. If yes,
+ * set the DUMMY flag again and force the broadcast mode in the
+ * clockevents layer.
+ */
+void __cpuinit check_boot_apic_timer_broadcast(void)
 {
-	unsigned int v, maxlvt;
+	if (!disable_apic_timer ||
+	    (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY))
+		return;
 
-	v = apic_read(APIC_LVR);
-	maxlvt = GET_APIC_MAXLVT(v);
-	return maxlvt;
+	printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n");
+	lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY;
+
+	local_irq_enable();
+	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &boot_cpu_id);
+	local_irq_disable();
 }
 
+void __cpuinit setup_secondary_APIC_clock(void)
+{
+	check_boot_apic_timer_broadcast();
+	setup_APIC_timer();
+}
+
+/*
+ * The guts of the apic timer interrupt
+ */
+static void local_apic_timer_interrupt(void)
+{
+	int cpu = smp_processor_id();
+	struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
+
+	/*
+	 * Normally we should not be here till LAPIC has been initialized but
+	 * in some cases like kdump, its possible that there is a pending LAPIC
+	 * timer interrupt from previous kernel's context and is delivered in
+	 * new kernel the moment interrupts are enabled.
+	 *
+	 * Interrupts are enabled early and LAPIC is setup much later, hence
+	 * its possible that when we get here evt->event_handler is NULL.
+	 * Check for event_handler being NULL and discard the interrupt as
+	 * spurious.
+	 */
+	if (!evt->event_handler) {
+		printk(KERN_WARNING
+		       "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
+		/* Switch it off */
+		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
+		return;
+	}
+
+	/*
+	 * the NMI deadlock-detector uses this.
+	 */
+	add_pda(apic_timer_irqs, 1);
+
+	evt->event_handler(evt);
+}
+
+/*
+ * Local APIC timer interrupt. This is the most natural way for doing
+ * local interrupts, but local timer interrupts can be emulated by
+ * broadcast interrupts too. [in case the hw doesn't support APIC timers]
+ *
+ * [ if a single-CPU system runs an SMP kernel then we call the local
+ *   interrupt as well. Thus we cannot inline the local irq ... ]
+ */
+void smp_apic_timer_interrupt(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
+	/*
+	 * NOTE! We'd better ACK the irq immediately,
+	 * because timer handling can be slow.
+	 */
+	ack_APIC_irq();
+	/*
+	 * update_process_times() expects us to have done irq_enter().
+	 * Besides, if we don't timer interrupts ignore the global
+	 * interrupt lock, which is the WrongThing (tm) to do.
+	 */
+	exit_idle();
+	irq_enter();
+	local_apic_timer_interrupt();
+	irq_exit();
+	set_irq_regs(old_regs);
+}
+
+int setup_profiling_timer(unsigned int multiplier)
+{
+	return -EINVAL;
+}
+
+
+/*
+ * Local APIC start and shutdown
+ */
+
+/**
+ * clear_local_APIC - shutdown the local APIC
+ *
+ * This is called, when a CPU is disabled and before rebooting, so the state of
+ * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
+ * leftovers during boot.
+ */
 void clear_local_APIC(void)
 {
-	int maxlvt;
-	unsigned int v;
-
-	maxlvt = lapic_get_maxlvt();
+	int maxlvt = lapic_get_maxlvt();
+	u32 v;
 
 	/*
 	 * Masking an LVT entry can trigger a local APIC error
@@ -213,45 +521,9 @@ void clear_local_APIC(void)
 	apic_read(APIC_ESR);
 }
 
-void disconnect_bsp_APIC(int virt_wire_setup)
-{
-	/* Go back to Virtual Wire compatibility mode */
-	unsigned long value;
-
-	/* For the spurious interrupt use vector F, and enable it */
-	value = apic_read(APIC_SPIV);
-	value &= ~APIC_VECTOR_MASK;
-	value |= APIC_SPIV_APIC_ENABLED;
-	value |= 0xf;
-	apic_write(APIC_SPIV, value);
-
-	if (!virt_wire_setup) {
-		/*
-		 * For LVT0 make it edge triggered, active high,
-		 * external and enabled
-		 */
-		value = apic_read(APIC_LVT0);
-		value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
-			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
-		value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-		value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
-		apic_write(APIC_LVT0, value);
-	} else {
-		/* Disable LVT0 */
-		apic_write(APIC_LVT0, APIC_LVT_MASKED);
-	}
-
-	/* For LVT1 make it edge triggered, active high, nmi and enabled */
-	value = apic_read(APIC_LVT1);
-	value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
-			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
-	value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-	value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
-	apic_write(APIC_LVT1, value);
-}
-
+/**
+ * disable_local_APIC - clear and disable the local APIC
+ */
 void disable_local_APIC(void)
 {
 	unsigned int value;
@@ -335,18 +607,20 @@ int __init verify_local_APIC(void)
 	 * compatibility mode, but most boxes are anymore.
 	 */
 	reg0 = apic_read(APIC_LVT0);
-	apic_printk(APIC_DEBUG,"Getting LVT0: %x\n", reg0);
+	apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
 	reg1 = apic_read(APIC_LVT1);
 	apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
 
 	return 1;
 }
 
+/**
+ * sync_Arb_IDs - synchronize APIC bus arbitration IDs
+ */
 void __init sync_Arb_IDs(void)
 {
 	/* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */
-	unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
-	if (ver >= 0x14)	/* P4 or higher */
+	if (modern_apic())
 		return;
 
 	/*
@@ -398,7 +672,10 @@ void __init init_bsp_APIC(void)
 	apic_write(APIC_LVT1, value);
 }
 
-void __cpuinit setup_local_APIC (void)
+/**
+ * setup_local_APIC - setup the local APIC
+ */
+void __cpuinit setup_local_APIC(void)
 {
 	unsigned int value, maxlvt;
 	int i, j;
@@ -520,6 +797,188 @@ void __cpuinit setup_local_APIC (void)
 	apic_pm_activate();
 }
 
+/*
+ * Detect and enable local APICs on non-SMP boards.
+ * Original code written by Keir Fraser.
+ * On AMD64 we trust the BIOS - if it says no APIC it is likely
+ * not correctly set up (usually the APIC timer won't work etc.)
+ */
+static int __init detect_init_APIC(void)
+{
+	if (!cpu_has_apic) {
+		printk(KERN_INFO "No local APIC present\n");
+		return -1;
+	}
+
+	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+	boot_cpu_id = 0;
+	return 0;
+}
+
+/**
+ * init_apic_mappings - initialize APIC mappings
+ */
+void __init init_apic_mappings(void)
+{
+	unsigned long apic_phys;
+
+	/*
+	 * If no local APIC can be found then set up a fake all
+	 * zeroes page to simulate the local APIC and another
+	 * one for the IO-APIC.
+	 */
+	if (!smp_found_config && detect_init_APIC()) {
+		apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
+		apic_phys = __pa(apic_phys);
+	} else
+		apic_phys = mp_lapic_addr;
+
+	set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
+	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+				APIC_BASE, apic_phys);
+
+	/* Put local APIC into the resource map. */
+	lapic_resource.start = apic_phys;
+	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
+	insert_resource(&iomem_resource, &lapic_resource);
+
+	/*
+	 * Fetch the APIC ID of the BSP in case we have a
+	 * default configuration (or the MP table is broken).
+	 */
+	boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
+}
+
+/*
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
+ */
+int __init APIC_init_uniprocessor(void)
+{
+	if (disable_apic) {
+		printk(KERN_INFO "Apic disabled\n");
+		return -1;
+	}
+	if (!cpu_has_apic) {
+		disable_apic = 1;
+		printk(KERN_INFO "Apic disabled by BIOS\n");
+		return -1;
+	}
+
+	verify_local_APIC();
+
+	phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
+	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id));
+
+	setup_local_APIC();
+
+	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
+		setup_IO_APIC();
+	else
+		nr_ioapics = 0;
+	setup_boot_APIC_clock();
+	check_nmi_watchdog();
+	return 0;
+}
+
+/*
+ * Local APIC interrupts
+ */
+
+/*
+ * This interrupt should _never_ happen with our APIC/SMP architecture
+ */
+asmlinkage void smp_spurious_interrupt(void)
+{
+	unsigned int v;
+	exit_idle();
+	irq_enter();
+	/*
+	 * Check if this really is a spurious interrupt and ACK it
+	 * if it is a vectored one.  Just in case...
+	 * Spurious interrupts should not be ACKed.
+	 */
+	v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
+	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
+		ack_APIC_irq();
+
+	add_pda(irq_spurious_count, 1);
+	irq_exit();
+}
+
+/*
+ * This interrupt should never happen with our APIC/SMP architecture
+ */
+asmlinkage void smp_error_interrupt(void)
+{
+	unsigned int v, v1;
+
+	exit_idle();
+	irq_enter();
+	/* First tickle the hardware, only then report what went on. -- REW */
+	v = apic_read(APIC_ESR);
+	apic_write(APIC_ESR, 0);
+	v1 = apic_read(APIC_ESR);
+	ack_APIC_irq();
+	atomic_inc(&irq_err_count);
+
+	/* Here is what the APIC error bits mean:
+	   0: Send CS error
+	   1: Receive CS error
+	   2: Send accept error
+	   3: Receive accept error
+	   4: Reserved
+	   5: Send illegal vector
+	   6: Received illegal vector
+	   7: Illegal register address
+	*/
+	printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
+		smp_processor_id(), v , v1);
+	irq_exit();
+}
+
+void disconnect_bsp_APIC(int virt_wire_setup)
+{
+	/* Go back to Virtual Wire compatibility mode */
+	unsigned long value;
+
+	/* For the spurious interrupt use vector F, and enable it */
+	value = apic_read(APIC_SPIV);
+	value &= ~APIC_VECTOR_MASK;
+	value |= APIC_SPIV_APIC_ENABLED;
+	value |= 0xf;
+	apic_write(APIC_SPIV, value);
+
+	if (!virt_wire_setup) {
+		/*
+		 * For LVT0 make it edge triggered, active high,
+		 * external and enabled
+		 */
+		value = apic_read(APIC_LVT0);
+		value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+		value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+		value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
+		apic_write(APIC_LVT0, value);
+	} else {
+		/* Disable LVT0 */
+		apic_write(APIC_LVT0, APIC_LVT_MASKED);
+	}
+
+	/* For LVT1 make it edge triggered, active high, nmi and enabled */
+	value = apic_read(APIC_LVT1);
+	value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+	value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+	value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
+	apic_write(APIC_LVT1, value);
+}
+
+/*
+ * Power management
+ */
 #ifdef CONFIG_PM
 
 static struct {
@@ -653,337 +1112,6 @@ static void apic_pm_activate(void) { }
 
 #endif	/* CONFIG_PM */
 
-static int __init apic_set_verbosity(char *str)
-{
-	if (str == NULL)  {
-		skip_ioapic_setup = 0;
-		ioapic_force = 1;
-		return 0;
-	}
-	if (strcmp("debug", str) == 0)
-		apic_verbosity = APIC_DEBUG;
-	else if (strcmp("verbose", str) == 0)
-		apic_verbosity = APIC_VERBOSE;
-	else {
-		printk(KERN_WARNING "APIC Verbosity level %s not recognised"
-				" use apic=verbose or apic=debug\n", str);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-early_param("apic", apic_set_verbosity);
-
-/*
- * Detect and enable local APICs on non-SMP boards.
- * Original code written by Keir Fraser.
- * On AMD64 we trust the BIOS - if it says no APIC it is likely
- * not correctly set up (usually the APIC timer won't work etc.)
- */
-
-static int __init detect_init_APIC (void)
-{
-	if (!cpu_has_apic) {
-		printk(KERN_INFO "No local APIC present\n");
-		return -1;
-	}
-
-	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-	boot_cpu_id = 0;
-	return 0;
-}
-
-void __init init_apic_mappings(void)
-{
-	unsigned long apic_phys;
-
-	/*
-	 * If no local APIC can be found then set up a fake all
-	 * zeroes page to simulate the local APIC and another
-	 * one for the IO-APIC.
-	 */
-	if (!smp_found_config && detect_init_APIC()) {
-		apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
-		apic_phys = __pa(apic_phys);
-	} else
-		apic_phys = mp_lapic_addr;
-
-	set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
-	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
-				APIC_BASE, apic_phys);
-
-	/* Put local APIC into the resource map. */
-	lapic_resource.start = apic_phys;
-	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
-	insert_resource(&iomem_resource, &lapic_resource);
-
-	/*
-	 * Fetch the APIC ID of the BSP in case we have a
-	 * default configuration (or the MP table is broken).
-	 */
-	boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
-}
-
-/*
- * This function sets up the local APIC timer, with a timeout of
- * 'clocks' APIC bus clock. During calibration we actually call
- * this function twice on the boot CPU, once with a bogus timeout
- * value, second time for real. The other (noncalibrating) CPUs
- * call this function only once, with the real, calibrated value.
- *
- * We do reads before writes even if unnecessary, to get around the
- * P5 APIC double write bug.
- */
-
-static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
-{
-	unsigned int lvtt_value, tmp_value;
-
-	lvtt_value = LOCAL_TIMER_VECTOR;
-	if (!oneshot)
-		lvtt_value |= APIC_LVT_TIMER_PERIODIC;
-	if (!irqen)
-		lvtt_value |= APIC_LVT_MASKED;
-
-	apic_write(APIC_LVTT, lvtt_value);
-
-	/*
-	 * Divide PICLK by 16
-	 */
-	tmp_value = apic_read(APIC_TDCR);
-	apic_write(APIC_TDCR, (tmp_value
-				& ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
-				| APIC_TDR_DIV_16);
-
-	if (!oneshot)
-		apic_write(APIC_TMICT, clocks);
-}
-
-static void setup_APIC_timer(void)
-{
-	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
-
-	memcpy(levt, &lapic_clockevent, sizeof(*levt));
-	levt->cpumask = cpumask_of_cpu(smp_processor_id());
-
-	clockevents_register_device(levt);
-}
-
-/*
- * In this function we calibrate APIC bus clocks to the external
- * timer. Unfortunately we cannot use jiffies and the timer irq
- * to calibrate, since some later bootup code depends on getting
- * the first irq? Ugh.
- *
- * We want to do the calibration only once since we
- * want to have local timer irqs syncron. CPUs connected
- * by the same APIC bus have the very same bus frequency.
- * And we want to have irqs off anyways, no accidental
- * APIC irq that way.
- */
-
-#define TICK_COUNT 100000000
-
-static void __init calibrate_APIC_clock(void)
-{
-	unsigned apic, apic_start;
-	unsigned long tsc, tsc_start;
-	int result;
-
-	local_irq_disable();
-
-	/*
-	 * Put whatever arbitrary (but long enough) timeout
-	 * value into the APIC clock, we just want to get the
-	 * counter running for calibration.
-	 *
-	 * No interrupt enable !
-	 */
-	__setup_APIC_LVTT(250000000, 0, 0);
-
-	apic_start = apic_read(APIC_TMCCT);
-#ifdef CONFIG_X86_PM_TIMER
-	if (apic_calibrate_pmtmr && pmtmr_ioport) {
-		pmtimer_wait(5000);  /* 5ms wait */
-		apic = apic_read(APIC_TMCCT);
-		result = (apic_start - apic) * 1000L / 5;
-	} else
-#endif
-	{
-		rdtscll(tsc_start);
-
-		do {
-			apic = apic_read(APIC_TMCCT);
-			rdtscll(tsc);
-		} while ((tsc - tsc_start) < TICK_COUNT &&
-				(apic_start - apic) < TICK_COUNT);
-
-		result = (apic_start - apic) * 1000L * tsc_khz /
-					(tsc - tsc_start);
-	}
-
-	local_irq_enable();
-
-	printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
-
-	printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
-		result / 1000 / 1000, result / 1000 % 1000);
-
-	/* Calculate the scaled math multiplication factor */
-	lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32);
-	lapic_clockevent.max_delta_ns =
-		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
-	lapic_clockevent.min_delta_ns =
-		clockevent_delta2ns(0xF, &lapic_clockevent);
-
-	calibration_result = result / HZ;
-}
-
-void __init setup_boot_APIC_clock (void)
-{
-	/*
-	 * The local apic timer can be disabled via the kernel commandline.
-	 * Register the lapic timer as a dummy clock event source on SMP
-	 * systems, so the broadcast mechanism is used. On UP systems simply
-	 * ignore it.
-	 */
-	if (disable_apic_timer) {
-		printk(KERN_INFO "Disabling APIC timer\n");
-		/* No broadcast on UP ! */
-		if (num_possible_cpus() > 1)
-			setup_APIC_timer();
-		return;
-	}
-
-	printk(KERN_INFO "Using local APIC timer interrupts.\n");
-	calibrate_APIC_clock();
-
-	/*
-	 * If nmi_watchdog is set to IO_APIC, we need the
-	 * PIT/HPET going.  Otherwise register lapic as a dummy
-	 * device.
-	 */
-	if (nmi_watchdog != NMI_IO_APIC)
-		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
-	else
-		printk(KERN_WARNING "APIC timer registered as dummy,"
-		       " due to nmi_watchdog=1!\n");
-
-	setup_APIC_timer();
-}
-
-/*
- * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the
- * C1E flag only in the secondary CPU, so when we detect the wreckage
- * we already have enabled the boot CPU local apic timer. Check, if
- * disable_apic_timer is set and the DUMMY flag is cleared. If yes,
- * set the DUMMY flag again and force the broadcast mode in the
- * clockevents layer.
- */
-void __cpuinit check_boot_apic_timer_broadcast(void)
-{
-	if (!disable_apic_timer ||
-	    (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY))
-		return;
-
-	printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n");
-	lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY;
-
-	local_irq_enable();
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &boot_cpu_id);
-	local_irq_disable();
-}
-
-void __cpuinit setup_secondary_APIC_clock(void)
-{
-	check_boot_apic_timer_broadcast();
-	setup_APIC_timer();
-}
-
-int setup_profiling_timer(unsigned int multiplier)
-{
-	return -EINVAL;
-}
-
-void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector,
-			     unsigned char msg_type, unsigned char mask)
-{
-	unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE;
-	unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
-	apic_write(reg, v);
-}
-
-/*
- * Local timer interrupt handler. It does both profiling and
- * process statistics/rescheduling.
- *
- * We do profiling in every local tick, statistics/rescheduling
- * happen only every 'profiling multiplier' ticks. The default
- * multiplier is 1 and it can be changed by writing the new multiplier
- * value into /proc/profile.
- */
-
-static void smp_local_timer_interrupt(void)
-{
-	int cpu = smp_processor_id();
-	struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
-
-	/*
-	 * Normally we should not be here till LAPIC has been initialized but
-	 * in some cases like kdump, its possible that there is a pending LAPIC
-	 * timer interrupt from previous kernel's context and is delivered in
-	 * new kernel the moment interrupts are enabled.
-	 *
-	 * Interrupts are enabled early and LAPIC is setup much later, hence
-	 * its possible that when we get here evt->event_handler is NULL.
-	 * Check for event_handler being NULL and discard the interrupt as
-	 * spurious.
-	 */
-	if (!evt->event_handler) {
-		printk(KERN_WARNING
-		       "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
-		/* Switch it off */
-		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
-		return;
-	}
-
-	/*
-	 * the NMI deadlock-detector uses this.
-	 */
-	add_pda(apic_timer_irqs, 1);
-
-	evt->event_handler(evt);
-}
-
-/*
- * Local APIC timer interrupt. This is the most natural way for doing
- * local interrupts, but local timer interrupts can be emulated by
- * broadcast interrupts too. [in case the hw doesn't support APIC timers]
- *
- * [ if a single-CPU system runs an SMP kernel then we call the local
- *   interrupt as well. Thus we cannot inline the local irq ... ]
- */
-void smp_apic_timer_interrupt(struct pt_regs *regs)
-{
-	struct pt_regs *old_regs = set_irq_regs(regs);
-
-	/*
-	 * NOTE! We'd better ACK the irq immediately,
-	 * because timer handling can be slow.
-	 */
-	ack_APIC_irq();
-	/*
-	 * update_process_times() expects us to have done irq_enter().
-	 * Besides, if we don't timer interrupts ignore the global
-	 * interrupt lock, which is the WrongThing (tm) to do.
-	 */
-	exit_idle();
-	irq_enter();
-	smp_local_timer_interrupt();
-	irq_exit();
-	set_irq_regs(old_regs);
-}
-
 /*
  * apic_is_clustered_box() -- Check if we can expect good TSC
  *
@@ -1032,91 +1160,28 @@ __cpuinit int apic_is_clustered_box(void)
 }
 
 /*
- * This interrupt should _never_ happen with our APIC/SMP architecture
+ * APIC command line parameters
  */
-asmlinkage void smp_spurious_interrupt(void)
+static int __init apic_set_verbosity(char *str)
 {
-	unsigned int v;
-	exit_idle();
-	irq_enter();
-	/*
-	 * Check if this really is a spurious interrupt and ACK it
-	 * if it is a vectored one.  Just in case...
-	 * Spurious interrupts should not be ACKed.
-	 */
-	v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
-	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
-		ack_APIC_irq();
-
-	add_pda(irq_spurious_count, 1);
-	irq_exit();
-}
-
-/*
- * This interrupt should never happen with our APIC/SMP architecture
- */
-
-asmlinkage void smp_error_interrupt(void)
-{
-	unsigned int v, v1;
-
-	exit_idle();
-	irq_enter();
-	/* First tickle the hardware, only then report what went on. -- REW */
-	v = apic_read(APIC_ESR);
-	apic_write(APIC_ESR, 0);
-	v1 = apic_read(APIC_ESR);
-	ack_APIC_irq();
-	atomic_inc(&irq_err_count);
-
-	/* Here is what the APIC error bits mean:
-	   0: Send CS error
-	   1: Receive CS error
-	   2: Send accept error
-	   3: Receive accept error
-	   4: Reserved
-	   5: Send illegal vector
-	   6: Received illegal vector
-	   7: Illegal register address
-	*/
-	printk (KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
-		smp_processor_id(), v , v1);
-	irq_exit();
-}
-
-int disable_apic;
-
-/*
- * This initializes the IO-APIC and APIC hardware if this is
- * a UP kernel.
- */
-int __init APIC_init_uniprocessor (void)
-{
-	if (disable_apic) {
-		printk(KERN_INFO "Apic disabled\n");
-		return -1;
+	if (str == NULL)  {
+		skip_ioapic_setup = 0;
+		ioapic_force = 1;
+		return 0;
 	}
-	if (!cpu_has_apic) {
-		disable_apic = 1;
-		printk(KERN_INFO "Apic disabled by BIOS\n");
-		return -1;
+	if (strcmp("debug", str) == 0)
+		apic_verbosity = APIC_DEBUG;
+	else if (strcmp("verbose", str) == 0)
+		apic_verbosity = APIC_VERBOSE;
+	else {
+		printk(KERN_WARNING "APIC Verbosity level %s not recognised"
+				" use apic=verbose or apic=debug\n", str);
+		return -EINVAL;
 	}
 
-	verify_local_APIC();
-
-	phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
-	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id));
-
-	setup_local_APIC();
-
-	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
-		setup_IO_APIC();
-	else
-		nr_ioapics = 0;
-	setup_boot_APIC_clock();
-	check_nmi_watchdog();
 	return 0;
 }
+early_param("apic", apic_set_verbosity);
 
 static __init int setup_disableapic(char *str)
 {

From 2f2239d1d53b06dd475634f79e1905f3775fcedc Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:20 +0100
Subject: [PATCH 077/890] x86: prepare merging futex_32/64.h

Replace .quad/.long with a define and use the same asm syntax
for i386 and x86.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/futex_32.h | 64 ++++++++++++++++++--------------------
 include/asm-x86/futex_64.h | 24 +++++++-------
 2 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/include/asm-x86/futex_32.h b/include/asm-x86/futex_32.h
index 438ef0ec7101..c80013e19c21 100644
--- a/include/asm-x86/futex_32.h
+++ b/include/asm-x86/futex_32.h
@@ -4,6 +4,8 @@
 #ifdef __KERNEL__
 
 #include <linux/futex.h>
+
+#include <asm/asm.h>
 #include <asm/errno.h>
 #include <asm/system.h>
 #include <asm/processor.h>
@@ -17,13 +19,13 @@
 	jmp	2b\n\
 	.previous\n\
 	.section __ex_table,\"a\"\n\
-	.align	8\n\
-	.long	1b,3b\n\
+	.align	8\n"						\
+	_ASM_PTR "1b,3b\n					\
 	.previous"						\
 	: "=r" (oldval), "=r" (ret), "+m" (*uaddr)		\
 	: "i" (-EFAULT), "0" (oparg), "1" (0))
 
-#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
+#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg)	\
   __asm__ __volatile (						\
 "1:	movl	%2, %0\n\
 	movl	%0, %3\n"					\
@@ -35,8 +37,8 @@
 	jmp	3b\n\
 	.previous\n\
 	.section __ex_table,\"a\"\n\
-	.align	8\n\
-	.long	1b,4b,2b,4b\n\
+	.align	8\n"						\
+	_ASM_PTR "1b,4b,2b,4b\n					\
 	.previous"						\
 	: "=&a" (oldval), "=&r" (ret), "+m" (*uaddr),		\
 	  "=&r" (tem)						\
@@ -56,36 +58,32 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
 
+#ifndef CONFIG_X86_BSWAP
+	if (op == FUTEX_OP_SET && boot_cpu_data.x86 == 3)
+		return -ENOSYS;
+#endif
+
 	pagefault_disable();
 
-	if (op == FUTEX_OP_SET)
+	switch (op) {
+	case FUTEX_OP_SET:
 		__futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
-	else {
-#ifndef CONFIG_X86_BSWAP
-		if (boot_cpu_data.x86 == 3)
-			ret = -ENOSYS;
-		else
-#endif
-		switch (op) {
-		case FUTEX_OP_ADD:
-			__futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret,
-					   oldval, uaddr, oparg);
-			break;
-		case FUTEX_OP_OR:
-			__futex_atomic_op2("orl %4, %3", ret, oldval, uaddr,
-					   oparg);
-			break;
-		case FUTEX_OP_ANDN:
-			__futex_atomic_op2("andl %4, %3", ret, oldval, uaddr,
-					   ~oparg);
-			break;
-		case FUTEX_OP_XOR:
-			__futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr,
-					   oparg);
-			break;
-		default:
-			ret = -ENOSYS;
-		}
+		break;
+	case FUTEX_OP_ADD:
+		__futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval,
+				   uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg);
+		break;
+	default:
+		ret = -ENOSYS;
 	}
 
 	pagefault_enable();
@@ -120,7 +118,7 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 
 		"	.section __ex_table, \"a\"		\n"
 		"	.align  8				\n"
-		"	.long   1b,3b				\n"
+			_ASM_PTR " 1b,3b			\n"
 		"	.previous				\n"
 
 		: "=a" (oldval), "+m" (*uaddr)
diff --git a/include/asm-x86/futex_64.h b/include/asm-x86/futex_64.h
index 5cdfb08013c3..02964d225d18 100644
--- a/include/asm-x86/futex_64.h
+++ b/include/asm-x86/futex_64.h
@@ -4,6 +4,8 @@
 #ifdef __KERNEL__
 
 #include <linux/futex.h>
+
+#include <asm/asm.h>
 #include <asm/errno.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -16,13 +18,13 @@
 	jmp	2b\n\
 	.previous\n\
 	.section __ex_table,\"a\"\n\
-	.align	8\n\
-	.quad	1b,3b\n\
+	.align	8\n"						\
+	_ASM_PTR "1b,3b\n					\
 	.previous"						\
-	: "=r" (oldval), "=r" (ret), "=m" (*uaddr)		\
-	: "i" (-EFAULT), "m" (*uaddr), "0" (oparg), "1" (0))
+	: "=r" (oldval), "=r" (ret), "+m" (*uaddr)		\
+	: "i" (-EFAULT), "0" (oparg), "1" (0))
 
-#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
+#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg)	\
   __asm__ __volatile (						\
 "1:	movl	%2, %0\n\
 	movl	%0, %3\n"					\
@@ -34,12 +36,12 @@
 	jmp	3b\n\
 	.previous\n\
 	.section __ex_table,\"a\"\n\
-	.align	8\n\
-	.quad	1b,4b,2b,4b\n\
+	.align	8\n"						\
+	_ASM_PTR "1b,4b,2b,4b\n					\
 	.previous"						\
-	: "=&a" (oldval), "=&r" (ret), "=m" (*uaddr),		\
+	: "=&a" (oldval), "=&r" (ret), "+m" (*uaddr),		\
 	  "=&r" (tem)						\
-	: "r" (oparg), "i" (-EFAULT), "m" (*uaddr), "1" (0))
+	: "r" (oparg), "i" (-EFAULT), "1" (0))
 
 static inline int
 futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
@@ -110,10 +112,10 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 
 		"	.section __ex_table, \"a\"		\n"
 		"	.align  8				\n"
-		"	.quad   1b,3b				\n"
+			_ASM_PTR " 1b,3b			\n"
 		"	.previous				\n"
 
-		: "=a" (oldval), "=m" (*uaddr)
+		: "=a" (oldval), "+m" (*uaddr)
 		: "i" (-EFAULT), "r" (newval), "0" (oldval)
 		: "memory"
 	);

From 2f18e47c89f1a29cc815cd225b72dd7fd86acffe Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:20 +0100
Subject: [PATCH 078/890] x86: merge futex_32/64.h

Finally merge them together.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/futex.h    | 138 +++++++++++++++++++++++++++++++++++--
 include/asm-x86/futex_32.h | 133 -----------------------------------
 include/asm-x86/futex_64.h | 127 ----------------------------------
 3 files changed, 134 insertions(+), 264 deletions(-)
 delete mode 100644 include/asm-x86/futex_32.h
 delete mode 100644 include/asm-x86/futex_64.h

diff --git a/include/asm-x86/futex.h b/include/asm-x86/futex.h
index 1f4610e0c613..62828d63f1b1 100644
--- a/include/asm-x86/futex.h
+++ b/include/asm-x86/futex.h
@@ -1,5 +1,135 @@
-#ifdef CONFIG_X86_32
-# include "futex_32.h"
-#else
-# include "futex_64.h"
+#ifndef _ASM_X86_FUTEX_H
+#define _ASM_X86_FUTEX_H
+
+#ifdef __KERNEL__
+
+#include <linux/futex.h>
+
+#include <asm/asm.h>
+#include <asm/errno.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg)	\
+  __asm__ __volatile(						\
+"1:	" insn "\n"						\
+"2:	.section .fixup,\"ax\"\n				\
+3:	mov	%3, %1\n					\
+	jmp	2b\n						\
+	.previous\n						\
+	.section __ex_table,\"a\"\n				\
+	.align	8\n"						\
+	_ASM_PTR "1b,3b\n					\
+	.previous"						\
+	: "=r" (oldval), "=r" (ret), "+m" (*uaddr)		\
+	: "i" (-EFAULT), "0" (oparg), "1" (0))
+
+#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg)	\
+  __asm__ __volatile(						\
+"1:	movl	%2, %0\n					\
+	movl	%0, %3\n"					\
+	insn "\n"						\
+"2:	" LOCK_PREFIX "cmpxchgl %3, %2\n			\
+	jnz	1b\n						\
+3:	.section .fixup,\"ax\"\n				\
+4:	mov	%5, %1\n					\
+	jmp	3b\n						\
+	.previous\n						\
+	.section __ex_table,\"a\"\n				\
+	.align	8\n"						\
+	_ASM_PTR "1b,4b,2b,4b\n					\
+	.previous"						\
+	: "=&a" (oldval), "=&r" (ret), "+m" (*uaddr),		\
+	  "=&r" (tem)						\
+	: "r" (oparg), "i" (-EFAULT), "1" (0))
+
+static inline int
+futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+{
+	int op = (encoded_op >> 28) & 7;
+	int cmp = (encoded_op >> 24) & 15;
+	int oparg = (encoded_op << 8) >> 20;
+	int cmparg = (encoded_op << 20) >> 20;
+	int oldval = 0, ret, tem;
+
+	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+		oparg = 1 << oparg;
+
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+		return -EFAULT;
+
+#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
+	/* Real i386 machines can only support FUTEX_OP_SET */
+	if (op != FUTEX_OP_SET && boot_cpu_data.x86 == 3)
+		return -ENOSYS;
+#endif
+
+	pagefault_disable();
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		__futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ADD:
+		__futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval,
+				   uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	pagefault_enable();
+
+	if (!ret) {
+		switch (cmp) {
+		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
+		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
+		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
+		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
+		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
+		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
+		default: ret = -ENOSYS;
+		}
+	}
+	return ret;
+}
+
+static inline int
+futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+{
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+		return -EFAULT;
+
+	__asm__ __volatile__(
+		"1:	" LOCK_PREFIX "cmpxchgl %3, %1		\n"
+
+		"2:	.section .fixup, \"ax\"			\n"
+		"3:	mov     %2, %0				\n"
+		"	jmp     2b				\n"
+		"	.previous				\n"
+
+		"	.section __ex_table, \"a\"		\n"
+		"	.align  8				\n"
+			_ASM_PTR " 1b,3b			\n"
+		"	.previous				\n"
+
+		: "=a" (oldval), "+m" (*uaddr)
+		: "i" (-EFAULT), "r" (newval), "0" (oldval)
+		: "memory"
+	);
+
+	return oldval;
+}
+
+#endif
 #endif
diff --git a/include/asm-x86/futex_32.h b/include/asm-x86/futex_32.h
deleted file mode 100644
index c80013e19c21..000000000000
--- a/include/asm-x86/futex_32.h
+++ /dev/null
@@ -1,133 +0,0 @@
-#ifndef _ASM_FUTEX_H
-#define _ASM_FUTEX_H
-
-#ifdef __KERNEL__
-
-#include <linux/futex.h>
-
-#include <asm/asm.h>
-#include <asm/errno.h>
-#include <asm/system.h>
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-
-#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
-  __asm__ __volatile (						\
-"1:	" insn "\n"						\
-"2:	.section .fixup,\"ax\"\n\
-3:	mov	%3, %1\n\
-	jmp	2b\n\
-	.previous\n\
-	.section __ex_table,\"a\"\n\
-	.align	8\n"						\
-	_ASM_PTR "1b,3b\n					\
-	.previous"						\
-	: "=r" (oldval), "=r" (ret), "+m" (*uaddr)		\
-	: "i" (-EFAULT), "0" (oparg), "1" (0))
-
-#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg)	\
-  __asm__ __volatile (						\
-"1:	movl	%2, %0\n\
-	movl	%0, %3\n"					\
-	insn "\n"						\
-"2:	" LOCK_PREFIX "cmpxchgl %3, %2\n\
-	jnz	1b\n\
-3:	.section .fixup,\"ax\"\n\
-4:	mov	%5, %1\n\
-	jmp	3b\n\
-	.previous\n\
-	.section __ex_table,\"a\"\n\
-	.align	8\n"						\
-	_ASM_PTR "1b,4b,2b,4b\n					\
-	.previous"						\
-	: "=&a" (oldval), "=&r" (ret), "+m" (*uaddr),		\
-	  "=&r" (tem)						\
-	: "r" (oparg), "i" (-EFAULT), "1" (0))
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-		return -EFAULT;
-
-#ifndef CONFIG_X86_BSWAP
-	if (op == FUTEX_OP_SET && boot_cpu_data.x86 == 3)
-		return -ENOSYS;
-#endif
-
-	pagefault_disable();
-
-	switch (op) {
-	case FUTEX_OP_SET:
-		__futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
-		break;
-	case FUTEX_OP_ADD:
-		__futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval,
-				   uaddr, oparg);
-		break;
-	case FUTEX_OP_OR:
-		__futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg);
-		break;
-	case FUTEX_OP_ANDN:
-		__futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg);
-		break;
-	case FUTEX_OP_XOR:
-		__futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg);
-		break;
-	default:
-		ret = -ENOSYS;
-	}
-
-	pagefault_enable();
-
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-		default: ret = -ENOSYS;
-		}
-	}
-	return ret;
-}
-
-static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
-{
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
-		return -EFAULT;
-
-	__asm__ __volatile__(
-		"1:	" LOCK_PREFIX "cmpxchgl %3, %1		\n"
-
-		"2:	.section .fixup, \"ax\"			\n"
-		"3:	mov     %2, %0				\n"
-		"	jmp     2b				\n"
-		"	.previous				\n"
-
-		"	.section __ex_table, \"a\"		\n"
-		"	.align  8				\n"
-			_ASM_PTR " 1b,3b			\n"
-		"	.previous				\n"
-
-		: "=a" (oldval), "+m" (*uaddr)
-		: "i" (-EFAULT), "r" (newval), "0" (oldval)
-		: "memory"
-	);
-
-	return oldval;
-}
-
-#endif
-#endif
diff --git a/include/asm-x86/futex_64.h b/include/asm-x86/futex_64.h
deleted file mode 100644
index 02964d225d18..000000000000
--- a/include/asm-x86/futex_64.h
+++ /dev/null
@@ -1,127 +0,0 @@
-#ifndef _ASM_FUTEX_H
-#define _ASM_FUTEX_H
-
-#ifdef __KERNEL__
-
-#include <linux/futex.h>
-
-#include <asm/asm.h>
-#include <asm/errno.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
-  __asm__ __volatile (						\
-"1:	" insn "\n"						\
-"2:	.section .fixup,\"ax\"\n\
-3:	mov	%3, %1\n\
-	jmp	2b\n\
-	.previous\n\
-	.section __ex_table,\"a\"\n\
-	.align	8\n"						\
-	_ASM_PTR "1b,3b\n					\
-	.previous"						\
-	: "=r" (oldval), "=r" (ret), "+m" (*uaddr)		\
-	: "i" (-EFAULT), "0" (oparg), "1" (0))
-
-#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg)	\
-  __asm__ __volatile (						\
-"1:	movl	%2, %0\n\
-	movl	%0, %3\n"					\
-	insn "\n"						\
-"2:	" LOCK_PREFIX "cmpxchgl %3, %2\n\
-	jnz	1b\n\
-3:	.section .fixup,\"ax\"\n\
-4:	mov	%5, %1\n\
-	jmp	3b\n\
-	.previous\n\
-	.section __ex_table,\"a\"\n\
-	.align	8\n"						\
-	_ASM_PTR "1b,4b,2b,4b\n					\
-	.previous"						\
-	: "=&a" (oldval), "=&r" (ret), "+m" (*uaddr),		\
-	  "=&r" (tem)						\
-	: "r" (oparg), "i" (-EFAULT), "1" (0))
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-		return -EFAULT;
-
-	pagefault_disable();
-
-	switch (op) {
-	case FUTEX_OP_SET:
-		__futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
-		break;
-	case FUTEX_OP_ADD:
-		__futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval,
-				   uaddr, oparg);
-		break;
-	case FUTEX_OP_OR:
-		__futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg);
-		break;
-	case FUTEX_OP_ANDN:
-		__futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg);
-		break;
-	case FUTEX_OP_XOR:
-		__futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg);
-		break;
-	default:
-		ret = -ENOSYS;
-	}
-
-	pagefault_enable();
-
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-		default: ret = -ENOSYS;
-		}
-	}
-	return ret;
-}
-
-static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
-{
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
-		return -EFAULT;
-
-	__asm__ __volatile__(
-		"1:	" LOCK_PREFIX "cmpxchgl %3, %1		\n"
-
-		"2:	.section .fixup, \"ax\"			\n"
-		"3:	mov     %2, %0				\n"
-		"	jmp     2b				\n"
-		"	.previous				\n"
-
-		"	.section __ex_table, \"a\"		\n"
-		"	.align  8				\n"
-			_ASM_PTR " 1b,3b			\n"
-		"	.previous				\n"
-
-		: "=a" (oldval), "+m" (*uaddr)
-		: "i" (-EFAULT), "r" (newval), "0" (oldval)
-		: "memory"
-	);
-
-	return oldval;
-}
-
-#endif
-#endif

From f28b8d608829aecd7343015a0df89f6b6e89d391 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:20 +0100
Subject: [PATCH 079/890] x86: merge include/asm-x86/dma.h

Almost identical.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/dma.h    | 320 ++++++++++++++++++++++++++++++++++++++-
 include/asm-x86/dma_32.h | 297 ------------------------------------
 include/asm-x86/dma_64.h | 304 -------------------------------------
 3 files changed, 317 insertions(+), 604 deletions(-)
 delete mode 100644 include/asm-x86/dma_32.h
 delete mode 100644 include/asm-x86/dma_64.h

diff --git a/include/asm-x86/dma.h b/include/asm-x86/dma.h
index 9f936c61a4e5..e9733ce89880 100644
--- a/include/asm-x86/dma.h
+++ b/include/asm-x86/dma.h
@@ -1,5 +1,319 @@
-#ifdef CONFIG_X86_32
-# include "dma_32.h"
+/*
+ * linux/include/asm/dma.h: Defines for using and allocating dma channels.
+ * Written by Hennus Bergman, 1992.
+ * High DMA channel support & info by Hannu Savolainen
+ * and John Boyd, Nov. 1992.
+ */
+
+#ifndef _ASM_X86_DMA_H
+#define _ASM_X86_DMA_H
+
+#include <linux/spinlock.h>	/* And spinlocks */
+#include <asm/io.h>		/* need byte IO */
+#include <linux/delay.h>
+
+
+#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER
+#define dma_outb	outb_p
 #else
-# include "dma_64.h"
+#define dma_outb	outb
 #endif
+
+#define dma_inb		inb
+
+/*
+ * NOTES about DMA transfers:
+ *
+ *  controller 1: channels 0-3, byte operations, ports 00-1F
+ *  controller 2: channels 4-7, word operations, ports C0-DF
+ *
+ *  - ALL registers are 8 bits only, regardless of transfer size
+ *  - channel 4 is not used - cascades 1 into 2.
+ *  - channels 0-3 are byte - addresses/counts are for physical bytes
+ *  - channels 5-7 are word - addresses/counts are for physical words
+ *  - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries
+ *  - transfer count loaded to registers is 1 less than actual count
+ *  - controller 2 offsets are all even (2x offsets for controller 1)
+ *  - page registers for 5-7 don't use data bit 0, represent 128K pages
+ *  - page registers for 0-3 use bit 0, represent 64K pages
+ *
+ * DMA transfers are limited to the lower 16MB of _physical_ memory.
+ * Note that addresses loaded into registers must be _physical_ addresses,
+ * not logical addresses (which may differ if paging is active).
+ *
+ *  Address mapping for channels 0-3:
+ *
+ *   A23 ... A16 A15 ... A8  A7 ... A0    (Physical addresses)
+ *    |  ...  |   |  ... |   |  ... |
+ *    |  ...  |   |  ... |   |  ... |
+ *    |  ...  |   |  ... |   |  ... |
+ *   P7  ...  P0  A7 ... A0  A7 ... A0
+ * |    Page    | Addr MSB | Addr LSB |   (DMA registers)
+ *
+ *  Address mapping for channels 5-7:
+ *
+ *   A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0    (Physical addresses)
+ *    |  ...  |   \   \   ... \  \  \  ... \  \
+ *    |  ...  |    \   \   ... \  \  \  ... \  (not used)
+ *    |  ...  |     \   \   ... \  \  \  ... \
+ *   P7  ...  P1 (0) A7 A6  ... A0 A7 A6 ... A0
+ * |      Page      |  Addr MSB   |  Addr LSB  |   (DMA registers)
+ *
+ * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses
+ * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at
+ * the hardware level, so odd-byte transfers aren't possible).
+ *
+ * Transfer count (_not # bytes_) is limited to 64K, represented as actual
+ * count - 1 : 64K => 0xFFFF, 1 => 0x0000.  Thus, count is always 1 or more,
+ * and up to 128K bytes may be transferred on channels 5-7 in one operation.
+ *
+ */
+
+#define MAX_DMA_CHANNELS	8
+
+#ifdef CONFIG_X86_32
+
+/* The maximum address that we can perform a DMA transfer to on this platform */
+#define MAX_DMA_ADDRESS      (PAGE_OFFSET+0x1000000)
+
+#else
+
+/* 16MB ISA DMA zone */
+#define MAX_DMA_PFN   ((16*1024*1024) >> PAGE_SHIFT)
+
+/* 4GB broken PCI/AGP hardware bus master zone */
+#define MAX_DMA32_PFN ((4UL*1024*1024*1024) >> PAGE_SHIFT)
+
+/* Compat define for old dma zone */
+#define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT))
+
+#endif
+
+/* 8237 DMA controllers */
+#define IO_DMA1_BASE	0x00	/* 8 bit slave DMA, channels 0..3 */
+#define IO_DMA2_BASE	0xC0	/* 16 bit master DMA, ch 4(=slave input)..7 */
+
+/* DMA controller registers */
+#define DMA1_CMD_REG		0x08	/* command register (w) */
+#define DMA1_STAT_REG		0x08	/* status register (r) */
+#define DMA1_REQ_REG		0x09    /* request register (w) */
+#define DMA1_MASK_REG		0x0A	/* single-channel mask (w) */
+#define DMA1_MODE_REG		0x0B	/* mode register (w) */
+#define DMA1_CLEAR_FF_REG	0x0C	/* clear pointer flip-flop (w) */
+#define DMA1_TEMP_REG		0x0D    /* Temporary Register (r) */
+#define DMA1_RESET_REG		0x0D	/* Master Clear (w) */
+#define DMA1_CLR_MASK_REG       0x0E    /* Clear Mask */
+#define DMA1_MASK_ALL_REG       0x0F    /* all-channels mask (w) */
+
+#define DMA2_CMD_REG		0xD0	/* command register (w) */
+#define DMA2_STAT_REG		0xD0	/* status register (r) */
+#define DMA2_REQ_REG		0xD2    /* request register (w) */
+#define DMA2_MASK_REG		0xD4	/* single-channel mask (w) */
+#define DMA2_MODE_REG		0xD6	/* mode register (w) */
+#define DMA2_CLEAR_FF_REG	0xD8	/* clear pointer flip-flop (w) */
+#define DMA2_TEMP_REG		0xDA    /* Temporary Register (r) */
+#define DMA2_RESET_REG		0xDA	/* Master Clear (w) */
+#define DMA2_CLR_MASK_REG       0xDC    /* Clear Mask */
+#define DMA2_MASK_ALL_REG       0xDE    /* all-channels mask (w) */
+
+#define DMA_ADDR_0		0x00    /* DMA address registers */
+#define DMA_ADDR_1		0x02
+#define DMA_ADDR_2		0x04
+#define DMA_ADDR_3		0x06
+#define DMA_ADDR_4		0xC0
+#define DMA_ADDR_5		0xC4
+#define DMA_ADDR_6		0xC8
+#define DMA_ADDR_7		0xCC
+
+#define DMA_CNT_0		0x01    /* DMA count registers */
+#define DMA_CNT_1		0x03
+#define DMA_CNT_2		0x05
+#define DMA_CNT_3		0x07
+#define DMA_CNT_4		0xC2
+#define DMA_CNT_5		0xC6
+#define DMA_CNT_6		0xCA
+#define DMA_CNT_7		0xCE
+
+#define DMA_PAGE_0		0x87    /* DMA page registers */
+#define DMA_PAGE_1		0x83
+#define DMA_PAGE_2		0x81
+#define DMA_PAGE_3		0x82
+#define DMA_PAGE_5		0x8B
+#define DMA_PAGE_6		0x89
+#define DMA_PAGE_7		0x8A
+
+/* I/O to memory, no autoinit, increment, single mode */
+#define DMA_MODE_READ		0x44
+/* memory to I/O, no autoinit, increment, single mode */
+#define DMA_MODE_WRITE		0x48
+/* pass thru DREQ->HRQ, DACK<-HLDA only */
+#define DMA_MODE_CASCADE	0xC0
+
+#define DMA_AUTOINIT		0x10
+
+
+extern spinlock_t  dma_spin_lock;
+
+static __inline__ unsigned long claim_dma_lock(void)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&dma_spin_lock, flags);
+	return flags;
+}
+
+static __inline__ void release_dma_lock(unsigned long flags)
+{
+	spin_unlock_irqrestore(&dma_spin_lock, flags);
+}
+
+/* enable/disable a specific DMA channel */
+static __inline__ void enable_dma(unsigned int dmanr)
+{
+	if (dmanr <= 3)
+		dma_outb(dmanr, DMA1_MASK_REG);
+	else
+		dma_outb(dmanr & 3, DMA2_MASK_REG);
+}
+
+static __inline__ void disable_dma(unsigned int dmanr)
+{
+	if (dmanr <= 3)
+		dma_outb(dmanr | 4, DMA1_MASK_REG);
+	else
+		dma_outb((dmanr & 3) | 4, DMA2_MASK_REG);
+}
+
+/* Clear the 'DMA Pointer Flip Flop'.
+ * Write 0 for LSB/MSB, 1 for MSB/LSB access.
+ * Use this once to initialize the FF to a known state.
+ * After that, keep track of it. :-)
+ * --- In order to do that, the DMA routines below should ---
+ * --- only be used while holding the DMA lock ! ---
+ */
+static __inline__ void clear_dma_ff(unsigned int dmanr)
+{
+	if (dmanr <= 3)
+		dma_outb(0, DMA1_CLEAR_FF_REG);
+	else
+		dma_outb(0, DMA2_CLEAR_FF_REG);
+}
+
+/* set mode (above) for a specific DMA channel */
+static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
+{
+	if (dmanr <= 3)
+		dma_outb(mode | dmanr, DMA1_MODE_REG);
+	else
+		dma_outb(mode | (dmanr & 3), DMA2_MODE_REG);
+}
+
+/* Set only the page register bits of the transfer address.
+ * This is used for successive transfers when we know the contents of
+ * the lower 16 bits of the DMA current address register, but a 64k boundary
+ * may have been crossed.
+ */
+static __inline__ void set_dma_page(unsigned int dmanr, char pagenr)
+{
+	switch (dmanr) {
+	case 0:
+		dma_outb(pagenr, DMA_PAGE_0);
+		break;
+	case 1:
+		dma_outb(pagenr, DMA_PAGE_1);
+		break;
+	case 2:
+		dma_outb(pagenr, DMA_PAGE_2);
+		break;
+	case 3:
+		dma_outb(pagenr, DMA_PAGE_3);
+		break;
+	case 5:
+		dma_outb(pagenr & 0xfe, DMA_PAGE_5);
+		break;
+	case 6:
+		dma_outb(pagenr & 0xfe, DMA_PAGE_6);
+		break;
+	case 7:
+		dma_outb(pagenr & 0xfe, DMA_PAGE_7);
+		break;
+	}
+}
+
+
+/* Set transfer address & page bits for specific DMA channel.
+ * Assumes dma flipflop is clear.
+ */
+static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a)
+{
+	set_dma_page(dmanr, a>>16);
+	if (dmanr <= 3)  {
+		dma_outb(a & 0xff, ((dmanr & 3) << 1) + IO_DMA1_BASE);
+		dma_outb((a >> 8) & 0xff, ((dmanr & 3) << 1) + IO_DMA1_BASE);
+	}  else  {
+	    dma_outb((a >> 1) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE);
+	    dma_outb((a >> 9) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE);
+	}
+}
+
+
+/* Set transfer size (max 64k for DMA0..3, 128k for DMA5..7) for
+ * a specific DMA channel.
+ * You must ensure the parameters are valid.
+ * NOTE: from a manual: "the number of transfers is one more
+ * than the initial word count"! This is taken into account.
+ * Assumes dma flip-flop is clear.
+ * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
+ */
+static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
+{
+	count--;
+	if (dmanr <= 3)  {
+	    dma_outb(count & 0xff, ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE);
+	    dma_outb((count >> 8) & 0xff,
+		     ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE);
+	} else {
+	    dma_outb((count >> 1) & 0xff,
+		     ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE);
+	    dma_outb((count >> 9) & 0xff,
+		     ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE);
+	}
+}
+
+
+/* Get DMA residue count. After a DMA transfer, this
+ * should return zero. Reading this while a DMA transfer is
+ * still in progress will return unpredictable results.
+ * If called before the channel has been used, it may return 1.
+ * Otherwise, it returns the number of _bytes_ left to transfer.
+ *
+ * Assumes DMA flip-flop is clear.
+ */
+static __inline__ int get_dma_residue(unsigned int dmanr)
+{
+	unsigned int io_port;
+	/* using short to get 16-bit wrap around */
+	unsigned short count;
+
+	io_port = (dmanr <= 3) ? ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE
+		: ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE;
+
+	count = 1 + dma_inb(io_port);
+	count += dma_inb(io_port) << 8;
+
+	return (dmanr <= 3) ? count : (count << 1);
+}
+
+
+/* These are in kernel/dma.c: */
+extern int request_dma(unsigned int dmanr, const char *device_id);
+extern void free_dma(unsigned int dmanr);
+
+/* From PCI */
+
+#ifdef CONFIG_PCI
+extern int isa_dma_bridge_buggy;
+#else
+#define isa_dma_bridge_buggy	(0)
+#endif
+
+#endif /* _ASM_X86_DMA_H */
diff --git a/include/asm-x86/dma_32.h b/include/asm-x86/dma_32.h
deleted file mode 100644
index d23aac8e1a50..000000000000
--- a/include/asm-x86/dma_32.h
+++ /dev/null
@@ -1,297 +0,0 @@
-/* $Id: dma.h,v 1.7 1992/12/14 00:29:34 root Exp root $
- * linux/include/asm/dma.h: Defines for using and allocating dma channels.
- * Written by Hennus Bergman, 1992.
- * High DMA channel support & info by Hannu Savolainen
- * and John Boyd, Nov. 1992.
- */
-
-#ifndef _ASM_DMA_H
-#define _ASM_DMA_H
-
-#include <linux/spinlock.h>	/* And spinlocks */
-#include <asm/io.h>		/* need byte IO */
-#include <linux/delay.h>
-
-
-#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER
-#define dma_outb	outb_p
-#else
-#define dma_outb	outb
-#endif
-
-#define dma_inb		inb
-
-/*
- * NOTES about DMA transfers:
- *
- *  controller 1: channels 0-3, byte operations, ports 00-1F
- *  controller 2: channels 4-7, word operations, ports C0-DF
- *
- *  - ALL registers are 8 bits only, regardless of transfer size
- *  - channel 4 is not used - cascades 1 into 2.
- *  - channels 0-3 are byte - addresses/counts are for physical bytes
- *  - channels 5-7 are word - addresses/counts are for physical words
- *  - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries
- *  - transfer count loaded to registers is 1 less than actual count
- *  - controller 2 offsets are all even (2x offsets for controller 1)
- *  - page registers for 5-7 don't use data bit 0, represent 128K pages
- *  - page registers for 0-3 use bit 0, represent 64K pages
- *
- * DMA transfers are limited to the lower 16MB of _physical_ memory.  
- * Note that addresses loaded into registers must be _physical_ addresses,
- * not logical addresses (which may differ if paging is active).
- *
- *  Address mapping for channels 0-3:
- *
- *   A23 ... A16 A15 ... A8  A7 ... A0    (Physical addresses)
- *    |  ...  |   |  ... |   |  ... |
- *    |  ...  |   |  ... |   |  ... |
- *    |  ...  |   |  ... |   |  ... |
- *   P7  ...  P0  A7 ... A0  A7 ... A0   
- * |    Page    | Addr MSB | Addr LSB |   (DMA registers)
- *
- *  Address mapping for channels 5-7:
- *
- *   A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0    (Physical addresses)
- *    |  ...  |   \   \   ... \  \  \  ... \  \
- *    |  ...  |    \   \   ... \  \  \  ... \  (not used)
- *    |  ...  |     \   \   ... \  \  \  ... \
- *   P7  ...  P1 (0) A7 A6  ... A0 A7 A6 ... A0   
- * |      Page      |  Addr MSB   |  Addr LSB  |   (DMA registers)
- *
- * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses
- * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at
- * the hardware level, so odd-byte transfers aren't possible).
- *
- * Transfer count (_not # bytes_) is limited to 64K, represented as actual
- * count - 1 : 64K => 0xFFFF, 1 => 0x0000.  Thus, count is always 1 or more,
- * and up to 128K bytes may be transferred on channels 5-7 in one operation. 
- *
- */
-
-#define MAX_DMA_CHANNELS	8
-
-/* The maximum address that we can perform a DMA transfer to on this platform */
-#define MAX_DMA_ADDRESS      (PAGE_OFFSET+0x1000000)
-
-/* 8237 DMA controllers */
-#define IO_DMA1_BASE	0x00	/* 8 bit slave DMA, channels 0..3 */
-#define IO_DMA2_BASE	0xC0	/* 16 bit master DMA, ch 4(=slave input)..7 */
-
-/* DMA controller registers */
-#define DMA1_CMD_REG		0x08	/* command register (w) */
-#define DMA1_STAT_REG		0x08	/* status register (r) */
-#define DMA1_REQ_REG            0x09    /* request register (w) */
-#define DMA1_MASK_REG		0x0A	/* single-channel mask (w) */
-#define DMA1_MODE_REG		0x0B	/* mode register (w) */
-#define DMA1_CLEAR_FF_REG	0x0C	/* clear pointer flip-flop (w) */
-#define DMA1_TEMP_REG           0x0D    /* Temporary Register (r) */
-#define DMA1_RESET_REG		0x0D	/* Master Clear (w) */
-#define DMA1_CLR_MASK_REG       0x0E    /* Clear Mask */
-#define DMA1_MASK_ALL_REG       0x0F    /* all-channels mask (w) */
-
-#define DMA2_CMD_REG		0xD0	/* command register (w) */
-#define DMA2_STAT_REG		0xD0	/* status register (r) */
-#define DMA2_REQ_REG            0xD2    /* request register (w) */
-#define DMA2_MASK_REG		0xD4	/* single-channel mask (w) */
-#define DMA2_MODE_REG		0xD6	/* mode register (w) */
-#define DMA2_CLEAR_FF_REG	0xD8	/* clear pointer flip-flop (w) */
-#define DMA2_TEMP_REG           0xDA    /* Temporary Register (r) */
-#define DMA2_RESET_REG		0xDA	/* Master Clear (w) */
-#define DMA2_CLR_MASK_REG       0xDC    /* Clear Mask */
-#define DMA2_MASK_ALL_REG       0xDE    /* all-channels mask (w) */
-
-#define DMA_ADDR_0              0x00    /* DMA address registers */
-#define DMA_ADDR_1              0x02
-#define DMA_ADDR_2              0x04
-#define DMA_ADDR_3              0x06
-#define DMA_ADDR_4              0xC0
-#define DMA_ADDR_5              0xC4
-#define DMA_ADDR_6              0xC8
-#define DMA_ADDR_7              0xCC
-
-#define DMA_CNT_0               0x01    /* DMA count registers */
-#define DMA_CNT_1               0x03
-#define DMA_CNT_2               0x05
-#define DMA_CNT_3               0x07
-#define DMA_CNT_4               0xC2
-#define DMA_CNT_5               0xC6
-#define DMA_CNT_6               0xCA
-#define DMA_CNT_7               0xCE
-
-#define DMA_PAGE_0              0x87    /* DMA page registers */
-#define DMA_PAGE_1              0x83
-#define DMA_PAGE_2              0x81
-#define DMA_PAGE_3              0x82
-#define DMA_PAGE_5              0x8B
-#define DMA_PAGE_6              0x89
-#define DMA_PAGE_7              0x8A
-
-#define DMA_MODE_READ	0x44	/* I/O to memory, no autoinit, increment, single mode */
-#define DMA_MODE_WRITE	0x48	/* memory to I/O, no autoinit, increment, single mode */
-#define DMA_MODE_CASCADE 0xC0   /* pass thru DREQ->HRQ, DACK<-HLDA only */
-
-#define DMA_AUTOINIT	0x10
-
-
-extern spinlock_t  dma_spin_lock;
-
-static __inline__ unsigned long claim_dma_lock(void)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&dma_spin_lock, flags);
-	return flags;
-}
-
-static __inline__ void release_dma_lock(unsigned long flags)
-{
-	spin_unlock_irqrestore(&dma_spin_lock, flags);
-}
-
-/* enable/disable a specific DMA channel */
-static __inline__ void enable_dma(unsigned int dmanr)
-{
-	if (dmanr<=3)
-		dma_outb(dmanr,  DMA1_MASK_REG);
-	else
-		dma_outb(dmanr & 3,  DMA2_MASK_REG);
-}
-
-static __inline__ void disable_dma(unsigned int dmanr)
-{
-	if (dmanr<=3)
-		dma_outb(dmanr | 4,  DMA1_MASK_REG);
-	else
-		dma_outb((dmanr & 3) | 4,  DMA2_MASK_REG);
-}
-
-/* Clear the 'DMA Pointer Flip Flop'.
- * Write 0 for LSB/MSB, 1 for MSB/LSB access.
- * Use this once to initialize the FF to a known state.
- * After that, keep track of it. :-)
- * --- In order to do that, the DMA routines below should ---
- * --- only be used while holding the DMA lock ! ---
- */
-static __inline__ void clear_dma_ff(unsigned int dmanr)
-{
-	if (dmanr<=3)
-		dma_outb(0,  DMA1_CLEAR_FF_REG);
-	else
-		dma_outb(0,  DMA2_CLEAR_FF_REG);
-}
-
-/* set mode (above) for a specific DMA channel */
-static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
-{
-	if (dmanr<=3)
-		dma_outb(mode | dmanr,  DMA1_MODE_REG);
-	else
-		dma_outb(mode | (dmanr&3),  DMA2_MODE_REG);
-}
-
-/* Set only the page register bits of the transfer address.
- * This is used for successive transfers when we know the contents of
- * the lower 16 bits of the DMA current address register, but a 64k boundary
- * may have been crossed.
- */
-static __inline__ void set_dma_page(unsigned int dmanr, char pagenr)
-{
-	switch(dmanr) {
-		case 0:
-			dma_outb(pagenr, DMA_PAGE_0);
-			break;
-		case 1:
-			dma_outb(pagenr, DMA_PAGE_1);
-			break;
-		case 2:
-			dma_outb(pagenr, DMA_PAGE_2);
-			break;
-		case 3:
-			dma_outb(pagenr, DMA_PAGE_3);
-			break;
-		case 5:
-			dma_outb(pagenr & 0xfe, DMA_PAGE_5);
-			break;
-		case 6:
-			dma_outb(pagenr & 0xfe, DMA_PAGE_6);
-			break;
-		case 7:
-			dma_outb(pagenr & 0xfe, DMA_PAGE_7);
-			break;
-	}
-}
-
-
-/* Set transfer address & page bits for specific DMA channel.
- * Assumes dma flipflop is clear.
- */
-static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a)
-{
-	set_dma_page(dmanr, a>>16);
-	if (dmanr <= 3)  {
-	    dma_outb( a & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
-            dma_outb( (a>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
-	}  else  {
-	    dma_outb( (a>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
-	    dma_outb( (a>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
-	}
-}
-
-
-/* Set transfer size (max 64k for DMA0..3, 128k for DMA5..7) for
- * a specific DMA channel.
- * You must ensure the parameters are valid.
- * NOTE: from a manual: "the number of transfers is one more
- * than the initial word count"! This is taken into account.
- * Assumes dma flip-flop is clear.
- * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
- */
-static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
-{
-        count--;
-	if (dmanr <= 3)  {
-	    dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
-	    dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
-        } else {
-	    dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
-	    dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
-        }
-}
-
-
-/* Get DMA residue count. After a DMA transfer, this
- * should return zero. Reading this while a DMA transfer is
- * still in progress will return unpredictable results.
- * If called before the channel has been used, it may return 1.
- * Otherwise, it returns the number of _bytes_ left to transfer.
- *
- * Assumes DMA flip-flop is clear.
- */
-static __inline__ int get_dma_residue(unsigned int dmanr)
-{
-	unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE
-					 : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE;
-
-	/* using short to get 16-bit wrap around */
-	unsigned short count;
-
-	count = 1 + dma_inb(io_port);
-	count += dma_inb(io_port) << 8;
-	
-	return (dmanr<=3)? count : (count<<1);
-}
-
-
-/* These are in kernel/dma.c: */
-extern int request_dma(unsigned int dmanr, const char * device_id);	/* reserve a DMA channel */
-extern void free_dma(unsigned int dmanr);	/* release it again */
-
-/* From PCI */
-
-#ifdef CONFIG_PCI
-extern int isa_dma_bridge_buggy;
-#else
-#define isa_dma_bridge_buggy 	(0)
-#endif
-
-#endif /* _ASM_DMA_H */
diff --git a/include/asm-x86/dma_64.h b/include/asm-x86/dma_64.h
deleted file mode 100644
index a37c16f06289..000000000000
--- a/include/asm-x86/dma_64.h
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * linux/include/asm/dma.h: Defines for using and allocating dma channels.
- * Written by Hennus Bergman, 1992.
- * High DMA channel support & info by Hannu Savolainen
- * and John Boyd, Nov. 1992.
- */
-
-#ifndef _ASM_DMA_H
-#define _ASM_DMA_H
-
-#include <linux/spinlock.h>	/* And spinlocks */
-#include <asm/io.h>		/* need byte IO */
-#include <linux/delay.h>
-
-
-#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER
-#define dma_outb	outb_p
-#else
-#define dma_outb	outb
-#endif
-
-#define dma_inb		inb
-
-/*
- * NOTES about DMA transfers:
- *
- *  controller 1: channels 0-3, byte operations, ports 00-1F
- *  controller 2: channels 4-7, word operations, ports C0-DF
- *
- *  - ALL registers are 8 bits only, regardless of transfer size
- *  - channel 4 is not used - cascades 1 into 2.
- *  - channels 0-3 are byte - addresses/counts are for physical bytes
- *  - channels 5-7 are word - addresses/counts are for physical words
- *  - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries
- *  - transfer count loaded to registers is 1 less than actual count
- *  - controller 2 offsets are all even (2x offsets for controller 1)
- *  - page registers for 5-7 don't use data bit 0, represent 128K pages
- *  - page registers for 0-3 use bit 0, represent 64K pages
- *
- * DMA transfers are limited to the lower 16MB of _physical_ memory.  
- * Note that addresses loaded into registers must be _physical_ addresses,
- * not logical addresses (which may differ if paging is active).
- *
- *  Address mapping for channels 0-3:
- *
- *   A23 ... A16 A15 ... A8  A7 ... A0    (Physical addresses)
- *    |  ...  |   |  ... |   |  ... |
- *    |  ...  |   |  ... |   |  ... |
- *    |  ...  |   |  ... |   |  ... |
- *   P7  ...  P0  A7 ... A0  A7 ... A0   
- * |    Page    | Addr MSB | Addr LSB |   (DMA registers)
- *
- *  Address mapping for channels 5-7:
- *
- *   A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0    (Physical addresses)
- *    |  ...  |   \   \   ... \  \  \  ... \  \
- *    |  ...  |    \   \   ... \  \  \  ... \  (not used)
- *    |  ...  |     \   \   ... \  \  \  ... \
- *   P7  ...  P1 (0) A7 A6  ... A0 A7 A6 ... A0   
- * |      Page      |  Addr MSB   |  Addr LSB  |   (DMA registers)
- *
- * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses
- * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at
- * the hardware level, so odd-byte transfers aren't possible).
- *
- * Transfer count (_not # bytes_) is limited to 64K, represented as actual
- * count - 1 : 64K => 0xFFFF, 1 => 0x0000.  Thus, count is always 1 or more,
- * and up to 128K bytes may be transferred on channels 5-7 in one operation. 
- *
- */
-
-#define MAX_DMA_CHANNELS	8
-
-
-/* 16MB ISA DMA zone */
-#define MAX_DMA_PFN   ((16*1024*1024) >> PAGE_SHIFT)
-
-/* 4GB broken PCI/AGP hardware bus master zone */
-#define MAX_DMA32_PFN ((4UL*1024*1024*1024) >> PAGE_SHIFT)
-
-/* Compat define for old dma zone */
-#define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT))
-
-/* 8237 DMA controllers */
-#define IO_DMA1_BASE	0x00	/* 8 bit slave DMA, channels 0..3 */
-#define IO_DMA2_BASE	0xC0	/* 16 bit master DMA, ch 4(=slave input)..7 */
-
-/* DMA controller registers */
-#define DMA1_CMD_REG		0x08	/* command register (w) */
-#define DMA1_STAT_REG		0x08	/* status register (r) */
-#define DMA1_REQ_REG            0x09    /* request register (w) */
-#define DMA1_MASK_REG		0x0A	/* single-channel mask (w) */
-#define DMA1_MODE_REG		0x0B	/* mode register (w) */
-#define DMA1_CLEAR_FF_REG	0x0C	/* clear pointer flip-flop (w) */
-#define DMA1_TEMP_REG           0x0D    /* Temporary Register (r) */
-#define DMA1_RESET_REG		0x0D	/* Master Clear (w) */
-#define DMA1_CLR_MASK_REG       0x0E    /* Clear Mask */
-#define DMA1_MASK_ALL_REG       0x0F    /* all-channels mask (w) */
-
-#define DMA2_CMD_REG		0xD0	/* command register (w) */
-#define DMA2_STAT_REG		0xD0	/* status register (r) */
-#define DMA2_REQ_REG            0xD2    /* request register (w) */
-#define DMA2_MASK_REG		0xD4	/* single-channel mask (w) */
-#define DMA2_MODE_REG		0xD6	/* mode register (w) */
-#define DMA2_CLEAR_FF_REG	0xD8	/* clear pointer flip-flop (w) */
-#define DMA2_TEMP_REG           0xDA    /* Temporary Register (r) */
-#define DMA2_RESET_REG		0xDA	/* Master Clear (w) */
-#define DMA2_CLR_MASK_REG       0xDC    /* Clear Mask */
-#define DMA2_MASK_ALL_REG       0xDE    /* all-channels mask (w) */
-
-#define DMA_ADDR_0              0x00    /* DMA address registers */
-#define DMA_ADDR_1              0x02
-#define DMA_ADDR_2              0x04
-#define DMA_ADDR_3              0x06
-#define DMA_ADDR_4              0xC0
-#define DMA_ADDR_5              0xC4
-#define DMA_ADDR_6              0xC8
-#define DMA_ADDR_7              0xCC
-
-#define DMA_CNT_0               0x01    /* DMA count registers */
-#define DMA_CNT_1               0x03
-#define DMA_CNT_2               0x05
-#define DMA_CNT_3               0x07
-#define DMA_CNT_4               0xC2
-#define DMA_CNT_5               0xC6
-#define DMA_CNT_6               0xCA
-#define DMA_CNT_7               0xCE
-
-#define DMA_PAGE_0              0x87    /* DMA page registers */
-#define DMA_PAGE_1              0x83
-#define DMA_PAGE_2              0x81
-#define DMA_PAGE_3              0x82
-#define DMA_PAGE_5              0x8B
-#define DMA_PAGE_6              0x89
-#define DMA_PAGE_7              0x8A
-
-#define DMA_MODE_READ	0x44	/* I/O to memory, no autoinit, increment, single mode */
-#define DMA_MODE_WRITE	0x48	/* memory to I/O, no autoinit, increment, single mode */
-#define DMA_MODE_CASCADE 0xC0   /* pass thru DREQ->HRQ, DACK<-HLDA only */
-
-#define DMA_AUTOINIT	0x10
-
-
-extern spinlock_t  dma_spin_lock;
-
-static __inline__ unsigned long claim_dma_lock(void)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&dma_spin_lock, flags);
-	return flags;
-}
-
-static __inline__ void release_dma_lock(unsigned long flags)
-{
-	spin_unlock_irqrestore(&dma_spin_lock, flags);
-}
-
-/* enable/disable a specific DMA channel */
-static __inline__ void enable_dma(unsigned int dmanr)
-{
-	if (dmanr<=3)
-		dma_outb(dmanr,  DMA1_MASK_REG);
-	else
-		dma_outb(dmanr & 3,  DMA2_MASK_REG);
-}
-
-static __inline__ void disable_dma(unsigned int dmanr)
-{
-	if (dmanr<=3)
-		dma_outb(dmanr | 4,  DMA1_MASK_REG);
-	else
-		dma_outb((dmanr & 3) | 4,  DMA2_MASK_REG);
-}
-
-/* Clear the 'DMA Pointer Flip Flop'.
- * Write 0 for LSB/MSB, 1 for MSB/LSB access.
- * Use this once to initialize the FF to a known state.
- * After that, keep track of it. :-)
- * --- In order to do that, the DMA routines below should ---
- * --- only be used while holding the DMA lock ! ---
- */
-static __inline__ void clear_dma_ff(unsigned int dmanr)
-{
-	if (dmanr<=3)
-		dma_outb(0,  DMA1_CLEAR_FF_REG);
-	else
-		dma_outb(0,  DMA2_CLEAR_FF_REG);
-}
-
-/* set mode (above) for a specific DMA channel */
-static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
-{
-	if (dmanr<=3)
-		dma_outb(mode | dmanr,  DMA1_MODE_REG);
-	else
-		dma_outb(mode | (dmanr&3),  DMA2_MODE_REG);
-}
-
-/* Set only the page register bits of the transfer address.
- * This is used for successive transfers when we know the contents of
- * the lower 16 bits of the DMA current address register, but a 64k boundary
- * may have been crossed.
- */
-static __inline__ void set_dma_page(unsigned int dmanr, char pagenr)
-{
-	switch(dmanr) {
-		case 0:
-			dma_outb(pagenr, DMA_PAGE_0);
-			break;
-		case 1:
-			dma_outb(pagenr, DMA_PAGE_1);
-			break;
-		case 2:
-			dma_outb(pagenr, DMA_PAGE_2);
-			break;
-		case 3:
-			dma_outb(pagenr, DMA_PAGE_3);
-			break;
-		case 5:
-			dma_outb(pagenr & 0xfe, DMA_PAGE_5);
-			break;
-		case 6:
-			dma_outb(pagenr & 0xfe, DMA_PAGE_6);
-			break;
-		case 7:
-			dma_outb(pagenr & 0xfe, DMA_PAGE_7);
-			break;
-	}
-}
-
-
-/* Set transfer address & page bits for specific DMA channel.
- * Assumes dma flipflop is clear.
- */
-static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a)
-{
-	set_dma_page(dmanr, a>>16);
-	if (dmanr <= 3)  {
-	    dma_outb( a & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
-            dma_outb( (a>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
-	}  else  {
-	    dma_outb( (a>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
-	    dma_outb( (a>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
-	}
-}
-
-
-/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for
- * a specific DMA channel.
- * You must ensure the parameters are valid.
- * NOTE: from a manual: "the number of transfers is one more
- * than the initial word count"! This is taken into account.
- * Assumes dma flip-flop is clear.
- * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
- */
-static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
-{
-        count--;
-	if (dmanr <= 3)  {
-	    dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
-	    dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
-        } else {
-	    dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
-	    dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
-        }
-}
-
-
-/* Get DMA residue count. After a DMA transfer, this
- * should return zero. Reading this while a DMA transfer is
- * still in progress will return unpredictable results.
- * If called before the channel has been used, it may return 1.
- * Otherwise, it returns the number of _bytes_ left to transfer.
- *
- * Assumes DMA flip-flop is clear.
- */
-static __inline__ int get_dma_residue(unsigned int dmanr)
-{
-	unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE
-					 : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE;
-
-	/* using short to get 16-bit wrap around */
-	unsigned short count;
-
-	count = 1 + dma_inb(io_port);
-	count += dma_inb(io_port) << 8;
-	
-	return (dmanr<=3)? count : (count<<1);
-}
-
-
-/* These are in kernel/dma.c: */
-extern int request_dma(unsigned int dmanr, const char * device_id);	/* reserve a DMA channel */
-extern void free_dma(unsigned int dmanr);	/* release it again */
-
-/* From PCI */
-
-#ifdef CONFIG_PCI
-extern int isa_dma_bridge_buggy;
-#else
-#define isa_dma_bridge_buggy 	(0)
-#endif
-
-#endif /* _ASM_DMA_H */

From 3ebc51d7c95425c3b4667fa042576fb1c6e2ea16 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:21 +0100
Subject: [PATCH 080/890] x86: merge include/asm-x86/scatterlist.h

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/scatterlist.h    | 38 ++++++++++++++++++++++++++++----
 include/asm-x86/scatterlist_32.h | 28 -----------------------
 include/asm-x86/scatterlist_64.h | 29 ------------------------
 3 files changed, 34 insertions(+), 61 deletions(-)
 delete mode 100644 include/asm-x86/scatterlist_32.h
 delete mode 100644 include/asm-x86/scatterlist_64.h

diff --git a/include/asm-x86/scatterlist.h b/include/asm-x86/scatterlist.h
index 3a1e76257a27..d13c197866d6 100644
--- a/include/asm-x86/scatterlist.h
+++ b/include/asm-x86/scatterlist.h
@@ -1,5 +1,35 @@
-#ifdef CONFIG_X86_32
-# include "scatterlist_32.h"
-#else
-# include "scatterlist_64.h"
+#ifndef _ASM_X86_SCATTERLIST_H
+#define _ASM_X86_SCATTERLIST_H
+
+#include <asm/types.h>
+
+struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+	unsigned long	sg_magic;
+#endif
+	unsigned long	page_link;
+	unsigned int	offset;
+	unsigned int	length;
+	dma_addr_t	dma_address;
+#ifdef CONFIG_X86_64
+	unsigned int	dma_length;
+#endif
+};
+
+#define ARCH_HAS_SG_CHAIN
+#define ISA_DMA_THRESHOLD (0x00ffffff)
+
+/*
+ * These macros should be used after a pci_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries pci_map_sg
+ * returns.
+ */
+#define sg_dma_address(sg)	((sg)->dma_address)
+#ifdef CONFIG_X86_32
+# define sg_dma_len(sg)		((sg)->length)
+#else
+# define sg_dma_len(sg)		((sg)->dma_length)
+#endif
+
 #endif
diff --git a/include/asm-x86/scatterlist_32.h b/include/asm-x86/scatterlist_32.h
deleted file mode 100644
index 0e7d997a34be..000000000000
--- a/include/asm-x86/scatterlist_32.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef _I386_SCATTERLIST_H
-#define _I386_SCATTERLIST_H
-
-#include <asm/types.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-    unsigned long	sg_magic;
-#endif
-    unsigned long	page_link;
-    unsigned int	offset;
-    dma_addr_t		dma_address;
-    unsigned int	length;
-};
-
-#define ARCH_HAS_SG_CHAIN
-
-/* These macros should be used after a pci_map_sg call has been done
- * to get bus addresses of each of the SG entries and their lengths.
- * You should only work with the number of sg entries pci_map_sg
- * returns.
- */
-#define sg_dma_address(sg)	((sg)->dma_address)
-#define sg_dma_len(sg)		((sg)->length)
-
-#define ISA_DMA_THRESHOLD (0x00ffffff)
-
-#endif /* !(_I386_SCATTERLIST_H) */
diff --git a/include/asm-x86/scatterlist_64.h b/include/asm-x86/scatterlist_64.h
deleted file mode 100644
index 1847c72befeb..000000000000
--- a/include/asm-x86/scatterlist_64.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef _X8664_SCATTERLIST_H
-#define _X8664_SCATTERLIST_H
-
-#include <asm/types.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-    unsigned long	sg_magic;
-#endif
-    unsigned long	page_link;
-    unsigned int	offset;
-    unsigned int	length;
-    dma_addr_t		dma_address;
-    unsigned int        dma_length;
-};
-
-#define ARCH_HAS_SG_CHAIN
-
-#define ISA_DMA_THRESHOLD (0x00ffffff)
-
-/* These macros should be used after a pci_map_sg call has been done
- * to get bus addresses of each of the SG entries and their lengths.
- * You should only work with the number of sg entries pci_map_sg
- * returns.
- */
-#define sg_dma_address(sg)     ((sg)->dma_address)
-#define sg_dma_len(sg)         ((sg)->dma_length)
-
-#endif 

From f2f58178f497ca56501d44d79982621e19c5007f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:23 +0100
Subject: [PATCH 081/890] x86: simplify set_bitmap in ioport_32.c

Simplify set_bitmap(). This is not in a hotpath and we really can use the
straight forward loop through those bits. A similar implementation is used
in the 64 bit code as well.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ioport_32.c | 32 ++++++--------------------------
 1 file changed, 6 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/ioport_32.c b/arch/x86/kernel/ioport_32.c
index 4ed48dc8df1e..c5a64011764f 100644
--- a/arch/x86/kernel/ioport_32.c
+++ b/arch/x86/kernel/ioport_32.c
@@ -16,36 +16,16 @@
 #include <linux/syscalls.h>
 
 /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
-static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value)
+static void set_bitmap(unsigned long *bitmap, unsigned int base,
+		       unsigned int extent, int new_value)
 {
-	unsigned long mask;
-	unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG);
-	unsigned int low_index = base & (BITS_PER_LONG-1);
-	int length = low_index + extent;
+	unsigned int i;
 
-	if (low_index != 0) {
-		mask = (~0UL << low_index);
-		if (length < BITS_PER_LONG)
-			mask &= ~(~0UL << length);
+	for (i = base; i < base + extent; i++) {
 		if (new_value)
-			*bitmap_base++ |= mask;
+			__set_bit(i, bitmap);
 		else
-			*bitmap_base++ &= ~mask;
-		length -= BITS_PER_LONG;
-	}
-
-	mask = (new_value ? ~0UL : 0UL);
-	while (length >= BITS_PER_LONG) {
-		*bitmap_base++ = mask;
-		length -= BITS_PER_LONG;
-	}
-
-	if (length > 0) {
-		mask = ~(~0UL << length);
-		if (new_value)
-			*bitmap_base++ |= mask;
-		else
-			*bitmap_base++ &= ~mask;
+			__clear_bit(i, bitmap);
 	}
 }
 

From 9a211abeaab74e2634669a64ebd82fac5d94d276 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:24 +0100
Subject: [PATCH 082/890] x86: clean up ioport_32.c

Remove unused variables, rename the "unused" argument to regp. It is used !
Codingstyle fixes.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ioport_32.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/ioport_32.c b/arch/x86/kernel/ioport_32.c
index c5a64011764f..c281ffa18259 100644
--- a/arch/x86/kernel/ioport_32.c
+++ b/arch/x86/kernel/ioport_32.c
@@ -29,16 +29,14 @@ static void set_bitmap(unsigned long *bitmap, unsigned int base,
 	}
 }
 
-
 /*
  * this changes the io permissions bitmap in the current task.
  */
 asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 {
-	unsigned long i, max_long, bytes, bytes_updated;
 	struct thread_struct * t = &current->thread;
 	struct tss_struct * tss;
-	unsigned long *bitmap;
+	unsigned long i, max_long;
 
 	if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
 		return -EINVAL;
@@ -51,7 +49,8 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 	 * this is why we delay this operation until now:
 	 */
 	if (!t->io_bitmap_ptr) {
-		bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
+		unsigned long *bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
+
 		if (!bitmap)
 			return -ENOMEM;
 
@@ -80,10 +79,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 		if (t->io_bitmap_ptr[i] != ~0UL)
 			max_long = i;
 
-	bytes = (max_long + 1) * sizeof(long);
-	bytes_updated = max(bytes, t->io_bitmap_max);
-
-	t->io_bitmap_max = bytes;
+	t->io_bitmap_max = (max_long + 1) * sizeof(unsigned long);
 
 	/*
 	 * Sets the lazy trigger so that the next I/O operation will
@@ -110,9 +106,9 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
  * code.
  */
 
-asmlinkage long sys_iopl(unsigned long unused)
+asmlinkage long sys_iopl(unsigned long regsp)
 {
-	volatile struct pt_regs * regs = (struct pt_regs *) &unused;
+	volatile struct pt_regs *regs = (struct pt_regs *)&regsp;
 	unsigned int level = regs->ebx;
 	unsigned int old = (regs->eflags >> 12) & 3;
 	struct thread_struct *t = &current->thread;
@@ -124,8 +120,10 @@ asmlinkage long sys_iopl(unsigned long unused)
 		if (!capable(CAP_SYS_RAWIO))
 			return -EPERM;
 	}
+
 	t->iopl = level << 12;
 	regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl;
 	set_iopl_mask(t->iopl);
+
 	return 0;
 }

From ed4aed98da8d042716d327a0b538dd8002c0a767 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:24 +0100
Subject: [PATCH 083/890] x86: clean up arch/x86/kernel/vsmp_64.c

White space and coding style clenaup.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/vsmp_64.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 414caf0c5f9a..d971210a6d36 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -25,21 +25,24 @@ static int __init vsmp_init(void)
 		return 0;
 
 	/* Check if we are running on a ScaleMP vSMP box */
-	if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) != PCI_VENDOR_ID_SCALEMP) ||
-	    (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) != PCI_DEVICE_ID_SCALEMP_VSMP_CTL))
+	if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) !=
+	     PCI_VENDOR_ID_SCALEMP) ||
+	    (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) !=
+	     PCI_DEVICE_ID_SCALEMP_VSMP_CTL))
 		return 0;
 
 	/* set vSMP magic bits to indicate vSMP capable kernel */
 	address = ioremap(read_pci_config(0, 0x1f, 0, PCI_BASE_ADDRESS_0), 8);
 	cap = readl(address);
 	ctl = readl(address + 4);
-	printk("vSMP CTL: capabilities:0x%08x  control:0x%08x\n", cap, ctl);
+	printk(KERN_INFO "vSMP CTL: capabilities:0x%08x  control:0x%08x\n",
+	       cap, ctl);
 	if (cap & ctl & (1 << 4)) {
 		/* Turn on vSMP IRQ fastpath handling (see system.h) */
 		ctl &= ~(1 << 4);
 		writel(ctl, address + 4);
 		ctl = readl(address + 4);
-		printk("vSMP CTL: control set to:0x%08x\n", ctl);
+		printk(KERN_INFO "vSMP CTL: control set to:0x%08x\n", ctl);
 	}
 
 	iounmap(address);

From f8eeae682166e3b965dcdaf499932f4b63cc5b5d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:25 +0100
Subject: [PATCH 084/890] x86: clean up arch/x86/mm/mmap_32/64.c

White space and coding style clenaup.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/mmap_32.c |  4 ++--
 arch/x86/mm/mmap_64.c | 11 +++++++----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/x86/mm/mmap_32.c b/arch/x86/mm/mmap_32.c
index 552e08473755..d7dd0962a6d7 100644
--- a/arch/x86/mm/mmap_32.c
+++ b/arch/x86/mm/mmap_32.c
@@ -64,8 +64,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	 * bit is set, or if the expected stack growth is unlimited:
 	 */
 	if (sysctl_legacy_va_layout ||
-			(current->personality & ADDR_COMPAT_LAYOUT) ||
-			current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) {
+	    (current->personality & ADDR_COMPAT_LAYOUT) ||
+	    current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) {
 		mm->mmap_base = TASK_UNMAPPED_BASE;
 		mm->get_unmapped_area = arch_get_unmapped_area;
 		mm->unmap_area = arch_unmap_area;
diff --git a/arch/x86/mm/mmap_64.c b/arch/x86/mm/mmap_64.c
index 80bba0dc000e..ffb71a31bb6e 100644
--- a/arch/x86/mm/mmap_64.c
+++ b/arch/x86/mm/mmap_64.c
@@ -16,11 +16,14 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 #endif
 	mm->mmap_base = TASK_UNMAPPED_BASE;
 	if (current->flags & PF_RANDOMIZE) {
-		/* Add 28bit randomness which is about 40bits of address space
-		   because mmap base has to be page aligned.
- 		   or ~1/128 of the total user VM
-	   	   (total user address space is 47bits) */
+		/*
+		 * Add 28bit randomness which is about 40bits of
+		 * address space because mmap base has to be page
+		 * aligned.  or ~1/128 of the total user VM (total
+		 * user address space is 47bits)
+		 */
 		unsigned rnd = get_random_int() & 0xfffffff;
+
 		mm->mmap_base += ((unsigned long)rnd) << PAGE_SHIFT;
 	}
 	mm->get_unmapped_area = arch_get_unmapped_area;

From 4ec08da02f0fda16fbc8dfa040568facaa576790 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:26 +0100
Subject: [PATCH 085/890] x86: remove the duplicated arch/x86/ia32/mmap32.c

Use mmap_32.c in arch/x86/mm instead

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/Makefile  |  3 +-
 arch/x86/ia32/mmap32.c  | 79 -----------------------------------------
 arch/x86/mm/Makefile_64 |  2 +-
 arch/x86/mm/mmap_32.c   |  4 +++
 4 files changed, 6 insertions(+), 82 deletions(-)
 delete mode 100644 arch/x86/ia32/mmap32.c

diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile
index e2edda255a84..a3c997e9f39a 100644
--- a/arch/x86/ia32/Makefile
+++ b/arch/x86/ia32/Makefile
@@ -3,8 +3,7 @@
 #
 
 obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o tls32.o \
-	ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o \
-	mmap32.o
+	ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o
 
 sysv-$(CONFIG_SYSVIPC) := ipc32.o
 obj-$(CONFIG_IA32_EMULATION) += $(sysv-y)
diff --git a/arch/x86/ia32/mmap32.c b/arch/x86/ia32/mmap32.c
deleted file mode 100644
index 7649370a3144..000000000000
--- a/arch/x86/ia32/mmap32.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- *  linux/arch/x86_64/ia32/mm/mmap.c
- *
- *  flexible mmap layout support
- *
- * Based on the i386 version which was
- *
- * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- *
- * Started by Ingo Molnar <mingo@elte.hu>
- */
-
-#include <linux/personality.h>
-#include <linux/mm.h>
-#include <linux/random.h>
-#include <linux/sched.h>
-
-/*
- * Top of mmap area (just below the process stack).
- *
- * Leave an at least ~128 MB hole.
- */
-#define MIN_GAP (128 * 1024 * 1024)
-#define MAX_GAP (TASK_SIZE / 6 * 5)
-
-static inline unsigned long mmap_base(struct mm_struct *mm)
-{
-	unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
-	unsigned long random_factor = 0;
-
-	if (current->flags & PF_RANDOMIZE)
-		random_factor = get_random_int() % (1024 * 1024);
-
-	if (gap < MIN_GAP)
-		gap = MIN_GAP;
-	else if (gap > MAX_GAP)
-		gap = MAX_GAP;
-
-	return PAGE_ALIGN(TASK_SIZE - gap - random_factor);
-}
-
-/*
- * This function, called very early during the creation of a new
- * process VM image, sets up which VM layout function to use:
- */
-void ia32_pick_mmap_layout(struct mm_struct *mm)
-{
-	/*
-	 * Fall back to the standard layout if the personality
-	 * bit is set, or if the expected stack growth is unlimited:
-	 */
-	if (sysctl_legacy_va_layout ||
-	    (current->personality & ADDR_COMPAT_LAYOUT) ||
-	    current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) {
-		mm->mmap_base = TASK_UNMAPPED_BASE;
-		mm->get_unmapped_area = arch_get_unmapped_area;
-		mm->unmap_area = arch_unmap_area;
-	} else {
-		mm->mmap_base = mmap_base(mm);
-		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
-		mm->unmap_area = arch_unmap_area_topdown;
-	}
-}
diff --git a/arch/x86/mm/Makefile_64 b/arch/x86/mm/Makefile_64
index 6bcb47945b87..d18baed54fc6 100644
--- a/arch/x86/mm/Makefile_64
+++ b/arch/x86/mm/Makefile_64
@@ -7,4 +7,4 @@ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_NUMA) += numa_64.o
 obj-$(CONFIG_K8_NUMA) += k8topology_64.o
 obj-$(CONFIG_ACPI_NUMA) += srat_64.o
-
+obj-$(CONFIG_IA32_EMULATION) += mmap_32.o
diff --git a/arch/x86/mm/mmap_32.c b/arch/x86/mm/mmap_32.c
index d7dd0962a6d7..d490662e4af4 100644
--- a/arch/x86/mm/mmap_32.c
+++ b/arch/x86/mm/mmap_32.c
@@ -57,7 +57,11 @@ static inline unsigned long mmap_base(struct mm_struct *mm)
  * This function, called very early during the creation of a new
  * process VM image, sets up which VM layout function to use:
  */
+#ifdef CONFIG_X86_32
 void arch_pick_mmap_layout(struct mm_struct *mm)
+#else
+void ia32_pick_mmap_layout(struct mm_struct *mm)
+#endif
 {
 	/*
 	 * Fall back to the standard layout if the personality

From 6ce60b07e670e800c4c5cfe984ed5188e7a64135 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:26 +0100
Subject: [PATCH 086/890] x86: unify mc146818rtc.h - prepare for sharing rtc
 code

Unify mc146818rtc.h by adding the rtc_cmos_read/write functions to
time_64.c. This is a preparatory patch to finaly share the rtc code,
which is unsurprisingly similar.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/time_64.c        |  21 +++++++
 include/asm-x86/mc146818rtc.h    | 100 +++++++++++++++++++++++++++++--
 include/asm-x86/mc146818rtc_32.h |  97 ------------------------------
 include/asm-x86/mc146818rtc_64.h |  29 ---------
 4 files changed, 117 insertions(+), 130 deletions(-)
 delete mode 100644 include/asm-x86/mc146818rtc_32.h
 delete mode 100644 include/asm-x86/mc146818rtc_64.h

diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index 368b1942b39a..0a01504586a5 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -69,6 +69,27 @@ unsigned long profile_pc(struct pt_regs *regs)
 }
 EXPORT_SYMBOL(profile_pc);
 
+/* Routines for accessing the CMOS RAM/RTC. */
+unsigned char rtc_cmos_read(unsigned char addr)
+{
+	unsigned char val;
+	lock_cmos_prefix(addr);
+	outb_p(addr, RTC_PORT(0));
+	val = inb_p(RTC_PORT(1));
+	lock_cmos_suffix(addr);
+	return val;
+}
+EXPORT_SYMBOL(rtc_cmos_read);
+
+void rtc_cmos_write(unsigned char val, unsigned char addr)
+{
+	lock_cmos_prefix(addr);
+	outb_p(addr, RTC_PORT(0));
+	outb_p(val, RTC_PORT(1));
+	lock_cmos_suffix(addr);
+}
+EXPORT_SYMBOL(rtc_cmos_write);
+
 /*
  * In order to set the CMOS clock precisely, set_rtc_mmss has to be called 500
  * ms after the second nowtime has started, because when nowtime is written
diff --git a/include/asm-x86/mc146818rtc.h b/include/asm-x86/mc146818rtc.h
index 5c2bb66caf17..9d39436df231 100644
--- a/include/asm-x86/mc146818rtc.h
+++ b/include/asm-x86/mc146818rtc.h
@@ -1,5 +1,97 @@
-#ifdef CONFIG_X86_32
-# include "mc146818rtc_32.h"
-#else
-# include "mc146818rtc_64.h"
+/*
+ * Machine dependent access functions for RTC registers.
+ */
+#ifndef _ASM_MC146818RTC_H
+#define _ASM_MC146818RTC_H
+
+#include <asm/io.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <linux/mc146818rtc.h>
+
+#ifndef RTC_PORT
+#define RTC_PORT(x)	(0x70 + (x))
+#define RTC_ALWAYS_BCD	1	/* RTC operates in binary mode */
 #endif
+
+#if defined(CONFIG_X86_32) && defined(__HAVE_ARCH_CMPXCHG)
+/*
+ * This lock provides nmi access to the CMOS/RTC registers.  It has some
+ * special properties.  It is owned by a CPU and stores the index register
+ * currently being accessed (if owned).  The idea here is that it works
+ * like a normal lock (normally).  However, in an NMI, the NMI code will
+ * first check to see if its CPU owns the lock, meaning that the NMI
+ * interrupted during the read/write of the device.  If it does, it goes ahead
+ * and performs the access and then restores the index register.  If it does
+ * not, it locks normally.
+ *
+ * Note that since we are working with NMIs, we need this lock even in
+ * a non-SMP machine just to mark that the lock is owned.
+ *
+ * This only works with compare-and-swap.  There is no other way to
+ * atomically claim the lock and set the owner.
+ */
+#include <linux/smp.h>
+extern volatile unsigned long cmos_lock;
+
+/*
+ * All of these below must be called with interrupts off, preempt
+ * disabled, etc.
+ */
+
+static inline void lock_cmos(unsigned char reg)
+{
+	unsigned long new;
+	new = ((smp_processor_id()+1) << 8) | reg;
+	for (;;) {
+		if (cmos_lock) {
+			cpu_relax();
+			continue;
+		}
+		if (__cmpxchg(&cmos_lock, 0, new, sizeof(cmos_lock)) == 0)
+			return;
+	}
+}
+
+static inline void unlock_cmos(void)
+{
+	cmos_lock = 0;
+}
+static inline int do_i_have_lock_cmos(void)
+{
+	return (cmos_lock >> 8) == (smp_processor_id()+1);
+}
+static inline unsigned char current_lock_cmos_reg(void)
+{
+	return cmos_lock & 0xff;
+}
+#define lock_cmos_prefix(reg) \
+	do {					\
+		unsigned long cmos_flags;	\
+		local_irq_save(cmos_flags);	\
+		lock_cmos(reg)
+#define lock_cmos_suffix(reg) \
+		unlock_cmos();			\
+		local_irq_restore(cmos_flags);	\
+	} while (0)
+#else
+#define lock_cmos_prefix(reg) do {} while (0)
+#define lock_cmos_suffix(reg) do {} while (0)
+#define lock_cmos(reg)
+#define unlock_cmos()
+#define do_i_have_lock_cmos() 0
+#define current_lock_cmos_reg() 0
+#endif
+
+/*
+ * The yet supported machines all access the RTC index register via
+ * an ISA port access but the way to access the date register differs ...
+ */
+#define CMOS_READ(addr) rtc_cmos_read(addr)
+#define CMOS_WRITE(val, addr) rtc_cmos_write(val, addr)
+unsigned char rtc_cmos_read(unsigned char addr);
+void rtc_cmos_write(unsigned char val, unsigned char addr);
+
+#define RTC_IRQ 8
+
+#endif /* _ASM_MC146818RTC_H */
diff --git a/include/asm-x86/mc146818rtc_32.h b/include/asm-x86/mc146818rtc_32.h
deleted file mode 100644
index 1613b42eaf58..000000000000
--- a/include/asm-x86/mc146818rtc_32.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Machine dependent access functions for RTC registers.
- */
-#ifndef _ASM_MC146818RTC_H
-#define _ASM_MC146818RTC_H
-
-#include <asm/io.h>
-#include <asm/system.h>
-#include <asm/processor.h>
-#include <linux/mc146818rtc.h>
-
-#ifndef RTC_PORT
-#define RTC_PORT(x)	(0x70 + (x))
-#define RTC_ALWAYS_BCD	1	/* RTC operates in binary mode */
-#endif
-
-#ifdef __HAVE_ARCH_CMPXCHG
-/*
- * This lock provides nmi access to the CMOS/RTC registers.  It has some
- * special properties.  It is owned by a CPU and stores the index register
- * currently being accessed (if owned).  The idea here is that it works
- * like a normal lock (normally).  However, in an NMI, the NMI code will
- * first check to see if its CPU owns the lock, meaning that the NMI
- * interrupted during the read/write of the device.  If it does, it goes ahead
- * and performs the access and then restores the index register.  If it does
- * not, it locks normally.
- *
- * Note that since we are working with NMIs, we need this lock even in
- * a non-SMP machine just to mark that the lock is owned.
- *
- * This only works with compare-and-swap.  There is no other way to
- * atomically claim the lock and set the owner.
- */
-#include <linux/smp.h>
-extern volatile unsigned long cmos_lock;
-
-/*
- * All of these below must be called with interrupts off, preempt
- * disabled, etc.
- */
-
-static inline void lock_cmos(unsigned char reg)
-{
-	unsigned long new;
-	new = ((smp_processor_id()+1) << 8) | reg;
-	for (;;) {
-		if (cmos_lock) {
-			cpu_relax();
-			continue;
-		}
-		if (__cmpxchg(&cmos_lock, 0, new, sizeof(cmos_lock)) == 0)
-			return;
-	}
-}
-
-static inline void unlock_cmos(void)
-{
-	cmos_lock = 0;
-}
-static inline int do_i_have_lock_cmos(void)
-{
-	return (cmos_lock >> 8) == (smp_processor_id()+1);
-}
-static inline unsigned char current_lock_cmos_reg(void)
-{
-	return cmos_lock & 0xff;
-}
-#define lock_cmos_prefix(reg) \
-	do {					\
-		unsigned long cmos_flags;	\
-		local_irq_save(cmos_flags);	\
-		lock_cmos(reg)
-#define lock_cmos_suffix(reg) \
-		unlock_cmos();			\
-		local_irq_restore(cmos_flags);	\
-	} while (0)
-#else
-#define lock_cmos_prefix(reg) do {} while (0)
-#define lock_cmos_suffix(reg) do {} while (0)
-#define lock_cmos(reg)
-#define unlock_cmos()
-#define do_i_have_lock_cmos() 0
-#define current_lock_cmos_reg() 0
-#endif
-
-/*
- * The yet supported machines all access the RTC index register via
- * an ISA port access but the way to access the date register differs ...
- */
-#define CMOS_READ(addr) rtc_cmos_read(addr)
-#define CMOS_WRITE(val, addr) rtc_cmos_write(val, addr)
-unsigned char rtc_cmos_read(unsigned char addr);
-void rtc_cmos_write(unsigned char val, unsigned char addr);
-
-#define RTC_IRQ 8
-
-#endif /* _ASM_MC146818RTC_H */
diff --git a/include/asm-x86/mc146818rtc_64.h b/include/asm-x86/mc146818rtc_64.h
deleted file mode 100644
index d6e3009430c1..000000000000
--- a/include/asm-x86/mc146818rtc_64.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Machine dependent access functions for RTC registers.
- */
-#ifndef _ASM_MC146818RTC_H
-#define _ASM_MC146818RTC_H
-
-#include <asm/io.h>
-
-#ifndef RTC_PORT
-#define RTC_PORT(x)	(0x70 + (x))
-#define RTC_ALWAYS_BCD	1	/* RTC operates in binary mode */
-#endif
-
-/*
- * The yet supported machines all access the RTC index register via
- * an ISA port access but the way to access the date register differs ...
- */
-#define CMOS_READ(addr) ({ \
-outb_p((addr),RTC_PORT(0)); \
-inb_p(RTC_PORT(1)); \
-})
-#define CMOS_WRITE(val, addr) ({ \
-outb_p((addr),RTC_PORT(0)); \
-outb_p((val),RTC_PORT(1)); \
-})
-
-#define RTC_IRQ 8
-
-#endif /* _ASM_MC146818RTC_H */

From fe599f9fbc5d470ec5b55d08f2bbb991ddecbbc8 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:26 +0100
Subject: [PATCH 087/890] x86: isolate the rtc code for sharing

The mach-default/mach_time.h code inline is moved to arch/x86/kernel/rtc.c
and the header files are adjusted.

Shrink the 3 dozen includes to the ones we really need.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile_32                   |   2 +-
 .../mach_time.h => arch/x86/kernel/rtc.c      |  83 +++++++++++---
 arch/x86/kernel/time_32.c                     | 105 +-----------------
 include/asm-x86/mc146818rtc.h                 |   3 +
 include/asm-x86/time.h                        |   2 +-
 5 files changed, 78 insertions(+), 117 deletions(-)
 rename include/asm-x86/mach-default/mach_time.h => arch/x86/kernel/rtc.c (64%)

diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index 31ff982bc26b..eb2da53578d7 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -8,7 +8,7 @@ CPPFLAGS_vmlinux.lds += -Ui386
 obj-y	:= process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
 		ptrace_32.o time_32.o ioport_32.o ldt.o setup_32.o i8259_32.o sys_i386_32.o \
 		pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\
-		quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o
+		quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o rtc.o
 
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-y				+= cpu/
diff --git a/include/asm-x86/mach-default/mach_time.h b/arch/x86/kernel/rtc.c
similarity index 64%
rename from include/asm-x86/mach-default/mach_time.h
rename to arch/x86/kernel/rtc.c
index 31eb5de6f3dc..45bf54d9f4c5 100644
--- a/include/asm-x86/mach-default/mach_time.h
+++ b/arch/x86/kernel/rtc.c
@@ -1,18 +1,10 @@
 /*
- *  include/asm-i386/mach-default/mach_time.h
- *
- *  Machine specific set RTC function for generic.
- *  Split out from time.c by Osamu Tomita <tomita@cinet.co.jp>
+ * RTC related functions
  */
-#ifndef _MACH_TIME_H
-#define _MACH_TIME_H
-
+#include <linux/bcd.h>
 #include <linux/mc146818rtc.h>
 
-/* for check timing call set_rtc_mmss() 500ms     */
-/* used in arch/i386/time.c::do_timer_interrupt() */
-#define USEC_AFTER	500000
-#define USEC_BEFORE	500000
+#include <asm/time.h>
 
 /*
  * In order to set the CMOS clock precisely, set_rtc_mmss has to be
@@ -24,7 +16,7 @@
  * BUG: This routine does not handle hour overflow properly; it just
  *      sets the minutes. Usually you'll only notice that after reboot!
  */
-static inline int mach_set_rtc_mmss(unsigned long nowtime)
+int mach_set_rtc_mmss(unsigned long nowtime)
 {
 	int retval = 0;
 	int real_seconds, real_minutes, cmos_minutes;
@@ -79,7 +71,7 @@ static inline int mach_set_rtc_mmss(unsigned long nowtime)
 	return retval;
 }
 
-static inline unsigned long mach_get_cmos_time(void)
+unsigned long mach_get_cmos_time(void)
 {
 	unsigned int year, mon, day, hour, min, sec;
 
@@ -108,4 +100,67 @@ static inline unsigned long mach_get_cmos_time(void)
 	return mktime(year, mon, day, hour, min, sec);
 }
 
-#endif /* !_MACH_TIME_H */
+DEFINE_SPINLOCK(rtc_lock);
+EXPORT_SYMBOL(rtc_lock);
+
+/*
+ * This is a special lock that is owned by the CPU and holds the index
+ * register we are working with.  It is required for NMI access to the
+ * CMOS/RTC registers.  See include/asm-i386/mc146818rtc.h for details.
+ */
+volatile unsigned long cmos_lock = 0;
+EXPORT_SYMBOL(cmos_lock);
+
+/* Routines for accessing the CMOS RAM/RTC. */
+unsigned char rtc_cmos_read(unsigned char addr)
+{
+	unsigned char val;
+
+	lock_cmos_prefix(addr);
+	outb_p(addr, RTC_PORT(0));
+	val = inb_p(RTC_PORT(1));
+	lock_cmos_suffix(addr);
+	return val;
+}
+EXPORT_SYMBOL(rtc_cmos_read);
+
+void rtc_cmos_write(unsigned char val, unsigned char addr)
+{
+	lock_cmos_prefix(addr);
+	outb_p(addr, RTC_PORT(0));
+	outb_p(val, RTC_PORT(1));
+	lock_cmos_suffix(addr);
+}
+EXPORT_SYMBOL(rtc_cmos_write);
+
+static int set_rtc_mmss(unsigned long nowtime)
+{
+	int retval;
+	unsigned long flags;
+
+	/* gets recalled with irq locally disabled */
+	/* XXX - does irqsave resolve this? -johnstul */
+	spin_lock_irqsave(&rtc_lock, flags);
+	retval = set_wallclock(nowtime);
+	spin_unlock_irqrestore(&rtc_lock, flags);
+
+	return retval;
+}
+
+/* not static: needed by APM */
+unsigned long read_persistent_clock(void)
+{
+	unsigned long retval;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rtc_lock, flags);
+	retval = get_wallclock();
+	spin_unlock_irqrestore(&rtc_lock, flags);
+
+	return retval;
+}
+
+int update_persistent_clock(struct timespec now)
+{
+	return set_rtc_mmss(now.tv_sec);
+}
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 8a322c96bc23..e9ead762abe8 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -28,98 +28,21 @@
  *	serialize accesses to xtime/lost_ticks).
  */
 
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/mm.h>
+#include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/time.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/module.h>
-#include <linux/sysdev.h>
-#include <linux/bcd.h>
-#include <linux/efi.h>
 #include <linux/mca.h>
 
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/irq.h>
-#include <asm/msr.h>
-#include <asm/delay.h>
-#include <asm/mpspec.h>
-#include <asm/uaccess.h>
-#include <asm/processor.h>
-#include <asm/timer.h>
+#include <asm/arch_hooks.h>
+#include <asm/hpet.h>
 #include <asm/time.h>
 
-#include "mach_time.h"
-
-#include <linux/timex.h>
-
-#include <asm/hpet.h>
-
-#include <asm/arch_hooks.h>
-
 #include "io_ports.h"
-
-#include <asm/i8259.h>
-
 #include "do_timer.h"
 
 unsigned int cpu_khz;	/* Detected as we calibrate the TSC */
 EXPORT_SYMBOL(cpu_khz);
 
-DEFINE_SPINLOCK(rtc_lock);
-EXPORT_SYMBOL(rtc_lock);
-
-/*
- * This is a special lock that is owned by the CPU and holds the index
- * register we are working with.  It is required for NMI access to the
- * CMOS/RTC registers.  See include/asm-i386/mc146818rtc.h for details.
- */
-volatile unsigned long cmos_lock = 0;
-EXPORT_SYMBOL(cmos_lock);
-
-/* Routines for accessing the CMOS RAM/RTC. */
-unsigned char rtc_cmos_read(unsigned char addr)
-{
-	unsigned char val;
-	lock_cmos_prefix(addr);
-	outb_p(addr, RTC_PORT(0));
-	val = inb_p(RTC_PORT(1));
-	lock_cmos_suffix(addr);
-	return val;
-}
-EXPORT_SYMBOL(rtc_cmos_read);
-
-void rtc_cmos_write(unsigned char val, unsigned char addr)
-{
-	lock_cmos_prefix(addr);
-	outb_p(addr, RTC_PORT(0));
-	outb_p(val, RTC_PORT(1));
-	lock_cmos_suffix(addr);
-}
-EXPORT_SYMBOL(rtc_cmos_write);
-
-static int set_rtc_mmss(unsigned long nowtime)
-{
-	int retval;
-	unsigned long flags;
-
-	/* gets recalled with irq locally disabled */
-	/* XXX - does irqsave resolve this? -johnstul */
-	spin_lock_irqsave(&rtc_lock, flags);
-	retval = set_wallclock(nowtime);
-	spin_unlock_irqrestore(&rtc_lock, flags);
-
-	return retval;
-}
-
-
 int timer_ack;
 
 unsigned long profile_pc(struct pt_regs *regs)
@@ -137,7 +60,7 @@ unsigned long profile_pc(struct pt_regs *regs)
 		/* Return address is either directly at stack pointer
 		   or above a saved eflags. Eflags has bits 22-31 zero,
 		   kernel addresses don't. */
- 		if (sp[0] >> 22)
+		if (sp[0] >> 22)
 			return sp[0];
 		if (sp[1] >> 22)
 			return sp[1];
@@ -193,26 +116,6 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-/* not static: needed by APM */
-unsigned long read_persistent_clock(void)
-{
-	unsigned long retval;
-	unsigned long flags;
-
-	spin_lock_irqsave(&rtc_lock, flags);
-
-	retval = get_wallclock();
-
-	spin_unlock_irqrestore(&rtc_lock, flags);
-
-	return retval;
-}
-
-int update_persistent_clock(struct timespec now)
-{
-	return set_rtc_mmss(now.tv_sec);
-}
-
 extern void (*late_time_init)(void);
 /* Duplicate of time_init() below, with hpet_enable part added */
 void __init hpet_time_init(void)
diff --git a/include/asm-x86/mc146818rtc.h b/include/asm-x86/mc146818rtc.h
index 9d39436df231..cdd9f965835a 100644
--- a/include/asm-x86/mc146818rtc.h
+++ b/include/asm-x86/mc146818rtc.h
@@ -92,6 +92,9 @@ static inline unsigned char current_lock_cmos_reg(void)
 unsigned char rtc_cmos_read(unsigned char addr);
 void rtc_cmos_write(unsigned char val, unsigned char addr);
 
+extern int mach_set_rtc_mmss(unsigned long nowtime);
+extern unsigned long mach_get_cmos_time(void);
+
 #define RTC_IRQ 8
 
 #endif /* _ASM_MC146818RTC_H */
diff --git a/include/asm-x86/time.h b/include/asm-x86/time.h
index eac011366dc2..b3f94cd81ac6 100644
--- a/include/asm-x86/time.h
+++ b/include/asm-x86/time.h
@@ -2,7 +2,7 @@
 #define _ASMi386_TIME_H
 
 #include <linux/efi.h>
-#include "mach_time.h"
+#include <asm/mc146818rtc.h>
 
 static inline unsigned long native_get_wallclock(void)
 {

From 1122b134bcd6e77c5a4117952b8cbc55c8d018bc Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:27 +0100
Subject: [PATCH 088/890] x86: share rtc code

Remove the rtc code from time_64.c and add the extra bits to the
i386 path. The ACPI century check is probably valid for i386 as
well, but this is material for a separate patch.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile_64 |   2 +-
 arch/x86/kernel/rtc.c       |  92 ++++++++++++++-------
 arch/x86/kernel/time_64.c   | 157 ------------------------------------
 3 files changed, 62 insertions(+), 189 deletions(-)

diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index 9cb3df27c413..ae95d21ea885 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -11,7 +11,7 @@ obj-y	:= process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
 		x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \
 		setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \
 		pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \
-		i8253.o io_delay.o
+		i8253.o io_delay.o rtc.o
 
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-y				+= cpu/
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 45bf54d9f4c5..d040840ff1b6 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -1,11 +1,32 @@
 /*
  * RTC related functions
  */
+#include <linux/acpi.h>
 #include <linux/bcd.h>
 #include <linux/mc146818rtc.h>
 
 #include <asm/time.h>
 
+#ifdef CONFIG_X86_32
+# define CMOS_YEARS_OFFS 1900
+/*
+ * This is a special lock that is owned by the CPU and holds the index
+ * register we are working with.  It is required for NMI access to the
+ * CMOS/RTC registers.  See include/asm-i386/mc146818rtc.h for details.
+ */
+volatile unsigned long cmos_lock = 0;
+EXPORT_SYMBOL(cmos_lock);
+#else
+/*
+ * x86-64 systems only exists since 2002.
+ * This will work up to Dec 31, 2100
+ */
+# define CMOS_YEARS_OFFS 2000
+#endif
+
+DEFINE_SPINLOCK(rtc_lock);
+EXPORT_SYMBOL(rtc_lock);
+
 /*
  * In order to set the CMOS clock precisely, set_rtc_mmss has to be
  * called 500 ms after the second nowtime has started, because when
@@ -22,10 +43,12 @@ int mach_set_rtc_mmss(unsigned long nowtime)
 	int real_seconds, real_minutes, cmos_minutes;
 	unsigned char save_control, save_freq_select;
 
-	save_control = CMOS_READ(RTC_CONTROL); /* tell the clock it's being set */
+	 /* tell the clock it's being set */
+	save_control = CMOS_READ(RTC_CONTROL);
 	CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
 
-	save_freq_select = CMOS_READ(RTC_FREQ_SELECT); /* stop and reset prescaler */
+	/* stop and reset prescaler */
+	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
 	CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
 
 	cmos_minutes = CMOS_READ(RTC_MINUTES);
@@ -40,8 +63,9 @@ int mach_set_rtc_mmss(unsigned long nowtime)
 	 */
 	real_seconds = nowtime % 60;
 	real_minutes = nowtime / 60;
+	/* correct for half hour time zone */
 	if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1)
-		real_minutes += 30;		/* correct for half hour time zone */
+		real_minutes += 30;
 	real_minutes %= 60;
 
 	if (abs(real_minutes - cmos_minutes) < 30) {
@@ -73,18 +97,32 @@ int mach_set_rtc_mmss(unsigned long nowtime)
 
 unsigned long mach_get_cmos_time(void)
 {
-	unsigned int year, mon, day, hour, min, sec;
+	unsigned int year, mon, day, hour, min, sec, century = 0;
 
-	do {
-		sec = CMOS_READ(RTC_SECONDS);
-		min = CMOS_READ(RTC_MINUTES);
-		hour = CMOS_READ(RTC_HOURS);
-		day = CMOS_READ(RTC_DAY_OF_MONTH);
-		mon = CMOS_READ(RTC_MONTH);
-		year = CMOS_READ(RTC_YEAR);
-	} while (sec != CMOS_READ(RTC_SECONDS));
+	/*
+	 * If UIP is clear, then we have >= 244 microseconds before
+	 * RTC registers will be updated.  Spec sheet says that this
+	 * is the reliable way to read RTC - registers. If UIP is set
+	 * then the register access might be invalid.
+	 */
+	while ((CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
+		cpu_relax();
 
-	if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+	sec = CMOS_READ(RTC_SECONDS);
+	min = CMOS_READ(RTC_MINUTES);
+	hour = CMOS_READ(RTC_HOURS);
+	day = CMOS_READ(RTC_DAY_OF_MONTH);
+	mon = CMOS_READ(RTC_MONTH);
+	year = CMOS_READ(RTC_YEAR);
+
+#if defined(CONFIG_ACPI) && defined(CONFIG_X86_64)
+	/* CHECKME: Is this really 64bit only ??? */
+	if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
+	    acpi_gbl_FADT.century)
+		century = CMOS_READ(acpi_gbl_FADT.century);
+#endif
+
+	if (RTC_ALWAYS_BCD || !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)) {
 		BCD_TO_BIN(sec);
 		BCD_TO_BIN(min);
 		BCD_TO_BIN(hour);
@@ -93,24 +131,19 @@ unsigned long mach_get_cmos_time(void)
 		BCD_TO_BIN(year);
 	}
 
-	year += 1900;
-	if (year < 1970)
-		year += 100;
+	if (century) {
+		BCD_TO_BIN(century);
+		year += century * 100;
+		printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
+	} else {
+		year += CMOS_YEARS_OFFS;
+		if (year < 1970)
+			year += 100;
+	}
 
 	return mktime(year, mon, day, hour, min, sec);
 }
 
-DEFINE_SPINLOCK(rtc_lock);
-EXPORT_SYMBOL(rtc_lock);
-
-/*
- * This is a special lock that is owned by the CPU and holds the index
- * register we are working with.  It is required for NMI access to the
- * CMOS/RTC registers.  See include/asm-i386/mc146818rtc.h for details.
- */
-volatile unsigned long cmos_lock = 0;
-EXPORT_SYMBOL(cmos_lock);
-
 /* Routines for accessing the CMOS RAM/RTC. */
 unsigned char rtc_cmos_read(unsigned char addr)
 {
@@ -138,8 +171,6 @@ static int set_rtc_mmss(unsigned long nowtime)
 	int retval;
 	unsigned long flags;
 
-	/* gets recalled with irq locally disabled */
-	/* XXX - does irqsave resolve this? -johnstul */
 	spin_lock_irqsave(&rtc_lock, flags);
 	retval = set_wallclock(nowtime);
 	spin_unlock_irqrestore(&rtc_lock, flags);
@@ -150,8 +181,7 @@ static int set_rtc_mmss(unsigned long nowtime)
 /* not static: needed by APM */
 unsigned long read_persistent_clock(void)
 {
-	unsigned long retval;
-	unsigned long flags;
+	unsigned long retval, flags;
 
 	spin_lock_irqsave(&rtc_lock, flags);
 	retval = get_wallclock();
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index 0a01504586a5..64cd03ed9bfc 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -46,9 +46,6 @@
 #include <asm/nmi.h>
 #include <asm/vgtod.h>
 
-DEFINE_SPINLOCK(rtc_lock);
-EXPORT_SYMBOL(rtc_lock);
-
 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
 
 unsigned long profile_pc(struct pt_regs *regs)
@@ -69,103 +66,6 @@ unsigned long profile_pc(struct pt_regs *regs)
 }
 EXPORT_SYMBOL(profile_pc);
 
-/* Routines for accessing the CMOS RAM/RTC. */
-unsigned char rtc_cmos_read(unsigned char addr)
-{
-	unsigned char val;
-	lock_cmos_prefix(addr);
-	outb_p(addr, RTC_PORT(0));
-	val = inb_p(RTC_PORT(1));
-	lock_cmos_suffix(addr);
-	return val;
-}
-EXPORT_SYMBOL(rtc_cmos_read);
-
-void rtc_cmos_write(unsigned char val, unsigned char addr)
-{
-	lock_cmos_prefix(addr);
-	outb_p(addr, RTC_PORT(0));
-	outb_p(val, RTC_PORT(1));
-	lock_cmos_suffix(addr);
-}
-EXPORT_SYMBOL(rtc_cmos_write);
-
-/*
- * In order to set the CMOS clock precisely, set_rtc_mmss has to be called 500
- * ms after the second nowtime has started, because when nowtime is written
- * into the registers of the CMOS clock, it will jump to the next second
- * precisely 500 ms later. Check the Motorola MC146818A or Dallas DS12887 data
- * sheet for details.
- */
-
-static int set_rtc_mmss(unsigned long nowtime)
-{
-	int retval = 0;
-	int real_seconds, real_minutes, cmos_minutes;
-	unsigned char control, freq_select;
-	unsigned long flags;
-
-/*
- * set_rtc_mmss is called when irqs are enabled, so disable irqs here
- */
-	spin_lock_irqsave(&rtc_lock, flags);
-/*
- * Tell the clock it's being set and stop it.
- */
-	control = CMOS_READ(RTC_CONTROL);
-	CMOS_WRITE(control | RTC_SET, RTC_CONTROL);
-
-	freq_select = CMOS_READ(RTC_FREQ_SELECT);
-	CMOS_WRITE(freq_select | RTC_DIV_RESET2, RTC_FREQ_SELECT);
-
-	cmos_minutes = CMOS_READ(RTC_MINUTES);
-		BCD_TO_BIN(cmos_minutes);
-
-/*
- * since we're only adjusting minutes and seconds, don't interfere with hour
- * overflow. This avoids messing with unknown time zones but requires your RTC
- * not to be off by more than 15 minutes. Since we're calling it only when
- * our clock is externally synchronized using NTP, this shouldn't be a problem.
- */
-
-	real_seconds = nowtime % 60;
-	real_minutes = nowtime / 60;
-	if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1)
-		real_minutes += 30;		/* correct for half hour time zone */
-	real_minutes %= 60;
-
-	if (abs(real_minutes - cmos_minutes) >= 30) {
-		printk(KERN_WARNING "time.c: can't update CMOS clock "
-		       "from %d to %d\n", cmos_minutes, real_minutes);
-		retval = -1;
-	} else {
-		BIN_TO_BCD(real_seconds);
-		BIN_TO_BCD(real_minutes);
-		CMOS_WRITE(real_seconds, RTC_SECONDS);
-		CMOS_WRITE(real_minutes, RTC_MINUTES);
-	}
-
-/*
- * The following flags have to be released exactly in this order, otherwise the
- * DS12887 (popular MC146818A clone with integrated battery and quartz) will
- * not reset the oscillator and will not update precisely 500 ms later. You
- * won't find this mentioned in the Dallas Semiconductor data sheets, but who
- * believes data sheets anyway ... -- Markus Kuhn
- */
-
-	CMOS_WRITE(control, RTC_CONTROL);
-	CMOS_WRITE(freq_select, RTC_FREQ_SELECT);
-
-	spin_unlock_irqrestore(&rtc_lock, flags);
-
-	return retval;
-}
-
-int update_persistent_clock(struct timespec now)
-{
-	return set_rtc_mmss(now.tv_sec);
-}
-
 static irqreturn_t timer_event_interrupt(int irq, void *dev_id)
 {
 	add_pda(irq0_irqs, 1);
@@ -175,63 +75,6 @@ static irqreturn_t timer_event_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-unsigned long read_persistent_clock(void)
-{
-	unsigned int year, mon, day, hour, min, sec;
-	unsigned long flags;
-	unsigned century = 0;
-
-	spin_lock_irqsave(&rtc_lock, flags);
-	/*
-	 * if UIP is clear, then we have >= 244 microseconds before RTC
-	 * registers will be updated.  Spec sheet says that this is the
-	 * reliable way to read RTC - registers invalid (off bus) during update
-	 */
-	while ((CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
-		cpu_relax();
-
-
-	/* now read all RTC registers while stable with interrupts disabled */
-	sec = CMOS_READ(RTC_SECONDS);
-	min = CMOS_READ(RTC_MINUTES);
-	hour = CMOS_READ(RTC_HOURS);
-	day = CMOS_READ(RTC_DAY_OF_MONTH);
-	mon = CMOS_READ(RTC_MONTH);
-	year = CMOS_READ(RTC_YEAR);
-#ifdef CONFIG_ACPI
-	if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
-				acpi_gbl_FADT.century)
-		century = CMOS_READ(acpi_gbl_FADT.century);
-#endif
-	spin_unlock_irqrestore(&rtc_lock, flags);
-
-	/*
-	 * We know that x86-64 always uses BCD format, no need to check the
-	 * config register.
-	 */
-
-	BCD_TO_BIN(sec);
-	BCD_TO_BIN(min);
-	BCD_TO_BIN(hour);
-	BCD_TO_BIN(day);
-	BCD_TO_BIN(mon);
-	BCD_TO_BIN(year);
-
-	if (century) {
-		BCD_TO_BIN(century);
-		year += century * 100;
-		printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
-	} else {
-		/*
-		 * x86-64 systems only exists since 2002.
-		 * This will work up to Dec 31, 2100
-		 */
-		year += 2000;
-	}
-
-	return mktime(year, mon, day, hour, min, sec);
-}
-
 /* calibrate_cpu is used on systems with fixed rate TSCs to determine
  * processor frequency */
 #define TICK_COUNT 100000000

From 081e10b96e971da2eba05ab1ecbf2c051fa119f6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:27 +0100
Subject: [PATCH 089/890] x86: clean up arch/x86/kernel/time_64.c includes

Reduce the lets include all to the minimum.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/time_64.c | 32 ++++----------------------------
 1 file changed, 4 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index 64cd03ed9bfc..f88bf6b802e3 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -11,38 +11,14 @@
  *  RTC support code taken from arch/i386/kernel/timers/time_hpet.c
  */
 
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/mc146818rtc.h>
-#include <linux/time.h>
-#include <linux/ioport.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/sysdev.h>
-#include <linux/bcd.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/kallsyms.h>
-#include <linux/acpi.h>
 #include <linux/clockchips.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/time.h>
 
-#ifdef CONFIG_ACPI
-#include <acpi/achware.h>	/* for PM timer frequency */
-#include <acpi/acpi_bus.h>
-#endif
 #include <asm/i8253.h>
-#include <asm/pgtable.h>
-#include <asm/vsyscall.h>
-#include <asm/timex.h>
-#include <asm/proto.h>
 #include <asm/hpet.h>
-#include <asm/sections.h>
-#include <linux/hpet.h>
-#include <asm/apic.h>
-#include <asm/hpet.h>
-#include <asm/mpspec.h>
 #include <asm/nmi.h>
 #include <asm/vgtod.h>
 

From 16da2f93054fc379522b93afc71d49751bd8be2b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:27 +0100
Subject: [PATCH 090/890] x86: smp_64.c: remove unused exports and cleanup
 while at it

The exports are nowhere used. There is even no reason why they were
ever introduced.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/smp_64.c | 74 +++++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 39 deletions(-)

diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c
index 03fa6ed559c6..62b0f2a1b1e8 100644
--- a/arch/x86/kernel/smp_64.c
+++ b/arch/x86/kernel/smp_64.c
@@ -29,7 +29,7 @@
 #include <asm/idle.h>
 
 /*
- *	Smarter SMP flushing macros. 
+ *	Smarter SMP flushing macros.
  *		c/o Linus Torvalds.
  *
  *	These mean you can really definitely utterly forget about
@@ -37,15 +37,15 @@
  *
  *	Optimizations Manfred Spraul <manfred@colorfullife.com>
  *
- * 	More scalable flush, from Andi Kleen
+ *	More scalable flush, from Andi Kleen
  *
- * 	To avoid global state use 8 different call vectors.
- * 	Each CPU uses a specific vector to trigger flushes on other
- * 	CPUs. Depending on the received vector the target CPUs look into
+ *	To avoid global state use 8 different call vectors.
+ *	Each CPU uses a specific vector to trigger flushes on other
+ *	CPUs. Depending on the received vector the target CPUs look into
  *	the right per cpu variable for the flush data.
  *
- * 	With more than 8 CPUs they are hashed to the 8 available
- * 	vectors. The limited global vector space forces us to this right now.
+ *	With more than 8 CPUs they are hashed to the 8 available
+ *	vectors. The limited global vector space forces us to this right now.
  *	In future when interrupts are split into per CPU domains this could be
  *	fixed, at the cost of triggering multiple IPIs in some cases.
  */
@@ -67,7 +67,7 @@ union smp_flush_state {
 static DEFINE_PER_CPU(union smp_flush_state, flush_state);
 
 /*
- * We cannot call mmdrop() because we are in interrupt context, 
+ * We cannot call mmdrop() because we are in interrupt context,
  * instead update mm->cpu_vm_mask.
  */
 static inline void leave_mm(int cpu)
@@ -85,25 +85,25 @@ static inline void leave_mm(int cpu)
  * 1) switch_mm() either 1a) or 1b)
  * 1a) thread switch to a different mm
  * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
- * 	Stop ipi delivery for the old mm. This is not synchronized with
- * 	the other cpus, but smp_invalidate_interrupt ignore flush ipis
- * 	for the wrong mm, and in the worst case we perform a superfluous
- * 	tlb flush.
+ *	Stop ipi delivery for the old mm. This is not synchronized with
+ *	the other cpus, but smp_invalidate_interrupt ignore flush ipis
+ *	for the wrong mm, and in the worst case we perform a superfluous
+ *	tlb flush.
  * 1a2) set cpu mmu_state to TLBSTATE_OK
- * 	Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+ *	Now the smp_invalidate_interrupt won't call leave_mm if cpu0
  *	was in lazy tlb mode.
  * 1a3) update cpu active_mm
- * 	Now cpu0 accepts tlb flushes for the new mm.
+ *	Now cpu0 accepts tlb flushes for the new mm.
  * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
- * 	Now the other cpus will send tlb flush ipis.
+ *	Now the other cpus will send tlb flush ipis.
  * 1a4) change cr3.
  * 1b) thread switch without mm change
  *	cpu active_mm is correct, cpu0 already handles
  *	flush ipis.
  * 1b1) set cpu mmu_state to TLBSTATE_OK
  * 1b2) test_and_set the cpu bit in cpu_vm_mask.
- * 	Atomically set the bit [other cpus will start sending flush ipis],
- * 	and test the bit.
+ *	Atomically set the bit [other cpus will start sending flush ipis],
+ *	and test the bit.
  * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
  * 2) switch %%esp, ie current
  *
@@ -142,7 +142,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
 
 	if (!cpu_isset(cpu, f->flush_cpumask))
 		goto out;
-		/* 
+		/*
 		 * This was a BUG() but until someone can quote me the
 		 * line from the intel manual that guarantees an IPI to
 		 * multiple CPUs is retried _only_ on the erroring CPUs
@@ -150,7 +150,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
 		 *
 		 * BUG();
 		 */
-		 
+
 	if (f->flush_mm == read_pda(active_mm)) {
 		if (read_pda(mmu_state) == TLBSTATE_OK) {
 			if (f->flush_va == FLUSH_ALL)
@@ -176,9 +176,11 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
 	sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
 	f = &per_cpu(flush_state, sender);
 
-	/* Could avoid this lock when
-	   num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
-	   probably not worth checking this for a cache-hot lock. */
+	/*
+	 * Could avoid this lock when
+	 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
+	 * probably not worth checking this for a cache-hot lock.
+	 */
 	spin_lock(&f->tlbstate_lock);
 
 	f->flush_mm = mm;
@@ -202,14 +204,14 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
 int __cpuinit init_smp_flush(void)
 {
 	int i;
+
 	for_each_cpu_mask(i, cpu_possible_map) {
 		spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
 	}
 	return 0;
 }
-
 core_initcall(init_smp_flush);
-	
+
 void flush_tlb_current_task(void)
 {
 	struct mm_struct *mm = current->mm;
@@ -224,7 +226,6 @@ void flush_tlb_current_task(void)
 		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
 	preempt_enable();
 }
-EXPORT_SYMBOL(flush_tlb_current_task);
 
 void flush_tlb_mm (struct mm_struct * mm)
 {
@@ -245,7 +246,6 @@ void flush_tlb_mm (struct mm_struct * mm)
 
 	preempt_enable();
 }
-EXPORT_SYMBOL(flush_tlb_mm);
 
 void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 {
@@ -259,8 +259,8 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 	if (current->active_mm == mm) {
 		if(current->mm)
 			__flush_tlb_one(va);
-		 else
-		 	leave_mm(smp_processor_id());
+		else
+			leave_mm(smp_processor_id());
 	}
 
 	if (!cpus_empty(cpu_mask))
@@ -268,7 +268,6 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 
 	preempt_enable();
 }
-EXPORT_SYMBOL(flush_tlb_page);
 
 static void do_flush_tlb_all(void* info)
 {
@@ -325,11 +324,9 @@ void unlock_ipi_call_lock(void)
  * this function sends a 'generic call function' IPI to all other CPU
  * of the system defined in the mask.
  */
-
-static int
-__smp_call_function_mask(cpumask_t mask,
-			 void (*func)(void *), void *info,
-			 int wait)
+static int __smp_call_function_mask(cpumask_t mask,
+				    void (*func)(void *), void *info,
+				    int wait)
 {
 	struct call_data_struct data;
 	cpumask_t allbutself;
@@ -417,11 +414,10 @@ EXPORT_SYMBOL(smp_call_function_mask);
  */
 
 int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
-	int nonatomic, int wait)
+			      int nonatomic, int wait)
 {
 	/* prevent preemption and reschedule on another processor */
-	int ret;
-	int me = get_cpu();
+	int ret, me = get_cpu();
 
 	/* Can deadlock when called with interrupts disabled */
 	WARN_ON(irqs_disabled());
@@ -471,9 +467,9 @@ static void stop_this_cpu(void *dummy)
 	 */
 	cpu_clear(smp_processor_id(), cpu_online_map);
 	disable_local_APIC();
-	for (;;) 
+	for (;;)
 		halt();
-} 
+}
 
 void smp_send_stop(void)
 {

From 02456c708eab4515121e5e581422fa3be14368d1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:27 +0100
Subject: [PATCH 091/890] x86: nuke a ton of dead hpet code

No users, just ballast

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/char/hpet.c  | 75 --------------------------------------------
 include/linux/hpet.h |  3 --
 2 files changed, 78 deletions(-)

diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 22f5fd02ea87..465ad35ed38f 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -600,63 +600,6 @@ static int hpet_is_known(struct hpet_data *hdp)
 	return 0;
 }
 
-EXPORT_SYMBOL(hpet_alloc);
-EXPORT_SYMBOL(hpet_register);
-EXPORT_SYMBOL(hpet_unregister);
-EXPORT_SYMBOL(hpet_control);
-
-int hpet_register(struct hpet_task *tp, int periodic)
-{
-	unsigned int i;
-	u64 mask;
-	struct hpet_timer __iomem *timer;
-	struct hpet_dev *devp;
-	struct hpets *hpetp;
-
-	switch (periodic) {
-	case 1:
-		mask = Tn_PER_INT_CAP_MASK;
-		break;
-	case 0:
-		mask = 0;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	tp->ht_opaque = NULL;
-
-	spin_lock_irq(&hpet_task_lock);
-	spin_lock(&hpet_lock);
-
-	for (devp = NULL, hpetp = hpets; hpetp && !devp; hpetp = hpetp->hp_next)
-		for (timer = hpetp->hp_hpet->hpet_timers, i = 0;
-		     i < hpetp->hp_ntimer; i++, timer++) {
-			if ((readq(&timer->hpet_config) & Tn_PER_INT_CAP_MASK)
-			    != mask)
-				continue;
-
-			devp = &hpetp->hp_dev[i];
-
-			if (devp->hd_flags & HPET_OPEN || devp->hd_task) {
-				devp = NULL;
-				continue;
-			}
-
-			tp->ht_opaque = devp;
-			devp->hd_task = tp;
-			break;
-		}
-
-	spin_unlock(&hpet_lock);
-	spin_unlock_irq(&hpet_task_lock);
-
-	if (tp->ht_opaque)
-		return 0;
-	else
-		return -EBUSY;
-}
-
 static inline int hpet_tpcheck(struct hpet_task *tp)
 {
 	struct hpet_dev *devp;
@@ -706,24 +649,6 @@ int hpet_unregister(struct hpet_task *tp)
 	return 0;
 }
 
-int hpet_control(struct hpet_task *tp, unsigned int cmd, unsigned long arg)
-{
-	struct hpet_dev *devp;
-	int err;
-
-	if ((err = hpet_tpcheck(tp)))
-		return err;
-
-	spin_lock_irq(&hpet_lock);
-	devp = tp->ht_opaque;
-	if (devp->hd_task != tp) {
-		spin_unlock_irq(&hpet_lock);
-		return -ENXIO;
-	}
-	spin_unlock_irq(&hpet_lock);
-	return hpet_ioctl_common(devp, cmd, arg, 1);
-}
-
 static ctl_table hpet_table[] = {
 	{
 	 .ctl_name = CTL_UNNUMBERED,
diff --git a/include/linux/hpet.h b/include/linux/hpet.h
index e3c0b2aa944c..9cd94bfd07e5 100644
--- a/include/linux/hpet.h
+++ b/include/linux/hpet.h
@@ -115,9 +115,6 @@ static inline void hpet_reserve_timer(struct hpet_data *hd, int timer)
 }
 
 int hpet_alloc(struct hpet_data *);
-int hpet_register(struct hpet_task *, int);
-int hpet_unregister(struct hpet_task *);
-int hpet_control(struct hpet_task *, unsigned int, unsigned long);
 
 #endif /* __KERNEL__ */
 

From a72368dd37f6ae333fbab03598e46a995d91decc Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:28 +0100
Subject: [PATCH 092/890] x86: remove dead code and exports

No users.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/process_64.c | 7 -------
 include/asm-x86/idle.h       | 1 -
 2 files changed, 8 deletions(-)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 2c9e59448f4c..98d85952f574 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -72,13 +72,6 @@ void idle_notifier_register(struct notifier_block *n)
 {
 	atomic_notifier_chain_register(&idle_notifier, n);
 }
-EXPORT_SYMBOL_GPL(idle_notifier_register);
-
-void idle_notifier_unregister(struct notifier_block *n)
-{
-	atomic_notifier_chain_unregister(&idle_notifier, n);
-}
-EXPORT_SYMBOL(idle_notifier_unregister);
 
 void enter_idle(void)
 {
diff --git a/include/asm-x86/idle.h b/include/asm-x86/idle.h
index 6bd47dcf2067..d240e5b30a45 100644
--- a/include/asm-x86/idle.h
+++ b/include/asm-x86/idle.h
@@ -6,7 +6,6 @@
 
 struct notifier_block;
 void idle_notifier_register(struct notifier_block *n);
-void idle_notifier_unregister(struct notifier_block *n);
 
 void enter_idle(void);
 void exit_idle(void);

From 3abf024d2abb79614d8c4cb25a70d5596f77d0ad Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:28 +0100
Subject: [PATCH 093/890] x86: nuke a ton of unused exports

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perfctr-watchdog.c | 1 -
 arch/x86/kernel/e820_64.c              | 1 -
 arch/x86/kernel/init_task.c            | 1 -
 arch/x86/kernel/pci-swiotlb_64.c       | 1 -
 arch/x86/kernel/smpboot_32.c           | 1 -
 arch/x86/kernel/smpboot_64.c           | 2 --
 arch/x86/kernel/stacktrace.c           | 1 -
 arch/x86/kernel/traps_64.c             | 8 --------
 arch/x86/mach-voyager/voyager_smp.c    | 1 -
 arch/x86/mm/numa_64.c                  | 1 -
 include/asm-x86/bug.h                  | 3 ---
 include/asm-x86/mmu_context_64.h       | 2 +-
 12 files changed, 1 insertion(+), 22 deletions(-)

diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index c02541e6e653..9b838324b818 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -167,7 +167,6 @@ void release_evntsel_nmi(unsigned int msr)
 	clear_bit(counter, evntsel_nmi_owner);
 }
 
-EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
 EXPORT_SYMBOL(reserve_perfctr_nmi);
 EXPORT_SYMBOL(release_perfctr_nmi);
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index e510cfd5bb71..0128b0b0e5b5 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -34,7 +34,6 @@ struct e820map e820;
  * PFN of last memory page.
  */
 unsigned long end_pfn;
-EXPORT_SYMBOL(end_pfn);
 
 /*
  * end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index 468c9c437842..5b3ce7934363 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -15,7 +15,6 @@ static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
-EXPORT_SYMBOL(init_mm);
 
 /*
  * Initial thread structure.
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 102866d729a5..82a0a674a003 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -10,7 +10,6 @@
 #include <asm/dma.h>
 
 int swiotlb __read_mostly;
-EXPORT_SYMBOL(swiotlb);
 
 const struct dma_mapping_ops swiotlb_dma_ops = {
 	.mapping_error = swiotlb_dma_mapping_error,
diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c
index 4ea80cbe52e5..753d7acf4dac 100644
--- a/arch/x86/kernel/smpboot_32.c
+++ b/arch/x86/kernel/smpboot_32.c
@@ -83,7 +83,6 @@ EXPORT_SYMBOL(cpu_online_map);
 
 cpumask_t cpu_callin_map;
 cpumask_t cpu_callout_map;
-EXPORT_SYMBOL(cpu_callout_map);
 cpumask_t cpu_possible_map;
 EXPORT_SYMBOL(cpu_possible_map);
 static cpumask_t smp_commenced_mask;
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index b36d32ff0b39..7552db9ee9ff 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -78,8 +78,6 @@ EXPORT_SYMBOL(cpu_online_map);
  */
 cpumask_t cpu_callin_map;
 cpumask_t cpu_callout_map;
-EXPORT_SYMBOL(cpu_callout_map);
-
 cpumask_t cpu_possible_map;
 EXPORT_SYMBOL(cpu_possible_map);
 
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 55771fd7e545..c571edd11878 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -69,7 +69,6 @@ void save_stack_trace(struct stack_trace *trace)
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
-EXPORT_SYMBOL(save_stack_trace);
 
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index cc68b92316cd..aa248d754533 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -461,14 +461,6 @@ int is_valid_bugaddr(unsigned long rip)
 	return ud2 == 0x0b0f;
 }
 
-#ifdef CONFIG_BUG
-void out_of_line_bug(void)
-{ 
-	BUG(); 
-} 
-EXPORT_SYMBOL(out_of_line_bug);
-#endif
-
 static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
 static int die_owner = -1;
 static unsigned int die_nest_count;
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 73c435ce10fd..981def2b4e9b 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -227,7 +227,6 @@ static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
 /* This is for the new dynamic CPU boot code */
 cpumask_t cpu_callin_map = CPU_MASK_NONE;
 cpumask_t cpu_callout_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_callout_map);
 cpumask_t cpu_possible_map = CPU_MASK_NONE;
 EXPORT_SYMBOL(cpu_possible_map);
 
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 2d8fdc05f415..e7f3f4e9ec85 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -624,7 +624,6 @@ void __init init_cpu_to_node(void)
 
 EXPORT_SYMBOL(cpu_to_node);
 EXPORT_SYMBOL(node_to_cpumask);
-EXPORT_SYMBOL(memnode);
 EXPORT_SYMBOL(node_data);
 
 #ifdef CONFIG_DISCONTIGMEM
diff --git a/include/asm-x86/bug.h b/include/asm-x86/bug.h
index fd8bdc639c48..8d477a201392 100644
--- a/include/asm-x86/bug.h
+++ b/include/asm-x86/bug.h
@@ -33,9 +33,6 @@
 	} while(0)
 #endif
 
-void out_of_line_bug(void);
-#else /* CONFIG_BUG */
-static inline void out_of_line_bug(void) { }
 #endif /* !CONFIG_BUG */
 
 #include <asm-generic/bug.h>
diff --git a/include/asm-x86/mmu_context_64.h b/include/asm-x86/mmu_context_64.h
index 0cce83a78378..29f95c3acc28 100644
--- a/include/asm-x86/mmu_context_64.h
+++ b/include/asm-x86/mmu_context_64.h
@@ -49,7 +49,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	else {
 		write_pda(mmu_state, TLBSTATE_OK);
 		if (read_pda(active_mm) != next)
-			out_of_line_bug();
+			BUG();
 		if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
 			/* We were in lazy tlb mode and leave_mm disabled 
 			 * tlb flush IPI delivery. We must reload CR3

From 99122a3fe74c4918cdc86a8d6d63b40240fa8881 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Wed, 30 Jan 2008 13:30:28 +0100
Subject: [PATCH 094/890] x86: remove more bogus filenames in comments.

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/acpi_32.h                 | 4 +---
 include/asm-x86/acpi_64.h                 | 4 +---
 include/asm-x86/checksum_64.h             | 2 +-
 include/asm-x86/i387_32.h                 | 2 --
 include/asm-x86/i387_64.h                 | 2 --
 include/asm-x86/ia32_unistd.h             | 2 +-
 include/asm-x86/ide.h                     | 2 --
 include/asm-x86/irqflags_32.h             | 2 --
 include/asm-x86/irqflags_64.h             | 2 --
 include/asm-x86/mach-default/apm.h        | 2 --
 include/asm-x86/mach-default/mach_timer.h | 2 --
 include/asm-x86/mach-default/mach_traps.h | 2 --
 include/asm-x86/nmi_32.h                  | 3 ---
 include/asm-x86/nmi_64.h                  | 3 ---
 include/asm-x86/processor_32.h            | 2 --
 include/asm-x86/processor_64.h            | 2 --
 include/asm-x86/rio.h                     | 4 ++--
 include/asm-x86/rwsem.h                   | 2 +-
 include/asm-x86/topology_32.h             | 2 --
 include/asm-x86/xor_32.h                  | 2 --
 include/asm-x86/xor_64.h                  | 2 --
 21 files changed, 7 insertions(+), 43 deletions(-)

diff --git a/include/asm-x86/acpi_32.h b/include/asm-x86/acpi_32.h
index 3cce5a0f2d9c..c8afeea0b80b 100644
--- a/include/asm-x86/acpi_32.h
+++ b/include/asm-x86/acpi_32.h
@@ -1,9 +1,7 @@
 /*
- *  asm-i386/acpi.h
- *
  *  Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
  *  Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
-  *
+ *
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *
  *  This program is free software; you can redistribute it and/or modify
diff --git a/include/asm-x86/acpi_64.h b/include/asm-x86/acpi_64.h
index 98173357dd89..58114ed08024 100644
--- a/include/asm-x86/acpi_64.h
+++ b/include/asm-x86/acpi_64.h
@@ -1,9 +1,7 @@
 /*
- *  asm-x86_64/acpi.h
- *
  *  Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
  *  Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
-  *
+ *
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *
  *  This program is free software; you can redistribute it and/or modify
diff --git a/include/asm-x86/checksum_64.h b/include/asm-x86/checksum_64.h
index 419fe88a0342..e5f79997decc 100644
--- a/include/asm-x86/checksum_64.h
+++ b/include/asm-x86/checksum_64.h
@@ -4,7 +4,7 @@
 /* 
  * Checksums for x86-64 
  * Copyright 2002 by Andi Kleen, SuSE Labs 
- * with some code from asm-i386/checksum.h
+ * with some code from asm-x86/checksum.h
  */ 
 
 #include <linux/compiler.h>
diff --git a/include/asm-x86/i387_32.h b/include/asm-x86/i387_32.h
index cdd1e248e3b4..9ac2502cdd3d 100644
--- a/include/asm-x86/i387_32.h
+++ b/include/asm-x86/i387_32.h
@@ -1,6 +1,4 @@
 /*
- * include/asm-i386/i387.h
- *
  * Copyright (C) 1994 Linus Torvalds
  *
  * Pentium III FXSR, SSE support
diff --git a/include/asm-x86/i387_64.h b/include/asm-x86/i387_64.h
index 3a4ffba3d6bc..12edb9f2d755 100644
--- a/include/asm-x86/i387_64.h
+++ b/include/asm-x86/i387_64.h
@@ -1,6 +1,4 @@
 /*
- * include/asm-x86_64/i387.h
- *
  * Copyright (C) 1994 Linus Torvalds
  *
  * Pentium III FXSR, SSE support
diff --git a/include/asm-x86/ia32_unistd.h b/include/asm-x86/ia32_unistd.h
index 5b52ce507338..61cea9e7c5c1 100644
--- a/include/asm-x86/ia32_unistd.h
+++ b/include/asm-x86/ia32_unistd.h
@@ -5,7 +5,7 @@
  * This file contains the system call numbers of the ia32 port,
  * this is for the kernel only.
  * Only add syscalls here where some part of the kernel needs to know
- * the number. This should be otherwise in sync with asm-i386/unistd.h. -AK
+ * the number. This should be otherwise in sync with asm-x86/unistd_32.h. -AK
  */
 
 #define __NR_ia32_restart_syscall 0
diff --git a/include/asm-x86/ide.h b/include/asm-x86/ide.h
index 42130adf9c7c..c2552d8bebf7 100644
--- a/include/asm-x86/ide.h
+++ b/include/asm-x86/ide.h
@@ -1,6 +1,4 @@
 /*
- *  linux/include/asm-i386/ide.h
- *
  *  Copyright (C) 1994-1996  Linus Torvalds & authors
  */
 
diff --git a/include/asm-x86/irqflags_32.h b/include/asm-x86/irqflags_32.h
index 4c7720089cb5..98b21b9bdce8 100644
--- a/include/asm-x86/irqflags_32.h
+++ b/include/asm-x86/irqflags_32.h
@@ -1,6 +1,4 @@
 /*
- * include/asm-i386/irqflags.h
- *
  * IRQ flags handling
  *
  * This file gets included from lowlevel asm headers too, to provide
diff --git a/include/asm-x86/irqflags_64.h b/include/asm-x86/irqflags_64.h
index bb9163bb29d1..38c07db733cf 100644
--- a/include/asm-x86/irqflags_64.h
+++ b/include/asm-x86/irqflags_64.h
@@ -1,6 +1,4 @@
 /*
- * include/asm-x86_64/irqflags.h
- *
  * IRQ flags handling
  *
  * This file gets included from lowlevel asm headers too, to provide
diff --git a/include/asm-x86/mach-default/apm.h b/include/asm-x86/mach-default/apm.h
index 1f730b8bd1fd..989f34c37d32 100644
--- a/include/asm-x86/mach-default/apm.h
+++ b/include/asm-x86/mach-default/apm.h
@@ -1,6 +1,4 @@
 /*
- *  include/asm-i386/mach-default/apm.h
- *
  *  Machine specific APM BIOS functions for generic.
  *  Split out from apm.c by Osamu Tomita <tomita@cinet.co.jp>
  */
diff --git a/include/asm-x86/mach-default/mach_timer.h b/include/asm-x86/mach-default/mach_timer.h
index 807992fd4171..4b76e536cd98 100644
--- a/include/asm-x86/mach-default/mach_timer.h
+++ b/include/asm-x86/mach-default/mach_timer.h
@@ -1,6 +1,4 @@
 /*
- *  include/asm-i386/mach-default/mach_timer.h
- *
  *  Machine specific calibrate_tsc() for generic.
  *  Split out from timer_tsc.c by Osamu Tomita <tomita@cinet.co.jp>
  */
diff --git a/include/asm-x86/mach-default/mach_traps.h b/include/asm-x86/mach-default/mach_traps.h
index 625438b8a6eb..2fe7705c0484 100644
--- a/include/asm-x86/mach-default/mach_traps.h
+++ b/include/asm-x86/mach-default/mach_traps.h
@@ -1,6 +1,4 @@
 /*
- *  include/asm-i386/mach-default/mach_traps.h
- *
  *  Machine specific NMI handling for generic.
  *  Split out from traps.c by Osamu Tomita <tomita@cinet.co.jp>
  */
diff --git a/include/asm-x86/nmi_32.h b/include/asm-x86/nmi_32.h
index 70a958a8e381..7206c7e8a388 100644
--- a/include/asm-x86/nmi_32.h
+++ b/include/asm-x86/nmi_32.h
@@ -1,6 +1,3 @@
-/*
- *  linux/include/asm-i386/nmi.h
- */
 #ifndef ASM_NMI_H
 #define ASM_NMI_H
 
diff --git a/include/asm-x86/nmi_64.h b/include/asm-x86/nmi_64.h
index 65b6acf3bb59..92f36fb1dc62 100644
--- a/include/asm-x86/nmi_64.h
+++ b/include/asm-x86/nmi_64.h
@@ -1,6 +1,3 @@
-/*
- *  linux/include/asm-i386/nmi.h
- */
 #ifndef ASM_NMI_H
 #define ASM_NMI_H
 
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 58880a1ddd65..3b51a18f16d5 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -1,6 +1,4 @@
 /*
- * include/asm-i386/processor.h
- *
  * Copyright (C) 1994 Linus Torvalds
  */
 
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index 6b96e1f12a42..6c0d96a7af63 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -1,6 +1,4 @@
 /*
- * include/asm-x86_64/processor.h
- *
  * Copyright (C) 1994 Linus Torvalds
  */
 
diff --git a/include/asm-x86/rio.h b/include/asm-x86/rio.h
index c7350f6d2015..97cdcc9887ba 100644
--- a/include/asm-x86/rio.h
+++ b/include/asm-x86/rio.h
@@ -1,6 +1,6 @@
 /*
- * Derived from include/asm-i386/mach-summit/mach_mpparse.h
- *          and include/asm-i386/mach-default/bios_ebda.h
+ * Derived from include/asm-x86/mach-summit/mach_mpparse.h
+ *          and include/asm-x86/mach-default/bios_ebda.h
  *
  * Author: Laurent Vivier <Laurent.Vivier@bull.net>
  */
diff --git a/include/asm-x86/rwsem.h b/include/asm-x86/rwsem.h
index 041906f3c6df..a7e7e14cb430 100644
--- a/include/asm-x86/rwsem.h
+++ b/include/asm-x86/rwsem.h
@@ -2,7 +2,7 @@
  *
  * Written by David Howells (dhowells@redhat.com).
  *
- * Derived from asm-i386/semaphore.h
+ * Derived from asm-x86/semaphore.h
  *
  *
  * The MSW of the count is the negated number of active writers and waiting
diff --git a/include/asm-x86/topology_32.h b/include/asm-x86/topology_32.h
index 9040f5a61278..fdca80a851e2 100644
--- a/include/asm-x86/topology_32.h
+++ b/include/asm-x86/topology_32.h
@@ -1,6 +1,4 @@
 /*
- * linux/include/asm-i386/topology.h
- *
  * Written by: Matthew Dobson, IBM Corporation
  *
  * Copyright (C) 2002, IBM Corp.
diff --git a/include/asm-x86/xor_32.h b/include/asm-x86/xor_32.h
index 23c86cef3b25..a41ef1bdd424 100644
--- a/include/asm-x86/xor_32.h
+++ b/include/asm-x86/xor_32.h
@@ -1,6 +1,4 @@
 /*
- * include/asm-i386/xor.h
- *
  * Optimized RAID-5 checksumming functions for MMX and SSE.
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/include/asm-x86/xor_64.h b/include/asm-x86/xor_64.h
index f942fcc21831..1eee7fcb2420 100644
--- a/include/asm-x86/xor_64.h
+++ b/include/asm-x86/xor_64.h
@@ -1,6 +1,4 @@
 /*
- * include/asm-x86_64/xor.h
- *
  * Optimized RAID-5 checksumming functions for MMX and SSE.
  *
  * This program is free software; you can redistribute it and/or modify

From f0cd0af1b004f601f3cf96d9e001ffad9207f642 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 30 Jan 2008 13:30:29 +0100
Subject: [PATCH 095/890] x86: unexport __{read,write}_lock_failed

This patch removes the unused exports for __{read,write}_lock_failed.

Signed-off-by: Adrian Bunk <bunk@kernel.org>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/i386_ksyms_32.c  | 7 -------
 arch/x86/kernel/x8664_ksyms_64.c | 7 -------
 2 files changed, 14 deletions(-)

diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 02112fcc0de7..061627806a2d 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -22,12 +22,5 @@ EXPORT_SYMBOL(__put_user_8);
 
 EXPORT_SYMBOL(strstr);
 
-#ifdef CONFIG_SMP
-extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
-extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
-EXPORT_SYMBOL(__write_lock_failed);
-EXPORT_SYMBOL(__read_lock_failed);
-#endif
-
 EXPORT_SYMBOL(csum_partial);
 EXPORT_SYMBOL(empty_zero_page);
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 77c25b307635..e28b533674dd 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -34,13 +34,6 @@ EXPORT_SYMBOL(__copy_from_user_inatomic);
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(clear_page);
 
-#ifdef CONFIG_SMP
-extern void  __write_lock_failed(rwlock_t *rw);
-extern void  __read_lock_failed(rwlock_t *rw);
-EXPORT_SYMBOL(__write_lock_failed);
-EXPORT_SYMBOL(__read_lock_failed);
-#endif
-
 /* Export string functions. We normally rely on gcc builtin for most of these,
    but gcc sometimes decides not to inline them. */    
 #undef memcpy

From f20ebee41882d28c965166e56c1331fbd28778bb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:29 +0100
Subject: [PATCH 096/890] x86: move 8259 defines to i8259.h

Move the i8259 defines and remove the now io_ports.h

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c               |  2 --
 arch/x86/kernel/apm_32.c                |  2 --
 arch/x86/kernel/i8259_32.c              |  2 --
 arch/x86/kernel/io_apic_32.c            |  2 --
 arch/x86/kernel/time_32.c               |  1 -
 arch/x86/kernel/vmiclock_32.c           |  1 -
 include/asm-x86/i8259.h                 | 17 ++++++++++++++++-
 include/asm-x86/mach-default/io_ports.h | 25 -------------------------
 8 files changed, 16 insertions(+), 36 deletions(-)
 delete mode 100644 include/asm-x86/mach-default/io_ports.h

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 420c15842e44..56352c11a896 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -43,8 +43,6 @@
 #include <mach_apicdef.h>
 #include <mach_ipi.h>
 
-#include "io_ports.h"
-
 /*
  * Sanity check
  */
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index af045ca0f653..7496c2e4b6ae 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -235,8 +235,6 @@
 #include <asm/paravirt.h>
 #include <asm/reboot.h>
 
-#include "io_ports.h"
-
 #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
 extern int (*console_blank_hook)(int);
 #endif
diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c
index 5f3496d01984..3321ce669295 100644
--- a/arch/x86/kernel/i8259_32.c
+++ b/arch/x86/kernel/i8259_32.c
@@ -21,8 +21,6 @@
 #include <asm/arch_hooks.h>
 #include <asm/i8259.h>
 
-#include <io_ports.h>
-
 /*
  * This is the 'legacy' 8259A Programmable Interrupt Controller,
  * present in the majority of PC/AT boxes.
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index ab77f1905469..75bf8dc77650 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -48,8 +48,6 @@
 #include <mach_apic.h>
 #include <mach_apicdef.h>
 
-#include "io_ports.h"
-
 int (*ioapic_renumber_irq)(int ioapic, int irq);
 atomic_t irq_mis_count;
 
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index e9ead762abe8..2dcbb81b4cd3 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -37,7 +37,6 @@
 #include <asm/hpet.h>
 #include <asm/time.h>
 
-#include "io_ports.h"
 #include "do_timer.h"
 
 unsigned int cpu_khz;	/* Detected as we calibrate the TSC */
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index b1b5ab08b26e..57f9ef5a324c 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -35,7 +35,6 @@
 #include <asm/i8253.h>
 
 #include <irq_vectors.h>
-#include "io_ports.h"
 
 #define VMI_ONESHOT  (VMI_ALARM_IS_ONESHOT  | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
 #define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h
index 29d8f9a6b3fc..cabcc6cf3923 100644
--- a/include/asm-x86/i8259.h
+++ b/include/asm-x86/i8259.h
@@ -3,10 +3,25 @@
 
 extern unsigned int cached_irq_mask;
 
-#define __byte(x,y) 		(((unsigned char *) &(y))[x])
+#define __byte(x,y)		(((unsigned char *) &(y))[x])
 #define cached_master_mask	(__byte(0, cached_irq_mask))
 #define cached_slave_mask	(__byte(1, cached_irq_mask))
 
+/* i8259A PIC registers */
+#define PIC_MASTER_CMD		0x20
+#define PIC_MASTER_IMR		0x21
+#define PIC_MASTER_ISR		PIC_MASTER_CMD
+#define PIC_MASTER_POLL		PIC_MASTER_ISR
+#define PIC_MASTER_OCW3		PIC_MASTER_ISR
+#define PIC_SLAVE_CMD		0xa0
+#define PIC_SLAVE_IMR		0xa1
+
+/* i8259A PIC related value */
+#define PIC_CASCADE_IR		2
+#define MASTER_ICW4_DEFAULT	0x01
+#define SLAVE_ICW4_DEFAULT	0x01
+#define PIC_ICW4_AEOI		2
+
 extern spinlock_t i8259A_lock;
 
 extern void init_8259A(int auto_eoi);
diff --git a/include/asm-x86/mach-default/io_ports.h b/include/asm-x86/mach-default/io_ports.h
deleted file mode 100644
index 48540ba97166..000000000000
--- a/include/asm-x86/mach-default/io_ports.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- *  arch/i386/mach-generic/io_ports.h
- *
- *  Machine specific IO port address definition for generic.
- *  Written by Osamu Tomita <tomita@cinet.co.jp>
- */
-#ifndef _MACH_IO_PORTS_H
-#define _MACH_IO_PORTS_H
-
-/* i8259A PIC registers */
-#define PIC_MASTER_CMD		0x20
-#define PIC_MASTER_IMR		0x21
-#define PIC_MASTER_ISR		PIC_MASTER_CMD
-#define PIC_MASTER_POLL		PIC_MASTER_ISR
-#define PIC_MASTER_OCW3		PIC_MASTER_ISR
-#define PIC_SLAVE_CMD		0xa0
-#define PIC_SLAVE_IMR		0xa1
-
-/* i8259A PIC related value */
-#define PIC_CASCADE_IR		2
-#define MASTER_ICW4_DEFAULT	0x01
-#define SLAVE_ICW4_DEFAULT	0x01
-#define PIC_ICW4_AEOI		2
-
-#endif /* !_MACH_IO_PORTS_H */

From 2b8e05b5677d2b4f3cd218ee90a7332715cb262f Mon Sep 17 00:00:00 2001
From: Paul Jimenez <pj@place.org>
Date: Wed, 30 Jan 2008 13:30:29 +0100
Subject: [PATCH 097/890] x86: make i8259_64 more _32-like

Howdy! Here's a simple janitorish patch for you:

This patch mainly hinges around two includes and their ramifications:

#include <i8259.h>	which provides cached_{slave,master}_mask
#include <io_ports.h>	which provides PIC_{MASTER,SLAVE}_{IMR,CMD}

Adding these two includes and using those half dozen or so definitions
removed 140+ lines of diffs between i8259_32.c and i8259_64.c, thus
making it easier for the real substantitive differences between them to
show up, and hopefully therefore making it easier to eventually merge
the two.  All the warnings that checkpatch.pl throws (missing spaces
after commas and >80 character lines) exist intentionally to match
i8259_32.c.

Signed-off-by: Paul Jimenez <pj@place.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/i8259_64.c | 154 +++++++++++++++++++------------------
 1 file changed, 81 insertions(+), 73 deletions(-)

diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c
index ba6d57286f56..be82b1217691 100644
--- a/arch/x86/kernel/i8259_64.c
+++ b/arch/x86/kernel/i8259_64.c
@@ -21,6 +21,7 @@
 #include <asm/delay.h>
 #include <asm/desc.h>
 #include <asm/apic.h>
+#include <asm/i8259.h>
 
 /*
  * Common place to define all x86 IRQ vectors
@@ -48,7 +49,7 @@
  */
 
 /*
- * The IO-APIC gives us many more interrupt sources. Most of these 
+ * The IO-APIC gives us many more interrupt sources. Most of these
  * are unused but an SMP system is supposed to have enough memory ...
  * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
  * across the spectrum, so we really want to be prepared to get all
@@ -114,11 +115,7 @@ static struct irq_chip i8259A_chip = {
 /*
  * This contains the irq mask for both 8259A irq controllers,
  */
-static unsigned int cached_irq_mask = 0xffff;
-
-#define __byte(x,y) 	(((unsigned char *)&(y))[x])
-#define cached_21	(__byte(0,cached_irq_mask))
-#define cached_A1	(__byte(1,cached_irq_mask))
+unsigned int cached_irq_mask = 0xffff;
 
 /*
  * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
@@ -139,9 +136,9 @@ void disable_8259A_irq(unsigned int irq)
 	spin_lock_irqsave(&i8259A_lock, flags);
 	cached_irq_mask |= mask;
 	if (irq & 8)
-		outb(cached_A1,0xA1);
+		outb(cached_slave_mask, PIC_SLAVE_IMR);
 	else
-		outb(cached_21,0x21);
+		outb(cached_master_mask, PIC_MASTER_IMR);
 	spin_unlock_irqrestore(&i8259A_lock, flags);
 }
 
@@ -153,9 +150,9 @@ void enable_8259A_irq(unsigned int irq)
 	spin_lock_irqsave(&i8259A_lock, flags);
 	cached_irq_mask &= mask;
 	if (irq & 8)
-		outb(cached_A1,0xA1);
+		outb(cached_slave_mask, PIC_SLAVE_IMR);
 	else
-		outb(cached_21,0x21);
+		outb(cached_master_mask, PIC_MASTER_IMR);
 	spin_unlock_irqrestore(&i8259A_lock, flags);
 }
 
@@ -167,9 +164,9 @@ int i8259A_irq_pending(unsigned int irq)
 
 	spin_lock_irqsave(&i8259A_lock, flags);
 	if (irq < 8)
-		ret = inb(0x20) & mask;
+		ret = inb(PIC_MASTER_CMD) & mask;
 	else
-		ret = inb(0xA0) & (mask >> 8);
+		ret = inb(PIC_SLAVE_CMD) & (mask >> 8);
 	spin_unlock_irqrestore(&i8259A_lock, flags);
 
 	return ret;
@@ -196,14 +193,14 @@ static inline int i8259A_irq_real(unsigned int irq)
 	int irqmask = 1<<irq;
 
 	if (irq < 8) {
-		outb(0x0B,0x20);		/* ISR register */
-		value = inb(0x20) & irqmask;
-		outb(0x0A,0x20);		/* back to the IRR register */
+		outb(0x0B,PIC_MASTER_CMD);	/* ISR register */
+		value = inb(PIC_MASTER_CMD) & irqmask;
+		outb(0x0A,PIC_MASTER_CMD);	/* back to the IRR register */
 		return value;
 	}
-	outb(0x0B,0xA0);		/* ISR register */
-	value = inb(0xA0) & (irqmask >> 8);
-	outb(0x0A,0xA0);		/* back to the IRR register */
+	outb(0x0B,PIC_SLAVE_CMD);	/* ISR register */
+	value = inb(PIC_SLAVE_CMD) & (irqmask >> 8);
+	outb(0x0A,PIC_SLAVE_CMD);	/* back to the IRR register */
 	return value;
 }
 
@@ -240,14 +237,17 @@ static void mask_and_ack_8259A(unsigned int irq)
 
 handle_real_irq:
 	if (irq & 8) {
-		inb(0xA1);		/* DUMMY - (do we need this?) */
-		outb(cached_A1,0xA1);
-		outb(0x60+(irq&7),0xA0);/* 'Specific EOI' to slave */
-		outb(0x62,0x20);	/* 'Specific EOI' to master-IRQ2 */
+		inb(PIC_SLAVE_IMR);	/* DUMMY - (do we need this?) */
+		outb(cached_slave_mask, PIC_SLAVE_IMR);
+		/* 'Specific EOI' to slave */
+		outb(0x60+(irq&7),PIC_SLAVE_CMD);
+		 /* 'Specific EOI' to master-IRQ2 */
+		outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD);
 	} else {
-		inb(0x21);		/* DUMMY - (do we need this?) */
-		outb(cached_21,0x21);
-		outb(0x60+irq,0x20);	/* 'Specific EOI' to master */
+		inb(PIC_MASTER_IMR);	/* DUMMY - (do we need this?) */
+		outb(cached_master_mask, PIC_MASTER_IMR);
+		/* 'Specific EOI' to master */
+		outb(0x60+irq,PIC_MASTER_CMD);
 	}
 	spin_unlock_irqrestore(&i8259A_lock, flags);
 	return;
@@ -270,7 +270,8 @@ spurious_8259A_irq:
 		 * lets ACK and report it. [once per IRQ]
 		 */
 		if (!(spurious_irq_mask & irqmask)) {
-			printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq);
+			printk(KERN_DEBUG
+			       "spurious 8259A interrupt: IRQ%d.\n", irq);
 			spurious_irq_mask |= irqmask;
 		}
 		atomic_inc(&irq_err_count);
@@ -283,51 +284,6 @@ spurious_8259A_irq:
 	}
 }
 
-void init_8259A(int auto_eoi)
-{
-	unsigned long flags;
-
-	i8259A_auto_eoi = auto_eoi;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-
-	outb(0xff, 0x21);	/* mask all of 8259A-1 */
-	outb(0xff, 0xA1);	/* mask all of 8259A-2 */
-
-	/*
-	 * outb_p - this has to work on a wide range of PC hardware.
-	 */
-	outb_p(0x11, 0x20);	/* ICW1: select 8259A-1 init */
-	outb_p(IRQ0_VECTOR, 0x21);	/* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
-	outb_p(0x04, 0x21);	/* 8259A-1 (the master) has a slave on IR2 */
-	if (auto_eoi)
-		outb_p(0x03, 0x21);	/* master does Auto EOI */
-	else
-		outb_p(0x01, 0x21);	/* master expects normal EOI */
-
-	outb_p(0x11, 0xA0);	/* ICW1: select 8259A-2 init */
-	outb_p(IRQ8_VECTOR, 0xA1);	/* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
-	outb_p(0x02, 0xA1);	/* 8259A-2 is a slave on master's IR2 */
-	outb_p(0x01, 0xA1);	/* (slave's support for AEOI in flat mode
-				    is to be investigated) */
-
-	if (auto_eoi)
-		/*
-		 * in AEOI mode we just have to mask the interrupt
-		 * when acking.
-		 */
-		i8259A_chip.mask_ack = disable_8259A_irq;
-	else
-		i8259A_chip.mask_ack = mask_and_ack_8259A;
-
-	udelay(100);		/* wait for 8259A to initialize */
-
-	outb(cached_21, 0x21);	/* restore master IRQ mask */
-	outb(cached_A1, 0xA1);	/* restore slave IRQ mask */
-
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
 static char irq_trigger[2];
 /**
  * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ
@@ -364,8 +320,8 @@ static int i8259A_shutdown(struct sys_device *dev)
 	 * the kernel initialization code can get it
 	 * out of.
 	 */
-	outb(0xff, 0x21);	/* mask all of 8259A-1 */
-	outb(0xff, 0xA1);	/* mask all of 8259A-1 */
+	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
+	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-1 */
 	return 0;
 }
 
@@ -391,6 +347,58 @@ static int __init i8259A_init_sysfs(void)
 
 device_initcall(i8259A_init_sysfs);
 
+void init_8259A(int auto_eoi)
+{
+	unsigned long flags;
+
+	i8259A_auto_eoi = auto_eoi;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+
+	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
+	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-2 */
+
+	/*
+	 * outb_p - this has to work on a wide range of PC hardware.
+	 */
+	outb_p(0x11, PIC_MASTER_CMD);	/* ICW1: select 8259A-1 init */
+	/* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
+	outb_p(IRQ0_VECTOR, PIC_MASTER_IMR);
+	/* 8259A-1 (the master) has a slave on IR2 */
+	outb_p(0x04, PIC_MASTER_IMR);
+	if (auto_eoi)	/* master does Auto EOI */
+		outb_p(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
+	else		/* master expects normal EOI */
+		outb_p(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
+
+	outb_p(0x11, PIC_SLAVE_CMD);	/* ICW1: select 8259A-2 init */
+	/* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
+	outb_p(IRQ8_VECTOR, PIC_SLAVE_IMR);
+	/* 8259A-2 is a slave on master's IR2 */
+	outb_p(PIC_CASCADE_IR, PIC_SLAVE_IMR);
+	/* (slave's support for AEOI in flat mode is to be investigated) */
+	outb_p(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR);
+
+	if (auto_eoi)
+		/*
+		 * In AEOI mode we just have to mask the interrupt
+		 * when acking.
+		 */
+		i8259A_chip.mask_ack = disable_8259A_irq;
+	else
+		i8259A_chip.mask_ack = mask_and_ack_8259A;
+
+	udelay(100);		/* wait for 8259A to initialize */
+
+	outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
+	outb(cached_slave_mask, PIC_SLAVE_IMR);	  /* restore slave IRQ mask */
+
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+
+
+
 /*
  * IRQ2 is cascade interrupt to second interrupt controller
  */

From 1d8a1f6b51f6b195dfdcf05821be97edede5664a Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:30:30 +0100
Subject: [PATCH 098/890] x86: prepare merger of <asm/alternative_{32,64}.h>

Prepare for merging <asm/alternative_{32,64}.h> by making the 32- and
64-bit versions textually identical.  This involves:

- removing arbitrary header inclusion differences
- reorganizing the 32-bit version slightly to match the 64-bit version
- using <asm/asm.h> to unify the assembly code
- renaming struct paravirt_patch to struct paravirt_patch_site in the
  64-bit version to match the 32-bit version; there are no references
  to struct paravirt_patch elsewhere in the tree.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/alternative_32.h | 131 ++++++++++++++++---------------
 include/asm-x86/alternative_64.h |  84 ++++++++++----------
 2 files changed, 112 insertions(+), 103 deletions(-)

diff --git a/include/asm-x86/alternative_32.h b/include/asm-x86/alternative_32.h
index bda6c810c0f4..4919b5ee8b9f 100644
--- a/include/asm-x86/alternative_32.h
+++ b/include/asm-x86/alternative_32.h
@@ -1,23 +1,63 @@
 #ifndef _I386_ALTERNATIVE_H
 #define _I386_ALTERNATIVE_H
 
-#include <asm/types.h>
-#include <linux/stddef.h>
 #include <linux/types.h>
+#include <linux/stddef.h>
+#include <asm/asm.h>
+
+/*
+ * Alternative inline assembly for SMP.
+ *
+ * The LOCK_PREFIX macro defined here replaces the LOCK and
+ * LOCK_PREFIX macros used everywhere in the source tree.
+ *
+ * SMP alternatives use the same data structures as the other
+ * alternatives and the X86_FEATURE_UP flag to indicate the case of a
+ * UP system running a SMP kernel.  The existing apply_alternatives()
+ * works fine for patching a SMP kernel for UP.
+ *
+ * The SMP alternative tables can be kept after boot and contain both
+ * UP and SMP versions of the instructions to allow switching back to
+ * SMP at runtime, when hotplugging in a new CPU, which is especially
+ * useful in virtualized environments.
+ *
+ * The very common lock prefix is handled as special case in a
+ * separate table which is a pure address list without replacement ptr
+ * and size information.  That keeps the table sizes small.
+ */
+
+#ifdef CONFIG_SMP
+#define LOCK_PREFIX \
+		".section .smp_locks,\"a\"\n"	\
+		_ASM_ALIGN "\n"			\
+		_ASM_PTR "661f\n" /* address */	\
+		".previous\n"			\
+		"661:\n\tlock; "
+
+#else /* ! CONFIG_SMP */
+#define LOCK_PREFIX ""
+#endif
+
+/* This must be included *after* the definition of LOCK_PREFIX */
+#include <asm/cpufeature.h>
 
 struct alt_instr {
-	u8 *instr; 		/* original instruction */
+	u8 *instr;		/* original instruction */
 	u8 *replacement;
 	u8  cpuid;		/* cpuid bit set for replacement */
 	u8  instrlen;		/* length of original instruction */
-	u8  replacementlen; 	/* length of new instruction, <= instrlen */
-	u8  pad;
+	u8  replacementlen;	/* length of new instruction, <= instrlen */
+	u8  pad1;
+#ifdef CONFIG_X86_64
+	u32 pad2;
+#endif
 };
 
 extern void alternative_instructions(void);
 extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
 
 struct module;
+
 #ifdef CONFIG_SMP
 extern void alternatives_smp_module_add(struct module *mod, char *name,
 					void *locks, void *locks_end,
@@ -45,17 +85,17 @@ static inline void alternatives_smp_switch(int smp) {}
  * without volatile and memory clobber.
  */
 #define alternative(oldinstr, newinstr, feature)			\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n" 			\
+	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
 		      ".section .altinstructions,\"a\"\n"		\
-		      "  .align 4\n"					\
-		      "  .long 661b\n"            /* label */		\
-		      "  .long 663f\n"		  /* new instruction */	\
-		      "  .byte %c0\n"             /* feature bit */	\
-		      "  .byte 662b-661b\n"       /* sourcelen */	\
-		      "  .byte 664f-663f\n"       /* replacementlen */	\
+		      _ASM_ALIGN "\n"					\
+		      _ASM_PTR "661b\n"		/* label */		\
+		      _ASM_PTR "663f\n"		/* new instruction */	\
+		      "	 .byte %c0\n"		/* feature bit */	\
+		      "	 .byte 662b-661b\n"	/* sourcelen */		\
+		      "	 .byte 664f-663f\n"	/* replacementlen */	\
 		      ".previous\n"					\
 		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"   /* replacement */\
+		      "663:\n\t" newinstr "\n664:\n"  /* replacement */	\
 		      ".previous" :: "i" (feature) : "memory")
 
 /*
@@ -66,35 +106,35 @@ static inline void alternatives_smp_switch(int smp) {}
  * Argument numbers start with 1.
  * Best is to use constraints that are fixed size (like (%1) ... "r")
  * If you use variable sized constraints like "m" or "g" in the
- * replacement maake sure to pad to the worst case length.
+ * replacement make sure to pad to the worst case length.
  */
 #define alternative_input(oldinstr, newinstr, feature, input...)	\
 	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
 		      ".section .altinstructions,\"a\"\n"		\
-		      "  .align 4\n"					\
-		      "  .long 661b\n"            /* label */		\
-		      "  .long 663f\n"		  /* new instruction */ \
-		      "  .byte %c0\n"             /* feature bit */	\
-		      "  .byte 662b-661b\n"       /* sourcelen */	\
-		      "  .byte 664f-663f\n"       /* replacementlen */ 	\
+		      _ASM_ALIGN "\n"					\
+		      _ASM_PTR "661b\n"		/* label */		\
+		      _ASM_PTR "663f\n"		/* new instruction */	\
+		      "	 .byte %c0\n"		/* feature bit */	\
+		      "	 .byte 662b-661b\n"	/* sourcelen */		\
+		      "	 .byte 664f-663f\n"	/* replacementlen */	\
 		      ".previous\n"					\
 		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"   /* replacement */\
+		      "663:\n\t" newinstr "\n664:\n"  /* replacement */	\
 		      ".previous" :: "i" (feature), ##input)
 
 /* Like alternative_input, but with a single output argument */
-#define alternative_io(oldinstr, newinstr, feature, output, input...) \
+#define alternative_io(oldinstr, newinstr, feature, output, input...)	\
 	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
 		      ".section .altinstructions,\"a\"\n"		\
-		      "  .align 4\n"					\
-		      "  .long 661b\n"            /* label */		\
-		      "  .long 663f\n"		  /* new instruction */	\
-		      "  .byte %c[feat]\n"        /* feature bit */	\
-		      "  .byte 662b-661b\n"       /* sourcelen */	\
-		      "  .byte 664f-663f\n"       /* replacementlen */	\
+		      _ASM_ALIGN "\n"					\
+		      _ASM_PTR "661b\n"		/* label */		\
+		      _ASM_PTR "663f\n"		/* new instruction */	\
+		      "	 .byte %c[feat]\n"	/* feature bit */	\
+		      "	 .byte 662b-661b\n"	/* sourcelen */		\
+		      "	 .byte 664f-663f\n"	/* replacementlen */	\
 		      ".previous\n"					\
 		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"   /* replacement */ \
+		      "663:\n\t" newinstr "\n664:\n"  /* replacement */ \
 		      ".previous" : output : [feat] "i" (feature), ##input)
 
 /*
@@ -103,39 +143,6 @@ static inline void alternatives_smp_switch(int smp) {}
  */
 #define ASM_OUTPUT2(a, b) a, b
 
-/*
- * Alternative inline assembly for SMP.
- *
- * The LOCK_PREFIX macro defined here replaces the LOCK and
- * LOCK_PREFIX macros used everywhere in the source tree.
- *
- * SMP alternatives use the same data structures as the other
- * alternatives and the X86_FEATURE_UP flag to indicate the case of a
- * UP system running a SMP kernel.  The existing apply_alternatives()
- * works fine for patching a SMP kernel for UP.
- *
- * The SMP alternative tables can be kept after boot and contain both
- * UP and SMP versions of the instructions to allow switching back to
- * SMP at runtime, when hotplugging in a new CPU, which is especially
- * useful in virtualized environments.
- *
- * The very common lock prefix is handled as special case in a
- * separate table which is a pure address list without replacement ptr
- * and size information.  That keeps the table sizes small.
- */
-
-#ifdef CONFIG_SMP
-#define LOCK_PREFIX \
-		".section .smp_locks,\"a\"\n"	\
-		"  .align 4\n"			\
-		"  .long 661f\n" /* address */	\
-		".previous\n"			\
-	       	"661:\n\tlock; "
-
-#else /* ! CONFIG_SMP */
-#define LOCK_PREFIX ""
-#endif
-
 struct paravirt_patch_site;
 #ifdef CONFIG_PARAVIRT
 void apply_paravirt(struct paravirt_patch_site *start,
diff --git a/include/asm-x86/alternative_64.h b/include/asm-x86/alternative_64.h
index ab161e810151..50efcebae33f 100644
--- a/include/asm-x86/alternative_64.h
+++ b/include/asm-x86/alternative_64.h
@@ -1,10 +1,9 @@
 #ifndef _X86_64_ALTERNATIVE_H
 #define _X86_64_ALTERNATIVE_H
 
-#ifdef __KERNEL__
-
 #include <linux/types.h>
 #include <linux/stddef.h>
+#include <asm/asm.h>
 
 /*
  * Alternative inline assembly for SMP.
@@ -30,10 +29,10 @@
 #ifdef CONFIG_SMP
 #define LOCK_PREFIX \
 		".section .smp_locks,\"a\"\n"	\
-		"  .align 8\n"			\
-		"  .quad 661f\n" /* address */	\
+		_ASM_ALIGN "\n"			\
+		_ASM_PTR "661f\n" /* address */	\
 		".previous\n"			\
-	       	"661:\n\tlock; "
+		"661:\n\tlock; "
 
 #else /* ! CONFIG_SMP */
 #define LOCK_PREFIX ""
@@ -43,12 +42,15 @@
 #include <asm/cpufeature.h>
 
 struct alt_instr {
-	u8 *instr; 		/* original instruction */
+	u8 *instr;		/* original instruction */
 	u8 *replacement;
 	u8  cpuid;		/* cpuid bit set for replacement */
 	u8  instrlen;		/* length of original instruction */
-	u8  replacementlen; 	/* length of new instruction, <= instrlen */
-	u8  pad[5];
+	u8  replacementlen;	/* length of new instruction, <= instrlen */
+	u8  pad1;
+#ifdef CONFIG_X86_64
+	u32 pad2;
+#endif
 };
 
 extern void alternative_instructions(void);
@@ -68,9 +70,7 @@ static inline void alternatives_smp_module_add(struct module *mod, char *name,
 					void *text, void *text_end) {}
 static inline void alternatives_smp_module_del(struct module *mod) {}
 static inline void alternatives_smp_switch(int smp) {}
-#endif
-
-#endif
+#endif	/* CONFIG_SMP */
 
 /*
  * Alternative instructions for different CPU types or capabilities.
@@ -84,18 +84,18 @@ static inline void alternatives_smp_switch(int smp) {}
  * For non barrier like inlines please define new variants
  * without volatile and memory clobber.
  */
-#define alternative(oldinstr, newinstr, feature) 	\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n" 		     \
-		      ".section .altinstructions,\"a\"\n"     	     \
-		      "  .align 8\n"				       \
-		      "  .quad 661b\n"            /* label */          \
-		      "  .quad 663f\n"		  /* new instruction */ \
-		      "  .byte %c0\n"             /* feature bit */    \
-		      "  .byte 662b-661b\n"       /* sourcelen */      \
-		      "  .byte 664f-663f\n"       /* replacementlen */ \
+#define alternative(oldinstr, newinstr, feature)			\
+	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
+		      ".section .altinstructions,\"a\"\n"		\
+		      _ASM_ALIGN "\n"					\
+		      _ASM_PTR "661b\n"		/* label */		\
+		      _ASM_PTR "663f\n"		/* new instruction */	\
+		      "	 .byte %c0\n"		/* feature bit */	\
+		      "	 .byte 662b-661b\n"	/* sourcelen */		\
+		      "	 .byte 664f-663f\n"	/* replacementlen */	\
 		      ".previous\n"					\
 		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"   /* replacement */ \
+		      "663:\n\t" newinstr "\n664:\n"  /* replacement */	\
 		      ".previous" :: "i" (feature) : "memory")
 
 /*
@@ -111,30 +111,30 @@ static inline void alternatives_smp_switch(int smp) {}
 #define alternative_input(oldinstr, newinstr, feature, input...)	\
 	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
 		      ".section .altinstructions,\"a\"\n"		\
-		      "  .align 8\n"					\
-		      "  .quad 661b\n"            /* label */		\
-		      "  .quad 663f\n"		  /* new instruction */	\
-		      "  .byte %c0\n"             /* feature bit */	\
-		      "  .byte 662b-661b\n"       /* sourcelen */	\
-		      "  .byte 664f-663f\n"       /* replacementlen */	\
+		      _ASM_ALIGN "\n"					\
+		      _ASM_PTR "661b\n"		/* label */		\
+		      _ASM_PTR "663f\n"		/* new instruction */	\
+		      "	 .byte %c0\n"		/* feature bit */	\
+		      "	 .byte 662b-661b\n"	/* sourcelen */		\
+		      "	 .byte 664f-663f\n"	/* replacementlen */	\
 		      ".previous\n"					\
 		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"   /* replacement */ \
+		      "663:\n\t" newinstr "\n664:\n"  /* replacement */	\
 		      ".previous" :: "i" (feature), ##input)
 
 /* Like alternative_input, but with a single output argument */
-#define alternative_io(oldinstr, newinstr, feature, output, input...) \
+#define alternative_io(oldinstr, newinstr, feature, output, input...)	\
 	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
 		      ".section .altinstructions,\"a\"\n"		\
-		      "  .align 8\n"					\
-		      "  .quad 661b\n"            /* label */		\
-		      "  .quad 663f\n"		  /* new instruction */	\
-		      "  .byte %c[feat]\n"        /* feature bit */	\
-		      "  .byte 662b-661b\n"       /* sourcelen */	\
-		      "  .byte 664f-663f\n"       /* replacementlen */	\
+		      _ASM_ALIGN "\n"					\
+		      _ASM_PTR "661b\n"		/* label */		\
+		      _ASM_PTR "663f\n"		/* new instruction */	\
+		      "	 .byte %c[feat]\n"	/* feature bit */	\
+		      "	 .byte 662b-661b\n"	/* sourcelen */		\
+		      "	 .byte 664f-663f\n"	/* replacementlen */	\
 		      ".previous\n"					\
 		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"   /* replacement */ \
+		      "663:\n\t" newinstr "\n664:\n"  /* replacement */ \
 		      ".previous" : output : [feat] "i" (feature), ##input)
 
 /*
@@ -143,15 +143,17 @@ static inline void alternatives_smp_switch(int smp) {}
  */
 #define ASM_OUTPUT2(a, b) a, b
 
-struct paravirt_patch;
+struct paravirt_patch_site;
 #ifdef CONFIG_PARAVIRT
-void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end);
+void apply_paravirt(struct paravirt_patch_site *start,
+		    struct paravirt_patch_site *end);
 #else
 static inline void
-apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
+apply_paravirt(struct paravirt_patch_site *start,
+	       struct paravirt_patch_site *end)
 {}
-#define __parainstructions NULL
-#define __parainstructions_end NULL
+#define __parainstructions	NULL
+#define __parainstructions_end	NULL
 #endif
 
 extern void text_poke(void *addr, unsigned char *opcode, int len);

From 6b59257082e136fbbce1aa017575a451c0df0592 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:30:30 +0100
Subject: [PATCH 099/890] x86: actually merge <asm/alternative.h>

This actually merges <asm-x86/alternative_{32,64}.h> into
<asm-x86/alternative.h>.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/alternative.h    | 164 ++++++++++++++++++++++++++++++-
 include/asm-x86/alternative_32.h | 161 ------------------------------
 include/asm-x86/alternative_64.h | 161 ------------------------------
 3 files changed, 160 insertions(+), 326 deletions(-)
 delete mode 100644 include/asm-x86/alternative_32.h
 delete mode 100644 include/asm-x86/alternative_64.h

diff --git a/include/asm-x86/alternative.h b/include/asm-x86/alternative.h
index 9eef6a32a130..d8bacf3c4b08 100644
--- a/include/asm-x86/alternative.h
+++ b/include/asm-x86/alternative.h
@@ -1,5 +1,161 @@
-#ifdef CONFIG_X86_32
-# include "alternative_32.h"
-#else
-# include "alternative_64.h"
+#ifndef _ASM_X86_ALTERNATIVE_H
+#define _ASM_X86_ALTERNATIVE_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <asm/asm.h>
+
+/*
+ * Alternative inline assembly for SMP.
+ *
+ * The LOCK_PREFIX macro defined here replaces the LOCK and
+ * LOCK_PREFIX macros used everywhere in the source tree.
+ *
+ * SMP alternatives use the same data structures as the other
+ * alternatives and the X86_FEATURE_UP flag to indicate the case of a
+ * UP system running a SMP kernel.  The existing apply_alternatives()
+ * works fine for patching a SMP kernel for UP.
+ *
+ * The SMP alternative tables can be kept after boot and contain both
+ * UP and SMP versions of the instructions to allow switching back to
+ * SMP at runtime, when hotplugging in a new CPU, which is especially
+ * useful in virtualized environments.
+ *
+ * The very common lock prefix is handled as special case in a
+ * separate table which is a pure address list without replacement ptr
+ * and size information.  That keeps the table sizes small.
+ */
+
+#ifdef CONFIG_SMP
+#define LOCK_PREFIX \
+		".section .smp_locks,\"a\"\n"	\
+		_ASM_ALIGN "\n"			\
+		_ASM_PTR "661f\n" /* address */	\
+		".previous\n"			\
+		"661:\n\tlock; "
+
+#else /* ! CONFIG_SMP */
+#define LOCK_PREFIX ""
 #endif
+
+/* This must be included *after* the definition of LOCK_PREFIX */
+#include <asm/cpufeature.h>
+
+struct alt_instr {
+	u8 *instr;		/* original instruction */
+	u8 *replacement;
+	u8  cpuid;		/* cpuid bit set for replacement */
+	u8  instrlen;		/* length of original instruction */
+	u8  replacementlen;	/* length of new instruction, <= instrlen */
+	u8  pad1;
+#ifdef CONFIG_X86_64
+	u32 pad2;
+#endif
+};
+
+extern void alternative_instructions(void);
+extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+
+struct module;
+
+#ifdef CONFIG_SMP
+extern void alternatives_smp_module_add(struct module *mod, char *name,
+					void *locks, void *locks_end,
+					void *text, void *text_end);
+extern void alternatives_smp_module_del(struct module *mod);
+extern void alternatives_smp_switch(int smp);
+#else
+static inline void alternatives_smp_module_add(struct module *mod, char *name,
+					void *locks, void *locks_end,
+					void *text, void *text_end) {}
+static inline void alternatives_smp_module_del(struct module *mod) {}
+static inline void alternatives_smp_switch(int smp) {}
+#endif	/* CONFIG_SMP */
+
+/*
+ * Alternative instructions for different CPU types or capabilities.
+ *
+ * This allows to use optimized instructions even on generic binary
+ * kernels.
+ *
+ * length of oldinstr must be longer or equal the length of newinstr
+ * It can be padded with nops as needed.
+ *
+ * For non barrier like inlines please define new variants
+ * without volatile and memory clobber.
+ */
+#define alternative(oldinstr, newinstr, feature)			\
+	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
+		      ".section .altinstructions,\"a\"\n"		\
+		      _ASM_ALIGN "\n"					\
+		      _ASM_PTR "661b\n"		/* label */		\
+		      _ASM_PTR "663f\n"		/* new instruction */	\
+		      "	 .byte %c0\n"		/* feature bit */	\
+		      "	 .byte 662b-661b\n"	/* sourcelen */		\
+		      "	 .byte 664f-663f\n"	/* replacementlen */	\
+		      ".previous\n"					\
+		      ".section .altinstr_replacement,\"ax\"\n"		\
+		      "663:\n\t" newinstr "\n664:\n"  /* replacement */	\
+		      ".previous" :: "i" (feature) : "memory")
+
+/*
+ * Alternative inline assembly with input.
+ *
+ * Pecularities:
+ * No memory clobber here.
+ * Argument numbers start with 1.
+ * Best is to use constraints that are fixed size (like (%1) ... "r")
+ * If you use variable sized constraints like "m" or "g" in the
+ * replacement make sure to pad to the worst case length.
+ */
+#define alternative_input(oldinstr, newinstr, feature, input...)	\
+	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
+		      ".section .altinstructions,\"a\"\n"		\
+		      _ASM_ALIGN "\n"					\
+		      _ASM_PTR "661b\n"		/* label */		\
+		      _ASM_PTR "663f\n"		/* new instruction */	\
+		      "	 .byte %c0\n"		/* feature bit */	\
+		      "	 .byte 662b-661b\n"	/* sourcelen */		\
+		      "	 .byte 664f-663f\n"	/* replacementlen */	\
+		      ".previous\n"					\
+		      ".section .altinstr_replacement,\"ax\"\n"		\
+		      "663:\n\t" newinstr "\n664:\n"  /* replacement */	\
+		      ".previous" :: "i" (feature), ##input)
+
+/* Like alternative_input, but with a single output argument */
+#define alternative_io(oldinstr, newinstr, feature, output, input...)	\
+	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
+		      ".section .altinstructions,\"a\"\n"		\
+		      _ASM_ALIGN "\n"					\
+		      _ASM_PTR "661b\n"		/* label */		\
+		      _ASM_PTR "663f\n"		/* new instruction */	\
+		      "	 .byte %c[feat]\n"	/* feature bit */	\
+		      "	 .byte 662b-661b\n"	/* sourcelen */		\
+		      "	 .byte 664f-663f\n"	/* replacementlen */	\
+		      ".previous\n"					\
+		      ".section .altinstr_replacement,\"ax\"\n"		\
+		      "663:\n\t" newinstr "\n664:\n"  /* replacement */ \
+		      ".previous" : output : [feat] "i" (feature), ##input)
+
+/*
+ * use this macro(s) if you need more than one output parameter
+ * in alternative_io
+ */
+#define ASM_OUTPUT2(a, b) a, b
+
+struct paravirt_patch_site;
+#ifdef CONFIG_PARAVIRT
+void apply_paravirt(struct paravirt_patch_site *start,
+		    struct paravirt_patch_site *end);
+#else
+static inline void
+apply_paravirt(struct paravirt_patch_site *start,
+	       struct paravirt_patch_site *end)
+{}
+#define __parainstructions	NULL
+#define __parainstructions_end	NULL
+#endif
+
+extern void text_poke(void *addr, unsigned char *opcode, int len);
+
+#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/include/asm-x86/alternative_32.h b/include/asm-x86/alternative_32.h
deleted file mode 100644
index 4919b5ee8b9f..000000000000
--- a/include/asm-x86/alternative_32.h
+++ /dev/null
@@ -1,161 +0,0 @@
-#ifndef _I386_ALTERNATIVE_H
-#define _I386_ALTERNATIVE_H
-
-#include <linux/types.h>
-#include <linux/stddef.h>
-#include <asm/asm.h>
-
-/*
- * Alternative inline assembly for SMP.
- *
- * The LOCK_PREFIX macro defined here replaces the LOCK and
- * LOCK_PREFIX macros used everywhere in the source tree.
- *
- * SMP alternatives use the same data structures as the other
- * alternatives and the X86_FEATURE_UP flag to indicate the case of a
- * UP system running a SMP kernel.  The existing apply_alternatives()
- * works fine for patching a SMP kernel for UP.
- *
- * The SMP alternative tables can be kept after boot and contain both
- * UP and SMP versions of the instructions to allow switching back to
- * SMP at runtime, when hotplugging in a new CPU, which is especially
- * useful in virtualized environments.
- *
- * The very common lock prefix is handled as special case in a
- * separate table which is a pure address list without replacement ptr
- * and size information.  That keeps the table sizes small.
- */
-
-#ifdef CONFIG_SMP
-#define LOCK_PREFIX \
-		".section .smp_locks,\"a\"\n"	\
-		_ASM_ALIGN "\n"			\
-		_ASM_PTR "661f\n" /* address */	\
-		".previous\n"			\
-		"661:\n\tlock; "
-
-#else /* ! CONFIG_SMP */
-#define LOCK_PREFIX ""
-#endif
-
-/* This must be included *after* the definition of LOCK_PREFIX */
-#include <asm/cpufeature.h>
-
-struct alt_instr {
-	u8 *instr;		/* original instruction */
-	u8 *replacement;
-	u8  cpuid;		/* cpuid bit set for replacement */
-	u8  instrlen;		/* length of original instruction */
-	u8  replacementlen;	/* length of new instruction, <= instrlen */
-	u8  pad1;
-#ifdef CONFIG_X86_64
-	u32 pad2;
-#endif
-};
-
-extern void alternative_instructions(void);
-extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
-
-struct module;
-
-#ifdef CONFIG_SMP
-extern void alternatives_smp_module_add(struct module *mod, char *name,
-					void *locks, void *locks_end,
-					void *text, void *text_end);
-extern void alternatives_smp_module_del(struct module *mod);
-extern void alternatives_smp_switch(int smp);
-#else
-static inline void alternatives_smp_module_add(struct module *mod, char *name,
-					void *locks, void *locks_end,
-					void *text, void *text_end) {}
-static inline void alternatives_smp_module_del(struct module *mod) {}
-static inline void alternatives_smp_switch(int smp) {}
-#endif	/* CONFIG_SMP */
-
-/*
- * Alternative instructions for different CPU types or capabilities.
- *
- * This allows to use optimized instructions even on generic binary
- * kernels.
- *
- * length of oldinstr must be longer or equal the length of newinstr
- * It can be padded with nops as needed.
- *
- * For non barrier like inlines please define new variants
- * without volatile and memory clobber.
- */
-#define alternative(oldinstr, newinstr, feature)			\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
-		      ".section .altinstructions,\"a\"\n"		\
-		      _ASM_ALIGN "\n"					\
-		      _ASM_PTR "661b\n"		/* label */		\
-		      _ASM_PTR "663f\n"		/* new instruction */	\
-		      "	 .byte %c0\n"		/* feature bit */	\
-		      "	 .byte 662b-661b\n"	/* sourcelen */		\
-		      "	 .byte 664f-663f\n"	/* replacementlen */	\
-		      ".previous\n"					\
-		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"  /* replacement */	\
-		      ".previous" :: "i" (feature) : "memory")
-
-/*
- * Alternative inline assembly with input.
- *
- * Pecularities:
- * No memory clobber here.
- * Argument numbers start with 1.
- * Best is to use constraints that are fixed size (like (%1) ... "r")
- * If you use variable sized constraints like "m" or "g" in the
- * replacement make sure to pad to the worst case length.
- */
-#define alternative_input(oldinstr, newinstr, feature, input...)	\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
-		      ".section .altinstructions,\"a\"\n"		\
-		      _ASM_ALIGN "\n"					\
-		      _ASM_PTR "661b\n"		/* label */		\
-		      _ASM_PTR "663f\n"		/* new instruction */	\
-		      "	 .byte %c0\n"		/* feature bit */	\
-		      "	 .byte 662b-661b\n"	/* sourcelen */		\
-		      "	 .byte 664f-663f\n"	/* replacementlen */	\
-		      ".previous\n"					\
-		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"  /* replacement */	\
-		      ".previous" :: "i" (feature), ##input)
-
-/* Like alternative_input, but with a single output argument */
-#define alternative_io(oldinstr, newinstr, feature, output, input...)	\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
-		      ".section .altinstructions,\"a\"\n"		\
-		      _ASM_ALIGN "\n"					\
-		      _ASM_PTR "661b\n"		/* label */		\
-		      _ASM_PTR "663f\n"		/* new instruction */	\
-		      "	 .byte %c[feat]\n"	/* feature bit */	\
-		      "	 .byte 662b-661b\n"	/* sourcelen */		\
-		      "	 .byte 664f-663f\n"	/* replacementlen */	\
-		      ".previous\n"					\
-		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"  /* replacement */ \
-		      ".previous" : output : [feat] "i" (feature), ##input)
-
-/*
- * use this macro(s) if you need more than one output parameter
- * in alternative_io
- */
-#define ASM_OUTPUT2(a, b) a, b
-
-struct paravirt_patch_site;
-#ifdef CONFIG_PARAVIRT
-void apply_paravirt(struct paravirt_patch_site *start,
-		    struct paravirt_patch_site *end);
-#else
-static inline void
-apply_paravirt(struct paravirt_patch_site *start,
-	       struct paravirt_patch_site *end)
-{}
-#define __parainstructions	NULL
-#define __parainstructions_end	NULL
-#endif
-
-extern void text_poke(void *addr, unsigned char *opcode, int len);
-
-#endif /* _I386_ALTERNATIVE_H */
diff --git a/include/asm-x86/alternative_64.h b/include/asm-x86/alternative_64.h
deleted file mode 100644
index 50efcebae33f..000000000000
--- a/include/asm-x86/alternative_64.h
+++ /dev/null
@@ -1,161 +0,0 @@
-#ifndef _X86_64_ALTERNATIVE_H
-#define _X86_64_ALTERNATIVE_H
-
-#include <linux/types.h>
-#include <linux/stddef.h>
-#include <asm/asm.h>
-
-/*
- * Alternative inline assembly for SMP.
- *
- * The LOCK_PREFIX macro defined here replaces the LOCK and
- * LOCK_PREFIX macros used everywhere in the source tree.
- *
- * SMP alternatives use the same data structures as the other
- * alternatives and the X86_FEATURE_UP flag to indicate the case of a
- * UP system running a SMP kernel.  The existing apply_alternatives()
- * works fine for patching a SMP kernel for UP.
- *
- * The SMP alternative tables can be kept after boot and contain both
- * UP and SMP versions of the instructions to allow switching back to
- * SMP at runtime, when hotplugging in a new CPU, which is especially
- * useful in virtualized environments.
- *
- * The very common lock prefix is handled as special case in a
- * separate table which is a pure address list without replacement ptr
- * and size information.  That keeps the table sizes small.
- */
-
-#ifdef CONFIG_SMP
-#define LOCK_PREFIX \
-		".section .smp_locks,\"a\"\n"	\
-		_ASM_ALIGN "\n"			\
-		_ASM_PTR "661f\n" /* address */	\
-		".previous\n"			\
-		"661:\n\tlock; "
-
-#else /* ! CONFIG_SMP */
-#define LOCK_PREFIX ""
-#endif
-
-/* This must be included *after* the definition of LOCK_PREFIX */
-#include <asm/cpufeature.h>
-
-struct alt_instr {
-	u8 *instr;		/* original instruction */
-	u8 *replacement;
-	u8  cpuid;		/* cpuid bit set for replacement */
-	u8  instrlen;		/* length of original instruction */
-	u8  replacementlen;	/* length of new instruction, <= instrlen */
-	u8  pad1;
-#ifdef CONFIG_X86_64
-	u32 pad2;
-#endif
-};
-
-extern void alternative_instructions(void);
-extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
-
-struct module;
-
-#ifdef CONFIG_SMP
-extern void alternatives_smp_module_add(struct module *mod, char *name,
-					void *locks, void *locks_end,
-					void *text, void *text_end);
-extern void alternatives_smp_module_del(struct module *mod);
-extern void alternatives_smp_switch(int smp);
-#else
-static inline void alternatives_smp_module_add(struct module *mod, char *name,
-					void *locks, void *locks_end,
-					void *text, void *text_end) {}
-static inline void alternatives_smp_module_del(struct module *mod) {}
-static inline void alternatives_smp_switch(int smp) {}
-#endif	/* CONFIG_SMP */
-
-/*
- * Alternative instructions for different CPU types or capabilities.
- *
- * This allows to use optimized instructions even on generic binary
- * kernels.
- *
- * length of oldinstr must be longer or equal the length of newinstr
- * It can be padded with nops as needed.
- *
- * For non barrier like inlines please define new variants
- * without volatile and memory clobber.
- */
-#define alternative(oldinstr, newinstr, feature)			\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
-		      ".section .altinstructions,\"a\"\n"		\
-		      _ASM_ALIGN "\n"					\
-		      _ASM_PTR "661b\n"		/* label */		\
-		      _ASM_PTR "663f\n"		/* new instruction */	\
-		      "	 .byte %c0\n"		/* feature bit */	\
-		      "	 .byte 662b-661b\n"	/* sourcelen */		\
-		      "	 .byte 664f-663f\n"	/* replacementlen */	\
-		      ".previous\n"					\
-		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"  /* replacement */	\
-		      ".previous" :: "i" (feature) : "memory")
-
-/*
- * Alternative inline assembly with input.
- *
- * Pecularities:
- * No memory clobber here.
- * Argument numbers start with 1.
- * Best is to use constraints that are fixed size (like (%1) ... "r")
- * If you use variable sized constraints like "m" or "g" in the
- * replacement make sure to pad to the worst case length.
- */
-#define alternative_input(oldinstr, newinstr, feature, input...)	\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
-		      ".section .altinstructions,\"a\"\n"		\
-		      _ASM_ALIGN "\n"					\
-		      _ASM_PTR "661b\n"		/* label */		\
-		      _ASM_PTR "663f\n"		/* new instruction */	\
-		      "	 .byte %c0\n"		/* feature bit */	\
-		      "	 .byte 662b-661b\n"	/* sourcelen */		\
-		      "	 .byte 664f-663f\n"	/* replacementlen */	\
-		      ".previous\n"					\
-		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"  /* replacement */	\
-		      ".previous" :: "i" (feature), ##input)
-
-/* Like alternative_input, but with a single output argument */
-#define alternative_io(oldinstr, newinstr, feature, output, input...)	\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
-		      ".section .altinstructions,\"a\"\n"		\
-		      _ASM_ALIGN "\n"					\
-		      _ASM_PTR "661b\n"		/* label */		\
-		      _ASM_PTR "663f\n"		/* new instruction */	\
-		      "	 .byte %c[feat]\n"	/* feature bit */	\
-		      "	 .byte 662b-661b\n"	/* sourcelen */		\
-		      "	 .byte 664f-663f\n"	/* replacementlen */	\
-		      ".previous\n"					\
-		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"  /* replacement */ \
-		      ".previous" : output : [feat] "i" (feature), ##input)
-
-/*
- * use this macro(s) if you need more than one output parameter
- * in alternative_io
- */
-#define ASM_OUTPUT2(a, b) a, b
-
-struct paravirt_patch_site;
-#ifdef CONFIG_PARAVIRT
-void apply_paravirt(struct paravirt_patch_site *start,
-		    struct paravirt_patch_site *end);
-#else
-static inline void
-apply_paravirt(struct paravirt_patch_site *start,
-	       struct paravirt_patch_site *end)
-{}
-#define __parainstructions	NULL
-#define __parainstructions_end	NULL
-#endif
-
-extern void text_poke(void *addr, unsigned char *opcode, int len);
-
-#endif /* _X86_64_ALTERNATIVE_H */

From 013d23e1567c4cebee0a2db5c8fa97b91b34ac2a Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 30 Jan 2008 13:30:30 +0100
Subject: [PATCH 100/890] x86 e820_64.c: make 2 functions static

This patch makes the following needlessly global functions static:
- e820_print_map()
- early_panic()

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/e820_64.c | 4 ++--
 include/asm-x86/e820_64.h | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 0128b0b0e5b5..11a3d65db0c1 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -373,7 +373,7 @@ unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
 	return end - start - (ram << PAGE_SHIFT);
 }
 
-void __init e820_print_map(char *who)
+static void __init e820_print_map(char *who)
 {
 	int i;
 
@@ -633,7 +633,7 @@ static int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
 	return 0;
 }
 
-void early_panic(char *msg)
+static void early_panic(char *msg)
 {
 	early_printk(msg);
 	panic(msg);
diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h
index 0bd4787a5d57..e535e6044e21 100644
--- a/include/asm-x86/e820_64.h
+++ b/include/asm-x86/e820_64.h
@@ -21,7 +21,6 @@ extern void contig_e820_setup(void);
 extern unsigned long e820_end_of_ram(void);
 extern void e820_reserve_resources(void);
 extern void e820_mark_nosave_regions(void);
-extern void e820_print_map(char *who);
 extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
 extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
 extern unsigned long e820_hole_size(unsigned long start, unsigned long end);

From 52e3d90def24008c9b3435a1032d9a7f05766bce Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 30 Jan 2008 13:30:30 +0100
Subject: [PATCH 101/890] x86: io_apic_64.c: remove unused config check

CONFIG_IRQBALANCE doesn't exist on x86_64.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/io_apic_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index c6de7854ac63..3e471d0fb150 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1439,7 +1439,7 @@ static void ack_apic_level(unsigned int irq)
 	int do_unmask_irq = 0;
 
 	irq_complete_move(irq);
-#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
+#ifdef CONFIG_GENERIC_PENDING_IRQ
 	/* If we are moving the irq we need to mask it */
 	if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
 		do_unmask_irq = 1;

From 231fd906c5e71219bbc32618a8ed8b439d5dde98 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 30 Jan 2008 13:30:30 +0100
Subject: [PATCH 102/890] x86 mce_64.c: make struct mcelog static

This patch makes the needlessly global struct mcelog static.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mcheck/mce_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 242e8668dbeb..8ef6a6bfd112 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -63,7 +63,7 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
  * separate MCEs from kernel messages to avoid bogus bug reports.
  */
 
-struct mce_log mcelog = {
+static struct mce_log mcelog = {
 	MCE_LOG_SIGNATURE,
 	MCE_LOG_LEN,
 };

From 867ab545668385b903f9379019000383675c49b3 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 30 Jan 2008 13:30:31 +0100
Subject: [PATCH 103/890] x86: nmi_64.c: make code static

This patch makes the following needlessly global code static:
- panic_on_timeout
- setup_nmi_watchdog()

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/nmi_64.c | 4 ++--
 include/asm-x86/nmi_64.h | 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c
index c3d1476b6a11..53faef632fc6 100644
--- a/arch/x86/kernel/nmi_64.c
+++ b/arch/x86/kernel/nmi_64.c
@@ -39,7 +39,7 @@ static cpumask_t backtrace_mask = CPU_MASK_NONE;
  *  0: the lapic NMI watchdog is disabled, but can be enabled
  */
 atomic_t nmi_active = ATOMIC_INIT(0);		/* oprofile uses this */
-int panic_on_timeout;
+static int panic_on_timeout;
 
 unsigned int nmi_watchdog = NMI_DEFAULT;
 static unsigned int nmi_hz = HZ;
@@ -136,7 +136,7 @@ int __init check_nmi_watchdog (void)
 	return 0;
 }
 
-int __init setup_nmi_watchdog(char *str)
+static int __init setup_nmi_watchdog(char *str)
 {
 	int nmi;
 
diff --git a/include/asm-x86/nmi_64.h b/include/asm-x86/nmi_64.h
index 92f36fb1dc62..2eeb74e5f3ff 100644
--- a/include/asm-x86/nmi_64.h
+++ b/include/asm-x86/nmi_64.h
@@ -38,7 +38,6 @@ extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
 
 #define get_nmi_reason() inb(0x61)
 
-extern int panic_on_timeout;
 extern int unknown_nmi_panic;
 extern int nmi_watchdog_enabled;
 
@@ -57,7 +56,6 @@ extern void enable_timer_nmi_watchdog(void);
 extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
 
 extern void nmi_watchdog_default(void);
-extern int setup_nmi_watchdog(char *);
 
 extern atomic_t nmi_active;
 extern unsigned int nmi_watchdog;

From ed65260bb814a5c600a4e883f6bda73e287f5dec Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 30 Jan 2008 13:30:31 +0100
Subject: [PATCH 104/890] x86: pci-calgary_64.c: make a variable static

"debugging" is a horrible name for a global variable - thankfully it can
become static.

Also put it out of __read_mostly so that gcc no longer has to emit it
at all.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/pci-calgary_64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 6bf1f716909d..21d7e0f8610c 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -183,7 +183,7 @@ static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
 
 /* enable this to stress test the chip's TCE cache */
 #ifdef CONFIG_IOMMU_DEBUG
-int debugging __read_mostly = 1;
+static int debugging = 1;
 
 static inline unsigned long verify_bit_range(unsigned long* bitmap,
 	int expected, unsigned long start, unsigned long end)
@@ -202,7 +202,7 @@ static inline unsigned long verify_bit_range(unsigned long* bitmap,
 	return ~0UL;
 }
 #else /* debugging is disabled */
-int debugging __read_mostly = 0;
+static int debugging;
 
 static inline unsigned long verify_bit_range(unsigned long* bitmap,
 	int expected, unsigned long start, unsigned long end)

From 3e7593966be6f6d29a15138c0c96b961d437f2f5 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 30 Jan 2008 13:30:31 +0100
Subject: [PATCH 105/890] x86: pci-dma_64.c: cleanups

This patch contains the following cleanups:
- make the needlessly global iommu_setup() static
- remove the unused EXPORT_SYMBOL(iommu_merge)

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/pci-dma_64.c | 3 +--
 include/asm-x86/pci_64.h     | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c
index 5552d23d23c2..a82473d192a3 100644
--- a/arch/x86/kernel/pci-dma_64.c
+++ b/arch/x86/kernel/pci-dma_64.c
@@ -13,7 +13,6 @@
 #include <asm/calgary.h>
 
 int iommu_merge __read_mostly = 0;
-EXPORT_SYMBOL(iommu_merge);
 
 dma_addr_t bad_dma_address __read_mostly;
 EXPORT_SYMBOL(bad_dma_address);
@@ -230,7 +229,7 @@ EXPORT_SYMBOL(dma_set_mask);
  * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
  * documentation.
  */
-__init int iommu_setup(char *p)
+static __init int iommu_setup(char *p)
 {
 	iommu_merge = 1;
 
diff --git a/include/asm-x86/pci_64.h b/include/asm-x86/pci_64.h
index ef54226a9325..374690314539 100644
--- a/include/asm-x86/pci_64.h
+++ b/include/asm-x86/pci_64.h
@@ -26,7 +26,6 @@ extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int l
 
 
 extern void pci_iommu_alloc(void);
-extern int iommu_setup(char *opt);
 
 /* The PCI address space does equal the physical memory
  * address space.  The networking and block device layers use

From 2d2ee8de5f6d26ef2942e0b449aa68d9236d5777 Mon Sep 17 00:00:00 2001
From: Paul Jimenez <pj@place.org>
Date: Wed, 30 Jan 2008 13:30:31 +0100
Subject: [PATCH 106/890] x86: mtrr use type bool [RESEND AGAIN]

This is a janitorish patch to 1) remove private TRUE/FALSE #def's in
favor of using the standard enum from linux/stddef.h and 2) switch the
variables holding those values to type 'bool' (from linux/types.h)
since it both seems more appropriate and allows for potentially better
optimization.

As a truly minor aside, I removed a couple of comments documenting
a 'do_safe' parameter that seems to no longer exist.

Signed-off-by: Paul Jimenez <pj@place.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mtrr/amd.c     |  2 --
 arch/x86/kernel/cpu/mtrr/generic.c | 16 +++++++---------
 arch/x86/kernel/cpu/mtrr/if.c      | 15 ++++++++-------
 arch/x86/kernel/cpu/mtrr/main.c    |  8 +++++---
 arch/x86/kernel/cpu/mtrr/mtrr.h    |  6 ++----
 include/asm-x86/mtrr.h             |  8 ++++----
 6 files changed, 26 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c
index 0949cdbf848a..ee2331b0e58f 100644
--- a/arch/x86/kernel/cpu/mtrr/amd.c
+++ b/arch/x86/kernel/cpu/mtrr/amd.c
@@ -53,8 +53,6 @@ static void amd_set_mtrr(unsigned int reg, unsigned long base,
     <base> The base address of the region.
     <size> The size of the region. If this is 0 the region is disabled.
     <type> The type of the region.
-    <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
-    be done externally.
     [RETURNS] Nothing.
 */
 {
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 992f08dfbb6c..1c331c373a43 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -188,7 +188,7 @@ static inline void k8_enable_fixed_iorrs(void)
  * \param changed pointer which indicates whether the MTRR needed to be changed
  * \param msrwords pointer to the MSR values which the MSR should have
  */
-static void set_fixed_range(int msr, int * changed, unsigned int * msrwords)
+static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
 {
 	unsigned lo, hi;
 
@@ -200,7 +200,7 @@ static void set_fixed_range(int msr, int * changed, unsigned int * msrwords)
 		    ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK))
 			k8_enable_fixed_iorrs();
 		mtrr_wrmsr(msr, msrwords[0], msrwords[1]);
-		*changed = TRUE;
+		*changed = true;
 	}
 }
 
@@ -260,7 +260,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
 static int set_fixed_ranges(mtrr_type * frs)
 {
 	unsigned long long *saved = (unsigned long long *) frs;
-	int changed = FALSE;
+	bool changed = false;
 	int block=-1, range;
 
 	while (fixed_range_blocks[++block].ranges)
@@ -273,17 +273,17 @@ static int set_fixed_ranges(mtrr_type * frs)
 
 /*  Set the MSR pair relating to a var range. Returns TRUE if
     changes are made  */
-static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
+static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
 {
 	unsigned int lo, hi;
-	int changed = FALSE;
+	bool changed = false;
 
 	rdmsr(MTRRphysBase_MSR(index), lo, hi);
 	if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL)
 	    || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) !=
 		(hi & (size_and_mask >> (32 - PAGE_SHIFT)))) {
 		mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
-		changed = TRUE;
+		changed = true;
 	}
 
 	rdmsr(MTRRphysMask_MSR(index), lo, hi);
@@ -292,7 +292,7 @@ static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
 	    || (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) !=
 		(hi & (size_and_mask >> (32 - PAGE_SHIFT)))) {
 		mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
-		changed = TRUE;
+		changed = true;
 	}
 	return changed;
 }
@@ -417,8 +417,6 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
     <base> The base address of the region.
     <size> The size of the region. If this is 0 the region is disabled.
     <type> The type of the region.
-    <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
-    be done externally.
     [RETURNS] Nothing.
 */
 {
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index c7d8f1756745..14535686c099 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -37,7 +37,7 @@ const char *mtrr_attrib_to_str(int x)
 
 static int
 mtrr_file_add(unsigned long base, unsigned long size,
-	      unsigned int type, char increment, struct file *file, int page)
+	      unsigned int type, bool increment, struct file *file, int page)
 {
 	int reg, max;
 	unsigned int *fcount = FILE_FCOUNT(file); 
@@ -55,7 +55,7 @@ mtrr_file_add(unsigned long base, unsigned long size,
 		base >>= PAGE_SHIFT;
 		size >>= PAGE_SHIFT;
 	}
-	reg = mtrr_add_page(base, size, type, 1);
+	reg = mtrr_add_page(base, size, type, true);
 	if (reg >= 0)
 		++fcount[reg];
 	return reg;
@@ -141,7 +141,7 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
 		size >>= PAGE_SHIFT;
 		err =
 		    mtrr_add_page((unsigned long) base, (unsigned long) size, i,
-				  1);
+				  true);
 		if (err < 0)
 			return err;
 		return len;
@@ -217,7 +217,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
 		err =
-		    mtrr_file_add(sentry.base, sentry.size, sentry.type, 1,
+		    mtrr_file_add(sentry.base, sentry.size, sentry.type, true,
 				  file, 0);
 		break;
 	case MTRRIOC_SET_ENTRY:
@@ -226,7 +226,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
 #endif
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
-		err = mtrr_add(sentry.base, sentry.size, sentry.type, 0);
+		err = mtrr_add(sentry.base, sentry.size, sentry.type, false);
 		break;
 	case MTRRIOC_DEL_ENTRY:
 #ifdef CONFIG_COMPAT
@@ -270,7 +270,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
 		err =
-		    mtrr_file_add(sentry.base, sentry.size, sentry.type, 1,
+		    mtrr_file_add(sentry.base, sentry.size, sentry.type, true,
 				  file, 1);
 		break;
 	case MTRRIOC_SET_PAGE_ENTRY:
@@ -279,7 +279,8 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
 #endif
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
-		err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0);
+		err =
+		    mtrr_add_page(sentry.base, sentry.size, sentry.type, false);
 		break;
 	case MTRRIOC_DEL_PAGE_ENTRY:
 #ifdef CONFIG_COMPAT
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index beb45c9c0835..60af5ed2b5c0 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -311,7 +311,7 @@ static void set_mtrr(unsigned int reg, unsigned long base,
  */
 
 int mtrr_add_page(unsigned long base, unsigned long size, 
-		  unsigned int type, char increment)
+		  unsigned int type, bool increment)
 {
 	int i, replace, error;
 	mtrr_type ltype;
@@ -394,7 +394,9 @@ int mtrr_add_page(unsigned long base, unsigned long size,
 		if (likely(replace < 0))
 			usage_table[i] = 1;
 		else {
-			usage_table[i] = usage_table[replace] + !!increment;
+			usage_table[i] = usage_table[replace];
+			if (increment)
+				usage_table[i]++;
 			if (unlikely(replace != i)) {
 				set_mtrr(replace, 0, 0, 0);
 				usage_table[replace] = 0;
@@ -460,7 +462,7 @@ static int mtrr_check(unsigned long base, unsigned long size)
 
 int
 mtrr_add(unsigned long base, unsigned long size, unsigned int type,
-	 char increment)
+	 bool increment)
 {
 	if (mtrr_check(base, size))
 		return -EINVAL;
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 289dfe6030e3..54347e9a95c0 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -2,10 +2,8 @@
  * local mtrr defines.
  */
 
-#ifndef TRUE
-#define TRUE  1
-#define FALSE 0
-#endif
+#include <linux/types.h>
+#include <linux/stddef.h>
 
 #define MTRRcap_MSR     0x0fe
 #define MTRRdefType_MSR 0x2ff
diff --git a/include/asm-x86/mtrr.h b/include/asm-x86/mtrr.h
index e8320e4e6ca2..262670e42078 100644
--- a/include/asm-x86/mtrr.h
+++ b/include/asm-x86/mtrr.h
@@ -89,9 +89,9 @@ struct mtrr_gentry
 extern void mtrr_save_fixed_ranges(void *);
 extern void mtrr_save_state(void);
 extern int mtrr_add (unsigned long base, unsigned long size,
-		     unsigned int type, char increment);
+		     unsigned int type, bool increment);
 extern int mtrr_add_page (unsigned long base, unsigned long size,
-		     unsigned int type, char increment);
+		     unsigned int type, bool increment);
 extern int mtrr_del (int reg, unsigned long base, unsigned long size);
 extern int mtrr_del_page (int reg, unsigned long base, unsigned long size);
 extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
@@ -101,12 +101,12 @@ extern void mtrr_bp_init(void);
 #define mtrr_save_fixed_ranges(arg) do {} while (0)
 #define mtrr_save_state() do {} while (0)
 static __inline__ int mtrr_add (unsigned long base, unsigned long size,
-				unsigned int type, char increment)
+				unsigned int type, bool increment)
 {
     return -ENODEV;
 }
 static __inline__ int mtrr_add_page (unsigned long base, unsigned long size,
-				unsigned int type, char increment)
+				unsigned int type, bool increment)
 {
     return -ENODEV;
 }

From 201c19948b879ed95ac986bc994af29d7cf4859f Mon Sep 17 00:00:00 2001
From: Lucas Woods <woodzy@gmail.com>
Date: Wed, 30 Jan 2008 13:30:32 +0100
Subject: [PATCH 107/890] x86: remove duplicate includes

Signed-off-by: Lucas Woods <woodzy@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/hpet.c           | 1 -
 arch/x86/kernel/pci-calgary_64.c | 1 -
 arch/x86/vdso/vclock_gettime.c   | 1 -
 3 files changed, 3 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index a3c56c9b8a02..5c1702789be4 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -6,7 +6,6 @@
 #include <linux/init.h>
 #include <linux/sysdev.h>
 #include <linux/pm.h>
-#include <linux/delay.h>
 
 #include <asm/fixmap.h>
 #include <asm/hpet.h>
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 21d7e0f8610c..21f34db2c03c 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -30,7 +30,6 @@
 #include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/dma-mapping.h>
-#include <linux/init.h>
 #include <linux/bitops.h>
 #include <linux/pci_ids.h>
 #include <linux/pci.h>
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 5b54cdfb2b07..23476c2ebfc4 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -19,7 +19,6 @@
 #include <asm/hpet.h>
 #include <asm/unistd.h>
 #include <asm/io.h>
-#include <asm/vgtod.h>
 #include "vextern.h"
 
 #define gtod vdso_vsyscall_gtod_data

From 9773db2a301b089bb95907eec5ad1a2ef7fb4099 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Wed, 30 Jan 2008 13:30:32 +0100
Subject: [PATCH 108/890] x86: remove dead code in ia32-emu

Remove useless second time checking of fsave argument in save_i387_ia32()
routine.  It's possible the compiler is doing the same but that is much
better to remove the dead code explicitly.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/ia32/fpu32.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/ia32/fpu32.c b/arch/x86/ia32/fpu32.c
index 459bf743128f..ae80745f668f 100644
--- a/arch/x86/ia32/fpu32.c
+++ b/arch/x86/ia32/fpu32.c
@@ -174,8 +174,6 @@ int save_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 __user *buf,
 	if (fsave)
 		return 0;
 	err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status);
-	if (fsave)
-		return err ? -1 : 1;
 	err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
 	err |= __copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave,
 			      sizeof(struct i387_fxsave_struct));

From c9cce83dd1d59f52e2c8f8c7d265ba4854c40785 Mon Sep 17 00:00:00 2001
From: Bernhard Walle <bwalle@suse.de>
Date: Wed, 30 Jan 2008 13:30:32 +0100
Subject: [PATCH 109/890] x86: remove extern declarations for code, data, bss
 resources

This patch removes the extern struct resource declarations for
data_resource, code_resource and bss_resource on x86 and declares that
three structures as static as done on other architectures like IA64.

On i386, these structures are moved to setup_32.c (from e820_32.c) because
that's code that is not specific to e820 and also required on EFI systems.
That makes the "extern" reference superfluous.

On x86_64, data_resource, code_resource and bss_resource are passed to
e820_reserve_resources() as arguments just as done on i386 and IA64.  That
also avoids the "extern" reference and it's possible to make it static.

Signed-off-by: Bernhard Walle <bwalle@suse.de>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/e820_32.c  | 110 +------------------------------------
 arch/x86/kernel/e820_64.c  |  11 ++--
 arch/x86/kernel/setup_32.c | 106 ++++++++++++++++++++++++++++++++++-
 arch/x86/kernel/setup_64.c |   8 +--
 include/asm-x86/e820_32.h  |   6 ++
 include/asm-x86/e820_64.h  |   5 +-
 include/linux/ioport.h     |   2 +
 7 files changed, 127 insertions(+), 121 deletions(-)

diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 18f500d185a2..87cadc86d5ee 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -37,26 +37,6 @@ unsigned long pci_mem_start = 0x10000000;
 EXPORT_SYMBOL(pci_mem_start);
 #endif
 extern int user_defined_memmap;
-struct resource data_resource = {
-	.name	= "Kernel data",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
-struct resource code_resource = {
-	.name	= "Kernel code",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
-struct resource bss_resource = {
-	.name	= "Kernel bss",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
-};
 
 static struct resource system_rom_resource = {
 	.name	= "System ROM",
@@ -111,60 +91,6 @@ static struct resource video_rom_resource = {
 	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
 };
 
-static struct resource video_ram_resource = {
-	.name	= "Video RAM area",
-	.start	= 0xa0000,
-	.end	= 0xbffff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
-static struct resource standard_io_resources[] = { {
-	.name	= "dma1",
-	.start	= 0x0000,
-	.end	= 0x001f,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "pic1",
-	.start	= 0x0020,
-	.end	= 0x0021,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name   = "timer0",
-	.start	= 0x0040,
-	.end    = 0x0043,
-	.flags  = IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name   = "timer1",
-	.start  = 0x0050,
-	.end    = 0x0053,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "keyboard",
-	.start	= 0x0060,
-	.end	= 0x006f,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "dma page reg",
-	.start	= 0x0080,
-	.end	= 0x008f,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "pic2",
-	.start	= 0x00a0,
-	.end	= 0x00a1,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "dma2",
-	.start	= 0x00c0,
-	.end	= 0x00df,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "fpu",
-	.start	= 0x00f0,
-	.end	= 0x00ff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-} };
-
 #define ROMSIGNATURE 0xaa55
 
 static int __init romsignature(const unsigned char *rom)
@@ -260,10 +186,9 @@ static void __init probe_roms(void)
  * Request address space for all standard RAM and ROM resources
  * and also for regions reported as reserved by the e820.
  */
-static void __init
-legacy_init_iomem_resources(struct resource *code_resource,
-			    struct resource *data_resource,
-			    struct resource *bss_resource)
+void __init legacy_init_iomem_resources(struct resource *code_resource,
+		struct resource *data_resource,
+		struct resource *bss_resource)
 {
 	int i;
 
@@ -305,35 +230,6 @@ legacy_init_iomem_resources(struct resource *code_resource,
 	}
 }
 
-/*
- * Request address space for all standard resources
- *
- * This is called just before pcibios_init(), which is also a
- * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
- */
-static int __init request_standard_resources(void)
-{
-	int i;
-
-	printk("Setting up standard PCI resources\n");
-	if (efi_enabled)
-		efi_initialize_iomem_resources(&code_resource,
-				&data_resource, &bss_resource);
-	else
-		legacy_init_iomem_resources(&code_resource,
-				&data_resource, &bss_resource);
-
-	/* EFI systems may still have VGA */
-	request_resource(&iomem_resource, &video_ram_resource);
-
-	/* request I/O space for devices used on all i[345]86 PCs */
-	for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
-		request_resource(&ioport_resource, &standard_io_resources[i]);
-	return 0;
-}
-
-subsys_initcall(request_standard_resources);
-
 #if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
 /**
  * e820_mark_nosave_regions - Find the ranges of physical addresses that do not
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 11a3d65db0c1..151236896243 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -47,8 +47,6 @@ unsigned long end_pfn_map;
  */
 static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
 
-extern struct resource code_resource, data_resource, bss_resource;
-
 /* Check for some hardcoded bad areas that early boot is not allowed to touch */
 static inline int bad_addr(unsigned long *addrp, unsigned long size)
 {
@@ -213,7 +211,8 @@ unsigned long __init e820_end_of_ram(void)
 /*
  * Mark e820 reserved areas as busy for the resource manager.
  */
-void __init e820_reserve_resources(void)
+void __init e820_reserve_resources(struct resource *code_resource,
+		struct resource *data_resource, struct resource *bss_resource)
 {
 	int i;
 	for (i = 0; i < e820.nr_map; i++) {
@@ -235,9 +234,9 @@ void __init e820_reserve_resources(void)
 			 * so we try it repeatedly and let the resource manager
 			 * test it.
 			 */
-			request_resource(res, &code_resource);
-			request_resource(res, &data_resource);
-			request_resource(res, &bss_resource);
+			request_resource(res, code_resource);
+			request_resource(res, data_resource);
+			request_resource(res, bss_resource);
 #ifdef CONFIG_KEXEC
 			if (crashk_res.start != crashk_res.end)
 				request_resource(res, &crashk_res);
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 236d30b264d8..32edf70d6b0d 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -73,9 +73,80 @@ int disable_pse __cpuinitdata = 0;
 /*
  * Machine setup..
  */
-extern struct resource code_resource;
-extern struct resource data_resource;
-extern struct resource bss_resource;
+static struct resource data_resource = {
+	.name	= "Kernel data",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource code_resource = {
+	.name	= "Kernel code",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource bss_resource = {
+	.name	= "Kernel bss",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource video_ram_resource = {
+	.name	= "Video RAM area",
+	.start	= 0xa0000,
+	.end	= 0xbffff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource standard_io_resources[] = { {
+	.name	= "dma1",
+	.start	= 0x0000,
+	.end	= 0x001f,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "pic1",
+	.start	= 0x0020,
+	.end	= 0x0021,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name   = "timer0",
+	.start	= 0x0040,
+	.end    = 0x0043,
+	.flags  = IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name   = "timer1",
+	.start  = 0x0050,
+	.end    = 0x0053,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "keyboard",
+	.start	= 0x0060,
+	.end	= 0x006f,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "dma page reg",
+	.start	= 0x0080,
+	.end	= 0x008f,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "pic2",
+	.start	= 0x00a0,
+	.end	= 0x00a1,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "dma2",
+	.start	= 0x00c0,
+	.end	= 0x00df,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "fpu",
+	.start	= 0x00f0,
+	.end	= 0x00ff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+} };
 
 /* cpu data as detected by the assembly code in head.S */
 struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
@@ -693,3 +764,32 @@ void __init setup_arch(char **cmdline_p)
 #endif
 #endif
 }
+
+/*
+ * Request address space for all standard resources
+ *
+ * This is called just before pcibios_init(), which is also a
+ * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
+ */
+static int __init request_standard_resources(void)
+{
+	int i;
+
+	printk(KERN_INFO "Setting up standard PCI resources\n");
+	if (efi_enabled)
+		efi_initialize_iomem_resources(&code_resource,
+				&data_resource, &bss_resource);
+	else
+		legacy_init_iomem_resources(&code_resource,
+				&data_resource, &bss_resource);
+
+	/* EFI systems may still have VGA */
+	request_resource(&iomem_resource, &video_ram_resource);
+
+	/* request I/O space for devices used on all i[345]86 PCs */
+	for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
+		request_resource(&ioport_resource, &standard_io_resources[i]);
+	return 0;
+}
+
+subsys_initcall(request_standard_resources);
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index bcb5f3aaa097..1acb435a0585 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -123,19 +123,19 @@ struct resource standard_io_resources[] = {
 
 #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
 
-struct resource data_resource = {
+static struct resource data_resource = {
 	.name = "Kernel data",
 	.start = 0,
 	.end = 0,
 	.flags = IORESOURCE_RAM,
 };
-struct resource code_resource = {
+static struct resource code_resource = {
 	.name = "Kernel code",
 	.start = 0,
 	.end = 0,
 	.flags = IORESOURCE_RAM,
 };
-struct resource bss_resource = {
+static struct resource bss_resource = {
 	.name = "Kernel bss",
 	.start = 0,
 	.end = 0,
@@ -438,7 +438,7 @@ void __init setup_arch(char **cmdline_p)
 	/*
 	 * We trust e820 completely. No explicit ROM probing in memory.
  	 */
-	e820_reserve_resources(); 
+	e820_reserve_resources(&code_resource, &data_resource, &bss_resource);
 	e820_mark_nosave_regions();
 
 	{
diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h
index 03f60c690c8a..ae5ea19623fa 100644
--- a/include/asm-x86/e820_32.h
+++ b/include/asm-x86/e820_32.h
@@ -12,6 +12,8 @@
 #ifndef __E820_HEADER
 #define __E820_HEADER
 
+#include <linux/ioport.h>
+
 #define HIGH_MEMORY	(1024*1024)
 
 #ifndef __ASSEMBLY__
@@ -26,6 +28,9 @@ extern void register_bootmem_low_pages(unsigned long max_low_pfn);
 extern void e820_register_memory(void);
 extern void limit_regions(unsigned long long size);
 extern void print_memory_map(char *who);
+extern void legacy_init_iomem_resources(struct resource *code_resource,
+			    struct resource *data_resource,
+			    struct resource *bss_resource);
 
 #if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
 extern void e820_mark_nosave_regions(void);
@@ -35,5 +40,6 @@ static inline void e820_mark_nosave_regions(void)
 }
 #endif
 
+
 #endif/*!__ASSEMBLY__*/
 #endif/*__E820_HEADER*/
diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h
index e535e6044e21..1c7ba8804176 100644
--- a/include/asm-x86/e820_64.h
+++ b/include/asm-x86/e820_64.h
@@ -11,6 +11,8 @@
 #ifndef __E820_HEADER
 #define __E820_HEADER
 
+#include <linux/ioport.h>
+
 #ifndef __ASSEMBLY__
 extern unsigned long find_e820_area(unsigned long start, unsigned long end, 
 				    unsigned size);
@@ -19,7 +21,8 @@ extern void add_memory_region(unsigned long start, unsigned long size,
 extern void setup_memory_region(void);
 extern void contig_e820_setup(void); 
 extern unsigned long e820_end_of_ram(void);
-extern void e820_reserve_resources(void);
+extern void e820_reserve_resources(struct resource *code_resource,
+		struct resource *data_resource, struct resource *bss_resource);
 extern void e820_mark_nosave_regions(void);
 extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
 extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 6187a8567bc7..605d237364d2 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -8,6 +8,7 @@
 #ifndef _LINUX_IOPORT_H
 #define _LINUX_IOPORT_H
 
+#ifndef __ASSEMBLY__
 #include <linux/compiler.h>
 #include <linux/types.h>
 /*
@@ -153,4 +154,5 @@ extern struct resource * __devm_request_region(struct device *dev,
 extern void __devm_release_region(struct device *dev, struct resource *parent,
 				  resource_size_t start, resource_size_t n);
 
+#endif /* __ASSEMBLY__ */
 #endif	/* _LINUX_IOPORT_H */

From ec12fa5c80126b252ebcb474d12c832ec4647daa Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 30 Jan 2008 13:30:32 +0100
Subject: [PATCH 110/890] x86: bitops_32.h style cleanups

Coding style cleanups in x86/bitops_32.h:

- drop space in "* addr"
- whitespace & indentation fixes
- spello fixes

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/bitops_32.h | 48 ++++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/include/asm-x86/bitops_32.h b/include/asm-x86/bitops_32.h
index 0b40f6d20bea..5a29cce6a91d 100644
--- a/include/asm-x86/bitops_32.h
+++ b/include/asm-x86/bitops_32.h
@@ -37,7 +37,7 @@
  * Note that @nr may be almost arbitrarily large; this function is not
  * restricted to acting on a single-word quantity.
  */
-static inline void set_bit(int nr, volatile unsigned long * addr)
+static inline void set_bit(int nr, volatile unsigned long *addr)
 {
 	__asm__ __volatile__( LOCK_PREFIX
 		"btsl %1,%0"
@@ -54,7 +54,7 @@ static inline void set_bit(int nr, volatile unsigned long * addr)
  * If it's called on the same region of memory simultaneously, the effect
  * may be that only one operation succeeds.
  */
-static inline void __set_bit(int nr, volatile unsigned long * addr)
+static inline void __set_bit(int nr, volatile unsigned long *addr)
 {
 	__asm__(
 		"btsl %1,%0"
@@ -72,7 +72,7 @@ static inline void __set_bit(int nr, volatile unsigned long * addr)
  * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
  * in order to ensure changes are visible on other processors.
  */
-static inline void clear_bit(int nr, volatile unsigned long * addr)
+static inline void clear_bit(int nr, volatile unsigned long *addr)
 {
 	__asm__ __volatile__( LOCK_PREFIX
 		"btrl %1,%0"
@@ -94,7 +94,7 @@ static inline void clear_bit_unlock(unsigned long nr, volatile unsigned long *ad
 	clear_bit(nr, addr);
 }
 
-static inline void __clear_bit(int nr, volatile unsigned long * addr)
+static inline void __clear_bit(int nr, volatile unsigned long *addr)
 {
 	__asm__ __volatile__(
 		"btrl %1,%0"
@@ -132,7 +132,7 @@ static inline void __clear_bit_unlock(unsigned long nr, volatile unsigned long *
  * If it's called on the same region of memory simultaneously, the effect
  * may be that only one operation succeeds.
  */
-static inline void __change_bit(int nr, volatile unsigned long * addr)
+static inline void __change_bit(int nr, volatile unsigned long *addr)
 {
 	__asm__ __volatile__(
 		"btcl %1,%0"
@@ -150,7 +150,7 @@ static inline void __change_bit(int nr, volatile unsigned long * addr)
  * Note that @nr may be almost arbitrarily large; this function is not
  * restricted to acting on a single-word quantity.
  */
-static inline void change_bit(int nr, volatile unsigned long * addr)
+static inline void change_bit(int nr, volatile unsigned long *addr)
 {
 	__asm__ __volatile__( LOCK_PREFIX
 		"btcl %1,%0"
@@ -167,7 +167,7 @@ static inline void change_bit(int nr, volatile unsigned long * addr)
  * It may be reordered on other architectures than x86.
  * It also implies a memory barrier.
  */
-static inline int test_and_set_bit(int nr, volatile unsigned long * addr)
+static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -199,7 +199,7 @@ static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr)
  * If two examples of this operation race, one can appear to succeed
  * but actually fail.  You must protect multiple accesses with a lock.
  */
-static inline int __test_and_set_bit(int nr, volatile unsigned long * addr)
+static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -219,7 +219,7 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long * addr)
  * It can be reorderdered on other architectures other than x86.
  * It also implies a memory barrier.
  */
-static inline int test_and_clear_bit(int nr, volatile unsigned long * addr)
+static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -270,7 +270,7 @@ static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
  * This operation is atomic and cannot be reordered.  
  * It also implies a memory barrier.
  */
-static inline int test_and_change_bit(int nr, volatile unsigned long* addr)
+static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -287,7 +287,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long* addr)
  * @nr: bit number to test
  * @addr: Address to start counting from
  */
-static int test_bit(int nr, const volatile void * addr);
+static int test_bit(int nr, const volatile void *addr);
 #endif
 
 static __always_inline int constant_test_bit(int nr, const volatile unsigned long *addr)
@@ -295,7 +295,7 @@ static __always_inline int constant_test_bit(int nr, const volatile unsigned lon
 	return ((1UL << (nr & 31)) & (addr[nr >> 5])) != 0;
 }
 
-static inline int variable_test_bit(int nr, const volatile unsigned long * addr)
+static inline int variable_test_bit(int nr, const volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -306,10 +306,10 @@ static inline int variable_test_bit(int nr, const volatile unsigned long * addr)
 	return oldbit;
 }
 
-#define test_bit(nr,addr) \
-(__builtin_constant_p(nr) ? \
- constant_test_bit((nr),(addr)) : \
- variable_test_bit((nr),(addr)))
+#define test_bit(nr, addr) \
+	(__builtin_constant_p(nr) ? \
+		constant_test_bit((nr), (addr)) : \
+		variable_test_bit((nr), (addr)))
 
 #undef ADDR
 
@@ -318,7 +318,7 @@ static inline int variable_test_bit(int nr, const volatile unsigned long * addr)
  * @addr: The address to start the search at
  * @size: The maximum size to search
  *
- * Returns the bit-number of the first zero bit, not the number of the byte
+ * Returns the bit number of the first zero bit, not the number of the byte
  * containing a bit.
  */
 static inline int find_first_zero_bit(const unsigned long *addr, unsigned size)
@@ -348,7 +348,7 @@ static inline int find_first_zero_bit(const unsigned long *addr, unsigned size)
 /**
  * find_next_zero_bit - find the first zero bit in a memory region
  * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
+ * @offset: The bit number to start searching at
  * @size: The maximum size to search
  */
 int find_next_zero_bit(const unsigned long *addr, int size, int offset);
@@ -372,7 +372,7 @@ static inline unsigned long __ffs(unsigned long word)
  * @addr: The address to start the search at
  * @size: The maximum size to search
  *
- * Returns the bit-number of the first set bit, not the number of the byte
+ * Returns the bit number of the first set bit, not the number of the byte
  * containing a bit.
  */
 static inline unsigned find_first_bit(const unsigned long *addr, unsigned size)
@@ -391,7 +391,7 @@ static inline unsigned find_first_bit(const unsigned long *addr, unsigned size)
 /**
  * find_next_bit - find the first set bit in a memory region
  * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
+ * @offset: The bit number to start searching at
  * @size: The maximum size to search
  */
 int find_next_bit(const unsigned long *addr, int size, int offset);
@@ -460,10 +460,10 @@ static inline int fls(int x)
 
 #include <asm-generic/bitops/ext2-non-atomic.h>
 
-#define ext2_set_bit_atomic(lock,nr,addr) \
-        test_and_set_bit((nr),(unsigned long*)addr)
-#define ext2_clear_bit_atomic(lock,nr, addr) \
-	        test_and_clear_bit((nr),(unsigned long*)addr)
+#define ext2_set_bit_atomic(lock, nr, addr) \
+		test_and_set_bit((nr), (unsigned long *)addr)
+#define ext2_clear_bit_atomic(lock, nr, addr) \
+		test_and_clear_bit((nr), (unsigned long *)addr)
 
 #include <asm-generic/bitops/minix.h>
 

From 6ea8bad1c06c8ccde381f1c848afe6a245bb33b5 Mon Sep 17 00:00:00 2001
From: "clameter@sgi.com" <clameter@sgi.com>
Date: Wed, 30 Jan 2008 13:30:32 +0100
Subject: [PATCH 111/890] x86: clean up stack allocation and free

Clean up the allocation and freeing of stacks a bit by using a __GFP_ZERO flag
instead of memset.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Mike Travis <travis@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/thread_info_64.h | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h
index 7f6ee68f0002..1ac23c157231 100644
--- a/include/asm-x86/thread_info_64.h
+++ b/include/asm-x86/thread_info_64.h
@@ -74,20 +74,14 @@ static inline struct thread_info *stack_thread_info(void)
 
 /* thread information allocation */
 #ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk)					\
-    ({								\
-	struct thread_info *ret;				\
-								\
-	ret = ((struct thread_info *) __get_free_pages(GFP_KERNEL,THREAD_ORDER)); \
-	if (ret)						\
-		memset(ret, 0, THREAD_SIZE);			\
-	ret;							\
-    })
+#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO)
 #else
-#define alloc_thread_info(tsk) \
-	((struct thread_info *) __get_free_pages(GFP_KERNEL,THREAD_ORDER))
+#define THREAD_FLAGS GFP_KERNEL
 #endif
 
+#define alloc_thread_info(tsk) \
+	((struct thread_info *) __get_free_pages(THREAD_FLAGS, THREAD_ORDER))
+
 #define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER)
 
 #else /* !__ASSEMBLY__ */

From 416b72182ac3f3f4931ed17d0256b1d805d1b553 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 30 Jan 2008 13:30:33 +0100
Subject: [PATCH 112/890] x86: clean up nmi_32/64.c

clean up and make nmi_32/64.c more similar.
- white space and coding style clean up.
- nmi_cpu_busy is available on CONFIG_SMP.
- move functions __acpi_nmi_enable, acpi_nmi_enable,
  __acpi_nmi_disable and acpi_nmi_disable.
- make variables name more similar.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/nmi_32.c | 14 +++---
 arch/x86/kernel/nmi_64.c | 95 ++++++++++++++++++++--------------------
 2 files changed, 55 insertions(+), 54 deletions(-)

diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c
index 4f4bfd3a88b6..edd413650b3b 100644
--- a/arch/x86/kernel/nmi_32.c
+++ b/arch/x86/kernel/nmi_32.c
@@ -51,13 +51,13 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
 
 static int endflag __initdata = 0;
 
+#ifdef CONFIG_SMP
 /* The performance counters used by NMI_LOCAL_APIC don't trigger when
  * the CPU is idle. To make sure the NMI watchdog really ticks on all
  * CPUs during the test make them busy.
  */
 static __init void nmi_cpu_busy(void *data)
 {
-#ifdef CONFIG_SMP
 	local_irq_enable_in_hardirq();
 	/* Intentionally don't use cpu_relax here. This is
 	   to make sure that the performance counter really ticks,
@@ -67,8 +67,8 @@ static __init void nmi_cpu_busy(void *data)
 	   care if they get somewhat less cycles. */
 	while (endflag == 0)
 		mb();
-#endif
 }
+#endif
 
 static int __init check_nmi_watchdog(void)
 {
@@ -87,11 +87,13 @@ static int __init check_nmi_watchdog(void)
 
 	printk(KERN_INFO "Testing NMI watchdog ... ");
 
+#ifdef CONFIG_SMP
 	if (nmi_watchdog == NMI_LOCAL_APIC)
 		smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
+#endif
 
 	for_each_possible_cpu(cpu)
-		prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
+		prev_nmi_count[cpu] = nmi_count(cpu);
 	local_irq_enable();
 	mdelay((20*1000)/nmi_hz); // wait 20 ticks
 
@@ -237,10 +239,10 @@ void acpi_nmi_disable(void)
 		on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
 }
 
-void setup_apic_nmi_watchdog (void *unused)
+void setup_apic_nmi_watchdog(void *unused)
 {
 	if (__get_cpu_var(wd_enabled))
- 		return;
+		return;
 
 	/* cheap hack to support suspend/resume */
 	/* if cpu0 is not active neither should the other cpus */
@@ -329,7 +331,7 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
 	unsigned int sum;
 	int touched = 0;
 	int cpu = smp_processor_id();
-	int rc=0;
+	int rc = 0;
 
 	/* check for other users first */
 	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c
index 53faef632fc6..fb99484d21cf 100644
--- a/arch/x86/kernel/nmi_64.c
+++ b/arch/x86/kernel/nmi_64.c
@@ -78,22 +78,22 @@ static __init void nmi_cpu_busy(void *data)
 }
 #endif
 
-int __init check_nmi_watchdog (void)
+int __init check_nmi_watchdog(void)
 {
-	int *counts;
+	int *prev_nmi_count;
 	int cpu;
 
-	if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) 
+	if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED))
 		return 0;
 
 	if (!atomic_read(&nmi_active))
 		return 0;
 
-	counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
-	if (!counts)
+	prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
+	if (!prev_nmi_count)
 		return -1;
 
-	printk(KERN_INFO "testing NMI watchdog ... ");
+	printk(KERN_INFO "Testing NMI watchdog ... ");
 
 #ifdef CONFIG_SMP
 	if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -101,30 +101,29 @@ int __init check_nmi_watchdog (void)
 #endif
 
 	for (cpu = 0; cpu < NR_CPUS; cpu++)
-		counts[cpu] = cpu_pda(cpu)->__nmi_count;
+		prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count;
 	local_irq_enable();
 	mdelay((20*1000)/nmi_hz); // wait 20 ticks
 
 	for_each_online_cpu(cpu) {
 		if (!per_cpu(wd_enabled, cpu))
 			continue;
-		if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) {
+		if (cpu_pda(cpu)->__nmi_count - prev_nmi_count[cpu] <= 5) {
 			printk(KERN_WARNING "WARNING: CPU#%d: NMI "
 			       "appears to be stuck (%d->%d)!\n",
-			       cpu,
-			       counts[cpu],
-			       cpu_pda(cpu)->__nmi_count);
+				cpu,
+				prev_nmi_count[cpu],
+				cpu_pda(cpu)->__nmi_count);
 			per_cpu(wd_enabled, cpu) = 0;
 			atomic_dec(&nmi_active);
 		}
 	}
+	endflag = 1;
 	if (!atomic_read(&nmi_active)) {
-		kfree(counts);
+		kfree(prev_nmi_count);
 		atomic_set(&nmi_active, -1);
-		endflag = 1;
 		return -1;
 	}
-	endflag = 1;
 	printk("OK.\n");
 
 	/* now that we know it works we can reduce NMI frequency to
@@ -132,7 +131,7 @@ int __init check_nmi_watchdog (void)
 	if (nmi_watchdog == NMI_LOCAL_APIC)
 		nmi_hz = lapic_adjust_nmi_hz(1);
 
-	kfree(counts);
+	kfree(prev_nmi_count);
 	return 0;
 }
 
@@ -159,34 +158,6 @@ static int __init setup_nmi_watchdog(char *str)
 
 __setup("nmi_watchdog=", setup_nmi_watchdog);
 
-
-static void __acpi_nmi_disable(void *__unused)
-{
-	apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
-}
-
-/*
- * Disable timer based NMIs on all CPUs:
- */
-void acpi_nmi_disable(void)
-{
-	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-		on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
-}
-
-static void __acpi_nmi_enable(void *__unused)
-{
-	apic_write(APIC_LVT0, APIC_DM_NMI);
-}
-
-/*
- * Enable timer based NMIs on all CPUs:
- */
-void acpi_nmi_enable(void)
-{
-	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-		on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
-}
 #ifdef CONFIG_PM
 
 static int nmi_pm_active; /* nmi_active before suspend */
@@ -217,7 +188,7 @@ static struct sysdev_class nmi_sysclass = {
 };
 
 static struct sys_device device_lapic_nmi = {
-	.id		= 0,
+	.id	= 0,
 	.cls	= &nmi_sysclass,
 };
 
@@ -231,7 +202,7 @@ static int __init init_lapic_nmi_sysfs(void)
 	if (nmi_watchdog != NMI_LOCAL_APIC)
 		return 0;
 
-	if ( atomic_read(&nmi_active) < 0 )
+	if (atomic_read(&nmi_active) < 0)
 		return 0;
 
 	error = sysdev_class_register(&nmi_sysclass);
@@ -244,9 +215,37 @@ late_initcall(init_lapic_nmi_sysfs);
 
 #endif	/* CONFIG_PM */
 
+static void __acpi_nmi_enable(void *__unused)
+{
+	apic_write(APIC_LVT0, APIC_DM_NMI);
+}
+
+/*
+ * Enable timer based NMIs on all CPUs:
+ */
+void acpi_nmi_enable(void)
+{
+	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
+		on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
+}
+
+static void __acpi_nmi_disable(void *__unused)
+{
+	apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
+}
+
+/*
+ * Disable timer based NMIs on all CPUs:
+ */
+void acpi_nmi_disable(void)
+{
+	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
+		on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
+}
+
 void setup_apic_nmi_watchdog(void *unused)
 {
-	if (__get_cpu_var(wd_enabled) == 1)
+	if (__get_cpu_var(wd_enabled))
 		return;
 
 	/* cheap hack to support suspend/resume */
@@ -311,8 +310,9 @@ void touch_nmi_watchdog(void)
 		}
 	}
 
- 	touch_softlockup_watchdog();
+	touch_softlockup_watchdog();
 }
+EXPORT_SYMBOL(touch_nmi_watchdog);
 
 int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
 {
@@ -479,4 +479,3 @@ void __trigger_all_cpu_backtrace(void)
 
 EXPORT_SYMBOL(nmi_active);
 EXPORT_SYMBOL(nmi_watchdog);
-EXPORT_SYMBOL(touch_nmi_watchdog);

From 6abcd98ffafbff81f0bfd7ee1d129e634af13245 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:30:33 +0100
Subject: [PATCH 113/890] x86: irqflags consolidation

This patch consolidates the irqflags include files containing common
paravirt definitions. The native definition for interrupt handling, halt,
and such, are the same for 32 and 64 bit, and they are kept in irqflags.h.
the differences are split in the arch-specific files.

The syscall function, irq_enable_sysexit, has a very specific i386 naming,
and its name is then changed to a more general one.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/asm-offsets_32.c |   2 +-
 arch/x86/kernel/entry_32.S       |   8 +-
 arch/x86/kernel/paravirt_32.c    |  10 +-
 arch/x86/kernel/vmi_32.c         |   4 +-
 arch/x86/xen/enlighten.c         |   2 +-
 include/asm-x86/irqflags.h       | 248 ++++++++++++++++++++++++++++++-
 include/asm-x86/irqflags_32.h    | 195 ------------------------
 include/asm-x86/irqflags_64.h    | 174 ----------------------
 include/asm-x86/paravirt.h       |   9 +-
 9 files changed, 262 insertions(+), 390 deletions(-)
 delete mode 100644 include/asm-x86/irqflags_32.h
 delete mode 100644 include/asm-x86/irqflags_64.h

diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 0e45981b2dd7..c1ccfabb4a9e 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -123,7 +123,7 @@ void foo(void)
 	OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
 	OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
 	OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
-	OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
+	OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret);
 	OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
 #endif
 
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index dc7f938e5015..d63609dd64b9 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -58,7 +58,7 @@
  * for paravirtualization.  The following will never clobber any registers:
  *   INTERRUPT_RETURN (aka. "iret")
  *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
- *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
+ *   ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit").
  *
  * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
  * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
@@ -351,7 +351,7 @@ sysenter_past_esp:
 	xorl %ebp,%ebp
 	TRACE_IRQS_ON
 1:	mov  PT_FS(%esp), %fs
-	ENABLE_INTERRUPTS_SYSEXIT
+	ENABLE_INTERRUPTS_SYSCALL_RET
 	CFI_ENDPROC
 .pushsection .fixup,"ax"
 2:	movl $0,PT_FS(%esp)
@@ -882,10 +882,10 @@ ENTRY(native_iret)
 .previous
 END(native_iret)
 
-ENTRY(native_irq_enable_sysexit)
+ENTRY(native_irq_enable_syscall_ret)
 	sti
 	sysexit
-END(native_irq_enable_sysexit)
+END(native_irq_enable_syscall_ret)
 #endif
 
 KPROBE_ENTRY(int3)
diff --git a/arch/x86/kernel/paravirt_32.c b/arch/x86/kernel/paravirt_32.c
index f5000799f8ef..706b0562ea40 100644
--- a/arch/x86/kernel/paravirt_32.c
+++ b/arch/x86/kernel/paravirt_32.c
@@ -60,7 +60,7 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
 DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
 DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
 DEF_NATIVE(pv_cpu_ops, iret, "iret");
-DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
+DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "sti; sysexit");
 DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
 DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
 DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
@@ -88,7 +88,7 @@ static unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 	SITE(pv_irq_ops, restore_fl);
 	SITE(pv_irq_ops, save_fl);
 	SITE(pv_cpu_ops, iret);
-	SITE(pv_cpu_ops, irq_enable_sysexit);
+	SITE(pv_cpu_ops, irq_enable_syscall_ret);
 	SITE(pv_mmu_ops, read_cr2);
 	SITE(pv_mmu_ops, read_cr3);
 	SITE(pv_mmu_ops, write_cr3);
@@ -186,7 +186,7 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
 		/* If the operation is a nop, then nop the callsite */
 		ret = paravirt_patch_nop();
 	else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
-		 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit))
+		 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret))
 		/* If operation requires a jmp, then jmp */
 		ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
 	else
@@ -237,7 +237,7 @@ static void native_flush_tlb_single(unsigned long addr)
 
 /* These are in entry.S */
 extern void native_iret(void);
-extern void native_irq_enable_sysexit(void);
+extern void native_irq_enable_syscall_ret(void);
 
 static int __init print_banner(void)
 {
@@ -384,7 +384,7 @@ struct pv_cpu_ops pv_cpu_ops = {
 	.write_idt_entry = write_dt_entry,
 	.load_esp0 = native_load_esp0,
 
-	.irq_enable_sysexit = native_irq_enable_sysexit,
+	.irq_enable_syscall_ret = native_irq_enable_syscall_ret,
 	.iret = native_iret,
 
 	.set_iopl_mask = native_set_iopl_mask,
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index f02bad68abaa..aacce426cbd0 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -148,7 +148,7 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
 					      insns, eip);
 		case PARAVIRT_PATCH(pv_cpu_ops.iret):
 			return patch_internal(VMI_CALL_IRET, len, insns, eip);
-		case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit):
+		case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret):
 			return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip);
 		default:
 			break;
@@ -870,7 +870,7 @@ static inline int __init activate_vmi(void)
 	 * the backend.  They are performance critical anyway, so requiring
 	 * a patch is not a big problem.
 	 */
-	pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0;
+	pv_cpu_ops.irq_enable_syscall_ret = (void *)0xfeedbab0;
 	pv_cpu_ops.iret = (void *)0xbadbab0;
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 00829401389e..d3574485cb15 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -953,7 +953,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	.read_pmc = native_read_pmc,
 
 	.iret = (void *)&hypercall_page[__HYPERVISOR_iret],
-	.irq_enable_sysexit = NULL,  /* never called */
+	.irq_enable_syscall_ret = NULL,  /* never called */
 
 	.load_tr_desc = paravirt_nop,
 	.set_ldt = xen_set_ldt,
diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h
index 1b695ff52687..92021c1ffa3a 100644
--- a/include/asm-x86/irqflags.h
+++ b/include/asm-x86/irqflags.h
@@ -1,5 +1,245 @@
-#ifdef CONFIG_X86_32
-# include "irqflags_32.h"
-#else
-# include "irqflags_64.h"
+#ifndef _X86_IRQFLAGS_H_
+#define _X86_IRQFLAGS_H_
+
+#include <asm/processor-flags.h>
+
+#ifndef __ASSEMBLY__
+/*
+ * Interrupt control:
+ */
+
+static inline unsigned long native_save_fl(void)
+{
+	unsigned long flags;
+
+	__asm__ __volatile__(
+		"# __raw_save_flags\n\t"
+		"pushf ; pop %0"
+		: "=g" (flags)
+		: /* no input */
+		: "memory"
+	);
+
+	return flags;
+}
+
+static inline void native_restore_fl(unsigned long flags)
+{
+	__asm__ __volatile__(
+		"push %0 ; popf"
+		: /* no output */
+		:"g" (flags)
+		:"memory", "cc"
+	);
+}
+
+static inline void native_irq_disable(void)
+{
+	asm volatile("cli": : :"memory");
+}
+
+static inline void native_irq_enable(void)
+{
+	asm volatile("sti": : :"memory");
+}
+
+static inline void native_safe_halt(void)
+{
+	asm volatile("sti; hlt": : :"memory");
+}
+
+static inline void native_halt(void)
+{
+	asm volatile("hlt": : :"memory");
+}
+
+#endif
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#ifndef __ASSEMBLY__
+
+static inline unsigned long __raw_local_save_flags(void)
+{
+	return native_save_fl();
+}
+
+static inline void raw_local_irq_restore(unsigned long flags)
+{
+	native_restore_fl(flags);
+}
+
+static inline void raw_local_irq_disable(void)
+{
+	native_irq_disable();
+}
+
+static inline void raw_local_irq_enable(void)
+{
+	native_irq_enable();
+}
+
+/*
+ * Used in the idle loop; sti takes one instruction cycle
+ * to complete:
+ */
+static inline void raw_safe_halt(void)
+{
+	native_safe_halt();
+}
+
+/*
+ * Used when interrupts are already enabled or to
+ * shutdown the processor:
+ */
+static inline void halt(void)
+{
+	native_halt();
+}
+
+/*
+ * For spinlocks, etc:
+ */
+static inline unsigned long __raw_local_irq_save(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	raw_local_irq_disable();
+
+	return flags;
+}
+#else
+
+#define ENABLE_INTERRUPTS(x)	sti
+#define DISABLE_INTERRUPTS(x)	cli
+
+#ifdef CONFIG_X86_64
+#define INTERRUPT_RETURN	iretq
+#define ENABLE_INTERRUPTS_SYSCALL_RET			\
+			movq	%gs:pda_oldrsp, %rsp;	\
+			swapgs;				\
+			sysretq;
+#else
+#define INTERRUPT_RETURN		iret
+#define ENABLE_INTERRUPTS_SYSCALL_RET	sti; sysexit
+#define GET_CR0_INTO_EAX		movl %cr0, %eax
+#endif
+
+
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_PARAVIRT */
+
+#ifndef __ASSEMBLY__
+#define raw_local_save_flags(flags) \
+		do { (flags) = __raw_local_save_flags(); } while (0)
+
+#define raw_local_irq_save(flags) \
+		do { (flags) = __raw_local_irq_save(); } while (0)
+
+static inline int raw_irqs_disabled_flags(unsigned long flags)
+{
+	return !(flags & X86_EFLAGS_IF);
+}
+
+static inline int raw_irqs_disabled(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	return raw_irqs_disabled_flags(flags);
+}
+
+/*
+ * makes the traced hardirq state match with the machine state
+ *
+ * should be a rarely used function, only in places where its
+ * otherwise impossible to know the irq state, like in traps.
+ */
+static inline void trace_hardirqs_fixup_flags(unsigned long flags)
+{
+	if (raw_irqs_disabled_flags(flags))
+		trace_hardirqs_off();
+	else
+		trace_hardirqs_on();
+}
+
+static inline void trace_hardirqs_fixup(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	trace_hardirqs_fixup_flags(flags);
+}
+
+#else
+
+#ifdef CONFIG_X86_64
+/*
+ * Currently paravirt can't handle swapgs nicely when we
+ * don't have a stack we can rely on (such as a user space
+ * stack).  So we either find a way around these or just fault
+ * and emulate if a guest tries to call swapgs directly.
+ *
+ * Either way, this is a good way to document that we don't
+ * have a reliable stack. x86_64 only.
+ */
+#define SWAPGS_UNSAFE_STACK	swapgs
+#define ARCH_TRACE_IRQS_ON		call trace_hardirqs_on_thunk
+#define ARCH_TRACE_IRQS_OFF		call trace_hardirqs_off_thunk
+#define ARCH_LOCKDEP_SYS_EXIT		call lockdep_sys_exit_thunk
+#define ARCH_LOCKDEP_SYS_EXIT_IRQ	\
+	TRACE_IRQS_ON; \
+	sti; \
+	SAVE_REST; \
+	LOCKDEP_SYS_EXIT; \
+	RESTORE_REST; \
+	cli; \
+	TRACE_IRQS_OFF;
+
+#else
+#define ARCH_TRACE_IRQS_ON			\
+	pushl %eax;				\
+	pushl %ecx;				\
+	pushl %edx;				\
+	call trace_hardirqs_on;			\
+	popl %edx;				\
+	popl %ecx;				\
+	popl %eax;
+
+#define ARCH_TRACE_IRQS_OFF			\
+	pushl %eax;				\
+	pushl %ecx;				\
+	pushl %edx;				\
+	call trace_hardirqs_off;		\
+	popl %edx;				\
+	popl %ecx;				\
+	popl %eax;
+
+#define ARCH_LOCKDEP_SYS_EXIT			\
+	pushl %eax;				\
+	pushl %ecx;				\
+	pushl %edx;				\
+	call lockdep_sys_exit;			\
+	popl %edx;				\
+	popl %ecx;				\
+	popl %eax;
+
+#define ARCH_LOCKDEP_SYS_EXIT_IRQ
+#endif
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+#  define TRACE_IRQS_ON		ARCH_TRACE_IRQS_ON
+#  define TRACE_IRQS_OFF	ARCH_TRACE_IRQS_OFF
+#else
+#  define TRACE_IRQS_ON
+#  define TRACE_IRQS_OFF
+#endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#  define LOCKDEP_SYS_EXIT	ARCH_LOCKDEP_SYS_EXIT
+#  define LOCKDEP_SYS_EXIT_IRQ	ARCH_LOCKDEP_SYS_EXIT_IRQ
+# else
+#  define LOCKDEP_SYS_EXIT
+#  define LOCKDEP_SYS_EXIT_IRQ
+# endif
+
+#endif /* __ASSEMBLY__ */
 #endif
diff --git a/include/asm-x86/irqflags_32.h b/include/asm-x86/irqflags_32.h
deleted file mode 100644
index 98b21b9bdce8..000000000000
--- a/include/asm-x86/irqflags_32.h
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * IRQ flags handling
- *
- * This file gets included from lowlevel asm headers too, to provide
- * wrapped versions of the local_irq_*() APIs, based on the
- * raw_local_irq_*() functions from the lowlevel headers.
- */
-#ifndef _ASM_IRQFLAGS_H
-#define _ASM_IRQFLAGS_H
-#include <asm/processor-flags.h>
-
-#ifndef __ASSEMBLY__
-static inline unsigned long native_save_fl(void)
-{
-	unsigned long f;
-	asm volatile("pushfl ; popl %0":"=g" (f): /* no input */);
-	return f;
-}
-
-static inline void native_restore_fl(unsigned long f)
-{
-	asm volatile("pushl %0 ; popfl": /* no output */
-			     :"g" (f)
-			     :"memory", "cc");
-}
-
-static inline void native_irq_disable(void)
-{
-	asm volatile("cli": : :"memory");
-}
-
-static inline void native_irq_enable(void)
-{
-	asm volatile("sti": : :"memory");
-}
-
-static inline void native_safe_halt(void)
-{
-	asm volatile("sti; hlt": : :"memory");
-}
-
-static inline void native_halt(void)
-{
-	asm volatile("hlt": : :"memory");
-}
-#endif	/* __ASSEMBLY__ */
-
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#ifndef __ASSEMBLY__
-
-static inline unsigned long __raw_local_save_flags(void)
-{
-	return native_save_fl();
-}
-
-static inline void raw_local_irq_restore(unsigned long flags)
-{
-	native_restore_fl(flags);
-}
-
-static inline void raw_local_irq_disable(void)
-{
-	native_irq_disable();
-}
-
-static inline void raw_local_irq_enable(void)
-{
-	native_irq_enable();
-}
-
-/*
- * Used in the idle loop; sti takes one instruction cycle
- * to complete:
- */
-static inline void raw_safe_halt(void)
-{
-	native_safe_halt();
-}
-
-/*
- * Used when interrupts are already enabled or to
- * shutdown the processor:
- */
-static inline void halt(void)
-{
-	native_halt();
-}
-
-/*
- * For spinlocks, etc:
- */
-static inline unsigned long __raw_local_irq_save(void)
-{
-	unsigned long flags = __raw_local_save_flags();
-
-	raw_local_irq_disable();
-
-	return flags;
-}
-
-#else
-#define DISABLE_INTERRUPTS(clobbers)	cli
-#define ENABLE_INTERRUPTS(clobbers)	sti
-#define ENABLE_INTERRUPTS_SYSEXIT	sti; sysexit
-#define INTERRUPT_RETURN		iret
-#define GET_CR0_INTO_EAX		movl %cr0, %eax
-#endif /* __ASSEMBLY__ */
-#endif /* CONFIG_PARAVIRT */
-
-#ifndef __ASSEMBLY__
-#define raw_local_save_flags(flags) \
-		do { (flags) = __raw_local_save_flags(); } while (0)
-
-#define raw_local_irq_save(flags) \
-		do { (flags) = __raw_local_irq_save(); } while (0)
-
-static inline int raw_irqs_disabled_flags(unsigned long flags)
-{
-	return !(flags & X86_EFLAGS_IF);
-}
-
-static inline int raw_irqs_disabled(void)
-{
-	unsigned long flags = __raw_local_save_flags();
-
-	return raw_irqs_disabled_flags(flags);
-}
-
-/*
- * makes the traced hardirq state match with the machine state
- *
- * should be a rarely used function, only in places where its
- * otherwise impossible to know the irq state, like in traps.
- */
-static inline void trace_hardirqs_fixup_flags(unsigned long flags)
-{
-	if (raw_irqs_disabled_flags(flags))
-		trace_hardirqs_off();
-	else
-		trace_hardirqs_on();
-}
-
-static inline void trace_hardirqs_fixup(void)
-{
-	unsigned long flags = __raw_local_save_flags();
-
-	trace_hardirqs_fixup_flags(flags);
-}
-#endif /* __ASSEMBLY__ */
-
-/*
- * Do the CPU's IRQ-state tracing from assembly code. We call a
- * C function, so save all the C-clobbered registers:
- */
-#ifdef CONFIG_TRACE_IRQFLAGS
-
-# define TRACE_IRQS_ON				\
-	pushl %eax;				\
-	pushl %ecx;				\
-	pushl %edx;				\
-	call trace_hardirqs_on;			\
-	popl %edx;				\
-	popl %ecx;				\
-	popl %eax;
-
-# define TRACE_IRQS_OFF				\
-	pushl %eax;				\
-	pushl %ecx;				\
-	pushl %edx;				\
-	call trace_hardirqs_off;		\
-	popl %edx;				\
-	popl %ecx;				\
-	popl %eax;
-
-#else
-# define TRACE_IRQS_ON
-# define TRACE_IRQS_OFF
-#endif
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define LOCKDEP_SYS_EXIT			\
-	pushl %eax;				\
-	pushl %ecx;				\
-	pushl %edx;				\
-	call lockdep_sys_exit;			\
-	popl %edx;				\
-	popl %ecx;				\
-	popl %eax;
-#else
-# define LOCKDEP_SYS_EXIT
-#endif
-
-#endif
diff --git a/include/asm-x86/irqflags_64.h b/include/asm-x86/irqflags_64.h
deleted file mode 100644
index 38c07db733cf..000000000000
--- a/include/asm-x86/irqflags_64.h
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * IRQ flags handling
- *
- * This file gets included from lowlevel asm headers too, to provide
- * wrapped versions of the local_irq_*() APIs, based on the
- * raw_local_irq_*() functions from the lowlevel headers.
- */
-#ifndef _ASM_IRQFLAGS_H
-#define _ASM_IRQFLAGS_H
-#include <asm/processor-flags.h>
-
-#ifndef __ASSEMBLY__
-/*
- * Interrupt control:
- */
-
-static inline unsigned long __raw_local_save_flags(void)
-{
-	unsigned long flags;
-
-	__asm__ __volatile__(
-		"# __raw_save_flags\n\t"
-		"pushfq ; popq %q0"
-		: "=g" (flags)
-		: /* no input */
-		: "memory"
-	);
-
-	return flags;
-}
-
-#define raw_local_save_flags(flags) \
-		do { (flags) = __raw_local_save_flags(); } while (0)
-
-static inline void raw_local_irq_restore(unsigned long flags)
-{
-	__asm__ __volatile__(
-		"pushq %0 ; popfq"
-		: /* no output */
-		:"g" (flags)
-		:"memory", "cc"
-	);
-}
-
-#ifdef CONFIG_X86_VSMP
-
-/*
- * Interrupt control for the VSMP architecture:
- */
-
-static inline void raw_local_irq_disable(void)
-{
-	unsigned long flags = __raw_local_save_flags();
-
-	raw_local_irq_restore((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC);
-}
-
-static inline void raw_local_irq_enable(void)
-{
-	unsigned long flags = __raw_local_save_flags();
-
-	raw_local_irq_restore((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
-}
-
-static inline int raw_irqs_disabled_flags(unsigned long flags)
-{
-	return !(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC);
-}
-
-#else /* CONFIG_X86_VSMP */
-
-static inline void raw_local_irq_disable(void)
-{
-	__asm__ __volatile__("cli" : : : "memory");
-}
-
-static inline void raw_local_irq_enable(void)
-{
-	__asm__ __volatile__("sti" : : : "memory");
-}
-
-static inline int raw_irqs_disabled_flags(unsigned long flags)
-{
-	return !(flags & X86_EFLAGS_IF);
-}
-
-#endif
-
-/*
- * For spinlocks, etc.:
- */
-
-static inline unsigned long __raw_local_irq_save(void)
-{
-	unsigned long flags = __raw_local_save_flags();
-
-	raw_local_irq_disable();
-
-	return flags;
-}
-
-#define raw_local_irq_save(flags) \
-		do { (flags) = __raw_local_irq_save(); } while (0)
-
-static inline int raw_irqs_disabled(void)
-{
-	unsigned long flags = __raw_local_save_flags();
-
-	return raw_irqs_disabled_flags(flags);
-}
-
-/*
- * makes the traced hardirq state match with the machine state
- *
- * should be a rarely used function, only in places where its
- * otherwise impossible to know the irq state, like in traps.
- */
-static inline void trace_hardirqs_fixup_flags(unsigned long flags)
-{
-	if (raw_irqs_disabled_flags(flags))
-		trace_hardirqs_off();
-	else
-		trace_hardirqs_on();
-}
-
-static inline void trace_hardirqs_fixup(void)
-{
-	unsigned long flags = __raw_local_save_flags();
-
-	trace_hardirqs_fixup_flags(flags);
-}
-/*
- * Used in the idle loop; sti takes one instruction cycle
- * to complete:
- */
-static inline void raw_safe_halt(void)
-{
-	__asm__ __volatile__("sti; hlt" : : : "memory");
-}
-
-/*
- * Used when interrupts are already enabled or to
- * shutdown the processor:
- */
-static inline void halt(void)
-{
-	__asm__ __volatile__("hlt": : :"memory");
-}
-
-#else /* __ASSEMBLY__: */
-# ifdef CONFIG_TRACE_IRQFLAGS
-#  define TRACE_IRQS_ON		call trace_hardirqs_on_thunk
-#  define TRACE_IRQS_OFF	call trace_hardirqs_off_thunk
-# else
-#  define TRACE_IRQS_ON
-#  define TRACE_IRQS_OFF
-# endif
-# ifdef CONFIG_DEBUG_LOCK_ALLOC
-#  define LOCKDEP_SYS_EXIT	call lockdep_sys_exit_thunk
-#  define LOCKDEP_SYS_EXIT_IRQ	\
-	TRACE_IRQS_ON; \
-	sti; \
-	SAVE_REST; \
-	LOCKDEP_SYS_EXIT; \
-	RESTORE_REST; \
-	cli; \
-	TRACE_IRQS_OFF;
-# else
-#  define LOCKDEP_SYS_EXIT
-#  define LOCKDEP_SYS_EXIT_IRQ
-# endif
-#endif
-
-#endif
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 19fd3e67b08c..be7b934f6c54 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -121,7 +121,7 @@ struct pv_cpu_ops {
 	u64 (*read_pmc)(void);
 
 	/* These two are jmp to, not actually called. */
-	void (*irq_enable_sysexit)(void);
+	void (*irq_enable_syscall_ret)(void);
 	void (*iret)(void);
 
 	struct pv_lazy_ops lazy_mode;
@@ -1138,9 +1138,10 @@ static inline unsigned long __raw_local_irq_save(void)
 		  call *%cs:pv_irq_ops+PV_IRQ_irq_enable;		\
 		  popl %edx; popl %ecx; popl %eax)
 
-#define ENABLE_INTERRUPTS_SYSEXIT					       \
-	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), CLBR_NONE,\
-		  jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_sysexit)
+#define ENABLE_INTERRUPTS_SYSCALL_RET					\
+	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_syscall_ret),\
+		  CLBR_NONE,						\
+		  jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_syscall_ret)
 
 #define GET_CR0_INTO_EAX			\
 	push %ecx; push %edx;			\

From 2fed0c507cf0101d511366f36e8573f403dbfea5 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:30:33 +0100
Subject: [PATCH 114/890] x86: consolidate spinlock.h

The cli and sti instructions need to be replaced by paravirt hooks.
For the i386 architecture, this is already done. The code requirements
aren't much different from x86_64 POV, so this part is consolidated in
the common header

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/spinlock.h    | 14 +++++++
 include/asm-x86/spinlock_32.h | 71 +++++++++++++----------------------
 include/asm-x86/spinlock_64.h | 37 +++++++++++-------
 3 files changed, 64 insertions(+), 58 deletions(-)

diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h
index d74d85e71dcb..e1d555a3dfe5 100644
--- a/include/asm-x86/spinlock.h
+++ b/include/asm-x86/spinlock.h
@@ -1,5 +1,19 @@
+#ifndef _X86_SPINLOCK_H_
+#define _X86_SPINLOCK_H_
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define CLI_STRING	"cli"
+#define STI_STRING	"sti"
+#define CLI_STI_CLOBBERS
+#define CLI_STI_INPUT_ARGS
+#endif /* CONFIG_PARAVIRT */
+
 #ifdef CONFIG_X86_32
 # include "spinlock_32.h"
 #else
 # include "spinlock_64.h"
 #endif
+
+#endif
diff --git a/include/asm-x86/spinlock_32.h b/include/asm-x86/spinlock_32.h
index d3bcebed60ca..c42c3f12d7ce 100644
--- a/include/asm-x86/spinlock_32.h
+++ b/include/asm-x86/spinlock_32.h
@@ -5,16 +5,6 @@
 #include <asm/rwlock.h>
 #include <asm/page.h>
 #include <asm/processor.h>
-#include <linux/compiler.h>
-
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define CLI_STRING	"cli"
-#define STI_STRING	"sti"
-#define CLI_STI_CLOBBERS
-#define CLI_STI_INPUT_ARGS
-#endif /* CONFIG_PARAVIRT */
 
 /*
  * Your basic SMP spinlocks, allowing only a single CPU anywhere
@@ -27,23 +17,24 @@
  * (the type definitions are in asm/spinlock_types.h)
  */
 
-static inline int __raw_spin_is_locked(raw_spinlock_t *x)
+static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
 {
-	return *(volatile signed char *)(&(x)->slock) <= 0;
+	return *(volatile signed char *)(&(lock)->slock) <= 0;
 }
 
 static inline void __raw_spin_lock(raw_spinlock_t *lock)
 {
-	asm volatile("\n1:\t"
-		     LOCK_PREFIX " ; decb %0\n\t"
-		     "jns 3f\n"
-		     "2:\t"
-		     "rep;nop\n\t"
-		     "cmpb $0,%0\n\t"
-		     "jle 2b\n\t"
-		     "jmp 1b\n"
-		     "3:\n\t"
-		     : "+m" (lock->slock) : : "memory");
+	asm volatile(
+		"\n1:\t"
+		LOCK_PREFIX " ; decb %0\n\t"
+		"jns 3f\n"
+		"2:\t"
+		"rep;nop\n\t"
+		"cmpb $0,%0\n\t"
+		"jle 2b\n\t"
+		"jmp 1b\n"
+		"3:\n\t"
+		: "+m" (lock->slock) : : "memory");
 }
 
 /*
@@ -55,7 +46,8 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
  * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use this variant.
  */
 #ifndef CONFIG_PROVE_LOCKING
-static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
+static inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
+					 unsigned long flags)
 {
 	asm volatile(
 		"\n1:\t"
@@ -79,18 +71,20 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long fla
 		"5:\n\t"
 		: [slock] "+m" (lock->slock)
 		: [flags] "r" (flags)
-	 	  CLI_STI_INPUT_ARGS
+		  CLI_STI_INPUT_ARGS
 		: "memory" CLI_STI_CLOBBERS);
 }
 #endif
 
 static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 {
-	char oldval;
+	signed char oldval;
+
 	asm volatile(
 		"xchgb %b0,%1"
 		:"=q" (oldval), "+m" (lock->slock)
 		:"0" (0) : "memory");
+
 	return oldval > 0;
 }
 
@@ -112,7 +106,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 
 static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 {
-	char oldval = 1;
+	unsigned char oldval = 1;
 
 	asm volatile("xchgb %b0, %1"
 		     : "=q" (oldval), "+m" (lock->slock)
@@ -139,31 +133,16 @@ static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
  *
  * On x86, we implement read-write locks as a 32-bit counter
  * with the high bit (sign) being the "contended" bit.
- *
- * The inline assembly is non-obvious. Think about it.
- *
- * Changed to use the same technique as rw semaphores.  See
- * semaphore.h for details.  -ben
- *
- * the helpers are in arch/i386/kernel/semaphore.c
  */
 
-/**
- * read_can_lock - would read_trylock() succeed?
- * @lock: the rwlock in question.
- */
-static inline int __raw_read_can_lock(raw_rwlock_t *x)
+static inline int __raw_read_can_lock(raw_rwlock_t *lock)
 {
-	return (int)(x)->lock > 0;
+	return (int)(lock)->lock > 0;
 }
 
-/**
- * write_can_lock - would write_trylock() succeed?
- * @lock: the rwlock in question.
- */
-static inline int __raw_write_can_lock(raw_rwlock_t *x)
+static inline int __raw_write_can_lock(raw_rwlock_t *lock)
 {
-	return (x)->lock == RW_LOCK_BIAS;
+	return (lock)->lock == RW_LOCK_BIAS;
 }
 
 static inline void __raw_read_lock(raw_rwlock_t *rw)
@@ -187,6 +166,7 @@ static inline void __raw_write_lock(raw_rwlock_t *rw)
 static inline int __raw_read_trylock(raw_rwlock_t *lock)
 {
 	atomic_t *count = (atomic_t *)lock;
+
 	atomic_dec(count);
 	if (atomic_read(count) >= 0)
 		return 1;
@@ -197,6 +177,7 @@ static inline int __raw_read_trylock(raw_rwlock_t *lock)
 static inline int __raw_write_trylock(raw_rwlock_t *lock)
 {
 	atomic_t *count = (atomic_t *)lock;
+
 	if (atomic_sub_and_test(RW_LOCK_BIAS, count))
 		return 1;
 	atomic_add(RW_LOCK_BIAS, count);
diff --git a/include/asm-x86/spinlock_64.h b/include/asm-x86/spinlock_64.h
index 88bf981e73cf..3b5adf92ad08 100644
--- a/include/asm-x86/spinlock_64.h
+++ b/include/asm-x86/spinlock_64.h
@@ -33,14 +33,21 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
 		"cmpl $0,%0\n\t"
 		"jle 3b\n\t"
 		"jmp 1b\n"
-		"2:\t" : "=m" (lock->slock) : : "memory");
+		"2:\t"
+		: "=m" (lock->slock) : : "memory");
 }
 
 /*
- * Same as __raw_spin_lock, but reenable interrupts during spinning.
+ * It is easier for the lock validator if interrupts are not re-enabled
+ * in the middle of a lock-acquire. This is a performance feature anyway
+ * so we turn it off:
+ *
+ * NOTE: there's an irqs-on section here, which normally would have to be
+ * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use this variant.
  */
 #ifndef CONFIG_PROVE_LOCKING
-static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
+static inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
+					 unsigned long flags)
 {
 	asm volatile(
 		"\n1:\t"
@@ -48,12 +55,12 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long fla
 		"jns 5f\n"
 		"testl $0x200, %1\n\t"	/* interrupts were disabled? */
 		"jz 4f\n\t"
-	        "sti\n"
+		STI_STRING "\n"
 		"3:\t"
 		"rep;nop\n\t"
 		"cmpl $0, %0\n\t"
 		"jle 3b\n\t"
-		"cli\n\t"
+		CLI_STRING "\n\t"
 		"jmp 1b\n"
 		"4:\t"
 		"rep;nop\n\t"
@@ -61,7 +68,9 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long fla
 		"jg 1b\n\t"
 		"jmp 4b\n"
 		"5:\n\t"
-		: "+m" (lock->slock) : "r" ((unsigned)flags) : "memory");
+		: "+m" (lock->slock)
+		: "r" ((unsigned)flags) CLI_STI_INPUT_ARGS
+		: "memory" CLI_STI_CLOBBERS);
 }
 #endif
 
@@ -79,7 +88,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 
 static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 {
-	asm volatile("movl $1,%0" :"=m" (lock->slock) :: "memory");
+	asm volatile("movl $1,%0" : "=m" (lock->slock) :: "memory");
 }
 
 static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
@@ -114,18 +123,18 @@ static inline int __raw_write_can_lock(raw_rwlock_t *lock)
 
 static inline void __raw_read_lock(raw_rwlock_t *rw)
 {
-	asm volatile(LOCK_PREFIX "subl $1,(%0)\n\t"
+	asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t"
 		     "jns 1f\n"
-		     "call __read_lock_failed\n"
+		     "call __read_lock_failed\n\t"
 		     "1:\n"
 		     ::"D" (rw), "i" (RW_LOCK_BIAS) : "memory");
 }
 
 static inline void __raw_write_lock(raw_rwlock_t *rw)
 {
-	asm volatile(LOCK_PREFIX "subl %1,(%0)\n\t"
+	asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t"
 		     "jz 1f\n"
-		     "\tcall __write_lock_failed\n\t"
+		     "call __write_lock_failed\n\t"
 		     "1:\n"
 		     ::"D" (rw), "i" (RW_LOCK_BIAS) : "memory");
 }
@@ -133,6 +142,7 @@ static inline void __raw_write_lock(raw_rwlock_t *rw)
 static inline int __raw_read_trylock(raw_rwlock_t *lock)
 {
 	atomic_t *count = (atomic_t *)lock;
+
 	atomic_dec(count);
 	if (atomic_read(count) >= 0)
 		return 1;
@@ -143,6 +153,7 @@ static inline int __raw_read_trylock(raw_rwlock_t *lock)
 static inline int __raw_write_trylock(raw_rwlock_t *lock)
 {
 	atomic_t *count = (atomic_t *)lock;
+
 	if (atomic_sub_and_test(RW_LOCK_BIAS, count))
 		return 1;
 	atomic_add(RW_LOCK_BIAS, count);
@@ -151,12 +162,12 @@ static inline int __raw_write_trylock(raw_rwlock_t *lock)
 
 static inline void __raw_read_unlock(raw_rwlock_t *rw)
 {
-	asm volatile(LOCK_PREFIX " ; incl %0" :"=m" (rw->lock) : : "memory");
+	asm volatile(LOCK_PREFIX "incl %0" :"=m" (rw->lock) : : "memory");
 }
 
 static inline void __raw_write_unlock(raw_rwlock_t *rw)
 {
-	asm volatile(LOCK_PREFIX " ; addl $" RW_LOCK_BIAS_STR ",%0"
+	asm volatile(LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ",%0"
 				: "=m" (rw->lock) : : "memory");
 }
 

From a33fff3a033f2e8a930067ad608c21e1f86bffab Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:34 +0100
Subject: [PATCH 115/890] x86: fix asm constraints in spinlock_32/64.h

Use the correct constraints for the spinlock assembler functions.

read (modify) write functions need "+m" instead of "=m"

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/spinlock_32.h |  2 +-
 include/asm-x86/spinlock_64.h | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/asm-x86/spinlock_32.h b/include/asm-x86/spinlock_32.h
index c42c3f12d7ce..fca124a1103d 100644
--- a/include/asm-x86/spinlock_32.h
+++ b/include/asm-x86/spinlock_32.h
@@ -99,7 +99,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 
 static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 {
-	asm volatile("movb $1,%0" : "+m" (lock->slock) :: "memory");
+	asm volatile("movb $1,%0" : "=m" (lock->slock) :: "memory");
 }
 
 #else
diff --git a/include/asm-x86/spinlock_64.h b/include/asm-x86/spinlock_64.h
index 3b5adf92ad08..e81f6c18d877 100644
--- a/include/asm-x86/spinlock_64.h
+++ b/include/asm-x86/spinlock_64.h
@@ -34,7 +34,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
 		"jle 3b\n\t"
 		"jmp 1b\n"
 		"2:\t"
-		: "=m" (lock->slock) : : "memory");
+		: "+m" (lock->slock) : : "memory");
 }
 
 /*
@@ -80,7 +80,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 
 	asm volatile(
 		"xchgl %0,%1"
-		:"=q" (oldval), "=m" (lock->slock)
+		:"=q" (oldval), "+m" (lock->slock)
 		:"0" (0) : "memory");
 
 	return oldval > 0;
@@ -162,13 +162,13 @@ static inline int __raw_write_trylock(raw_rwlock_t *lock)
 
 static inline void __raw_read_unlock(raw_rwlock_t *rw)
 {
-	asm volatile(LOCK_PREFIX "incl %0" :"=m" (rw->lock) : : "memory");
+	asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory");
 }
 
 static inline void __raw_write_unlock(raw_rwlock_t *rw)
 {
-	asm volatile(LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ",%0"
-				: "=m" (rw->lock) : : "memory");
+	asm volatile(LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ", %0"
+				: "+m" (rw->lock) : : "memory");
 }
 
 #define _raw_spin_relax(lock)	cpu_relax()

From 6514f93a2ce643ef5914eae7ce49b978e1d356aa Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:34 +0100
Subject: [PATCH 116/890] x86: use immediates instead of RW_LOCK_BIAS_STR

Use immediate instead of the RW_LOCK_BIAS_STR.
Makes the code more readable and gets rid of the string constant.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/rwlock.h      | 1 -
 include/asm-x86/spinlock_32.h | 8 ++++----
 include/asm-x86/spinlock_64.h | 6 +++---
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/include/asm-x86/rwlock.h b/include/asm-x86/rwlock.h
index f2b64a429e6b..6a8c0d645108 100644
--- a/include/asm-x86/rwlock.h
+++ b/include/asm-x86/rwlock.h
@@ -2,7 +2,6 @@
 #define _ASM_X86_RWLOCK_H
 
 #define RW_LOCK_BIAS		 0x01000000
-#define RW_LOCK_BIAS_STR	"0x01000000"
 
 /* Actual code is in asm/spinlock.h or in arch/x86/lib/rwlock.S */
 
diff --git a/include/asm-x86/spinlock_32.h b/include/asm-x86/spinlock_32.h
index fca124a1103d..e7a14ab906e9 100644
--- a/include/asm-x86/spinlock_32.h
+++ b/include/asm-x86/spinlock_32.h
@@ -156,11 +156,11 @@ static inline void __raw_read_lock(raw_rwlock_t *rw)
 
 static inline void __raw_write_lock(raw_rwlock_t *rw)
 {
-	asm volatile(LOCK_PREFIX " subl $" RW_LOCK_BIAS_STR ",(%0)\n\t"
+	asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t"
 		     "jz 1f\n"
 		     "call __write_lock_failed\n\t"
 		     "1:\n"
-		     ::"a" (rw) : "memory");
+		     ::"a" (rw), "i" (RW_LOCK_BIAS) : "memory");
 }
 
 static inline int __raw_read_trylock(raw_rwlock_t *lock)
@@ -191,8 +191,8 @@ static inline void __raw_read_unlock(raw_rwlock_t *rw)
 
 static inline void __raw_write_unlock(raw_rwlock_t *rw)
 {
-	asm volatile(LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ", %0"
-				 : "+m" (rw->lock) : : "memory");
+	asm volatile(LOCK_PREFIX "addl %1, %0"
+		     : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory");
 }
 
 #define _raw_spin_relax(lock)	cpu_relax()
diff --git a/include/asm-x86/spinlock_64.h b/include/asm-x86/spinlock_64.h
index e81f6c18d877..ab50e7f51058 100644
--- a/include/asm-x86/spinlock_64.h
+++ b/include/asm-x86/spinlock_64.h
@@ -127,7 +127,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw)
 		     "jns 1f\n"
 		     "call __read_lock_failed\n\t"
 		     "1:\n"
-		     ::"D" (rw), "i" (RW_LOCK_BIAS) : "memory");
+		     ::"D" (rw) : "memory");
 }
 
 static inline void __raw_write_lock(raw_rwlock_t *rw)
@@ -167,8 +167,8 @@ static inline void __raw_read_unlock(raw_rwlock_t *rw)
 
 static inline void __raw_write_unlock(raw_rwlock_t *rw)
 {
-	asm volatile(LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ", %0"
-				: "+m" (rw->lock) : : "memory");
+	asm volatile(LOCK_PREFIX "addl %1, %0"
+		     : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory");
 }
 
 #define _raw_spin_relax(lock)	cpu_relax()

From 3b23ed84ec34f04f54f7d5b1e35f258d64a7a5fb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:34 +0100
Subject: [PATCH 117/890] x86: spinlock_32/64 match the jump labels and symbols

Match the jump labels in  the 32/64 variants and switch the
64bit version to symbols, so the functions are almost identical
except for the operand size now.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/spinlock_32.h |  1 -
 include/asm-x86/spinlock_64.h | 21 +++++++++++----------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/asm-x86/spinlock_32.h b/include/asm-x86/spinlock_32.h
index e7a14ab906e9..4ef626d53682 100644
--- a/include/asm-x86/spinlock_32.h
+++ b/include/asm-x86/spinlock_32.h
@@ -53,7 +53,6 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
 		"\n1:\t"
 		LOCK_PREFIX " ; decb %[slock]\n\t"
 		"jns 5f\n"
-		"2:\t"
 		"testl $0x200, %[flags]\n\t"
 		"jz 4f\n\t"
 		STI_STRING "\n"
diff --git a/include/asm-x86/spinlock_64.h b/include/asm-x86/spinlock_64.h
index ab50e7f51058..19d483c80185 100644
--- a/include/asm-x86/spinlock_64.h
+++ b/include/asm-x86/spinlock_64.h
@@ -27,13 +27,13 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
 	asm volatile(
 		"\n1:\t"
 		LOCK_PREFIX " ; decl %0\n\t"
-		"jns 2f\n"
-		"3:\n"
+		"jns 3f\n"
+		"2:\t"
 		"rep;nop\n\t"
 		"cmpl $0,%0\n\t"
-		"jle 3b\n\t"
+		"jle 2b\n\t"
 		"jmp 1b\n"
-		"2:\t"
+		"3:\n\t"
 		: "+m" (lock->slock) : : "memory");
 }
 
@@ -51,25 +51,26 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
 {
 	asm volatile(
 		"\n1:\t"
-		LOCK_PREFIX " ; decl %0\n\t"
+		LOCK_PREFIX " ; decl %[slock]\n\t"
 		"jns 5f\n"
-		"testl $0x200, %1\n\t"	/* interrupts were disabled? */
+		"testl $0x200, %[flags]\n\t"
 		"jz 4f\n\t"
 		STI_STRING "\n"
 		"3:\t"
 		"rep;nop\n\t"
-		"cmpl $0, %0\n\t"
+		"cmpl $0, %[slock]\n\t"
 		"jle 3b\n\t"
 		CLI_STRING "\n\t"
 		"jmp 1b\n"
 		"4:\t"
 		"rep;nop\n\t"
-		"cmpl $0, %0\n\t"
+		"cmpl $0, %[slock]\n\t"
 		"jg 1b\n\t"
 		"jmp 4b\n"
 		"5:\n\t"
-		: "+m" (lock->slock)
-		: "r" ((unsigned)flags) CLI_STI_INPUT_ARGS
+		: [slock] "+m" (lock->slock)
+		: [flags] "r" ((unsigned)flags)
+		  CLI_STI_INPUT_ARGS
 		: "memory" CLI_STI_CLOBBERS);
 }
 #endif

From cf244e30f5b50763cbe85f7de30923d12999e38d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:34 +0100
Subject: [PATCH 118/890] x86: spinlock_32/64 substitute types and instructions

Use _slock_t for the spinlock data types and replace the instructions
by string defines, which makes the code of 32/64 bit versions more
or less identical.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/spinlock_32.h | 31 +++++++++++++++++++------------
 include/asm-x86/spinlock_64.h | 31 +++++++++++++++++++------------
 2 files changed, 38 insertions(+), 24 deletions(-)

diff --git a/include/asm-x86/spinlock_32.h b/include/asm-x86/spinlock_32.h
index 4ef626d53682..2de9b8b89903 100644
--- a/include/asm-x86/spinlock_32.h
+++ b/include/asm-x86/spinlock_32.h
@@ -17,20 +17,27 @@
  * (the type definitions are in asm/spinlock_types.h)
  */
 
+typedef char _slock_t;
+#define LOCK_INS_DEC "decb"
+#define LOCK_INS_XCH "xchgb"
+#define LOCK_INS_MOV "movb"
+#define LOCK_INS_CMP "cmpb"
+#define LOCK_PTR_REG "a"
+
 static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
 {
-	return *(volatile signed char *)(&(lock)->slock) <= 0;
+	return *(volatile _slock_t *)(&(lock)->slock) <= 0;
 }
 
 static inline void __raw_spin_lock(raw_spinlock_t *lock)
 {
 	asm volatile(
 		"\n1:\t"
-		LOCK_PREFIX " ; decb %0\n\t"
+		LOCK_PREFIX " ; " LOCK_INS_DEC " %0\n\t"
 		"jns 3f\n"
 		"2:\t"
 		"rep;nop\n\t"
-		"cmpb $0,%0\n\t"
+		LOCK_INS_CMP " $0,%0\n\t"
 		"jle 2b\n\t"
 		"jmp 1b\n"
 		"3:\n\t"
@@ -51,25 +58,25 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
 {
 	asm volatile(
 		"\n1:\t"
-		LOCK_PREFIX " ; decb %[slock]\n\t"
+		LOCK_PREFIX " ; " LOCK_INS_DEC " %[slock]\n\t"
 		"jns 5f\n"
 		"testl $0x200, %[flags]\n\t"
 		"jz 4f\n\t"
 		STI_STRING "\n"
 		"3:\t"
 		"rep;nop\n\t"
-		"cmpb $0, %[slock]\n\t"
+		LOCK_INS_CMP " $0, %[slock]\n\t"
 		"jle 3b\n\t"
 		CLI_STRING "\n\t"
 		"jmp 1b\n"
 		"4:\t"
 		"rep;nop\n\t"
-		"cmpb $0, %[slock]\n\t"
+		LOCK_INS_CMP " $0, %[slock]\n\t"
 		"jg 1b\n\t"
 		"jmp 4b\n"
 		"5:\n\t"
 		: [slock] "+m" (lock->slock)
-		: [flags] "r" (flags)
+		: [flags] "r" ((u32)flags)
 		  CLI_STI_INPUT_ARGS
 		: "memory" CLI_STI_CLOBBERS);
 }
@@ -77,10 +84,10 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
 
 static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 {
-	signed char oldval;
+	_slock_t oldval;
 
 	asm volatile(
-		"xchgb %b0,%1"
+		LOCK_INS_XCH " %0,%1"
 		:"=q" (oldval), "+m" (lock->slock)
 		:"0" (0) : "memory");
 
@@ -98,7 +105,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 
 static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 {
-	asm volatile("movb $1,%0" : "=m" (lock->slock) :: "memory");
+	asm volatile(LOCK_INS_MOV " $1,%0" : "=m" (lock->slock) :: "memory");
 }
 
 #else
@@ -150,7 +157,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw)
 		     "jns 1f\n"
 		     "call __read_lock_failed\n\t"
 		     "1:\n"
-		     ::"a" (rw) : "memory");
+		     ::LOCK_PTR_REG (rw) : "memory");
 }
 
 static inline void __raw_write_lock(raw_rwlock_t *rw)
@@ -159,7 +166,7 @@ static inline void __raw_write_lock(raw_rwlock_t *rw)
 		     "jz 1f\n"
 		     "call __write_lock_failed\n\t"
 		     "1:\n"
-		     ::"a" (rw), "i" (RW_LOCK_BIAS) : "memory");
+		     ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory");
 }
 
 static inline int __raw_read_trylock(raw_rwlock_t *lock)
diff --git a/include/asm-x86/spinlock_64.h b/include/asm-x86/spinlock_64.h
index 19d483c80185..f5ba90b7335c 100644
--- a/include/asm-x86/spinlock_64.h
+++ b/include/asm-x86/spinlock_64.h
@@ -17,20 +17,27 @@
  * (the type definitions are in asm/spinlock_types.h)
  */
 
+typedef int _slock_t;
+#define LOCK_INS_DEC "decl"
+#define LOCK_INS_XCH "xchgl"
+#define LOCK_INS_MOV "movl"
+#define LOCK_INS_CMP "cmpl"
+#define LOCK_PTR_REG "D"
+
 static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
 {
-	return *(volatile signed int *)(&(lock)->slock) <= 0;
+	return *(volatile _slock_t *)(&(lock)->slock) <= 0;
 }
 
 static inline void __raw_spin_lock(raw_spinlock_t *lock)
 {
 	asm volatile(
 		"\n1:\t"
-		LOCK_PREFIX " ; decl %0\n\t"
+		LOCK_PREFIX " ; " LOCK_INS_DEC " %0\n\t"
 		"jns 3f\n"
 		"2:\t"
 		"rep;nop\n\t"
-		"cmpl $0,%0\n\t"
+		LOCK_INS_CMP " $0,%0\n\t"
 		"jle 2b\n\t"
 		"jmp 1b\n"
 		"3:\n\t"
@@ -51,25 +58,25 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
 {
 	asm volatile(
 		"\n1:\t"
-		LOCK_PREFIX " ; decl %[slock]\n\t"
+		LOCK_PREFIX " ; " LOCK_INS_DEC " %[slock]\n\t"
 		"jns 5f\n"
 		"testl $0x200, %[flags]\n\t"
 		"jz 4f\n\t"
 		STI_STRING "\n"
 		"3:\t"
 		"rep;nop\n\t"
-		"cmpl $0, %[slock]\n\t"
+		LOCK_INS_CMP " $0, %[slock]\n\t"
 		"jle 3b\n\t"
 		CLI_STRING "\n\t"
 		"jmp 1b\n"
 		"4:\t"
 		"rep;nop\n\t"
-		"cmpl $0, %[slock]\n\t"
+		LOCK_INS_CMP " $0, %[slock]\n\t"
 		"jg 1b\n\t"
 		"jmp 4b\n"
 		"5:\n\t"
 		: [slock] "+m" (lock->slock)
-		: [flags] "r" ((unsigned)flags)
+		: [flags] "r" ((u32)flags)
 		  CLI_STI_INPUT_ARGS
 		: "memory" CLI_STI_CLOBBERS);
 }
@@ -77,10 +84,10 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
 
 static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 {
-	int oldval;
+	_slock_t oldval;
 
 	asm volatile(
-		"xchgl %0,%1"
+		LOCK_INS_XCH " %0,%1"
 		:"=q" (oldval), "+m" (lock->slock)
 		:"0" (0) : "memory");
 
@@ -89,7 +96,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 
 static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 {
-	asm volatile("movl $1,%0" : "=m" (lock->slock) :: "memory");
+	asm volatile(LOCK_INS_MOV " $1,%0" : "=m" (lock->slock) :: "memory");
 }
 
 static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
@@ -128,7 +135,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw)
 		     "jns 1f\n"
 		     "call __read_lock_failed\n\t"
 		     "1:\n"
-		     ::"D" (rw) : "memory");
+		     ::LOCK_PTR_REG (rw) : "memory");
 }
 
 static inline void __raw_write_lock(raw_rwlock_t *rw)
@@ -137,7 +144,7 @@ static inline void __raw_write_lock(raw_rwlock_t *rw)
 		     "jz 1f\n"
 		     "call __write_lock_failed\n\t"
 		     "1:\n"
-		     ::"D" (rw), "i" (RW_LOCK_BIAS) : "memory");
+		     ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory");
 }
 
 static inline int __raw_read_trylock(raw_rwlock_t *lock)

From 1075cf7a959f72833e54dd2d4f885617e58e3e0a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:34 +0100
Subject: [PATCH 119/890] x86: merge spinlock.h variants

Merge them finally together

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/spinlock.h    | 211 +++++++++++++++++++++++++++++++++-
 include/asm-x86/spinlock_32.h | 208 ---------------------------------
 include/asm-x86/spinlock_64.h | 186 ------------------------------
 3 files changed, 209 insertions(+), 396 deletions(-)
 delete mode 100644 include/asm-x86/spinlock_32.h
 delete mode 100644 include/asm-x86/spinlock_64.h

diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h
index e1d555a3dfe5..afd4b80ff0ad 100644
--- a/include/asm-x86/spinlock.h
+++ b/include/asm-x86/spinlock.h
@@ -1,6 +1,22 @@
 #ifndef _X86_SPINLOCK_H_
 #define _X86_SPINLOCK_H_
 
+#include <asm/atomic.h>
+#include <asm/rwlock.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ *
+ * Simple spin lock operations.  There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions. They have a cost.
+ *
+ * (the type definitions are in asm/spinlock_types.h)
+ */
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
@@ -11,9 +27,200 @@
 #endif /* CONFIG_PARAVIRT */
 
 #ifdef CONFIG_X86_32
-# include "spinlock_32.h"
+typedef char _slock_t;
+# define LOCK_INS_DEC "decb"
+# define LOCK_INS_XCH "xchgb"
+# define LOCK_INS_MOV "movb"
+# define LOCK_INS_CMP "cmpb"
+# define LOCK_PTR_REG "a"
 #else
-# include "spinlock_64.h"
+typedef int _slock_t;
+# define LOCK_INS_DEC "decl"
+# define LOCK_INS_XCH "xchgl"
+# define LOCK_INS_MOV "movl"
+# define LOCK_INS_CMP "cmpl"
+# define LOCK_PTR_REG "D"
 #endif
 
+static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
+{
+	return *(volatile _slock_t *)(&(lock)->slock) <= 0;
+}
+
+static inline void __raw_spin_lock(raw_spinlock_t *lock)
+{
+	asm volatile(
+		"\n1:\t"
+		LOCK_PREFIX " ; " LOCK_INS_DEC " %0\n\t"
+		"jns 3f\n"
+		"2:\t"
+		"rep;nop\n\t"
+		LOCK_INS_CMP " $0,%0\n\t"
+		"jle 2b\n\t"
+		"jmp 1b\n"
+		"3:\n\t"
+		: "+m" (lock->slock) : : "memory");
+}
+
+/*
+ * It is easier for the lock validator if interrupts are not re-enabled
+ * in the middle of a lock-acquire. This is a performance feature anyway
+ * so we turn it off:
+ *
+ * NOTE: there's an irqs-on section here, which normally would have to be
+ * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use this variant.
+ */
+#ifndef CONFIG_PROVE_LOCKING
+static inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
+					 unsigned long flags)
+{
+	asm volatile(
+		"\n1:\t"
+		LOCK_PREFIX " ; " LOCK_INS_DEC " %[slock]\n\t"
+		"jns 5f\n"
+		"testl $0x200, %[flags]\n\t"
+		"jz 4f\n\t"
+		STI_STRING "\n"
+		"3:\t"
+		"rep;nop\n\t"
+		LOCK_INS_CMP " $0, %[slock]\n\t"
+		"jle 3b\n\t"
+		CLI_STRING "\n\t"
+		"jmp 1b\n"
+		"4:\t"
+		"rep;nop\n\t"
+		LOCK_INS_CMP " $0, %[slock]\n\t"
+		"jg 1b\n\t"
+		"jmp 4b\n"
+		"5:\n\t"
+		: [slock] "+m" (lock->slock)
+		: [flags] "r" ((u32)flags)
+		  CLI_STI_INPUT_ARGS
+		: "memory" CLI_STI_CLOBBERS);
+}
+#endif
+
+static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+{
+	_slock_t oldval;
+
+	asm volatile(
+		LOCK_INS_XCH " %0,%1"
+		:"=q" (oldval), "+m" (lock->slock)
+		:"0" (0) : "memory");
+
+	return oldval > 0;
+}
+
+/*
+ * __raw_spin_unlock based on writing $1 to the low byte.
+ * This method works. Despite all the confusion.
+ * (except on PPro SMP or if we are using OOSTORE, so we use xchgb there)
+ * (PPro errata 66, 92)
+ */
+#if defined(X86_64) || \
+	(!defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE))
+
+static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+{
+	asm volatile(LOCK_INS_MOV " $1,%0" : "=m" (lock->slock) :: "memory");
+}
+
+#else
+
+static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+{
+	unsigned char oldval = 1;
+
+	asm volatile("xchgb %b0, %1"
+		     : "=q" (oldval), "+m" (lock->slock)
+		     : "0" (oldval) : "memory");
+}
+
+#endif
+
+static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
+{
+	while (__raw_spin_is_locked(lock))
+		cpu_relax();
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ *
+ * On x86, we implement read-write locks as a 32-bit counter
+ * with the high bit (sign) being the "contended" bit.
+ */
+
+static inline int __raw_read_can_lock(raw_rwlock_t *lock)
+{
+	return (int)(lock)->lock > 0;
+}
+
+static inline int __raw_write_can_lock(raw_rwlock_t *lock)
+{
+	return (lock)->lock == RW_LOCK_BIAS;
+}
+
+static inline void __raw_read_lock(raw_rwlock_t *rw)
+{
+	asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t"
+		     "jns 1f\n"
+		     "call __read_lock_failed\n\t"
+		     "1:\n"
+		     ::LOCK_PTR_REG (rw) : "memory");
+}
+
+static inline void __raw_write_lock(raw_rwlock_t *rw)
+{
+	asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t"
+		     "jz 1f\n"
+		     "call __write_lock_failed\n\t"
+		     "1:\n"
+		     ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory");
+}
+
+static inline int __raw_read_trylock(raw_rwlock_t *lock)
+{
+	atomic_t *count = (atomic_t *)lock;
+
+	atomic_dec(count);
+	if (atomic_read(count) >= 0)
+		return 1;
+	atomic_inc(count);
+	return 0;
+}
+
+static inline int __raw_write_trylock(raw_rwlock_t *lock)
+{
+	atomic_t *count = (atomic_t *)lock;
+
+	if (atomic_sub_and_test(RW_LOCK_BIAS, count))
+		return 1;
+	atomic_add(RW_LOCK_BIAS, count);
+	return 0;
+}
+
+static inline void __raw_read_unlock(raw_rwlock_t *rw)
+{
+	asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory");
+}
+
+static inline void __raw_write_unlock(raw_rwlock_t *rw)
+{
+	asm volatile(LOCK_PREFIX "addl %1, %0"
+		     : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory");
+}
+
+#define _raw_spin_relax(lock)	cpu_relax()
+#define _raw_read_relax(lock)	cpu_relax()
+#define _raw_write_relax(lock)	cpu_relax()
+
 #endif
diff --git a/include/asm-x86/spinlock_32.h b/include/asm-x86/spinlock_32.h
deleted file mode 100644
index 2de9b8b89903..000000000000
--- a/include/asm-x86/spinlock_32.h
+++ /dev/null
@@ -1,208 +0,0 @@
-#ifndef __ASM_SPINLOCK_H
-#define __ASM_SPINLOCK_H
-
-#include <asm/atomic.h>
-#include <asm/rwlock.h>
-#include <asm/page.h>
-#include <asm/processor.h>
-
-/*
- * Your basic SMP spinlocks, allowing only a single CPU anywhere
- *
- * Simple spin lock operations.  There are two variants, one clears IRQ's
- * on the local processor, one does not.
- *
- * We make no fairness assumptions. They have a cost.
- *
- * (the type definitions are in asm/spinlock_types.h)
- */
-
-typedef char _slock_t;
-#define LOCK_INS_DEC "decb"
-#define LOCK_INS_XCH "xchgb"
-#define LOCK_INS_MOV "movb"
-#define LOCK_INS_CMP "cmpb"
-#define LOCK_PTR_REG "a"
-
-static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
-{
-	return *(volatile _slock_t *)(&(lock)->slock) <= 0;
-}
-
-static inline void __raw_spin_lock(raw_spinlock_t *lock)
-{
-	asm volatile(
-		"\n1:\t"
-		LOCK_PREFIX " ; " LOCK_INS_DEC " %0\n\t"
-		"jns 3f\n"
-		"2:\t"
-		"rep;nop\n\t"
-		LOCK_INS_CMP " $0,%0\n\t"
-		"jle 2b\n\t"
-		"jmp 1b\n"
-		"3:\n\t"
-		: "+m" (lock->slock) : : "memory");
-}
-
-/*
- * It is easier for the lock validator if interrupts are not re-enabled
- * in the middle of a lock-acquire. This is a performance feature anyway
- * so we turn it off:
- *
- * NOTE: there's an irqs-on section here, which normally would have to be
- * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use this variant.
- */
-#ifndef CONFIG_PROVE_LOCKING
-static inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
-					 unsigned long flags)
-{
-	asm volatile(
-		"\n1:\t"
-		LOCK_PREFIX " ; " LOCK_INS_DEC " %[slock]\n\t"
-		"jns 5f\n"
-		"testl $0x200, %[flags]\n\t"
-		"jz 4f\n\t"
-		STI_STRING "\n"
-		"3:\t"
-		"rep;nop\n\t"
-		LOCK_INS_CMP " $0, %[slock]\n\t"
-		"jle 3b\n\t"
-		CLI_STRING "\n\t"
-		"jmp 1b\n"
-		"4:\t"
-		"rep;nop\n\t"
-		LOCK_INS_CMP " $0, %[slock]\n\t"
-		"jg 1b\n\t"
-		"jmp 4b\n"
-		"5:\n\t"
-		: [slock] "+m" (lock->slock)
-		: [flags] "r" ((u32)flags)
-		  CLI_STI_INPUT_ARGS
-		: "memory" CLI_STI_CLOBBERS);
-}
-#endif
-
-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
-{
-	_slock_t oldval;
-
-	asm volatile(
-		LOCK_INS_XCH " %0,%1"
-		:"=q" (oldval), "+m" (lock->slock)
-		:"0" (0) : "memory");
-
-	return oldval > 0;
-}
-
-/*
- * __raw_spin_unlock based on writing $1 to the low byte.
- * This method works. Despite all the confusion.
- * (except on PPro SMP or if we are using OOSTORE, so we use xchgb there)
- * (PPro errata 66, 92)
- */
-
-#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
-
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
-{
-	asm volatile(LOCK_INS_MOV " $1,%0" : "=m" (lock->slock) :: "memory");
-}
-
-#else
-
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
-{
-	unsigned char oldval = 1;
-
-	asm volatile("xchgb %b0, %1"
-		     : "=q" (oldval), "+m" (lock->slock)
-		     : "0" (oldval) : "memory");
-}
-
-#endif
-
-static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
-{
-	while (__raw_spin_is_locked(lock))
-		cpu_relax();
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- *
- * On x86, we implement read-write locks as a 32-bit counter
- * with the high bit (sign) being the "contended" bit.
- */
-
-static inline int __raw_read_can_lock(raw_rwlock_t *lock)
-{
-	return (int)(lock)->lock > 0;
-}
-
-static inline int __raw_write_can_lock(raw_rwlock_t *lock)
-{
-	return (lock)->lock == RW_LOCK_BIAS;
-}
-
-static inline void __raw_read_lock(raw_rwlock_t *rw)
-{
-	asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t"
-		     "jns 1f\n"
-		     "call __read_lock_failed\n\t"
-		     "1:\n"
-		     ::LOCK_PTR_REG (rw) : "memory");
-}
-
-static inline void __raw_write_lock(raw_rwlock_t *rw)
-{
-	asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t"
-		     "jz 1f\n"
-		     "call __write_lock_failed\n\t"
-		     "1:\n"
-		     ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory");
-}
-
-static inline int __raw_read_trylock(raw_rwlock_t *lock)
-{
-	atomic_t *count = (atomic_t *)lock;
-
-	atomic_dec(count);
-	if (atomic_read(count) >= 0)
-		return 1;
-	atomic_inc(count);
-	return 0;
-}
-
-static inline int __raw_write_trylock(raw_rwlock_t *lock)
-{
-	atomic_t *count = (atomic_t *)lock;
-
-	if (atomic_sub_and_test(RW_LOCK_BIAS, count))
-		return 1;
-	atomic_add(RW_LOCK_BIAS, count);
-	return 0;
-}
-
-static inline void __raw_read_unlock(raw_rwlock_t *rw)
-{
-	asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory");
-}
-
-static inline void __raw_write_unlock(raw_rwlock_t *rw)
-{
-	asm volatile(LOCK_PREFIX "addl %1, %0"
-		     : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory");
-}
-
-#define _raw_spin_relax(lock)	cpu_relax()
-#define _raw_read_relax(lock)	cpu_relax()
-#define _raw_write_relax(lock)	cpu_relax()
-
-#endif /* __ASM_SPINLOCK_H */
diff --git a/include/asm-x86/spinlock_64.h b/include/asm-x86/spinlock_64.h
deleted file mode 100644
index f5ba90b7335c..000000000000
--- a/include/asm-x86/spinlock_64.h
+++ /dev/null
@@ -1,186 +0,0 @@
-#ifndef __ASM_SPINLOCK_H
-#define __ASM_SPINLOCK_H
-
-#include <asm/atomic.h>
-#include <asm/rwlock.h>
-#include <asm/page.h>
-#include <asm/processor.h>
-
-/*
- * Your basic SMP spinlocks, allowing only a single CPU anywhere
- *
- * Simple spin lock operations.  There are two variants, one clears IRQ's
- * on the local processor, one does not.
- *
- * We make no fairness assumptions. They have a cost.
- *
- * (the type definitions are in asm/spinlock_types.h)
- */
-
-typedef int _slock_t;
-#define LOCK_INS_DEC "decl"
-#define LOCK_INS_XCH "xchgl"
-#define LOCK_INS_MOV "movl"
-#define LOCK_INS_CMP "cmpl"
-#define LOCK_PTR_REG "D"
-
-static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
-{
-	return *(volatile _slock_t *)(&(lock)->slock) <= 0;
-}
-
-static inline void __raw_spin_lock(raw_spinlock_t *lock)
-{
-	asm volatile(
-		"\n1:\t"
-		LOCK_PREFIX " ; " LOCK_INS_DEC " %0\n\t"
-		"jns 3f\n"
-		"2:\t"
-		"rep;nop\n\t"
-		LOCK_INS_CMP " $0,%0\n\t"
-		"jle 2b\n\t"
-		"jmp 1b\n"
-		"3:\n\t"
-		: "+m" (lock->slock) : : "memory");
-}
-
-/*
- * It is easier for the lock validator if interrupts are not re-enabled
- * in the middle of a lock-acquire. This is a performance feature anyway
- * so we turn it off:
- *
- * NOTE: there's an irqs-on section here, which normally would have to be
- * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use this variant.
- */
-#ifndef CONFIG_PROVE_LOCKING
-static inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
-					 unsigned long flags)
-{
-	asm volatile(
-		"\n1:\t"
-		LOCK_PREFIX " ; " LOCK_INS_DEC " %[slock]\n\t"
-		"jns 5f\n"
-		"testl $0x200, %[flags]\n\t"
-		"jz 4f\n\t"
-		STI_STRING "\n"
-		"3:\t"
-		"rep;nop\n\t"
-		LOCK_INS_CMP " $0, %[slock]\n\t"
-		"jle 3b\n\t"
-		CLI_STRING "\n\t"
-		"jmp 1b\n"
-		"4:\t"
-		"rep;nop\n\t"
-		LOCK_INS_CMP " $0, %[slock]\n\t"
-		"jg 1b\n\t"
-		"jmp 4b\n"
-		"5:\n\t"
-		: [slock] "+m" (lock->slock)
-		: [flags] "r" ((u32)flags)
-		  CLI_STI_INPUT_ARGS
-		: "memory" CLI_STI_CLOBBERS);
-}
-#endif
-
-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
-{
-	_slock_t oldval;
-
-	asm volatile(
-		LOCK_INS_XCH " %0,%1"
-		:"=q" (oldval), "+m" (lock->slock)
-		:"0" (0) : "memory");
-
-	return oldval > 0;
-}
-
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
-{
-	asm volatile(LOCK_INS_MOV " $1,%0" : "=m" (lock->slock) :: "memory");
-}
-
-static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
-{
-	while (__raw_spin_is_locked(lock))
-		cpu_relax();
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- *
- * On x86, we implement read-write locks as a 32-bit counter
- * with the high bit (sign) being the "contended" bit.
- */
-
-static inline int __raw_read_can_lock(raw_rwlock_t *lock)
-{
-	return (int)(lock)->lock > 0;
-}
-
-static inline int __raw_write_can_lock(raw_rwlock_t *lock)
-{
-	return (lock)->lock == RW_LOCK_BIAS;
-}
-
-static inline void __raw_read_lock(raw_rwlock_t *rw)
-{
-	asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t"
-		     "jns 1f\n"
-		     "call __read_lock_failed\n\t"
-		     "1:\n"
-		     ::LOCK_PTR_REG (rw) : "memory");
-}
-
-static inline void __raw_write_lock(raw_rwlock_t *rw)
-{
-	asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t"
-		     "jz 1f\n"
-		     "call __write_lock_failed\n\t"
-		     "1:\n"
-		     ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory");
-}
-
-static inline int __raw_read_trylock(raw_rwlock_t *lock)
-{
-	atomic_t *count = (atomic_t *)lock;
-
-	atomic_dec(count);
-	if (atomic_read(count) >= 0)
-		return 1;
-	atomic_inc(count);
-	return 0;
-}
-
-static inline int __raw_write_trylock(raw_rwlock_t *lock)
-{
-	atomic_t *count = (atomic_t *)lock;
-
-	if (atomic_sub_and_test(RW_LOCK_BIAS, count))
-		return 1;
-	atomic_add(RW_LOCK_BIAS, count);
-	return 0;
-}
-
-static inline void __raw_read_unlock(raw_rwlock_t *rw)
-{
-	asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory");
-}
-
-static inline void __raw_write_unlock(raw_rwlock_t *rw)
-{
-	asm volatile(LOCK_PREFIX "addl %1, %0"
-		     : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory");
-}
-
-#define _raw_spin_relax(lock)	cpu_relax()
-#define _raw_read_relax(lock)	cpu_relax()
-#define _raw_write_relax(lock)	cpu_relax()
-
-#endif /* __ASM_SPINLOCK_H */

From 0b9c99b6f21c2e9e00938e9c57942ed71bfe4d21 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:35 +0100
Subject: [PATCH 120/890] x86: cleanup tlbflush.h variants

Bring the tlbflush.h variants into sync to prepare merging and
paravirt support.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/smp_64.c            |  12 +--
 arch/x86/mach-voyager/voyager_smp.c |   7 +-
 arch/x86/mm/boot_ioremap_32.c       |   2 +-
 include/asm-x86/cpufeature.h        |   9 +++
 include/asm-x86/tlbflush_32.h       | 112 ++++++++++++----------------
 include/asm-x86/tlbflush_64.h       |  78 ++++++++++++++-----
 6 files changed, 124 insertions(+), 96 deletions(-)

diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c
index 62b0f2a1b1e8..7142447b5666 100644
--- a/arch/x86/kernel/smp_64.c
+++ b/arch/x86/kernel/smp_64.c
@@ -55,7 +55,6 @@ union smp_flush_state {
 		cpumask_t flush_cpumask;
 		struct mm_struct *flush_mm;
 		unsigned long flush_va;
-#define FLUSH_ALL	-1ULL
 		spinlock_t tlbstate_lock;
 	};
 	char pad[SMP_CACHE_BYTES];
@@ -153,7 +152,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
 
 	if (f->flush_mm == read_pda(active_mm)) {
 		if (read_pda(mmu_state) == TLBSTATE_OK) {
-			if (f->flush_va == FLUSH_ALL)
+			if (f->flush_va == TLB_FLUSH_ALL)
 				local_flush_tlb();
 			else
 				__flush_tlb_one(f->flush_va);
@@ -166,11 +165,12 @@ out:
 	add_pda(irq_tlb_count, 1);
 }
 
-static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
-						unsigned long va)
+void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
+			     unsigned long va)
 {
 	int sender;
 	union smp_flush_state *f;
+	cpumask_t cpumask = *cpumaskp;
 
 	/* Caller has disabled preemption */
 	sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
@@ -223,7 +223,7 @@ void flush_tlb_current_task(void)
 
 	local_flush_tlb();
 	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
 	preempt_enable();
 }
 
@@ -242,7 +242,7 @@ void flush_tlb_mm (struct mm_struct * mm)
 			leave_mm(smp_processor_id());
 	}
 	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
 
 	preempt_enable();
 }
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 981def2b4e9b..b472a2df0b7f 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -800,7 +800,6 @@ static void smp_reschedule_interrupt(void)
 static struct mm_struct *flush_mm;
 static unsigned long flush_va;
 static DEFINE_SPINLOCK(tlbstate_lock);
-#define FLUSH_ALL	0xffffffff
 
 /*
  * We cannot call mmdrop() because we are in interrupt context,
@@ -834,7 +833,7 @@ static void smp_invalidate_interrupt(void)
 
 	if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
 		if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
-			if (flush_va == FLUSH_ALL)
+			if (flush_va == TLB_FLUSH_ALL)
 				local_flush_tlb();
 			else
 				__flush_tlb_one(flush_va);
@@ -903,7 +902,7 @@ void flush_tlb_current_task(void)
 	cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
 	local_flush_tlb();
 	if (cpu_mask)
-		voyager_flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+		voyager_flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
 
 	preempt_enable();
 }
@@ -923,7 +922,7 @@ void flush_tlb_mm(struct mm_struct *mm)
 			leave_mm(smp_processor_id());
 	}
 	if (cpu_mask)
-		voyager_flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+		voyager_flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
 
 	preempt_enable();
 }
diff --git a/arch/x86/mm/boot_ioremap_32.c b/arch/x86/mm/boot_ioremap_32.c
index f14da2a53ece..b20f74a2770f 100644
--- a/arch/x86/mm/boot_ioremap_32.c
+++ b/arch/x86/mm/boot_ioremap_32.c
@@ -57,7 +57,7 @@ static void __boot_ioremap(unsigned long phys_addr, unsigned long nrpages,
 	pte = boot_vaddr_to_pte(virtual_source);
 	for (i=0; i < nrpages; i++, phys_addr += PAGE_SIZE, pte++) {
 		set_pte(pte, pfn_pte(phys_addr>>PAGE_SHIFT, PAGE_KERNEL));
-		__flush_tlb_one(&vaddr[i*PAGE_SIZE]);
+		__flush_tlb_one((unsigned long) &vaddr[i*PAGE_SIZE]);
 	}
 }
 
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 4c7875554d01..acbf6681740d 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -163,6 +163,12 @@
 #define cpu_has_clflush		boot_cpu_has(X86_FEATURE_CLFLSH)
 #define cpu_has_bts		boot_cpu_has(X86_FEATURE_BTS)
 
+#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
+# define cpu_has_invlpg		1
+#else
+# define cpu_has_invlpg		(boot_cpu_data.x86 > 3)
+#endif
+
 #ifdef CONFIG_X86_64
 
 #undef  cpu_has_vme
@@ -183,6 +189,9 @@
 #undef  cpu_has_centaur_mcr
 #define cpu_has_centaur_mcr	0
 
+#undef  cpu_has_pge
+#define cpu_has_pge		1
+
 #endif /* CONFIG_X86_64 */
 
 #endif /* _ASM_X86_CPUFEATURE_H */
diff --git a/include/asm-x86/tlbflush_32.h b/include/asm-x86/tlbflush_32.h
index 2bd5b95e2048..9e07cc8f2d94 100644
--- a/include/asm-x86/tlbflush_32.h
+++ b/include/asm-x86/tlbflush_32.h
@@ -1,8 +1,11 @@
-#ifndef _I386_TLBFLUSH_H
-#define _I386_TLBFLUSH_H
+#ifndef _X86_TLBFLUSH_H
+#define _X86_TLBFLUSH_H
 
 #include <linux/mm.h>
+#include <linux/sched.h>
+
 #include <asm/processor.h>
+#include <asm/system.h>
 
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
@@ -12,62 +15,41 @@
 #define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
 #endif
 
-#define __native_flush_tlb()						\
-	do {								\
-		unsigned int tmpreg;					\
-									\
-		__asm__ __volatile__(					\
-			"movl %%cr3, %0;              \n"		\
-			"movl %0, %%cr3;  # flush TLB \n"		\
-			: "=r" (tmpreg)					\
-			:: "memory");					\
-	} while (0)
+static inline void __native_flush_tlb(void)
+{
+	write_cr3(read_cr3());
+}
 
-/*
- * Global pages have to be flushed a bit differently. Not a real
- * performance problem because this does not happen often.
- */
-#define __native_flush_tlb_global()					\
-	do {								\
-		unsigned int tmpreg, cr4, cr4_orig;			\
-									\
-		__asm__ __volatile__(					\
-			"movl %%cr4, %2;  # turn off PGE     \n"	\
-			"movl %2, %1;                        \n"	\
-			"andl %3, %1;                        \n"	\
-			"movl %1, %%cr4;                     \n"	\
-			"movl %%cr3, %0;                     \n"	\
-			"movl %0, %%cr3;  # flush TLB        \n"	\
-			"movl %2, %%cr4;  # turn PGE back on \n"	\
-			: "=&r" (tmpreg), "=&r" (cr4), "=&r" (cr4_orig)	\
-			: "i" (~X86_CR4_PGE)				\
-			: "memory");					\
-	} while (0)
+static inline void __native_flush_tlb_global(void)
+{
+	unsigned long cr4 = read_cr4();
 
-#define __native_flush_tlb_single(addr) 				\
-	__asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory")
+	/* clear PGE */
+	write_cr4(cr4 & ~X86_CR4_PGE);
+	/* write old PGE again and flush TLBs */
+	write_cr4(cr4);
+}
 
-# define __flush_tlb_all()						\
-	do {								\
-		if (cpu_has_pge)					\
-			__flush_tlb_global();				\
-		else							\
-			__flush_tlb();					\
-	} while (0)
+static inline void __native_flush_tlb_single(unsigned long addr)
+{
+	__asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory");
+}
 
-#define cpu_has_invlpg	(boot_cpu_data.x86 > 3)
+static inline void __flush_tlb_all(void)
+{
+	if (cpu_has_pge)
+		__flush_tlb_global();
+	else
+		__flush_tlb();
+}
 
-#ifdef CONFIG_X86_INVLPG
-# define __flush_tlb_one(addr) __flush_tlb_single(addr)
-#else
-# define __flush_tlb_one(addr)						\
-	do {								\
-		if (cpu_has_invlpg)					\
-			__flush_tlb_single(addr);			\
-		else							\
-			__flush_tlb();					\
-	} while (0)
-#endif
+static inline void __flush_tlb_one(unsigned long addr)
+{
+	if (cpu_has_invlpg)
+		__flush_tlb_single(addr);
+	else
+		__flush_tlb();
+}
 
 /*
  * TLB flushing:
@@ -86,11 +68,8 @@
 
 #define TLB_FLUSH_ALL	0xffffffff
 
-
 #ifndef CONFIG_SMP
 
-#include <linux/sched.h>
-
 #define flush_tlb() __flush_tlb()
 #define flush_tlb_all() __flush_tlb_all()
 #define local_flush_tlb() __flush_tlb()
@@ -102,21 +81,22 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
 }
 
 static inline void flush_tlb_page(struct vm_area_struct *vma,
-	unsigned long addr)
+				  unsigned long addr)
 {
 	if (vma->vm_mm == current->active_mm)
 		__flush_tlb_one(addr);
 }
 
 static inline void flush_tlb_range(struct vm_area_struct *vma,
-	unsigned long start, unsigned long end)
+				   unsigned long start, unsigned long end)
 {
 	if (vma->vm_mm == current->active_mm)
 		__flush_tlb();
 }
 
 static inline void native_flush_tlb_others(const cpumask_t *cpumask,
-					   struct mm_struct *mm, unsigned long va)
+					   struct mm_struct *mm,
+					   unsigned long va)
 {
 }
 
@@ -124,8 +104,7 @@ static inline void native_flush_tlb_others(const cpumask_t *cpumask,
 
 #include <asm/smp.h>
 
-#define local_flush_tlb() \
-	__flush_tlb()
+#define local_flush_tlb() __flush_tlb()
 
 extern void flush_tlb_all(void);
 extern void flush_tlb_current_task(void);
@@ -134,7 +113,8 @@ extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
 
 #define flush_tlb()	flush_tlb_current_task()
 
-static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end)
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
 {
 	flush_tlb_mm(vma->vm_mm);
 }
@@ -152,17 +132,17 @@ struct tlb_state
 	char __cacheline_padding[L1_CACHE_BYTES-8];
 };
 DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
+
 #endif	/* SMP */
 
 #ifndef CONFIG_PARAVIRT
-#define flush_tlb_others(mask, mm, va)		\
-	native_flush_tlb_others(&mask, mm, va)
+#define flush_tlb_others(mask, mm, va)	native_flush_tlb_others(&mask, mm, va)
 #endif
 
 static inline void flush_tlb_kernel_range(unsigned long start,
-					unsigned long end)
+					  unsigned long end)
 {
 	flush_tlb_all();
 }
 
-#endif /* _I386_TLBFLUSH_H */
+#endif /* _X86_TLBFLUSH_H */
diff --git a/include/asm-x86/tlbflush_64.h b/include/asm-x86/tlbflush_64.h
index 7731fd23d572..0bed440ba9fe 100644
--- a/include/asm-x86/tlbflush_64.h
+++ b/include/asm-x86/tlbflush_64.h
@@ -1,26 +1,55 @@
-#ifndef _X8664_TLBFLUSH_H
-#define _X8664_TLBFLUSH_H
+#ifndef _X86_TLBFLUSH_H
+#define _X86_TLBFLUSH_H
 
 #include <linux/mm.h>
 #include <linux/sched.h>
+
 #include <asm/processor.h>
 #include <asm/system.h>
 
-static inline void __flush_tlb(void)
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define __flush_tlb() __native_flush_tlb()
+#define __flush_tlb_global() __native_flush_tlb_global()
+#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
+#endif
+
+static inline void __native_flush_tlb(void)
 {
 	write_cr3(read_cr3());
 }
 
-static inline void __flush_tlb_all(void)
+static inline void __native_flush_tlb_global(void)
 {
 	unsigned long cr4 = read_cr4();
-	write_cr4(cr4 & ~X86_CR4_PGE);	/* clear PGE */
-	write_cr4(cr4);			/* write old PGE again and flush TLBs */
+
+	/* clear PGE */
+	write_cr4(cr4 & ~X86_CR4_PGE);
+	/* write old PGE again and flush TLBs */
+	write_cr4(cr4);
 }
 
-#define __flush_tlb_one(addr) \
-	__asm__ __volatile__("invlpg (%0)" :: "r" (addr) : "memory")
+static inline void __native_flush_tlb_single(unsigned long addr)
+{
+	__asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory");
+}
 
+static inline void __flush_tlb_all(void)
+{
+	if (cpu_has_pge)
+		__flush_tlb_global();
+	else
+		__flush_tlb();
+}
+
+static inline void __flush_tlb_one(unsigned long addr)
+{
+	if (cpu_has_invlpg)
+		__flush_tlb_single(addr);
+	else
+		__flush_tlb();
+}
 
 /*
  * TLB flushing:
@@ -37,6 +66,8 @@ static inline void __flush_tlb_all(void)
  * range a few INVLPGs in a row are a win.
  */
 
+#define TLB_FLUSH_ALL	-1ULL
+
 #ifndef CONFIG_SMP
 
 #define flush_tlb() __flush_tlb()
@@ -50,25 +81,30 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
 }
 
 static inline void flush_tlb_page(struct vm_area_struct *vma,
-	unsigned long addr)
+				  unsigned long addr)
 {
 	if (vma->vm_mm == current->active_mm)
 		__flush_tlb_one(addr);
 }
 
 static inline void flush_tlb_range(struct vm_area_struct *vma,
-	unsigned long start, unsigned long end)
+				   unsigned long start, unsigned long end)
 {
 	if (vma->vm_mm == current->active_mm)
 		__flush_tlb();
 }
 
-#else
+static inline void native_flush_tlb_others(const cpumask_t *cpumask,
+					   struct mm_struct *mm,
+					   unsigned long va)
+{
+}
+
+#else  /* SMP */
 
 #include <asm/smp.h>
 
-#define local_flush_tlb() \
-	__flush_tlb()
+#define local_flush_tlb() __flush_tlb()
 
 extern void flush_tlb_all(void);
 extern void flush_tlb_current_task(void);
@@ -77,24 +113,28 @@ extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
 
 #define flush_tlb()	flush_tlb_current_task()
 
-static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end)
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
 {
 	flush_tlb_mm(vma->vm_mm);
 }
 
+void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm,
+			     unsigned long va);
+
 #define TLBSTATE_OK	1
 #define TLBSTATE_LAZY	2
 
-/* Roughly an IPI every 20MB with 4k pages for freeing page table
-   ranges. Cost is about 42k of memory for each CPU. */
-#define ARCH_FREE_PTE_NR 5350	
+#endif	/* SMP */
 
+#ifndef CONFIG_PARAVIRT
+#define flush_tlb_others(mask, mm, va)	native_flush_tlb_others(&mask, mm, va)
 #endif
 
 static inline void flush_tlb_kernel_range(unsigned long start,
-					unsigned long end)
+					  unsigned long end)
 {
 	flush_tlb_all();
 }
 
-#endif /* _X8664_TLBFLUSH_H */
+#endif /* _X86_TLBFLUSH_H */

From d291cf83639a0e0b67ff783b6ed29c0a747d4901 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:35 +0100
Subject: [PATCH 121/890] x86: merge tlbflush.h variants

The delta is now minimal. Merge them

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/tlbflush.h    | 159 +++++++++++++++++++++++++++++++++-
 include/asm-x86/tlbflush_32.h | 148 -------------------------------
 include/asm-x86/tlbflush_64.h | 140 ------------------------------
 3 files changed, 156 insertions(+), 291 deletions(-)
 delete mode 100644 include/asm-x86/tlbflush_32.h
 delete mode 100644 include/asm-x86/tlbflush_64.h

diff --git a/include/asm-x86/tlbflush.h b/include/asm-x86/tlbflush.h
index 9af4cc83a1af..3998709ed637 100644
--- a/include/asm-x86/tlbflush.h
+++ b/include/asm-x86/tlbflush.h
@@ -1,5 +1,158 @@
-#ifdef CONFIG_X86_32
-# include "tlbflush_32.h"
+#ifndef _ASM_X86_TLBFLUSH_H
+#define _ASM_X86_TLBFLUSH_H
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
 #else
-# include "tlbflush_64.h"
+#define __flush_tlb() __native_flush_tlb()
+#define __flush_tlb_global() __native_flush_tlb_global()
+#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
 #endif
+
+static inline void __native_flush_tlb(void)
+{
+	write_cr3(read_cr3());
+}
+
+static inline void __native_flush_tlb_global(void)
+{
+	unsigned long cr4 = read_cr4();
+
+	/* clear PGE */
+	write_cr4(cr4 & ~X86_CR4_PGE);
+	/* write old PGE again and flush TLBs */
+	write_cr4(cr4);
+}
+
+static inline void __native_flush_tlb_single(unsigned long addr)
+{
+	__asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory");
+}
+
+static inline void __flush_tlb_all(void)
+{
+	if (cpu_has_pge)
+		__flush_tlb_global();
+	else
+		__flush_tlb();
+}
+
+static inline void __flush_tlb_one(unsigned long addr)
+{
+	if (cpu_has_invlpg)
+		__flush_tlb_single(addr);
+	else
+		__flush_tlb();
+}
+
+#ifdef CONFIG_X86_32
+# define TLB_FLUSH_ALL	0xffffffff
+#else
+# define TLB_FLUSH_ALL	-1ULL
+#endif
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb() flushes the current mm struct TLBs
+ *  - flush_tlb_all() flushes all processes TLBs
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(vma, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
+ *  - flush_tlb_others(cpumask, mm, va) flushes TLBs on other cpus
+ *
+ * ..but the i386 has somewhat limited tlb flushing capabilities,
+ * and page-granular flushes are available only on i486 and up.
+ *
+ * x86-64 can only flush individual pages or full VMs. For a range flush
+ * we always do the full VM. Might be worth trying if for a small
+ * range a few INVLPGs in a row are a win.
+ */
+
+#ifndef CONFIG_SMP
+
+#define flush_tlb() __flush_tlb()
+#define flush_tlb_all() __flush_tlb_all()
+#define local_flush_tlb() __flush_tlb()
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+	if (mm == current->active_mm)
+		__flush_tlb();
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+				  unsigned long addr)
+{
+	if (vma->vm_mm == current->active_mm)
+		__flush_tlb_one(addr);
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
+{
+	if (vma->vm_mm == current->active_mm)
+		__flush_tlb();
+}
+
+static inline void native_flush_tlb_others(const cpumask_t *cpumask,
+					   struct mm_struct *mm,
+					   unsigned long va)
+{
+}
+
+#else  /* SMP */
+
+#include <asm/smp.h>
+
+#define local_flush_tlb() __flush_tlb()
+
+extern void flush_tlb_all(void);
+extern void flush_tlb_current_task(void);
+extern void flush_tlb_mm(struct mm_struct *);
+extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+
+#define flush_tlb()	flush_tlb_current_task()
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
+{
+	flush_tlb_mm(vma->vm_mm);
+}
+
+void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm,
+			     unsigned long va);
+
+#define TLBSTATE_OK	1
+#define TLBSTATE_LAZY	2
+
+#ifdef CONFIG_X86_32
+struct tlb_state
+{
+	struct mm_struct *active_mm;
+	int state;
+	char __cacheline_padding[L1_CACHE_BYTES-8];
+};
+DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
+#endif
+
+#endif	/* SMP */
+
+#ifndef CONFIG_PARAVIRT
+#define flush_tlb_others(mask, mm, va)	native_flush_tlb_others(&mask, mm, va)
+#endif
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+					  unsigned long end)
+{
+	flush_tlb_all();
+}
+
+#endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/include/asm-x86/tlbflush_32.h b/include/asm-x86/tlbflush_32.h
deleted file mode 100644
index 9e07cc8f2d94..000000000000
--- a/include/asm-x86/tlbflush_32.h
+++ /dev/null
@@ -1,148 +0,0 @@
-#ifndef _X86_TLBFLUSH_H
-#define _X86_TLBFLUSH_H
-
-#include <linux/mm.h>
-#include <linux/sched.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define __flush_tlb() __native_flush_tlb()
-#define __flush_tlb_global() __native_flush_tlb_global()
-#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
-#endif
-
-static inline void __native_flush_tlb(void)
-{
-	write_cr3(read_cr3());
-}
-
-static inline void __native_flush_tlb_global(void)
-{
-	unsigned long cr4 = read_cr4();
-
-	/* clear PGE */
-	write_cr4(cr4 & ~X86_CR4_PGE);
-	/* write old PGE again and flush TLBs */
-	write_cr4(cr4);
-}
-
-static inline void __native_flush_tlb_single(unsigned long addr)
-{
-	__asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory");
-}
-
-static inline void __flush_tlb_all(void)
-{
-	if (cpu_has_pge)
-		__flush_tlb_global();
-	else
-		__flush_tlb();
-}
-
-static inline void __flush_tlb_one(unsigned long addr)
-{
-	if (cpu_has_invlpg)
-		__flush_tlb_single(addr);
-	else
-		__flush_tlb();
-}
-
-/*
- * TLB flushing:
- *
- *  - flush_tlb() flushes the current mm struct TLBs
- *  - flush_tlb_all() flushes all processes TLBs
- *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
- *  - flush_tlb_page(vma, vmaddr) flushes one page
- *  - flush_tlb_range(vma, start, end) flushes a range of pages
- *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_others(cpumask, mm, va) flushes a TLBs on other cpus
- *
- * ..but the i386 has somewhat limited tlb flushing capabilities,
- * and page-granular flushes are available only on i486 and up.
- */
-
-#define TLB_FLUSH_ALL	0xffffffff
-
-#ifndef CONFIG_SMP
-
-#define flush_tlb() __flush_tlb()
-#define flush_tlb_all() __flush_tlb_all()
-#define local_flush_tlb() __flush_tlb()
-
-static inline void flush_tlb_mm(struct mm_struct *mm)
-{
-	if (mm == current->active_mm)
-		__flush_tlb();
-}
-
-static inline void flush_tlb_page(struct vm_area_struct *vma,
-				  unsigned long addr)
-{
-	if (vma->vm_mm == current->active_mm)
-		__flush_tlb_one(addr);
-}
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
-				   unsigned long start, unsigned long end)
-{
-	if (vma->vm_mm == current->active_mm)
-		__flush_tlb();
-}
-
-static inline void native_flush_tlb_others(const cpumask_t *cpumask,
-					   struct mm_struct *mm,
-					   unsigned long va)
-{
-}
-
-#else  /* SMP */
-
-#include <asm/smp.h>
-
-#define local_flush_tlb() __flush_tlb()
-
-extern void flush_tlb_all(void);
-extern void flush_tlb_current_task(void);
-extern void flush_tlb_mm(struct mm_struct *);
-extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
-
-#define flush_tlb()	flush_tlb_current_task()
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
-				   unsigned long start, unsigned long end)
-{
-	flush_tlb_mm(vma->vm_mm);
-}
-
-void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm,
-			     unsigned long va);
-
-#define TLBSTATE_OK	1
-#define TLBSTATE_LAZY	2
-
-struct tlb_state
-{
-	struct mm_struct *active_mm;
-	int state;
-	char __cacheline_padding[L1_CACHE_BYTES-8];
-};
-DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
-
-#endif	/* SMP */
-
-#ifndef CONFIG_PARAVIRT
-#define flush_tlb_others(mask, mm, va)	native_flush_tlb_others(&mask, mm, va)
-#endif
-
-static inline void flush_tlb_kernel_range(unsigned long start,
-					  unsigned long end)
-{
-	flush_tlb_all();
-}
-
-#endif /* _X86_TLBFLUSH_H */
diff --git a/include/asm-x86/tlbflush_64.h b/include/asm-x86/tlbflush_64.h
deleted file mode 100644
index 0bed440ba9fe..000000000000
--- a/include/asm-x86/tlbflush_64.h
+++ /dev/null
@@ -1,140 +0,0 @@
-#ifndef _X86_TLBFLUSH_H
-#define _X86_TLBFLUSH_H
-
-#include <linux/mm.h>
-#include <linux/sched.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define __flush_tlb() __native_flush_tlb()
-#define __flush_tlb_global() __native_flush_tlb_global()
-#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
-#endif
-
-static inline void __native_flush_tlb(void)
-{
-	write_cr3(read_cr3());
-}
-
-static inline void __native_flush_tlb_global(void)
-{
-	unsigned long cr4 = read_cr4();
-
-	/* clear PGE */
-	write_cr4(cr4 & ~X86_CR4_PGE);
-	/* write old PGE again and flush TLBs */
-	write_cr4(cr4);
-}
-
-static inline void __native_flush_tlb_single(unsigned long addr)
-{
-	__asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory");
-}
-
-static inline void __flush_tlb_all(void)
-{
-	if (cpu_has_pge)
-		__flush_tlb_global();
-	else
-		__flush_tlb();
-}
-
-static inline void __flush_tlb_one(unsigned long addr)
-{
-	if (cpu_has_invlpg)
-		__flush_tlb_single(addr);
-	else
-		__flush_tlb();
-}
-
-/*
- * TLB flushing:
- *
- *  - flush_tlb() flushes the current mm struct TLBs
- *  - flush_tlb_all() flushes all processes TLBs
- *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
- *  - flush_tlb_page(vma, vmaddr) flushes one page
- *  - flush_tlb_range(vma, start, end) flushes a range of pages
- *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *
- * x86-64 can only flush individual pages or full VMs. For a range flush
- * we always do the full VM. Might be worth trying if for a small
- * range a few INVLPGs in a row are a win.
- */
-
-#define TLB_FLUSH_ALL	-1ULL
-
-#ifndef CONFIG_SMP
-
-#define flush_tlb() __flush_tlb()
-#define flush_tlb_all() __flush_tlb_all()
-#define local_flush_tlb() __flush_tlb()
-
-static inline void flush_tlb_mm(struct mm_struct *mm)
-{
-	if (mm == current->active_mm)
-		__flush_tlb();
-}
-
-static inline void flush_tlb_page(struct vm_area_struct *vma,
-				  unsigned long addr)
-{
-	if (vma->vm_mm == current->active_mm)
-		__flush_tlb_one(addr);
-}
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
-				   unsigned long start, unsigned long end)
-{
-	if (vma->vm_mm == current->active_mm)
-		__flush_tlb();
-}
-
-static inline void native_flush_tlb_others(const cpumask_t *cpumask,
-					   struct mm_struct *mm,
-					   unsigned long va)
-{
-}
-
-#else  /* SMP */
-
-#include <asm/smp.h>
-
-#define local_flush_tlb() __flush_tlb()
-
-extern void flush_tlb_all(void);
-extern void flush_tlb_current_task(void);
-extern void flush_tlb_mm(struct mm_struct *);
-extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
-
-#define flush_tlb()	flush_tlb_current_task()
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
-				   unsigned long start, unsigned long end)
-{
-	flush_tlb_mm(vma->vm_mm);
-}
-
-void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm,
-			     unsigned long va);
-
-#define TLBSTATE_OK	1
-#define TLBSTATE_LAZY	2
-
-#endif	/* SMP */
-
-#ifndef CONFIG_PARAVIRT
-#define flush_tlb_others(mask, mm, va)	native_flush_tlb_others(&mask, mm, va)
-#endif
-
-static inline void flush_tlb_kernel_range(unsigned long start,
-					  unsigned long end)
-{
-	flush_tlb_all();
-}
-
-#endif /* _X86_TLBFLUSH_H */

From 64883ab0e3386d72112a9091d886352a7b4b8bf6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:35 +0100
Subject: [PATCH 122/890] x86: cleanup mpspec variants

Bring the mpspec variants into sync to prepare merging and
paravirt support.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/mpparse_32.c             |  18 +--
 arch/x86/mach-visws/mpparse.c            |  16 +-
 include/asm-x86/mach-bigsmp/mach_apic.h  |  12 +-
 include/asm-x86/mach-default/mach_apic.h |  16 +-
 include/asm-x86/mach-es7000/mach_apic.h  |  10 +-
 include/asm-x86/mach-numaq/mach_apic.h   |  10 +-
 include/asm-x86/mach-summit/mach_apic.h  |  16 +-
 include/asm-x86/mpspec_32.h              |  49 +++---
 include/asm-x86/mpspec_64.h              | 196 +++--------------------
 include/asm-x86/mpspec_def.h             |  87 +++++-----
 10 files changed, 146 insertions(+), 284 deletions(-)

diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c
index 7a05a7f6099a..22fc8d7dec11 100644
--- a/arch/x86/kernel/mpparse_32.c
+++ b/arch/x86/kernel/mpparse_32.c
@@ -258,7 +258,7 @@ static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
 	if (!(m->mpc_flags & MPC_APIC_USABLE))
 		return;
 
-	printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n",
+	printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
 		m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
 	if (nr_ioapics >= MAX_IO_APICS) {
 		printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n",
@@ -405,9 +405,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
 
 	mps_oem_check(mpc, oem, str);
 
-	printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
+	printk("APIC at: 0x%X\n", mpc->mpc_lapic);
 
-	/* 
+	/*
 	 * Save the local APIC address (it might be non-default) -- but only
 	 * if we're not using ACPI.
 	 */
@@ -918,14 +918,14 @@ void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
 	 */
 	mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
 	mp_ioapic_routing[idx].gsi_base = gsi_base;
-	mp_ioapic_routing[idx].gsi_end = gsi_base + 
+	mp_ioapic_routing[idx].gsi_end = gsi_base +
 		io_apic_get_redir_entries(idx);
 
-	printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
-		"GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, 
-		mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
-		mp_ioapic_routing[idx].gsi_base,
-		mp_ioapic_routing[idx].gsi_end);
+	printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
+	       "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
+	       mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
+	       mp_ioapic_routing[idx].gsi_base,
+	       mp_ioapic_routing[idx].gsi_end);
 }
 
 void __init
diff --git a/arch/x86/mach-visws/mpparse.c b/arch/x86/mach-visws/mpparse.c
index f3c74fab8b95..2a8456a1f44f 100644
--- a/arch/x86/mach-visws/mpparse.c
+++ b/arch/x86/mach-visws/mpparse.c
@@ -36,19 +36,19 @@ unsigned int __initdata maxcpus = NR_CPUS;
 
 static void __init MP_processor_info (struct mpc_config_processor *m)
 {
- 	int ver, logical_apicid;
+	int ver, logical_apicid;
 	physid_mask_t apic_cpus;
- 	
+
 	if (!(m->mpc_cpuflag & CPU_ENABLED))
 		return;
 
 	logical_apicid = m->mpc_apicid;
-	printk(KERN_INFO "%sCPU #%d %ld:%ld APIC version %d\n",
-		m->mpc_cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
-		m->mpc_apicid,
-		(m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
-		(m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
-		m->mpc_apicver);
+	printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n",
+	       m->mpc_cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
+	       m->mpc_apicid,
+	       (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+	       (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+	       m->mpc_apicver);
 
 	if (m->mpc_cpuflag & CPU_BOOTPROCESSOR)
 		boot_cpu_physical_apicid = m->mpc_apicid;
diff --git a/include/asm-x86/mach-bigsmp/mach_apic.h b/include/asm-x86/mach-bigsmp/mach_apic.h
index ebd319f838ab..6df235e8ea91 100644
--- a/include/asm-x86/mach-bigsmp/mach_apic.h
+++ b/include/asm-x86/mach-bigsmp/mach_apic.h
@@ -110,13 +110,13 @@ static inline int cpu_to_logical_apicid(int cpu)
 }
 
 static inline int mpc_apic_id(struct mpc_config_processor *m,
-			struct mpc_config_translation *translation_record)
+			      struct mpc_config_translation *translation_record)
 {
-	printk("Processor #%d %ld:%ld APIC version %d\n",
-	        m->mpc_apicid,
-	        (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
-	        (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
-	        m->mpc_apicver);
+	printk("Processor #%d %u:%u APIC version %d\n",
+	       m->mpc_apicid,
+	       (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+	       (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+	       m->mpc_apicver);
 	return m->mpc_apicid;
 }
 
diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h
index 6db1c3babe9a..e3c2c1012c1c 100644
--- a/include/asm-x86/mach-default/mach_apic.h
+++ b/include/asm-x86/mach-default/mach_apic.h
@@ -89,15 +89,15 @@ static inline physid_mask_t apicid_to_cpu_present(int phys_apicid)
 	return physid_mask_of_physid(phys_apicid);
 }
 
-static inline int mpc_apic_id(struct mpc_config_processor *m, 
-			struct mpc_config_translation *translation_record)
+static inline int mpc_apic_id(struct mpc_config_processor *m,
+			      struct mpc_config_translation *translation_record)
 {
-	printk("Processor #%d %ld:%ld APIC version %d\n",
-			m->mpc_apicid,
-			(m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
-			(m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
-			m->mpc_apicver);
-	return (m->mpc_apicid);
+	printk("Processor #%d %u:%u APIC version %d\n",
+	       m->mpc_apicid,
+	       (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+	       (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+	       m->mpc_apicver);
+	return m->mpc_apicid;
 }
 
 static inline void setup_portio_remap(void)
diff --git a/include/asm-x86/mach-es7000/mach_apic.h b/include/asm-x86/mach-es7000/mach_apic.h
index caec64be516d..d23011fdf454 100644
--- a/include/asm-x86/mach-es7000/mach_apic.h
+++ b/include/asm-x86/mach-es7000/mach_apic.h
@@ -131,11 +131,11 @@ static inline int cpu_to_logical_apicid(int cpu)
 
 static inline int mpc_apic_id(struct mpc_config_processor *m, struct mpc_config_translation *unused)
 {
-	printk("Processor #%d %ld:%ld APIC version %d\n",
-	        m->mpc_apicid,
-	        (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
-	        (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
-	        m->mpc_apicver);
+	printk("Processor #%d %u:%u APIC version %d\n",
+	       m->mpc_apicid,
+	       (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+	       (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+	       m->mpc_apicver);
 	return (m->mpc_apicid);
 }
 
diff --git a/include/asm-x86/mach-numaq/mach_apic.h b/include/asm-x86/mach-numaq/mach_apic.h
index 5e5e7dd2692e..17e183bd39c1 100644
--- a/include/asm-x86/mach-numaq/mach_apic.h
+++ b/include/asm-x86/mach-numaq/mach_apic.h
@@ -101,11 +101,11 @@ static inline int mpc_apic_id(struct mpc_config_processor *m,
 	int quad = translation_record->trans_quad;
 	int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
 
-	printk("Processor #%d %ld:%ld APIC version %d (quad %d, apic %d)\n",
-			m->mpc_apicid,
-			(m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
-			(m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
-			m->mpc_apicver, quad, logical_apicid);
+	printk("Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
+	       m->mpc_apicid,
+	       (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+	       (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+	       m->mpc_apicver, quad, logical_apicid);
 	return logical_apicid;
 }
 
diff --git a/include/asm-x86/mach-summit/mach_apic.h b/include/asm-x86/mach-summit/mach_apic.h
index 732f776aab8e..062c97f6100b 100644
--- a/include/asm-x86/mach-summit/mach_apic.h
+++ b/include/asm-x86/mach-summit/mach_apic.h
@@ -126,15 +126,15 @@ static inline physid_mask_t apicid_to_cpu_present(int apicid)
 	return physid_mask_of_physid(0);
 }
 
-static inline int mpc_apic_id(struct mpc_config_processor *m, 
-			struct mpc_config_translation *translation_record)
+static inline int mpc_apic_id(struct mpc_config_processor *m,
+			      struct mpc_config_translation *translation_record)
 {
-	printk("Processor #%d %ld:%ld APIC version %d\n",
-			m->mpc_apicid,
-			(m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
-			(m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
-			m->mpc_apicver);
-	return (m->mpc_apicid);
+	printk("Processor #%d %u:%u APIC version %d\n",
+	       m->mpc_apicid,
+	       (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+	       (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+	       m->mpc_apicver);
+	return m->mpc_apicid;
 }
 
 static inline void setup_portio_remap(void)
diff --git a/include/asm-x86/mpspec_32.h b/include/asm-x86/mpspec_32.h
index f21349399d14..bb73185e6a7d 100644
--- a/include/asm-x86/mpspec_32.h
+++ b/include/asm-x86/mpspec_32.h
@@ -1,34 +1,37 @@
 #ifndef __ASM_MPSPEC_H
 #define __ASM_MPSPEC_H
 
-#include <linux/cpumask.h>
 #include <asm/mpspec_def.h>
 #include <mach_mpspec.h>
 
-extern int mp_bus_id_to_type [MAX_MP_BUSSES];
-extern int mp_bus_id_to_node [MAX_MP_BUSSES];
-extern int mp_bus_id_to_local [MAX_MP_BUSSES];
-extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
-extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
+extern int mp_bus_id_to_type[MAX_MP_BUSSES];
+extern int mp_bus_id_to_node[MAX_MP_BUSSES];
+extern int mp_bus_id_to_local[MAX_MP_BUSSES];
+extern int quad_local_to_mp_bus_id[NR_CPUS/4][4];
 
 extern unsigned int def_to_bigsmp;
+extern int apic_version[MAX_APICS];
+extern int pic_mode;
+
+extern int mp_bus_id_to_pci_bus[MAX_MP_BUSSES];
+
 extern unsigned int boot_cpu_physical_apicid;
 extern int smp_found_config;
-extern void find_smp_config (void);
-extern void get_smp_config (void);
 extern int nr_ioapics;
-extern int apic_version [MAX_APICS];
 extern int mp_irq_entries;
-extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
+extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
 extern int mpc_default_type;
 extern unsigned long mp_lapic_addr;
-extern int pic_mode;
+
+extern void find_smp_config (void);
+extern void get_smp_config (void);
 
 #ifdef CONFIG_ACPI
 extern void mp_register_lapic (u8 id, u8 enabled);
 extern void mp_register_lapic_address (u64 address);
 extern void mp_register_ioapic (u8 id, u32 address, u32 gsi_base);
-extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger, u32 gsi);
+extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger,
+				    u32 gsi);
 extern void mp_config_acpi_legacy_irqs (void);
 extern int mp_register_gsi (u32 gsi, int edge_level, int active_high_low);
 #endif /* CONFIG_ACPI */
@@ -50,7 +53,7 @@ typedef struct physid_mask physid_mask_t;
 #define physids_and(dst, src1, src2)		bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
 #define physids_or(dst, src1, src2)		bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
 #define physids_clear(map)			bitmap_zero((map).mask, MAX_APICS)
-#define physids_complement(dst, src)		bitmap_complement((dst).mask,(src).mask, MAX_APICS)
+#define physids_complement(dst, src)		bitmap_complement((dst).mask, (src).mask, MAX_APICS)
 #define physids_empty(map)			bitmap_empty((map).mask, MAX_APICS)
 #define physids_equal(map1, map2)		bitmap_equal((map1).mask, (map2).mask, MAX_APICS)
 #define physids_weight(map)			bitmap_weight((map).mask, MAX_APICS)
@@ -58,18 +61,18 @@ typedef struct physid_mask physid_mask_t;
 #define physids_shift_left(d, s, n)		bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS)
 #define physids_coerce(map)			((map).mask[0])
 
-#define physids_promote(physids)						\
-	({									\
-		physid_mask_t __physid_mask = PHYSID_MASK_NONE;			\
-		__physid_mask.mask[0] = physids;				\
-		__physid_mask;							\
+#define physids_promote(physids)					\
+	({								\
+		physid_mask_t __physid_mask = PHYSID_MASK_NONE;		\
+		__physid_mask.mask[0] = physids;			\
+		__physid_mask;						\
 	})
 
-#define physid_mask_of_physid(physid)						\
-	({									\
-		physid_mask_t __physid_mask = PHYSID_MASK_NONE;			\
-		physid_set(physid, __physid_mask);				\
-		__physid_mask;							\
+#define physid_mask_of_physid(physid)					\
+	({								\
+		physid_mask_t __physid_mask = PHYSID_MASK_NONE;		\
+		physid_set(physid, __physid_mask);			\
+		__physid_mask;						\
 	})
 
 #define PHYSID_MASK_ALL		{ {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} }
diff --git a/include/asm-x86/mpspec_64.h b/include/asm-x86/mpspec_64.h
index 017fddb61dc5..16eab20667cd 100644
--- a/include/asm-x86/mpspec_64.h
+++ b/include/asm-x86/mpspec_64.h
@@ -1,189 +1,35 @@
 #ifndef __ASM_MPSPEC_H
 #define __ASM_MPSPEC_H
 
-/*
- * Structure definitions for SMP machines following the
- * Intel Multiprocessing Specification 1.1 and 1.4.
- */
-
-/*
- * This tag identifies where the SMP configuration
- * information is. 
- */
- 
-#define SMP_MAGIC_IDENT	(('_'<<24)|('P'<<16)|('M'<<8)|'_')
-
-/*
- * A maximum of 255 APICs with the current APIC ID architecture.
- */
-#define MAX_APICS 255
-
-struct intel_mp_floating
-{
-	char mpf_signature[4];		/* "_MP_" 			*/
-	unsigned int mpf_physptr;	/* Configuration table address	*/
-	unsigned char mpf_length;	/* Our length (paragraphs)	*/
-	unsigned char mpf_specification;/* Specification version	*/
-	unsigned char mpf_checksum;	/* Checksum (makes sum 0)	*/
-	unsigned char mpf_feature1;	/* Standard or configuration ? 	*/
-	unsigned char mpf_feature2;	/* Bit7 set for IMCR|PIC	*/
-	unsigned char mpf_feature3;	/* Unused (0)			*/
-	unsigned char mpf_feature4;	/* Unused (0)			*/
-	unsigned char mpf_feature5;	/* Unused (0)			*/
-};
-
-struct mp_config_table
-{
-	char mpc_signature[4];
-#define MPC_SIGNATURE "PCMP"
-	unsigned short mpc_length;	/* Size of table */
-	char  mpc_spec;			/* 0x01 */
-	char  mpc_checksum;
-	char  mpc_oem[8];
-	char  mpc_productid[12];
-	unsigned int mpc_oemptr;	/* 0 if not present */
-	unsigned short mpc_oemsize;	/* 0 if not present */
-	unsigned short mpc_oemcount;
-	unsigned int mpc_lapic;	/* APIC address */
-	unsigned int reserved;
-};
-
-/* Followed by entries */
-
-#define	MP_PROCESSOR	0
-#define	MP_BUS		1
-#define	MP_IOAPIC	2
-#define	MP_INTSRC	3
-#define	MP_LINTSRC	4
-
-struct mpc_config_processor
-{
-	unsigned char mpc_type;
-	unsigned char mpc_apicid;	/* Local APIC number */
-	unsigned char mpc_apicver;	/* Its versions */
-	unsigned char mpc_cpuflag;
-#define CPU_ENABLED		1	/* Processor is available */
-#define CPU_BOOTPROCESSOR	2	/* Processor is the BP */
-	unsigned int mpc_cpufeature;		
-#define CPU_STEPPING_MASK 0x0F
-#define CPU_MODEL_MASK	0xF0
-#define CPU_FAMILY_MASK	0xF00
-	unsigned int mpc_featureflag;	/* CPUID feature value */
-	unsigned int mpc_reserved[2];
-};
-
-struct mpc_config_bus
-{
-	unsigned char mpc_type;
-	unsigned char mpc_busid;
-	unsigned char mpc_bustype[6];
-};
-
-/* List of Bus Type string values, Intel MP Spec. */
-#define BUSTYPE_EISA	"EISA"
-#define BUSTYPE_ISA	"ISA"
-#define BUSTYPE_INTERN	"INTERN"	/* Internal BUS */
-#define BUSTYPE_MCA	"MCA"
-#define BUSTYPE_VL	"VL"		/* Local bus */
-#define BUSTYPE_PCI	"PCI"
-#define BUSTYPE_PCMCIA	"PCMCIA"
-#define BUSTYPE_CBUS	"CBUS"
-#define BUSTYPE_CBUSII	"CBUSII"
-#define BUSTYPE_FUTURE	"FUTURE"
-#define BUSTYPE_MBI	"MBI"
-#define BUSTYPE_MBII	"MBII"
-#define BUSTYPE_MPI	"MPI"
-#define BUSTYPE_MPSA	"MPSA"
-#define BUSTYPE_NUBUS	"NUBUS"
-#define BUSTYPE_TC	"TC"
-#define BUSTYPE_VME	"VME"
-#define BUSTYPE_XPRESS	"XPRESS"
-
-struct mpc_config_ioapic
-{
-	unsigned char mpc_type;
-	unsigned char mpc_apicid;
-	unsigned char mpc_apicver;
-	unsigned char mpc_flags;
-#define MPC_APIC_USABLE		0x01
-	unsigned int mpc_apicaddr;
-};
-
-struct mpc_config_intsrc
-{
-	unsigned char mpc_type;
-	unsigned char mpc_irqtype;
-	unsigned short mpc_irqflag;
-	unsigned char mpc_srcbus;
-	unsigned char mpc_srcbusirq;
-	unsigned char mpc_dstapic;
-	unsigned char mpc_dstirq;
-};
-
-enum mp_irq_source_types {
-	mp_INT = 0,
-	mp_NMI = 1,
-	mp_SMI = 2,
-	mp_ExtINT = 3
-};
-
-#define MP_IRQDIR_DEFAULT	0
-#define MP_IRQDIR_HIGH		1
-#define MP_IRQDIR_LOW		3
-
-
-struct mpc_config_lintsrc
-{
-	unsigned char mpc_type;
-	unsigned char mpc_irqtype;
-	unsigned short mpc_irqflag;
-	unsigned char mpc_srcbusid;
-	unsigned char mpc_srcbusirq;
-	unsigned char mpc_destapic;	
-#define MP_APIC_ALL	0xFF
-	unsigned char mpc_destapiclint;
-};
-
-/*
- *	Default configurations
- *
- *	1	2 CPU ISA 82489DX
- *	2	2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining
- *	3	2 CPU EISA 82489DX
- *	4	2 CPU MCA 82489DX
- *	5	2 CPU ISA+PCI
- *	6	2 CPU EISA+PCI
- *	7	2 CPU MCA+PCI
- */
+#include <asm/mpspec_def.h>
 
 #define MAX_MP_BUSSES 256
 /* Each PCI slot may be a combo card with its own bus.  4 IRQ pins per slot. */
 #define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4)
 extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
-extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
+
+extern int mp_bus_id_to_pci_bus[MAX_MP_BUSSES];
 
 extern unsigned int boot_cpu_physical_apicid;
 extern int smp_found_config;
-extern void find_smp_config (void);
-extern void get_smp_config (void);
 extern int nr_ioapics;
-extern unsigned char apic_version [MAX_APICS];
 extern int mp_irq_entries;
-extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
+extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
 extern int mpc_default_type;
 extern unsigned long mp_lapic_addr;
 
+extern void find_smp_config (void);
+extern void get_smp_config (void);
+
 #ifdef CONFIG_ACPI
 extern void mp_register_lapic (u8 id, u8 enabled);
 extern void mp_register_lapic_address (u64 address);
-
 extern void mp_register_ioapic (u8 id, u32 address, u32 gsi_base);
-extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger, u32 gsi);
+extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger,
+				    u32 gsi);
 extern void mp_config_acpi_legacy_irqs (void);
-extern int mp_register_gsi (u32 gsi, int triggering, int polarity);
-#endif
-
-extern int using_apic_timer;
+extern int mp_register_gsi (u32 gsi, int edge_level, int active_high_low);
+#endif /* CONFIG_ACPI */
 
 #define PHYSID_ARRAY_SIZE	BITS_TO_LONGS(MAX_APICS)
 
@@ -210,18 +56,18 @@ typedef struct physid_mask physid_mask_t;
 #define physids_shift_left(d, s, n)		bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS)
 #define physids_coerce(map)			((map).mask[0])
 
-#define physids_promote(physids)						\
-	({									\
-		physid_mask_t __physid_mask = PHYSID_MASK_NONE;			\
-		__physid_mask.mask[0] = physids;				\
-		__physid_mask;							\
+#define physids_promote(physids)					\
+	({								\
+		physid_mask_t __physid_mask = PHYSID_MASK_NONE;		\
+		__physid_mask.mask[0] = physids;			\
+		__physid_mask;						\
 	})
 
-#define physid_mask_of_physid(physid)						\
-	({									\
-		physid_mask_t __physid_mask = PHYSID_MASK_NONE;			\
-		physid_set(physid, __physid_mask);				\
-		__physid_mask;							\
+#define physid_mask_of_physid(physid)					\
+	({								\
+		physid_mask_t __physid_mask = PHYSID_MASK_NONE;		\
+		physid_set(physid, __physid_mask);			\
+		__physid_mask;						\
 	})
 
 #define PHYSID_MASK_ALL		{ {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} }
diff --git a/include/asm-x86/mpspec_def.h b/include/asm-x86/mpspec_def.h
index 13bafb16e7af..3504617fe648 100644
--- a/include/asm-x86/mpspec_def.h
+++ b/include/asm-x86/mpspec_def.h
@@ -8,52 +8,68 @@
 
 /*
  * This tag identifies where the SMP configuration
- * information is. 
+ * information is.
  */
- 
+
 #define SMP_MAGIC_IDENT	(('_'<<24)|('P'<<16)|('M'<<8)|'_')
 
-#define MAX_MPC_ENTRY 1024
-#define MAX_APICS      256
+#ifdef CONFIG_X86_32
+# define MAX_MPC_ENTRY 1024
+# define MAX_APICS      256
+#else
+/*
+ * A maximum of 255 APICs with the current APIC ID architecture.
+ */
+# define MAX_APICS 255
+#endif
 
 struct intel_mp_floating
 {
-	char mpf_signature[4];		/* "_MP_" 			*/
-	unsigned long mpf_physptr;	/* Configuration table address	*/
+	char mpf_signature[4];		/* "_MP_"			*/
+	unsigned int mpf_physptr;	/* Configuration table address	*/
 	unsigned char mpf_length;	/* Our length (paragraphs)	*/
 	unsigned char mpf_specification;/* Specification version	*/
 	unsigned char mpf_checksum;	/* Checksum (makes sum 0)	*/
-	unsigned char mpf_feature1;	/* Standard or configuration ? 	*/
+	unsigned char mpf_feature1;	/* Standard or configuration ?	*/
 	unsigned char mpf_feature2;	/* Bit7 set for IMCR|PIC	*/
 	unsigned char mpf_feature3;	/* Unused (0)			*/
 	unsigned char mpf_feature4;	/* Unused (0)			*/
 	unsigned char mpf_feature5;	/* Unused (0)			*/
 };
 
+#define MPC_SIGNATURE "PCMP"
+
 struct mp_config_table
 {
 	char mpc_signature[4];
-#define MPC_SIGNATURE "PCMP"
 	unsigned short mpc_length;	/* Size of table */
 	char  mpc_spec;			/* 0x01 */
 	char  mpc_checksum;
 	char  mpc_oem[8];
 	char  mpc_productid[12];
-	unsigned long mpc_oemptr;	/* 0 if not present */
+	unsigned int mpc_oemptr;	/* 0 if not present */
 	unsigned short mpc_oemsize;	/* 0 if not present */
 	unsigned short mpc_oemcount;
-	unsigned long mpc_lapic;	/* APIC address */
-	unsigned long reserved;
+	unsigned int mpc_lapic;	/* APIC address */
+	unsigned int reserved;
 };
 
 /* Followed by entries */
 
-#define	MP_PROCESSOR	0
-#define	MP_BUS		1
-#define	MP_IOAPIC	2
-#define	MP_INTSRC	3
-#define	MP_LINTSRC	4
-#define	MP_TRANSLATION  192  /* Used by IBM NUMA-Q to describe node locality */
+#define	MP_PROCESSOR		0
+#define	MP_BUS			1
+#define	MP_IOAPIC		2
+#define	MP_INTSRC		3
+#define	MP_LINTSRC		4
+/* Used by IBM NUMA-Q to describe node locality */
+#define	MP_TRANSLATION		192
+
+#define CPU_ENABLED		1	/* Processor is available */
+#define CPU_BOOTPROCESSOR	2	/* Processor is the BP */
+
+#define CPU_STEPPING_MASK	0x000F
+#define CPU_MODEL_MASK		0x00F0
+#define CPU_FAMILY_MASK		0x0F00
 
 struct mpc_config_processor
 {
@@ -61,14 +77,9 @@ struct mpc_config_processor
 	unsigned char mpc_apicid;	/* Local APIC number */
 	unsigned char mpc_apicver;	/* Its versions */
 	unsigned char mpc_cpuflag;
-#define CPU_ENABLED		1	/* Processor is available */
-#define CPU_BOOTPROCESSOR	2	/* Processor is the BP */
-	unsigned long mpc_cpufeature;		
-#define CPU_STEPPING_MASK 0x0F
-#define CPU_MODEL_MASK	0xF0
-#define CPU_FAMILY_MASK	0xF00
-	unsigned long mpc_featureflag;	/* CPUID feature value */
-	unsigned long mpc_reserved[2];
+	unsigned int mpc_cpufeature;
+	unsigned int mpc_featureflag;	/* CPUID feature value */
+	unsigned int mpc_reserved[2];
 };
 
 struct mpc_config_bus
@@ -98,14 +109,15 @@ struct mpc_config_bus
 #define BUSTYPE_VME	"VME"
 #define BUSTYPE_XPRESS	"XPRESS"
 
+#define MPC_APIC_USABLE		0x01
+
 struct mpc_config_ioapic
 {
 	unsigned char mpc_type;
 	unsigned char mpc_apicid;
 	unsigned char mpc_apicver;
 	unsigned char mpc_flags;
-#define MPC_APIC_USABLE		0x01
-	unsigned long mpc_apicaddr;
+	unsigned int mpc_apicaddr;
 };
 
 struct mpc_config_intsrc
@@ -130,6 +142,7 @@ enum mp_irq_source_types {
 #define MP_IRQDIR_HIGH		1
 #define MP_IRQDIR_LOW		3
 
+#define MP_APIC_ALL	0xFF
 
 struct mpc_config_lintsrc
 {
@@ -138,15 +151,15 @@ struct mpc_config_lintsrc
 	unsigned short mpc_irqflag;
 	unsigned char mpc_srcbusid;
 	unsigned char mpc_srcbusirq;
-	unsigned char mpc_destapic;	
-#define MP_APIC_ALL	0xFF
+	unsigned char mpc_destapic;
 	unsigned char mpc_destapiclint;
 };
 
+#define MPC_OEM_SIGNATURE "_OEM"
+
 struct mp_config_oemtable
 {
 	char oem_signature[4];
-#define MPC_OEM_SIGNATURE "_OEM"
 	unsigned short oem_length;	/* Size of table */
 	char  oem_rev;			/* 0x01 */
 	char  oem_checksum;
@@ -155,13 +168,13 @@ struct mp_config_oemtable
 
 struct mpc_config_translation
 {
-        unsigned char mpc_type;
-        unsigned char trans_len;
-        unsigned char trans_type;
-        unsigned char trans_quad;
-        unsigned char trans_global;
-        unsigned char trans_local;
-        unsigned short trans_reserved;
+	unsigned char mpc_type;
+	unsigned char trans_len;
+	unsigned char trans_type;
+	unsigned char trans_quad;
+	unsigned char trans_global;
+	unsigned char trans_local;
+	unsigned short trans_reserved;
 };
 
 /*

From c2805aa1d8ae51c7582d2ccbd736afa545cf5cc4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:35 +0100
Subject: [PATCH 123/890] x86: merge mpspec variants

The delta is now minimal. Merge them

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/mpspec.h    | 113 +++++++++++++++++++++++++++++++++++-
 include/asm-x86/mpspec_32.h |  84 ---------------------------
 include/asm-x86/mpspec_64.h |  79 -------------------------
 3 files changed, 111 insertions(+), 165 deletions(-)
 delete mode 100644 include/asm-x86/mpspec_32.h
 delete mode 100644 include/asm-x86/mpspec_64.h

diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index 8f268e8fd2e9..a2a6b2ea4259 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -1,5 +1,114 @@
+#ifndef _AM_X86_MPSPEC_H
+#define _AM_X86_MPSPEC_H
+
+#include <asm/mpspec_def.h>
+
 #ifdef CONFIG_X86_32
-# include "mpspec_32.h"
+#include <mach_mpspec.h>
+
+extern int mp_bus_id_to_type[MAX_MP_BUSSES];
+extern int mp_bus_id_to_node[MAX_MP_BUSSES];
+extern int mp_bus_id_to_local[MAX_MP_BUSSES];
+extern int quad_local_to_mp_bus_id[NR_CPUS/4][4];
+
+extern unsigned int def_to_bigsmp;
+extern int apic_version[MAX_APICS];
+extern int pic_mode;
+
 #else
-# include "mpspec_64.h"
+
+#define MAX_MP_BUSSES 256
+/* Each PCI slot may be a combo card with its own bus.  4 IRQ pins per slot. */
+#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4)
+
+extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+
+#endif
+
+extern int mp_bus_id_to_pci_bus[MAX_MP_BUSSES];
+
+extern unsigned int boot_cpu_physical_apicid;
+extern int smp_found_config;
+extern int nr_ioapics;
+extern int mp_irq_entries;
+extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+extern int mpc_default_type;
+extern unsigned long mp_lapic_addr;
+
+extern void find_smp_config(void);
+extern void get_smp_config(void);
+
+#ifdef CONFIG_ACPI
+extern void mp_register_lapic(u8 id, u8 enabled);
+extern void mp_register_lapic_address(u64 address);
+extern void mp_register_ioapic(u8 id, u32 address, u32 gsi_base);
+extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
+				   u32 gsi);
+extern void mp_config_acpi_legacy_irqs(void);
+extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low);
+#endif /* CONFIG_ACPI */
+
+#define PHYSID_ARRAY_SIZE	BITS_TO_LONGS(MAX_APICS)
+
+struct physid_mask
+{
+	unsigned long mask[PHYSID_ARRAY_SIZE];
+};
+
+typedef struct physid_mask physid_mask_t;
+
+#define physid_set(physid, map)			set_bit(physid, (map).mask)
+#define physid_clear(physid, map)		clear_bit(physid, (map).mask)
+#define physid_isset(physid, map)		test_bit(physid, (map).mask)
+#define physid_test_and_set(physid, map) \
+	test_and_set_bit(physid, (map).mask)
+
+#define physids_and(dst, src1, src2) \
+	bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
+
+#define physids_or(dst, src1, src2) \
+	bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
+
+#define physids_clear(map) \
+	bitmap_zero((map).mask, MAX_APICS)
+
+#define physids_complement(dst, src) \
+	bitmap_complement((dst).mask, (src).mask, MAX_APICS)
+
+#define physids_empty(map) \
+	bitmap_empty((map).mask, MAX_APICS)
+
+#define physids_equal(map1, map2) \
+	bitmap_equal((map1).mask, (map2).mask, MAX_APICS)
+
+#define physids_weight(map) \
+	bitmap_weight((map).mask, MAX_APICS)
+
+#define physids_shift_right(d, s, n) \
+	bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS)
+
+#define physids_shift_left(d, s, n) \
+	bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS)
+
+#define physids_coerce(map)			((map).mask[0])
+
+#define physids_promote(physids)					\
+	({								\
+		physid_mask_t __physid_mask = PHYSID_MASK_NONE;		\
+		__physid_mask.mask[0] = physids;			\
+		__physid_mask;						\
+	})
+
+#define physid_mask_of_physid(physid)					\
+	({								\
+		physid_mask_t __physid_mask = PHYSID_MASK_NONE;		\
+		physid_set(physid, __physid_mask);			\
+		__physid_mask;						\
+	})
+
+#define PHYSID_MASK_ALL		{ {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} }
+#define PHYSID_MASK_NONE	{ {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} }
+
+extern physid_mask_t phys_cpu_present_map;
+
 #endif
diff --git a/include/asm-x86/mpspec_32.h b/include/asm-x86/mpspec_32.h
deleted file mode 100644
index bb73185e6a7d..000000000000
--- a/include/asm-x86/mpspec_32.h
+++ /dev/null
@@ -1,84 +0,0 @@
-#ifndef __ASM_MPSPEC_H
-#define __ASM_MPSPEC_H
-
-#include <asm/mpspec_def.h>
-#include <mach_mpspec.h>
-
-extern int mp_bus_id_to_type[MAX_MP_BUSSES];
-extern int mp_bus_id_to_node[MAX_MP_BUSSES];
-extern int mp_bus_id_to_local[MAX_MP_BUSSES];
-extern int quad_local_to_mp_bus_id[NR_CPUS/4][4];
-
-extern unsigned int def_to_bigsmp;
-extern int apic_version[MAX_APICS];
-extern int pic_mode;
-
-extern int mp_bus_id_to_pci_bus[MAX_MP_BUSSES];
-
-extern unsigned int boot_cpu_physical_apicid;
-extern int smp_found_config;
-extern int nr_ioapics;
-extern int mp_irq_entries;
-extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-extern int mpc_default_type;
-extern unsigned long mp_lapic_addr;
-
-extern void find_smp_config (void);
-extern void get_smp_config (void);
-
-#ifdef CONFIG_ACPI
-extern void mp_register_lapic (u8 id, u8 enabled);
-extern void mp_register_lapic_address (u64 address);
-extern void mp_register_ioapic (u8 id, u32 address, u32 gsi_base);
-extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger,
-				    u32 gsi);
-extern void mp_config_acpi_legacy_irqs (void);
-extern int mp_register_gsi (u32 gsi, int edge_level, int active_high_low);
-#endif /* CONFIG_ACPI */
-
-#define PHYSID_ARRAY_SIZE	BITS_TO_LONGS(MAX_APICS)
-
-struct physid_mask
-{
-	unsigned long mask[PHYSID_ARRAY_SIZE];
-};
-
-typedef struct physid_mask physid_mask_t;
-
-#define physid_set(physid, map)			set_bit(physid, (map).mask)
-#define physid_clear(physid, map)		clear_bit(physid, (map).mask)
-#define physid_isset(physid, map)		test_bit(physid, (map).mask)
-#define physid_test_and_set(physid, map)	test_and_set_bit(physid, (map).mask)
-
-#define physids_and(dst, src1, src2)		bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
-#define physids_or(dst, src1, src2)		bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
-#define physids_clear(map)			bitmap_zero((map).mask, MAX_APICS)
-#define physids_complement(dst, src)		bitmap_complement((dst).mask, (src).mask, MAX_APICS)
-#define physids_empty(map)			bitmap_empty((map).mask, MAX_APICS)
-#define physids_equal(map1, map2)		bitmap_equal((map1).mask, (map2).mask, MAX_APICS)
-#define physids_weight(map)			bitmap_weight((map).mask, MAX_APICS)
-#define physids_shift_right(d, s, n)		bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS)
-#define physids_shift_left(d, s, n)		bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS)
-#define physids_coerce(map)			((map).mask[0])
-
-#define physids_promote(physids)					\
-	({								\
-		physid_mask_t __physid_mask = PHYSID_MASK_NONE;		\
-		__physid_mask.mask[0] = physids;			\
-		__physid_mask;						\
-	})
-
-#define physid_mask_of_physid(physid)					\
-	({								\
-		physid_mask_t __physid_mask = PHYSID_MASK_NONE;		\
-		physid_set(physid, __physid_mask);			\
-		__physid_mask;						\
-	})
-
-#define PHYSID_MASK_ALL		{ {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} }
-#define PHYSID_MASK_NONE	{ {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} }
-
-extern physid_mask_t phys_cpu_present_map;
-
-#endif
-
diff --git a/include/asm-x86/mpspec_64.h b/include/asm-x86/mpspec_64.h
deleted file mode 100644
index 16eab20667cd..000000000000
--- a/include/asm-x86/mpspec_64.h
+++ /dev/null
@@ -1,79 +0,0 @@
-#ifndef __ASM_MPSPEC_H
-#define __ASM_MPSPEC_H
-
-#include <asm/mpspec_def.h>
-
-#define MAX_MP_BUSSES 256
-/* Each PCI slot may be a combo card with its own bus.  4 IRQ pins per slot. */
-#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4)
-extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
-
-extern int mp_bus_id_to_pci_bus[MAX_MP_BUSSES];
-
-extern unsigned int boot_cpu_physical_apicid;
-extern int smp_found_config;
-extern int nr_ioapics;
-extern int mp_irq_entries;
-extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-extern int mpc_default_type;
-extern unsigned long mp_lapic_addr;
-
-extern void find_smp_config (void);
-extern void get_smp_config (void);
-
-#ifdef CONFIG_ACPI
-extern void mp_register_lapic (u8 id, u8 enabled);
-extern void mp_register_lapic_address (u64 address);
-extern void mp_register_ioapic (u8 id, u32 address, u32 gsi_base);
-extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger,
-				    u32 gsi);
-extern void mp_config_acpi_legacy_irqs (void);
-extern int mp_register_gsi (u32 gsi, int edge_level, int active_high_low);
-#endif /* CONFIG_ACPI */
-
-#define PHYSID_ARRAY_SIZE	BITS_TO_LONGS(MAX_APICS)
-
-struct physid_mask
-{
-	unsigned long mask[PHYSID_ARRAY_SIZE];
-};
-
-typedef struct physid_mask physid_mask_t;
-
-#define physid_set(physid, map)			set_bit(physid, (map).mask)
-#define physid_clear(physid, map)		clear_bit(physid, (map).mask)
-#define physid_isset(physid, map)		test_bit(physid, (map).mask)
-#define physid_test_and_set(physid, map)	test_and_set_bit(physid, (map).mask)
-
-#define physids_and(dst, src1, src2)		bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
-#define physids_or(dst, src1, src2)		bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
-#define physids_clear(map)			bitmap_zero((map).mask, MAX_APICS)
-#define physids_complement(dst, src)		bitmap_complement((dst).mask, (src).mask, MAX_APICS)
-#define physids_empty(map)			bitmap_empty((map).mask, MAX_APICS)
-#define physids_equal(map1, map2)		bitmap_equal((map1).mask, (map2).mask, MAX_APICS)
-#define physids_weight(map)			bitmap_weight((map).mask, MAX_APICS)
-#define physids_shift_right(d, s, n)		bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS)
-#define physids_shift_left(d, s, n)		bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS)
-#define physids_coerce(map)			((map).mask[0])
-
-#define physids_promote(physids)					\
-	({								\
-		physid_mask_t __physid_mask = PHYSID_MASK_NONE;		\
-		__physid_mask.mask[0] = physids;			\
-		__physid_mask;						\
-	})
-
-#define physid_mask_of_physid(physid)					\
-	({								\
-		physid_mask_t __physid_mask = PHYSID_MASK_NONE;		\
-		physid_set(physid, __physid_mask);			\
-		__physid_mask;						\
-	})
-
-#define PHYSID_MASK_ALL		{ {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} }
-#define PHYSID_MASK_NONE	{ {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} }
-
-extern physid_mask_t phys_cpu_present_map;
-
-#endif
-

From ae9d983be1eefac4b5efad69a188e7ac89a75797 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:36 +0100
Subject: [PATCH 124/890] x86: cleanup smp.h variants

Bring the smp.h variants into sync to prepare merging and
paravirt support.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/apic.h        |   1 +
 include/asm-x86/mpspec.h      |   3 +
 include/asm-x86/smp_32.h      | 131 +++++++++++++-----------------
 include/asm-x86/smp_64.h      | 147 ++++++++++++++--------------------
 include/asm-x86/topology_64.h |   2 -
 5 files changed, 118 insertions(+), 166 deletions(-)

diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index d0a221fa1fc3..18d932dff476 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -42,6 +42,7 @@ extern int local_apic_timer_disabled;
 
 extern int apic_runs_main_timer;
 extern int ioapic_force;
+extern int disable_apic;
 extern int disable_apic_timer;
 extern unsigned boot_cpu_id;
 
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index a2a6b2ea4259..781ad74ab9e9 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -13,8 +13,11 @@ extern int quad_local_to_mp_bus_id[NR_CPUS/4][4];
 
 extern unsigned int def_to_bigsmp;
 extern int apic_version[MAX_APICS];
+extern u8 apicid_2_node[];
 extern int pic_mode;
 
+#define MAX_APICID 256
+
 #else
 
 #define MAX_MP_BUSSES 256
diff --git a/include/asm-x86/smp_32.h b/include/asm-x86/smp_32.h
index e10b7affdfe5..c69e960429cc 100644
--- a/include/asm-x86/smp_32.h
+++ b/include/asm-x86/smp_32.h
@@ -1,58 +1,51 @@
 #ifndef __ASM_SMP_H
 #define __ASM_SMP_H
 
+#ifndef __ASSEMBLY__
+#include <linux/cpumask.h>
+#include <linux/init.h>
+
 /*
  * We need the APIC definitions automatically as part of 'smp.h'
  */
-#ifndef __ASSEMBLY__
-#include <linux/kernel.h>
-#include <linux/threads.h>
-#include <linux/cpumask.h>
+#ifdef CONFIG_X86_LOCAL_APIC
+# include <asm/mpspec.h>
+# include <asm/apic.h>
+# ifdef CONFIG_X86_IO_APIC
+#  include <asm/io_apic.h>
+# endif
 #endif
 
-#if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__)
-#include <linux/bitops.h>
-#include <asm/mpspec.h>
-#include <asm/apic.h>
-#ifdef CONFIG_X86_IO_APIC
-#include <asm/io_apic.h>
-#endif
-#endif
+extern cpumask_t cpu_callout_map;
+extern cpumask_t cpu_callin_map;
 
-#define BAD_APICID 0xFFu
-#ifdef CONFIG_SMP
-#ifndef __ASSEMBLY__
-
-/*
- * Private routines/data
- */
- 
-extern void smp_alloc_memory(void);
-extern int pic_mode;
 extern int smp_num_siblings;
-DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
-DECLARE_PER_CPU(cpumask_t, cpu_core_map);
+extern unsigned int num_processors;
 
-extern void (*mtrr_hook) (void);
-extern void zap_low_mappings (void);
+extern void smp_alloc_memory(void);
 extern void lock_ipi_call_lock(void);
 extern void unlock_ipi_call_lock(void);
 
-#define MAX_APICID 256
+extern void (*mtrr_hook) (void);
+extern void zap_low_mappings (void);
+
 extern u8 __initdata x86_cpu_to_apicid_init[];
 extern void *x86_cpu_to_apicid_ptr;
+
+DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
+DECLARE_PER_CPU(cpumask_t, cpu_core_map);
+DECLARE_PER_CPU(u8, cpu_llc_id);
 DECLARE_PER_CPU(u8, x86_cpu_to_apicid);
 
-#define cpu_physical_id(cpu)	per_cpu(x86_cpu_to_apicid, cpu)
-
-extern void set_cpu_sibling_map(int cpu);
-
 #ifdef CONFIG_HOTPLUG_CPU
 extern void cpu_exit_clear(void);
 extern void cpu_uninit(void);
 extern void remove_siblinginfo(int cpu);
 #endif
 
+/* Globals due to paravirt */
+extern void set_cpu_sibling_map(int cpu);
+
 struct smp_ops
 {
 	void (*smp_prepare_boot_cpu)(void);
@@ -67,6 +60,7 @@ struct smp_ops
 				      int wait);
 };
 
+#ifdef CONFIG_SMP
 extern struct smp_ops smp_ops;
 
 static inline void smp_prepare_boot_cpu(void)
@@ -107,10 +101,12 @@ int native_cpu_up(unsigned int cpunum);
 void native_smp_cpus_done(unsigned int max_cpus);
 
 #ifndef CONFIG_PARAVIRT
-#define startup_ipi_hook(phys_apicid, start_eip, start_esp) 		\
-do { } while (0)
+#define startup_ipi_hook(phys_apicid, start_eip, start_esp) do { } while (0)
 #endif
 
+extern int __cpu_disable(void);
+extern void __cpu_die(unsigned int cpu);
+
 /*
  * This function is needed by all SMP systems. It must _always_ be valid
  * from the initial startup. We map APIC_BASE very early in page_setup(),
@@ -119,9 +115,11 @@ do { } while (0)
 DECLARE_PER_CPU(int, cpu_number);
 #define raw_smp_processor_id() (x86_read_percpu(cpu_number))
 
-extern cpumask_t cpu_callout_map;
-extern cpumask_t cpu_callin_map;
-extern cpumask_t cpu_possible_map;
+#define cpu_physical_id(cpu)	per_cpu(x86_cpu_to_apicid, cpu)
+
+extern int safe_smp_processor_id(void);
+
+void __cpuinit smp_store_cpu_info(int id);
 
 /* We don't mark CPUs online until __cpu_up(), so we need another measure */
 static inline int num_booting_cpus(void)
@@ -129,56 +127,39 @@ static inline int num_booting_cpus(void)
 	return cpus_weight(cpu_callout_map);
 }
 
-extern int safe_smp_processor_id(void);
-extern int __cpu_disable(void);
-extern void __cpu_die(unsigned int cpu);
-extern unsigned int num_processors;
-
-void __cpuinit smp_store_cpu_info(int id);
-
-#endif /* !__ASSEMBLY__ */
-
 #else /* CONFIG_SMP */
 
 #define safe_smp_processor_id()		0
 #define cpu_physical_id(cpu)		boot_cpu_physical_apicid
 
-#define NO_PROC_ID		0xFF		/* No processor magic marker */
-
-#endif /* CONFIG_SMP */
-
-#ifndef __ASSEMBLY__
+#endif /* !CONFIG_SMP */
 
 #ifdef CONFIG_X86_LOCAL_APIC
 
-#ifdef APIC_DEFINITION
-extern int hard_smp_processor_id(void);
-#else
-#include <mach_apicdef.h>
-static inline int hard_smp_processor_id(void)
-{
-	/* we don't want to mark this access volatile - bad code generation */
-	return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
-}
-#endif /* APIC_DEFINITION */
-
-#else /* CONFIG_X86_LOCAL_APIC */
-
-#ifndef CONFIG_SMP
-#define hard_smp_processor_id()		0
-#endif
-
-#endif /* CONFIG_X86_LOCAL_APIC */
-
-extern u8 apicid_2_node[];
-
-#ifdef CONFIG_X86_LOCAL_APIC
 static __inline int logical_smp_processor_id(void)
 {
 	/* we don't want to mark this access volatile - bad code generation */
-	return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
+	return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
 }
-#endif
-#endif
 
+# ifdef APIC_DEFINITION
+extern int hard_smp_processor_id(void);
+# else
+#  include <mach_apicdef.h>
+static inline int hard_smp_processor_id(void)
+{
+	/* we don't want to mark this access volatile - bad code generation */
+	return GET_APIC_ID(*(u32 *)(APIC_BASE + APIC_ID));
+}
+# endif /* APIC_DEFINITION */
+
+#else /* CONFIG_X86_LOCAL_APIC */
+
+# ifndef CONFIG_SMP
+#  define hard_smp_processor_id()	0
+# endif
+
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+#endif /* !ASSEMBLY */
 #endif
diff --git a/include/asm-x86/smp_64.h b/include/asm-x86/smp_64.h
index ab612b0ff270..2feddda91e12 100644
--- a/include/asm-x86/smp_64.h
+++ b/include/asm-x86/smp_64.h
@@ -1,97 +1,39 @@
 #ifndef __ASM_SMP_H
 #define __ASM_SMP_H
 
+#include <linux/cpumask.h>
+#include <linux/init.h>
+
 /*
  * We need the APIC definitions automatically as part of 'smp.h'
  */
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/bitops.h>
-#include <linux/init.h>
-extern int disable_apic;
-
-#include <asm/mpspec.h>
 #include <asm/apic.h>
 #include <asm/io_apic.h>
+#include <asm/mpspec.h>
+#include <asm/pda.h>
 #include <asm/thread_info.h>
 
-#ifdef CONFIG_SMP
-
-#include <asm/pda.h>
-
-struct pt_regs;
-
-extern cpumask_t cpu_present_mask;
-extern cpumask_t cpu_possible_map;
-extern cpumask_t cpu_online_map;
 extern cpumask_t cpu_callout_map;
 extern cpumask_t cpu_initialized;
 
-/*
- * Private routines/data
- */
- 
+extern int smp_num_siblings;
+extern unsigned int num_processors;
+
 extern void smp_alloc_memory(void);
-extern volatile unsigned long smp_invalidate_needed;
 extern void lock_ipi_call_lock(void);
 extern void unlock_ipi_call_lock(void);
-extern int smp_num_siblings;
-extern void smp_send_reschedule(int cpu);
+
 extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *),
 				  void *info, int wait);
 
-/*
- * cpu_sibling_map and cpu_core_map now live
- * in the per cpu area
- *
- * extern cpumask_t cpu_sibling_map[NR_CPUS];
- * extern cpumask_t cpu_core_map[NR_CPUS];
- */
+extern u8 __initdata x86_cpu_to_apicid_init[];
+extern void *x86_cpu_to_apicid_ptr;
+extern u8 bios_cpu_apicid[];
+
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 DECLARE_PER_CPU(cpumask_t, cpu_core_map);
 DECLARE_PER_CPU(u8, cpu_llc_id);
-
-#define SMP_TRAMPOLINE_BASE 0x6000
-
-/*
- * On x86 all CPUs are mapped 1:1 to the APIC space.
- * This simplifies scheduling and IPI sending and
- * compresses data structures.
- */
-
-static inline int num_booting_cpus(void)
-{
-	return cpus_weight(cpu_callout_map);
-}
-
-#define raw_smp_processor_id() read_pda(cpunumber)
-
-extern int __cpu_disable(void);
-extern void __cpu_die(unsigned int cpu);
-extern void prefill_possible_map(void);
-extern unsigned num_processors;
-extern unsigned __cpuinitdata disabled_cpus;
-
-#define NO_PROC_ID		0xFF		/* No processor magic marker */
-
-#endif /* CONFIG_SMP */
-
-#define safe_smp_processor_id()		smp_processor_id()
-
-static inline int hard_smp_processor_id(void)
-{
-	/* we don't want to mark this access volatile - bad code generation */
-	return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID));
-}
-
-/*
- * Some lowlevel functions might want to know about
- * the real APIC ID <-> CPU # mapping.
- */
-extern u8 __initdata x86_cpu_to_apicid_init[];
-extern void *x86_cpu_to_apicid_ptr;
-DECLARE_PER_CPU(u8, x86_cpu_to_apicid);	/* physical ID */
-extern u8 bios_cpu_apicid[];
+DECLARE_PER_CPU(u8, x86_cpu_to_apicid);
 
 static inline int cpu_present_to_apicid(int mps_cpu)
 {
@@ -101,30 +43,57 @@ static inline int cpu_present_to_apicid(int mps_cpu)
 		return BAD_APICID;
 }
 
-#ifndef CONFIG_SMP
-#define stack_smp_processor_id() 0
-#define cpu_logical_map(x) (x)
-#else
-#include <asm/thread_info.h>
-#define stack_smp_processor_id() \
-({ 								\
-	struct thread_info *ti;					\
+#ifdef CONFIG_SMP
+
+#define SMP_TRAMPOLINE_BASE 0x6000
+
+extern int __cpu_disable(void);
+extern void __cpu_die(unsigned int cpu);
+extern void prefill_possible_map(void);
+extern unsigned __cpuinitdata disabled_cpus;
+
+#define raw_smp_processor_id()	read_pda(cpunumber)
+#define cpu_physical_id(cpu)	per_cpu(x86_cpu_to_apicid, cpu)
+
+#define stack_smp_processor_id()					\
+	({								\
+	struct thread_info *ti;						\
 	__asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK));	\
-	ti->cpu;						\
+	ti->cpu;							\
 })
-#endif
+
+/*
+ * On x86 all CPUs are mapped 1:1 to the APIC space. This simplifies
+ * scheduling and IPI sending and compresses data structures.
+ */
+static inline int num_booting_cpus(void)
+{
+	return cpus_weight(cpu_callout_map);
+}
+
+extern void smp_send_reschedule(int cpu);
+
+#else /* CONFIG_SMP */
+
+extern unsigned int boot_cpu_id;
+#define cpu_physical_id(cpu)	boot_cpu_id
+#define stack_smp_processor_id() 0
+
+#endif /* !CONFIG_SMP */
+
+#define safe_smp_processor_id()		smp_processor_id()
 
 static __inline int logical_smp_processor_id(void)
 {
 	/* we don't want to mark this access volatile - bad code generation */
-	return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
+	return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
+}
+
+static inline int hard_smp_processor_id(void)
+{
+	/* we don't want to mark this access volatile - bad code generation */
+	return GET_APIC_ID(*(u32 *)(APIC_BASE + APIC_ID));
 }
 
-#ifdef CONFIG_SMP
-#define cpu_physical_id(cpu)		per_cpu(x86_cpu_to_apicid, cpu)
-#else
-extern unsigned int boot_cpu_id;
-#define cpu_physical_id(cpu)		boot_cpu_id
-#endif /* !CONFIG_SMP */
 #endif
 
diff --git a/include/asm-x86/topology_64.h b/include/asm-x86/topology_64.h
index a718dda037e0..407b22d4e3b0 100644
--- a/include/asm-x86/topology_64.h
+++ b/include/asm-x86/topology_64.h
@@ -7,8 +7,6 @@
 #include <asm/mpspec.h>
 #include <linux/bitops.h>
 
-extern cpumask_t cpu_online_map;
-
 extern unsigned char cpu_to_node[];
 extern cpumask_t     node_to_cpumask[];
 

From 8a999410135602d98bd007e835dc21c0c279ad72 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:36 +0100
Subject: [PATCH 125/890] x86: cleanup acpi_32/64.h

Fix coding style to get a readable diff

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/acpi_32.h | 35 +++++++++++++--------------------
 include/asm-x86/acpi_64.h | 41 ++++++++++++++++-----------------------
 2 files changed, 31 insertions(+), 45 deletions(-)

diff --git a/include/asm-x86/acpi_32.h b/include/asm-x86/acpi_32.h
index c8afeea0b80b..03612bcf6422 100644
--- a/include/asm-x86/acpi_32.h
+++ b/include/asm-x86/acpi_32.h
@@ -24,11 +24,8 @@
 #ifndef _ASM_ACPI_H
 #define _ASM_ACPI_H
 
-#ifdef __KERNEL__
-
 #include <acpi/pdc_intel.h>
-
-#include <asm/system.h>		/* defines cmpxchg */
+#include <asm/numa.h>
 
 #define COMPILER_DEPENDENT_INT64   long long
 #define COMPILER_DEPENDENT_UINT64  unsigned long long
@@ -67,17 +64,17 @@ int __acpi_release_global_lock(unsigned int *lock);
  * Math helper asm macros
  */
 #define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \
-        asm("divl %2;"        \
-        :"=a"(q32), "=d"(r32) \
-        :"r"(d32),            \
-        "0"(n_lo), "1"(n_hi))
+	asm("divl %2;"				     \
+	    :"=a"(q32), "=d"(r32)		     \
+	    :"r"(d32),				     \
+	     "0"(n_lo), "1"(n_hi))
 
 
 #define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \
-    asm("shrl   $1,%2;"             \
-        "rcrl   $1,%3;"             \
-        :"=r"(n_hi), "=r"(n_lo)     \
-        :"0"(n_hi), "1"(n_lo))
+	asm("shrl   $1,%2	;"	\
+	    "rcrl   $1,%3;"		\
+	    :"=r"(n_hi), "=r"(n_lo)	\
+	    :"0"(n_hi), "1"(n_lo))
 
 #ifdef CONFIG_ACPI
 extern int acpi_lapic;
@@ -87,6 +84,9 @@ extern int acpi_strict;
 extern int acpi_disabled;
 extern int acpi_ht;
 extern int acpi_pci_disabled;
+extern int acpi_skip_timer_override;
+extern int acpi_use_timer_override;
+
 static inline void disable_acpi(void)
 {
 	acpi_disabled = 1;
@@ -100,11 +100,6 @@ static inline void disable_acpi(void)
 
 extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
 
-#ifdef CONFIG_X86_IO_APIC
-extern int acpi_skip_timer_override;
-extern int acpi_use_timer_override;
-#endif
-
 static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
 static inline void acpi_disable_pci(void)
 {
@@ -122,7 +117,7 @@ extern unsigned long acpi_wakeup_address;
 /* early initialization routine */
 extern void acpi_reserve_bootmem(void);
 
-#else	/* !CONFIG_ACPI */
+#else /* !CONFIG_ACPI */
 
 #define acpi_lapic 0
 #define acpi_ioapic 0
@@ -130,10 +125,8 @@ static inline void acpi_noirq_set(void) { }
 static inline void acpi_disable_pci(void) { }
 static inline void disable_acpi(void) { }
 
-#endif	/* !CONFIG_ACPI */
+#endif /* !CONFIG_ACPI */
 
 #define ARCH_HAS_POWER_INIT	1
 
-#endif /*__KERNEL__*/
-
 #endif /*_ASM_ACPI_H*/
diff --git a/include/asm-x86/acpi_64.h b/include/asm-x86/acpi_64.h
index 58114ed08024..b2670c6db8ec 100644
--- a/include/asm-x86/acpi_64.h
+++ b/include/asm-x86/acpi_64.h
@@ -24,8 +24,6 @@
 #ifndef _ASM_ACPI_H
 #define _ASM_ACPI_H
 
-#ifdef __KERNEL__
-
 #include <acpi/pdc_intel.h>
 #include <asm/numa.h>
 
@@ -66,17 +64,17 @@ int __acpi_release_global_lock(unsigned int *lock);
  * Math helper asm macros
  */
 #define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \
-        asm("divl %2;"        \
-        :"=a"(q32), "=d"(r32) \
-        :"r"(d32),            \
-        "0"(n_lo), "1"(n_hi))
+	asm("divl %2;"				     \
+	    :"=a"(q32), "=d"(r32)		     \
+	    :"r"(d32),				     \
+	     "0"(n_lo), "1"(n_hi))
 
 
 #define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \
-    asm("shrl   $1,%2;"             \
-        "rcrl   $1,%3;"             \
-        :"=r"(n_hi), "=r"(n_lo)     \
-        :"0"(n_hi), "1"(n_lo))
+	asm("shrl   $1,%2	;"	\
+	    "rcrl   $1,%3;"		\
+	    :"=r"(n_hi), "=r"(n_lo)	\
+	    :"0"(n_hi), "1"(n_lo))
 
 #ifdef CONFIG_ACPI
 extern int acpi_lapic;
@@ -84,8 +82,11 @@ extern int acpi_ioapic;
 extern int acpi_noirq;
 extern int acpi_strict;
 extern int acpi_disabled;
-extern int acpi_pci_disabled;
 extern int acpi_ht;
+extern int acpi_pci_disabled;
+extern int acpi_skip_timer_override;
+extern int acpi_use_timer_override;
+
 static inline void disable_acpi(void)
 {
 	acpi_disabled = 1;
@@ -98,6 +99,7 @@ static inline void disable_acpi(void)
 #define FIX_ACPI_PAGES 4
 
 extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
+
 static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
 static inline void acpi_disable_pci(void)
 {
@@ -115,7 +117,7 @@ extern unsigned long acpi_wakeup_address;
 /* early initialization routine */
 extern void acpi_reserve_bootmem(void);
 
-#else	/* !CONFIG_ACPI */
+#else /* !CONFIG_ACPI */
 
 #define acpi_lapic 0
 #define acpi_ioapic 0
@@ -124,19 +126,12 @@ static inline void acpi_disable_pci(void) { }
 
 #endif /* !CONFIG_ACPI */
 
+#define ARCH_HAS_POWER_INIT	1
+
+#ifdef CONFIG_ACPI_NUMA
 extern int acpi_numa;
 extern int acpi_scan_nodes(unsigned long start, unsigned long end);
 #define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
-
-extern int acpi_disabled;
-extern int acpi_pci_disabled;
-
-#define ARCH_HAS_POWER_INIT 1
-
-extern int acpi_skip_timer_override;
-extern int acpi_use_timer_override;
-
-#ifdef CONFIG_ACPI_NUMA
 extern void __init acpi_fake_nodes(const struct bootnode *fake_nodes,
 				   int num_nodes);
 #else
@@ -146,6 +141,4 @@ static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,
 }
 #endif
 
-#endif /*__KERNEL__*/
-
 #endif /*_ASM_ACPI_H*/

From 0b80fc721bed2635c7f0b198e1c862a4596dc2cd Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:36 +0100
Subject: [PATCH 126/890] x86: merge acpi_32/64.h

Merge the files.

[ mingo@elte.hu: build fix ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/acpi.h    | 148 ++++++++++++++++++++++++++++++++++++--
 include/asm-x86/acpi_32.h | 132 ----------------------------------
 include/asm-x86/acpi_64.h | 144 -------------------------------------
 3 files changed, 143 insertions(+), 281 deletions(-)
 delete mode 100644 include/asm-x86/acpi_32.h
 delete mode 100644 include/asm-x86/acpi_64.h

diff --git a/include/asm-x86/acpi.h b/include/asm-x86/acpi.h
index f8a89793ac8c..c477e5741751 100644
--- a/include/asm-x86/acpi.h
+++ b/include/asm-x86/acpi.h
@@ -1,14 +1,123 @@
 #ifndef _ASM_X86_ACPI_H
 #define _ASM_X86_ACPI_H
 
-#ifdef CONFIG_X86_32
-# include "acpi_32.h"
-#else
-# include "acpi_64.h"
-#endif
+/*
+ *  Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ *  Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <acpi/pdc_intel.h>
 
+#include <asm/numa.h>
 #include <asm/processor.h>
 
+#define COMPILER_DEPENDENT_INT64   long long
+#define COMPILER_DEPENDENT_UINT64  unsigned long long
+
+/*
+ * Calling conventions:
+ *
+ * ACPI_SYSTEM_XFACE        - Interfaces to host OS (handlers, threads)
+ * ACPI_EXTERNAL_XFACE      - External ACPI interfaces
+ * ACPI_INTERNAL_XFACE      - Internal ACPI interfaces
+ * ACPI_INTERNAL_VAR_XFACE  - Internal variable-parameter list interfaces
+ */
+#define ACPI_SYSTEM_XFACE
+#define ACPI_EXTERNAL_XFACE
+#define ACPI_INTERNAL_XFACE
+#define ACPI_INTERNAL_VAR_XFACE
+
+/* Asm macros */
+
+#define ACPI_ASM_MACROS
+#define BREAKPOINT3
+#define ACPI_DISABLE_IRQS() local_irq_disable()
+#define ACPI_ENABLE_IRQS()  local_irq_enable()
+#define ACPI_FLUSH_CPU_CACHE()	wbinvd()
+
+int __acpi_acquire_global_lock(unsigned int *lock);
+int __acpi_release_global_lock(unsigned int *lock);
+
+#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \
+	((Acq) = __acpi_acquire_global_lock(&facs->global_lock))
+
+#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \
+	((Acq) = __acpi_release_global_lock(&facs->global_lock))
+
+/*
+ * Math helper asm macros
+ */
+#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \
+	asm("divl %2;"				     \
+	    :"=a"(q32), "=d"(r32)		     \
+	    :"r"(d32),				     \
+	     "0"(n_lo), "1"(n_hi))
+
+
+#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \
+	asm("shrl   $1,%2	;"	\
+	    "rcrl   $1,%3;"		\
+	    :"=r"(n_hi), "=r"(n_lo)	\
+	    :"0"(n_hi), "1"(n_lo))
+
+#ifdef CONFIG_ACPI
+extern int acpi_lapic;
+extern int acpi_ioapic;
+extern int acpi_noirq;
+extern int acpi_strict;
+extern int acpi_disabled;
+extern int acpi_ht;
+extern int acpi_pci_disabled;
+extern int acpi_skip_timer_override;
+extern int acpi_use_timer_override;
+
+static inline void disable_acpi(void)
+{
+	acpi_disabled = 1;
+	acpi_ht = 0;
+	acpi_pci_disabled = 1;
+	acpi_noirq = 1;
+}
+
+/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */
+#define FIX_ACPI_PAGES 4
+
+extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
+
+static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
+static inline void acpi_disable_pci(void)
+{
+	acpi_pci_disabled = 1;
+	acpi_noirq_set();
+}
+extern int acpi_irq_balance_set(char *str);
+
+/* routines for saving/restoring kernel state */
+extern int acpi_save_state_mem(void);
+extern void acpi_restore_state_mem(void);
+
+extern unsigned long acpi_wakeup_address;
+
+/* early initialization routine */
+extern void acpi_reserve_bootmem(void);
+
 /*
  * Check if the CPU can handle C2 and deeper
  */
@@ -29,4 +138,33 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
 		return max_cstate;
 }
 
+#else /* !CONFIG_ACPI */
+
+#define acpi_lapic 0
+#define acpi_ioapic 0
+static inline void acpi_noirq_set(void) { }
+static inline void acpi_disable_pci(void) { }
+static inline void disable_acpi(void) { }
+
+#endif /* !CONFIG_ACPI */
+
+#define ARCH_HAS_POWER_INIT	1
+
+struct bootnode;
+
+#ifdef CONFIG_ACPI_NUMA
+extern int acpi_numa;
+extern int acpi_scan_nodes(unsigned long start, unsigned long end);
+#ifdef CONFIG_X86_64
+# define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
 #endif
+extern void __init acpi_fake_nodes(const struct bootnode *fake_nodes,
+				   int num_nodes);
+#else
+static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,
+				   int num_nodes)
+{
+}
+#endif
+
+#endif /*__X86_ASM_ACPI_H*/
diff --git a/include/asm-x86/acpi_32.h b/include/asm-x86/acpi_32.h
deleted file mode 100644
index 03612bcf6422..000000000000
--- a/include/asm-x86/acpi_32.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- *  Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
- *  Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#ifndef _ASM_ACPI_H
-#define _ASM_ACPI_H
-
-#include <acpi/pdc_intel.h>
-#include <asm/numa.h>
-
-#define COMPILER_DEPENDENT_INT64   long long
-#define COMPILER_DEPENDENT_UINT64  unsigned long long
-
-/*
- * Calling conventions:
- *
- * ACPI_SYSTEM_XFACE        - Interfaces to host OS (handlers, threads)
- * ACPI_EXTERNAL_XFACE      - External ACPI interfaces
- * ACPI_INTERNAL_XFACE      - Internal ACPI interfaces
- * ACPI_INTERNAL_VAR_XFACE  - Internal variable-parameter list interfaces
- */
-#define ACPI_SYSTEM_XFACE
-#define ACPI_EXTERNAL_XFACE
-#define ACPI_INTERNAL_XFACE
-#define ACPI_INTERNAL_VAR_XFACE
-
-/* Asm macros */
-
-#define ACPI_ASM_MACROS
-#define BREAKPOINT3
-#define ACPI_DISABLE_IRQS() local_irq_disable()
-#define ACPI_ENABLE_IRQS()  local_irq_enable()
-#define ACPI_FLUSH_CPU_CACHE()	wbinvd()
-
-int __acpi_acquire_global_lock(unsigned int *lock);
-int __acpi_release_global_lock(unsigned int *lock);
-
-#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \
-	((Acq) = __acpi_acquire_global_lock(&facs->global_lock))
-
-#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \
-	((Acq) = __acpi_release_global_lock(&facs->global_lock))
-
-/*
- * Math helper asm macros
- */
-#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \
-	asm("divl %2;"				     \
-	    :"=a"(q32), "=d"(r32)		     \
-	    :"r"(d32),				     \
-	     "0"(n_lo), "1"(n_hi))
-
-
-#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \
-	asm("shrl   $1,%2	;"	\
-	    "rcrl   $1,%3;"		\
-	    :"=r"(n_hi), "=r"(n_lo)	\
-	    :"0"(n_hi), "1"(n_lo))
-
-#ifdef CONFIG_ACPI
-extern int acpi_lapic;
-extern int acpi_ioapic;
-extern int acpi_noirq;
-extern int acpi_strict;
-extern int acpi_disabled;
-extern int acpi_ht;
-extern int acpi_pci_disabled;
-extern int acpi_skip_timer_override;
-extern int acpi_use_timer_override;
-
-static inline void disable_acpi(void)
-{
-	acpi_disabled = 1;
-	acpi_ht = 0;
-	acpi_pci_disabled = 1;
-	acpi_noirq = 1;
-}
-
-/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */
-#define FIX_ACPI_PAGES 4
-
-extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
-
-static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
-static inline void acpi_disable_pci(void)
-{
-	acpi_pci_disabled = 1;
-	acpi_noirq_set();
-}
-extern int acpi_irq_balance_set(char *str);
-
-/* routines for saving/restoring kernel state */
-extern int acpi_save_state_mem(void);
-extern void acpi_restore_state_mem(void);
-
-extern unsigned long acpi_wakeup_address;
-
-/* early initialization routine */
-extern void acpi_reserve_bootmem(void);
-
-#else /* !CONFIG_ACPI */
-
-#define acpi_lapic 0
-#define acpi_ioapic 0
-static inline void acpi_noirq_set(void) { }
-static inline void acpi_disable_pci(void) { }
-static inline void disable_acpi(void) { }
-
-#endif /* !CONFIG_ACPI */
-
-#define ARCH_HAS_POWER_INIT	1
-
-#endif /*_ASM_ACPI_H*/
diff --git a/include/asm-x86/acpi_64.h b/include/asm-x86/acpi_64.h
deleted file mode 100644
index b2670c6db8ec..000000000000
--- a/include/asm-x86/acpi_64.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- *  Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
- *  Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#ifndef _ASM_ACPI_H
-#define _ASM_ACPI_H
-
-#include <acpi/pdc_intel.h>
-#include <asm/numa.h>
-
-#define COMPILER_DEPENDENT_INT64   long long
-#define COMPILER_DEPENDENT_UINT64  unsigned long long
-
-/*
- * Calling conventions:
- *
- * ACPI_SYSTEM_XFACE        - Interfaces to host OS (handlers, threads)
- * ACPI_EXTERNAL_XFACE      - External ACPI interfaces
- * ACPI_INTERNAL_XFACE      - Internal ACPI interfaces
- * ACPI_INTERNAL_VAR_XFACE  - Internal variable-parameter list interfaces
- */
-#define ACPI_SYSTEM_XFACE
-#define ACPI_EXTERNAL_XFACE
-#define ACPI_INTERNAL_XFACE
-#define ACPI_INTERNAL_VAR_XFACE
-
-/* Asm macros */
-
-#define ACPI_ASM_MACROS
-#define BREAKPOINT3
-#define ACPI_DISABLE_IRQS() local_irq_disable()
-#define ACPI_ENABLE_IRQS()  local_irq_enable()
-#define ACPI_FLUSH_CPU_CACHE()	wbinvd()
-
-int __acpi_acquire_global_lock(unsigned int *lock);
-int __acpi_release_global_lock(unsigned int *lock);
-
-#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \
-	((Acq) = __acpi_acquire_global_lock(&facs->global_lock))
-
-#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \
-	((Acq) = __acpi_release_global_lock(&facs->global_lock))
-
-/*
- * Math helper asm macros
- */
-#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \
-	asm("divl %2;"				     \
-	    :"=a"(q32), "=d"(r32)		     \
-	    :"r"(d32),				     \
-	     "0"(n_lo), "1"(n_hi))
-
-
-#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \
-	asm("shrl   $1,%2	;"	\
-	    "rcrl   $1,%3;"		\
-	    :"=r"(n_hi), "=r"(n_lo)	\
-	    :"0"(n_hi), "1"(n_lo))
-
-#ifdef CONFIG_ACPI
-extern int acpi_lapic;
-extern int acpi_ioapic;
-extern int acpi_noirq;
-extern int acpi_strict;
-extern int acpi_disabled;
-extern int acpi_ht;
-extern int acpi_pci_disabled;
-extern int acpi_skip_timer_override;
-extern int acpi_use_timer_override;
-
-static inline void disable_acpi(void)
-{
-	acpi_disabled = 1;
-	acpi_ht = 0;
-	acpi_pci_disabled = 1;
-	acpi_noirq = 1;
-}
-
-/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */
-#define FIX_ACPI_PAGES 4
-
-extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
-
-static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
-static inline void acpi_disable_pci(void)
-{
-	acpi_pci_disabled = 1;
-	acpi_noirq_set();
-}
-extern int acpi_irq_balance_set(char *str);
-
-/* routines for saving/restoring kernel state */
-extern int acpi_save_state_mem(void);
-extern void acpi_restore_state_mem(void);
-
-extern unsigned long acpi_wakeup_address;
-
-/* early initialization routine */
-extern void acpi_reserve_bootmem(void);
-
-#else /* !CONFIG_ACPI */
-
-#define acpi_lapic 0
-#define acpi_ioapic 0
-static inline void acpi_noirq_set(void) { }
-static inline void acpi_disable_pci(void) { }
-
-#endif /* !CONFIG_ACPI */
-
-#define ARCH_HAS_POWER_INIT	1
-
-#ifdef CONFIG_ACPI_NUMA
-extern int acpi_numa;
-extern int acpi_scan_nodes(unsigned long start, unsigned long end);
-#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
-extern void __init acpi_fake_nodes(const struct bootnode *fake_nodes,
-				   int num_nodes);
-#else
-static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,
-				   int num_nodes)
-{
-}
-#endif
-
-#endif /*_ASM_ACPI_H*/

From 4757d7d8d00c5f8b2c176ca03b0a8eabbc206664 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:37 +0100
Subject: [PATCH 127/890] x86: put all kern_addr_valid() incarnations to
 pgtable.h

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mmzone_32.h    | 3 ---
 include/asm-x86/pgtable_32.h   | 8 +++++++-
 include/asm-x86/sparsemem_32.h | 3 ---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/asm-x86/mmzone_32.h b/include/asm-x86/mmzone_32.h
index 118e9812778f..5d6f4ce6e6d6 100644
--- a/include/asm-x86/mmzone_32.h
+++ b/include/asm-x86/mmzone_32.h
@@ -87,9 +87,6 @@ static inline int pfn_to_nid(unsigned long pfn)
 	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;		\
 })
 
-/* XXX: FIXME -- wli */
-#define kern_addr_valid(kaddr)	(0)
-
 #ifdef CONFIG_X86_NUMAQ            /* we have contiguous memory on NUMA-Q */
 #define pfn_valid(pfn)          ((pfn) < num_physpages)
 #else
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index ed3e70d8d04b..a8be1ee15a99 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -497,9 +497,15 @@ static inline void paravirt_pagetable_setup_done(pgd_t *base)
 
 #endif /* !__ASSEMBLY__ */
 
+/*
+ * kern_addr_valid() is (1) for FLATMEM and (0) for
+ * SPARSEMEM and DISCONTIGMEM
+ */
 #ifdef CONFIG_FLATMEM
 #define kern_addr_valid(addr)	(1)
-#endif /* CONFIG_FLATMEM */
+#else
+#define kern_addr_valid(kaddr)	(0)
+#endif
 
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)		\
 		remap_pfn_range(vma, vaddr, pfn, size, prot)
diff --git a/include/asm-x86/sparsemem_32.h b/include/asm-x86/sparsemem_32.h
index cfeed990585f..d89c32100591 100644
--- a/include/asm-x86/sparsemem_32.h
+++ b/include/asm-x86/sparsemem_32.h
@@ -24,8 +24,5 @@
 #define MAX_PHYSMEM_BITS	32
 #endif
 
-/* XXX: FIXME -- wli */
-#define kern_addr_valid(kaddr)  (0)
-
 #endif /* CONFIG_SPARSEMEM */
 #endif /* _I386_SPARSEMEM_H */

From 8a423ff0c4a0472607bbed6790fdaeec54af2ebb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:37 +0100
Subject: [PATCH 128/890] x86: merge include/asm-x86/sparsemem.h

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mmzone_64.h    |  2 ++
 include/asm-x86/sparsemem.h    | 35 +++++++++++++++++++++++++++++++---
 include/asm-x86/sparsemem_32.h | 28 ---------------------------
 include/asm-x86/sparsemem_64.h | 26 -------------------------
 4 files changed, 34 insertions(+), 57 deletions(-)
 delete mode 100644 include/asm-x86/sparsemem_32.h
 delete mode 100644 include/asm-x86/sparsemem_64.h

diff --git a/include/asm-x86/mmzone_64.h b/include/asm-x86/mmzone_64.h
index 19a89377b123..1e0ed34a6adc 100644
--- a/include/asm-x86/mmzone_64.h
+++ b/include/asm-x86/mmzone_64.h
@@ -41,6 +41,8 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
 #define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
 				 NODE_DATA(nid)->node_spanned_pages)
 
+extern int early_pfn_to_nid(unsigned long pfn);
+
 #ifdef CONFIG_DISCONTIGMEM
 #define pfn_to_nid(pfn) phys_to_nid((unsigned long)(pfn) << PAGE_SHIFT)
 
diff --git a/include/asm-x86/sparsemem.h b/include/asm-x86/sparsemem.h
index 3f203b1d9ee8..fa58cd55411a 100644
--- a/include/asm-x86/sparsemem.h
+++ b/include/asm-x86/sparsemem.h
@@ -1,5 +1,34 @@
+#ifndef _ASM_X86_SPARSEMEM_H
+#define _ASM_X86_SPARSEMEM_H
+
+#ifdef CONFIG_SPARSEMEM
+/*
+ * generic non-linear memory support:
+ *
+ * 1) we will not split memory into more chunks than will fit into the flags
+ *    field of the struct page
+ *
+ * SECTION_SIZE_BITS		2^n: size of each section
+ * MAX_PHYSADDR_BITS		2^n: max size of physical address space
+ * MAX_PHYSMEM_BITS		2^n: how much memory we can have in that space
+ *
+ */
+
 #ifdef CONFIG_X86_32
-# include "sparsemem_32.h"
-#else
-# include "sparsemem_64.h"
+# ifdef CONFIG_X86_PAE
+#  define SECTION_SIZE_BITS	30
+#  define MAX_PHYSADDR_BITS	36
+#  define MAX_PHYSMEM_BITS	36
+# else
+#  define SECTION_SIZE_BITS	26
+#  define MAX_PHYSADDR_BITS	32
+#  define MAX_PHYSMEM_BITS	32
+# endif
+#else /* CONFIG_X86_32 */
+# define SECTION_SIZE_BITS	27 /* matt - 128 is convenient right now */
+# define MAX_PHYSADDR_BITS	40
+# define MAX_PHYSMEM_BITS	40
+#endif
+
+#endif /* CONFIG_SPARSEMEM */
 #endif
diff --git a/include/asm-x86/sparsemem_32.h b/include/asm-x86/sparsemem_32.h
deleted file mode 100644
index d89c32100591..000000000000
--- a/include/asm-x86/sparsemem_32.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef _I386_SPARSEMEM_H
-#define _I386_SPARSEMEM_H
-#ifdef CONFIG_SPARSEMEM
-
-/*
- * generic non-linear memory support:
- *
- * 1) we will not split memory into more chunks than will fit into the
- *    flags field of the struct page
- */
-
-/*
- * SECTION_SIZE_BITS		2^N: how big each section will be
- * MAX_PHYSADDR_BITS		2^N: how much physical address space we have
- * MAX_PHYSMEM_BITS		2^N: how much memory we can have in that space
- */
-#ifdef CONFIG_X86_PAE
-#define SECTION_SIZE_BITS       30
-#define MAX_PHYSADDR_BITS       36
-#define MAX_PHYSMEM_BITS	36
-#else
-#define SECTION_SIZE_BITS       26
-#define MAX_PHYSADDR_BITS       32
-#define MAX_PHYSMEM_BITS	32
-#endif
-
-#endif /* CONFIG_SPARSEMEM */
-#endif /* _I386_SPARSEMEM_H */
diff --git a/include/asm-x86/sparsemem_64.h b/include/asm-x86/sparsemem_64.h
deleted file mode 100644
index dabb16714a71..000000000000
--- a/include/asm-x86/sparsemem_64.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef _ASM_X86_64_SPARSEMEM_H
-#define _ASM_X86_64_SPARSEMEM_H 1
-
-#ifdef CONFIG_SPARSEMEM
-
-/*
- * generic non-linear memory support:
- *
- * 1) we will not split memory into more chunks than will fit into the flags
- *    field of the struct page
- *
- * SECTION_SIZE_BITS		2^n: size of each section
- * MAX_PHYSADDR_BITS		2^n: max size of physical address space
- * MAX_PHYSMEM_BITS		2^n: how much memory we can have in that space
- *
- */
-
-#define SECTION_SIZE_BITS	27 /* matt - 128 is convenient right now */
-#define MAX_PHYSADDR_BITS	40
-#define MAX_PHYSMEM_BITS	40
-
-extern int early_pfn_to_nid(unsigned long pfn);
-
-#endif /* CONFIG_SPARSEMEM */
-
-#endif /* _ASM_X86_64_SPARSEMEM_H */

From e1d9197862ff4d950dab45669b7f37b5ec3219d8 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:37 +0100
Subject: [PATCH 129/890] x86: merge include/asm-x86/sparsemem.h

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/io_apic.h    | 158 ++++++++++++++++++++++++++++++++++-
 include/asm-x86/io_apic_32.h | 155 ----------------------------------
 include/asm-x86/io_apic_64.h | 134 -----------------------------
 3 files changed, 156 insertions(+), 291 deletions(-)
 delete mode 100644 include/asm-x86/io_apic_32.h
 delete mode 100644 include/asm-x86/io_apic_64.h

diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index 88494966beeb..0f5b3fef0b08 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -1,5 +1,159 @@
+#ifndef __ASM_IO_APIC_H
+#define __ASM_IO_APIC_H
+
+#include <asm/types.h>
+#include <asm/mpspec.h>
+#include <asm/apicdef.h>
+
+/*
+ * Intel IO-APIC support for SMP and UP systems.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
+ */
+
+/*
+ * The structure of the IO-APIC:
+ */
+union IO_APIC_reg_00 {
+	u32	raw;
+	struct {
+		u32	__reserved_2	: 14,
+			LTS		:  1,
+			delivery_type	:  1,
+			__reserved_1	:  8,
+			ID		:  8;
+	} __attribute__ ((packed)) bits;
+};
+
+union IO_APIC_reg_01 {
+	u32	raw;
+	struct {
+		u32	version		:  8,
+			__reserved_2	:  7,
+			PRQ		:  1,
+			entries		:  8,
+			__reserved_1	:  8;
+	} __attribute__ ((packed)) bits;
+};
+
+union IO_APIC_reg_02 {
+	u32	raw;
+	struct {
+		u32	__reserved_2	: 24,
+			arbitration	:  4,
+			__reserved_1	:  4;
+	} __attribute__ ((packed)) bits;
+};
+
+union IO_APIC_reg_03 {
+	u32	raw;
+	struct {
+		u32	boot_DT		:  1,
+			__reserved_1	: 31;
+	} __attribute__ ((packed)) bits;
+};
+
+enum ioapic_irq_destination_types {
+	dest_Fixed = 0,
+	dest_LowestPrio = 1,
+	dest_SMI = 2,
+	dest__reserved_1 = 3,
+	dest_NMI = 4,
+	dest_INIT = 5,
+	dest__reserved_2 = 6,
+	dest_ExtINT = 7
+};
+
+struct IO_APIC_route_entry {
+	__u32	vector		:  8,
+		delivery_mode	:  3,	/* 000: FIXED
+					 * 001: lowest prio
+					 * 111: ExtINT
+					 */
+		dest_mode	:  1,	/* 0: physical, 1: logical */
+		delivery_status	:  1,
+		polarity	:  1,
+		irr		:  1,
+		trigger		:  1,	/* 0: edge, 1: level */
+		mask		:  1,	/* 0: enabled, 1: disabled */
+		__reserved_2	: 15;
+
 #ifdef CONFIG_X86_32
-# include "io_apic_32.h"
+	union {
+		struct {
+			__u32	__reserved_1	: 24,
+				physical_dest	:  4,
+				__reserved_2	:  4;
+		} physical;
+
+		struct {
+			__u32	__reserved_1	: 24,
+				logical_dest	:  8;
+		} logical;
+	} dest;
 #else
-# include "io_apic_64.h"
+	__u32	__reserved_3	: 24,
+		dest		:  8;
+#endif
+
+} __attribute__ ((packed));
+
+#ifdef CONFIG_X86_IO_APIC
+
+/*
+ * # of IO-APICs and # of IRQ routing registers
+ */
+extern int nr_ioapics;
+extern int nr_ioapic_registers[MAX_IO_APICS];
+
+/*
+ * MP-BIOS irq configuration table structures:
+ */
+
+/* I/O APIC entries */
+extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+
+/* # of MP IRQ source entries */
+extern int mp_irq_entries;
+
+/* MP IRQ source entries */
+extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* non-0 if default (table-less) MP configuration */
+extern int mpc_default_type;
+
+/* Older SiS APIC requires we rewrite the index register */
+extern int sis_apic_bug;
+
+/* 1 if "noapic" boot option passed */
+extern int skip_ioapic_setup;
+
+static inline void disable_ioapic_setup(void)
+{
+	skip_ioapic_setup = 1;
+}
+
+/*
+ * If we use the IO-APIC for IRQ routing, disable automatic
+ * assignment of PCI IRQ's.
+ */
+#define io_apic_assign_pci_irqs \
+	(mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
+
+#ifdef CONFIG_ACPI
+extern int io_apic_get_unique_id(int ioapic, int apic_id);
+extern int io_apic_get_version(int ioapic);
+extern int io_apic_get_redir_entries(int ioapic);
+extern int io_apic_set_pci_routing(int ioapic, int pin, int irq,
+				   int edge_level, int active_high_low);
+extern int timer_uses_ioapic_pin_0;
+#endif /* CONFIG_ACPI */
+
+extern int (*ioapic_renumber_irq)(int ioapic, int irq);
+extern void ioapic_init_mappings(void);
+
+#else  /* !CONFIG_X86_IO_APIC */
+#define io_apic_assign_pci_irqs 0
+#endif
+
 #endif
diff --git a/include/asm-x86/io_apic_32.h b/include/asm-x86/io_apic_32.h
deleted file mode 100644
index 3f087883ea48..000000000000
--- a/include/asm-x86/io_apic_32.h
+++ /dev/null
@@ -1,155 +0,0 @@
-#ifndef __ASM_IO_APIC_H
-#define __ASM_IO_APIC_H
-
-#include <asm/types.h>
-#include <asm/mpspec.h>
-#include <asm/apicdef.h>
-
-/*
- * Intel IO-APIC support for SMP and UP systems.
- *
- * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
- */
-
-/*
- * The structure of the IO-APIC:
- */
-union IO_APIC_reg_00 {
-	u32	raw;
-	struct {
-		u32	__reserved_2	: 14,
-			LTS		:  1,
-			delivery_type	:  1,
-			__reserved_1	:  8,
-			ID		:  8;
-	} __attribute__ ((packed)) bits;
-};
-
-union IO_APIC_reg_01 {
-	u32	raw;
-	struct {
-		u32	version		:  8,
-			__reserved_2	:  7,
-			PRQ		:  1,
-			entries		:  8,
-			__reserved_1	:  8;
-	} __attribute__ ((packed)) bits;
-};
-
-union IO_APIC_reg_02 {
-	u32	raw;
-	struct {
-		u32	__reserved_2	: 24,
-			arbitration	:  4,
-			__reserved_1	:  4;
-	} __attribute__ ((packed)) bits;
-};
-
-union IO_APIC_reg_03 {
-	u32	raw;
-	struct {
-		u32	boot_DT		:  1,
-			__reserved_1	: 31;
-	} __attribute__ ((packed)) bits;
-};
-
-enum ioapic_irq_destination_types {
-	dest_Fixed = 0,
-	dest_LowestPrio = 1,
-	dest_SMI = 2,
-	dest__reserved_1 = 3,
-	dest_NMI = 4,
-	dest_INIT = 5,
-	dest__reserved_2 = 6,
-	dest_ExtINT = 7
-};
-
-struct IO_APIC_route_entry {
-	__u32	vector		:  8,
-		delivery_mode	:  3,	/* 000: FIXED
-					 * 001: lowest prio
-					 * 111: ExtINT
-					 */
-		dest_mode	:  1,	/* 0: physical, 1: logical */
-		delivery_status	:  1,
-		polarity	:  1,
-		irr		:  1,
-		trigger		:  1,	/* 0: edge, 1: level */
-		mask		:  1,	/* 0: enabled, 1: disabled */
-		__reserved_2	: 15;
-
-	union {		struct { __u32
-					__reserved_1	: 24,
-					physical_dest	:  4,
-					__reserved_2	:  4;
-			} physical;
-
-			struct { __u32
-					__reserved_1	: 24,
-					logical_dest	:  8;
-			} logical;
-	} dest;
-
-} __attribute__ ((packed));
-
-#ifdef CONFIG_X86_IO_APIC
-
-/*
- * # of IO-APICs and # of IRQ routing registers
- */
-extern int nr_ioapics;
-extern int nr_ioapic_registers[MAX_IO_APICS];
-
-/*
- * MP-BIOS irq configuration table structures:
- */
-
-/* I/O APIC entries */
-extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
-
-/* # of MP IRQ source entries */
-extern int mp_irq_entries;
-
-/* MP IRQ source entries */
-extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-
-/* non-0 if default (table-less) MP configuration */
-extern int mpc_default_type;
-
-/* Older SiS APIC requires we rewrite the index register */
-extern int sis_apic_bug;
-
-/* 1 if "noapic" boot option passed */
-extern int skip_ioapic_setup;
-
-static inline void disable_ioapic_setup(void)
-{
-	skip_ioapic_setup = 1;
-}
-
-static inline int ioapic_setup_disabled(void)
-{
-	return skip_ioapic_setup;
-}
-
-/*
- * If we use the IO-APIC for IRQ routing, disable automatic
- * assignment of PCI IRQ's.
- */
-#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
-
-#ifdef CONFIG_ACPI
-extern int io_apic_get_unique_id (int ioapic, int apic_id);
-extern int io_apic_get_version (int ioapic);
-extern int io_apic_get_redir_entries (int ioapic);
-extern int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low);
-extern int timer_uses_ioapic_pin_0;
-#endif /* CONFIG_ACPI */
-
-extern int (*ioapic_renumber_irq)(int ioapic, int irq);
-
-#else  /* !CONFIG_X86_IO_APIC */
-#define io_apic_assign_pci_irqs 0
-#endif
-
-#endif
diff --git a/include/asm-x86/io_apic_64.h b/include/asm-x86/io_apic_64.h
deleted file mode 100644
index 3e3c2114d93a..000000000000
--- a/include/asm-x86/io_apic_64.h
+++ /dev/null
@@ -1,134 +0,0 @@
-#ifndef __ASM_IO_APIC_H
-#define __ASM_IO_APIC_H
-
-#include <asm/types.h>
-#include <asm/mpspec.h>
-#include <asm/apicdef.h>
-
-/*
- * Intel IO-APIC support for SMP and UP systems.
- *
- * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
- */
-
-#define APIC_MISMATCH_DEBUG
-
-/*
- * The structure of the IO-APIC:
- */
-union IO_APIC_reg_00 {
-	u32	raw;
-	struct {
-		u32	__reserved_2	: 14,
-			LTS		:  1,
-			delivery_type	:  1,
-			__reserved_1	:  8,
-			ID		:  8;
-	} __attribute__ ((packed)) bits;
-};
-
-union IO_APIC_reg_01 {
-	u32	raw;
-	struct {
-		u32	version		:  8,
-		__reserved_2	:  7,
-		PRQ		:  1,
-		entries		:  8,
-		__reserved_1	:  8;
-	} __attribute__ ((packed)) bits;
-};
-
-union IO_APIC_reg_02 {
-	u32	raw;
-	struct {
-		u32	__reserved_2	: 24,
-		arbitration	:  4,
-		__reserved_1	:  4;
-	} __attribute__ ((packed)) bits;
-};
-
-union IO_APIC_reg_03 {
-	u32	raw;
-	struct {
-		u32	boot_DT		:  1,
-			__reserved_1	: 31;
-	} __attribute__ ((packed)) bits;
-};
-
-/*
- * # of IO-APICs and # of IRQ routing registers
- */
-extern int nr_ioapics;
-extern int nr_ioapic_registers[MAX_IO_APICS];
-
-enum ioapic_irq_destination_types {
-	dest_Fixed = 0,
-	dest_LowestPrio = 1,
-	dest_SMI = 2,
-	dest__reserved_1 = 3,
-	dest_NMI = 4,
-	dest_INIT = 5,
-	dest__reserved_2 = 6,
-	dest_ExtINT = 7
-};
-
-struct IO_APIC_route_entry {
-	__u32	vector		:  8,
-		delivery_mode	:  3,	/* 000: FIXED
-					 * 001: lowest prio
-					 * 111: ExtINT
-					 */
-		dest_mode	:  1,	/* 0: physical, 1: logical */
-		delivery_status	:  1,
-		polarity	:  1,
-		irr		:  1,
-		trigger		:  1,	/* 0: edge, 1: level */
-		mask		:  1,	/* 0: enabled, 1: disabled */
-		__reserved_2	: 15;
-
-	__u32	__reserved_3	: 24,
-		dest		:  8;
-} __attribute__ ((packed));
-
-/*
- * MP-BIOS irq configuration table structures:
- */
-
-/* I/O APIC entries */
-extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
-
-/* # of MP IRQ source entries */
-extern int mp_irq_entries;
-
-/* MP IRQ source entries */
-extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-
-/* non-0 if default (table-less) MP configuration */
-extern int mpc_default_type;
-
-/* 1 if "noapic" boot option passed */
-extern int skip_ioapic_setup;
-
-static inline void disable_ioapic_setup(void)
-{
-	skip_ioapic_setup = 1;
-}
-
-
-/*
- * If we use the IO-APIC for IRQ routing, disable automatic
- * assignment of PCI IRQ's.
- */
-#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
-
-#ifdef CONFIG_ACPI
-extern int io_apic_get_version (int ioapic);
-extern int io_apic_get_redir_entries (int ioapic);
-extern int io_apic_set_pci_routing (int ioapic, int pin, int irq, int, int);
-#endif
-
-extern void ioapic_init_mappings(void);
-
-extern int sis_apic_bug; /* dummy */ 
-
-#endif

From e3cfe529dd87dd3354789546676fef2e808822e4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:37 +0100
Subject: [PATCH 130/890] x86: cleanup numa_64.c

Clean it up before applying more patches.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/numa_64.c | 213 ++++++++++++++++++++++--------------------
 1 file changed, 111 insertions(+), 102 deletions(-)

diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index e7f3f4e9ec85..4faed6a8f3ae 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -1,7 +1,7 @@
-/* 
+/*
  * Generic VM initialization for x86-64 NUMA setups.
  * Copyright 2002,2003 Andi Kleen, SuSE Labs.
- */ 
+ */
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/string.h>
@@ -24,6 +24,8 @@
 #endif
 
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL(node_data);
+
 bootmem_data_t plat_node_bdata[MAX_NUMNODES];
 
 struct memnode memnode;
@@ -31,16 +33,19 @@ struct memnode memnode;
 unsigned char cpu_to_node[NR_CPUS] __read_mostly = {
 	[0 ... NR_CPUS-1] = NUMA_NO_NODE
 };
+EXPORT_SYMBOL(cpu_to_node);
+
 unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
- 	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
 };
+
 cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL(node_to_cpumask);
 
 int numa_off __initdata;
 unsigned long __initdata nodemap_addr;
 unsigned long __initdata nodemap_size;
 
-
 /*
  * Given a shift value, try to populate memnodemap[]
  * Returns :
@@ -48,12 +53,11 @@ unsigned long __initdata nodemap_size;
  * 0 if memnodmap[] too small (of shift too small)
  * -1 if node overlap or lost ram (shift too big)
  */
-static int __init
-populate_memnodemap(const struct bootnode *nodes, int numnodes, int shift)
+static int __init populate_memnodemap(const struct bootnode *nodes,
+				      int numnodes, int shift)
 {
-	int i; 
-	int res = -1;
 	unsigned long addr, end;
+	int i, res = -1;
 
 	memset(memnodemap, 0xff, memnodemapsize);
 	for (i = 0; i < numnodes; i++) {
@@ -70,7 +74,7 @@ populate_memnodemap(const struct bootnode *nodes, int numnodes, int shift)
 			addr += (1UL << shift);
 		} while (addr < end);
 		res = 1;
-	} 
+	}
 	return res;
 }
 
@@ -105,8 +109,8 @@ static int __init allocate_cachealigned_memnodemap(void)
  * The LSB of all start and end addresses in the node map is the value of the
  * maximum possible shift.
  */
-static int __init
-extract_lsb_from_nodes (const struct bootnode *nodes, int numnodes)
+static int __init extract_lsb_from_nodes(const struct bootnode *nodes,
+					 int numnodes)
 {
 	int i, nodes_used = 0;
 	unsigned long start, end;
@@ -141,10 +145,9 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes)
 		shift);
 
 	if (populate_memnodemap(nodes, numnodes, shift) != 1) {
-		printk(KERN_INFO
-	"Your memory is not aligned you need to rebuild your kernel "
-	"with a bigger NODEMAPSIZE shift=%d\n",
-			shift);
+		printk(KERN_INFO "Your memory is not aligned you need to "
+		       "rebuild your kernel with a bigger NODEMAPSIZE "
+		       "shift=%d\n", shift);
 		return -1;
 	}
 	return shift;
@@ -157,35 +160,37 @@ int early_pfn_to_nid(unsigned long pfn)
 }
 #endif
 
-static void * __init
-early_node_mem(int nodeid, unsigned long start, unsigned long end,
-	      unsigned long size)
+static void * __init early_node_mem(int nodeid, unsigned long start,
+				    unsigned long end, unsigned long size)
 {
 	unsigned long mem = find_e820_area(start, end, size);
 	void *ptr;
+
 	if (mem != -1L)
 		return __va(mem);
 	ptr = __alloc_bootmem_nopanic(size,
 				SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS));
 	if (ptr == NULL) {
 		printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
-			size, nodeid);
+		       size, nodeid);
 		return NULL;
 	}
 	return ptr;
 }
 
 /* Initialize bootmem allocator for a node */
-void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
-{ 
-	unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start; 
-	unsigned long nodedata_phys;
+void __init setup_node_bootmem(int nodeid, unsigned long start,
+			       unsigned long end)
+{
+	unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size;
+	unsigned long bootmap_start, nodedata_phys;
 	void *bootmap;
 	const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
 
-	start = round_up(start, ZONE_ALIGN); 
+	start = round_up(start, ZONE_ALIGN);
 
-	printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end);
+	printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
+	       start, end);
 
 	start_pfn = start >> PAGE_SHIFT;
 	end_pfn = end >> PAGE_SHIFT;
@@ -201,75 +206,81 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en
 	NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
 
 	/* Find a place for the bootmem map */
-	bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 
+	bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
 	bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
 	bootmap = early_node_mem(nodeid, bootmap_start, end,
 					bootmap_pages<<PAGE_SHIFT);
 	if (bootmap == NULL)  {
 		if (nodedata_phys < start || nodedata_phys >= end)
-			free_bootmem((unsigned long)node_data[nodeid],pgdat_size);
+			free_bootmem((unsigned long)node_data[nodeid],
+				     pgdat_size);
 		node_data[nodeid] = NULL;
 		return;
 	}
 	bootmap_start = __pa(bootmap);
-	Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages); 
-	
+	Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages);
+
 	bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
-					 bootmap_start >> PAGE_SHIFT, 
-					 start_pfn, end_pfn); 
+					 bootmap_start >> PAGE_SHIFT,
+					 start_pfn, end_pfn);
 
 	free_bootmem_with_active_regions(nodeid, end);
 
-	reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); 
-	reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);
+	reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
+	reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
+			     bootmap_pages<<PAGE_SHIFT);
 #ifdef CONFIG_ACPI_NUMA
 	srat_reserve_add_area(nodeid);
 #endif
 	node_set_online(nodeid);
-} 
+}
 
 /* Initialize final allocator for a zone */
 void __init setup_node_zones(int nodeid)
-{ 
+{
 	unsigned long start_pfn, end_pfn, memmapsize, limit;
 
- 	start_pfn = node_start_pfn(nodeid);
- 	end_pfn = node_end_pfn(nodeid);
+	start_pfn = node_start_pfn(nodeid);
+	end_pfn = node_end_pfn(nodeid);
 
 	Dprintk(KERN_INFO "Setting up memmap for node %d %lx-%lx\n",
 		nodeid, start_pfn, end_pfn);
 
-	/* Try to allocate mem_map at end to not fill up precious <4GB
-	   memory. */
+	/*
+	 * Try to allocate mem_map at end to not fill up precious <4GB
+	 * memory.
+	 */
 	memmapsize = sizeof(struct page) * (end_pfn-start_pfn);
 	limit = end_pfn << PAGE_SHIFT;
 #ifdef CONFIG_FLAT_NODE_MEM_MAP
-	NODE_DATA(nodeid)->node_mem_map = 
-		__alloc_bootmem_core(NODE_DATA(nodeid)->bdata, 
-				memmapsize, SMP_CACHE_BYTES, 
-				round_down(limit - memmapsize, PAGE_SIZE), 
-				limit);
+	NODE_DATA(nodeid)->node_mem_map =
+		__alloc_bootmem_core(NODE_DATA(nodeid)->bdata,
+				     memmapsize, SMP_CACHE_BYTES,
+				     round_down(limit - memmapsize, PAGE_SIZE),
+				     limit);
 #endif
-} 
+}
 
+/*
+ * There are unfortunately some poorly designed mainboards around that
+ * only connect memory to a single CPU. This breaks the 1:1 cpu->node
+ * mapping. To avoid this fill in the mapping for all possible CPUs,
+ * as the number of CPUs is not known yet. We round robin the existing
+ * nodes.
+ */
 void __init numa_init_array(void)
 {
 	int rr, i;
-	/* There are unfortunately some poorly designed mainboards around
-	   that only connect memory to a single CPU. This breaks the 1:1 cpu->node
-	   mapping. To avoid this fill in the mapping for all possible
-	   CPUs, as the number of CPUs is not known yet. 
-	   We round robin the existing nodes. */
+
 	rr = first_node(node_online_map);
 	for (i = 0; i < NR_CPUS; i++) {
 		if (cpu_to_node(i) != NUMA_NO_NODE)
 			continue;
- 		numa_set_node(i, rr);
+		numa_set_node(i, rr);
 		rr = next_node(rr, node_online_map);
 		if (rr == MAX_NUMNODES)
 			rr = first_node(node_online_map);
 	}
-
 }
 
 #ifdef CONFIG_NUMA_EMU
@@ -277,15 +288,17 @@ void __init numa_init_array(void)
 char *cmdline __initdata;
 
 /*
- * Setups up nid to range from addr to addr + size.  If the end boundary is
- * greater than max_addr, then max_addr is used instead.  The return value is 0
- * if there is additional memory left for allocation past addr and -1 otherwise.
- * addr is adjusted to be at the end of the node.
+ * Setups up nid to range from addr to addr + size.  If the end
+ * boundary is greater than max_addr, then max_addr is used instead.
+ * The return value is 0 if there is additional memory left for
+ * allocation past addr and -1 otherwise.  addr is adjusted to be at
+ * the end of the node.
  */
 static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr,
 				   u64 size, u64 max_addr)
 {
 	int ret = 0;
+
 	nodes[nid].start = *addr;
 	*addr += size;
 	if (*addr >= max_addr) {
@@ -336,6 +349,7 @@ static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr,
 
 	for (i = node_start; i < num_nodes + node_start; i++) {
 		u64 end = *addr + size;
+
 		if (i < big)
 			end += FAKE_NODE_MIN_SIZE;
 		/*
@@ -381,14 +395,9 @@ static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr,
 static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
 {
 	struct bootnode nodes[MAX_NUMNODES];
-	u64 addr = start_pfn << PAGE_SHIFT;
+	u64 size, addr = start_pfn << PAGE_SHIFT;
 	u64 max_addr = end_pfn << PAGE_SHIFT;
-	int num_nodes = 0;
-	int coeff_flag;
-	int coeff = -1;
-	int num = 0;
-	u64 size;
-	int i;
+	int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i;
 
 	memset(&nodes, 0, sizeof(nodes));
 	/*
@@ -396,8 +405,9 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
 	 * system RAM into N fake nodes.
 	 */
 	if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) {
-		num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0,
-						simple_strtol(cmdline, NULL, 0));
+		long n = simple_strtol(cmdline, NULL, 0);
+
+		num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0, n);
 		if (num_nodes < 0)
 			return num_nodes;
 		goto out;
@@ -484,46 +494,47 @@ out:
 	for_each_node_mask(i, node_possible_map) {
 		e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
 						nodes[i].end >> PAGE_SHIFT);
- 		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
+		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 	}
 	acpi_fake_nodes(nodes, num_nodes);
- 	numa_init_array();
- 	return 0;
+	numa_init_array();
+	return 0;
 }
 #endif /* CONFIG_NUMA_EMU */
 
 void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
-{ 
+{
 	int i;
 
 	nodes_clear(node_possible_map);
 
 #ifdef CONFIG_NUMA_EMU
 	if (cmdline && !numa_emulation(start_pfn, end_pfn))
- 		return;
+		return;
 	nodes_clear(node_possible_map);
 #endif
 
 #ifdef CONFIG_ACPI_NUMA
 	if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
 					  end_pfn << PAGE_SHIFT))
- 		return;
+		return;
 	nodes_clear(node_possible_map);
 #endif
 
 #ifdef CONFIG_K8_NUMA
-	if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT))
+	if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT,
+					end_pfn<<PAGE_SHIFT))
 		return;
 	nodes_clear(node_possible_map);
 #endif
 	printk(KERN_INFO "%s\n",
 	       numa_off ? "NUMA turned off" : "No NUMA configuration found");
 
-	printk(KERN_INFO "Faking a node at %016lx-%016lx\n", 
+	printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
 	       start_pfn << PAGE_SHIFT,
-	       end_pfn << PAGE_SHIFT); 
-		/* setup dummy node covering all memory */ 
-	memnode_shift = 63; 
+	       end_pfn << PAGE_SHIFT);
+	/* setup dummy node covering all memory */
+	memnode_shift = 63;
 	memnodemap = memnode.embedded_map;
 	memnodemap[0] = 0;
 	nodes_clear(node_online_map);
@@ -539,7 +550,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 __cpuinit void numa_add_cpu(int cpu)
 {
 	set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
-} 
+}
 
 void __cpuinit numa_set_node(int cpu, int node)
 {
@@ -547,20 +558,22 @@ void __cpuinit numa_set_node(int cpu, int node)
 	cpu_to_node(cpu) = node;
 }
 
-unsigned long __init numa_free_all_bootmem(void) 
-{ 
-	int i;
+unsigned long __init numa_free_all_bootmem(void)
+{
 	unsigned long pages = 0;
-	for_each_online_node(i) {
+	int i;
+
+	for_each_online_node(i)
 		pages += free_all_bootmem_node(NODE_DATA(i));
-	}
+
 	return pages;
-} 
+}
 
 void __init paging_init(void)
-{ 
-	int i;
+{
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
+	int i;
+
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
 	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
 	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
@@ -569,32 +582,30 @@ void __init paging_init(void)
 	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();
 
-	for_each_online_node(i) {
-		setup_node_zones(i); 
-	}
+	for_each_online_node(i)
+		setup_node_zones(i);
 
 	free_area_init_nodes(max_zone_pfns);
-} 
+}
 
 static __init int numa_setup(char *opt)
-{ 
+{
 	if (!opt)
 		return -EINVAL;
-	if (!strncmp(opt,"off",3))
+	if (!strncmp(opt, "off", 3))
 		numa_off = 1;
 #ifdef CONFIG_NUMA_EMU
 	if (!strncmp(opt, "fake=", 5))
 		cmdline = opt + 5;
 #endif
 #ifdef CONFIG_ACPI_NUMA
- 	if (!strncmp(opt,"noacpi",6))
- 		acpi_numa = -1;
-	if (!strncmp(opt,"hotadd=", 7))
+	if (!strncmp(opt, "noacpi", 6))
+		acpi_numa = -1;
+	if (!strncmp(opt, "hotadd=", 7))
 		hotadd_percent = simple_strtoul(opt+7, NULL, 10);
 #endif
 	return 0;
-} 
-
+}
 early_param("numa", numa_setup);
 
 /*
@@ -612,20 +623,18 @@ early_param("numa", numa_setup);
 void __init init_cpu_to_node(void)
 {
 	int i;
- 	for (i = 0; i < NR_CPUS; i++) {
+
+	for (i = 0; i < NR_CPUS; i++) {
 		u8 apicid = x86_cpu_to_apicid_init[i];
+
 		if (apicid == BAD_APICID)
 			continue;
 		if (apicid_to_node[apicid] == NUMA_NO_NODE)
 			continue;
-		numa_set_node(i,apicid_to_node[apicid]);
+		numa_set_node(i, apicid_to_node[apicid]);
 	}
 }
 
-EXPORT_SYMBOL(cpu_to_node);
-EXPORT_SYMBOL(node_to_cpumask);
-EXPORT_SYMBOL(node_data);
-
 #ifdef CONFIG_DISCONTIGMEM
 /*
  * Functions to convert PFNs from/to per node page addresses.

From 7462894a7cb03b54b9139f31fab5928366752a78 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:38 +0100
Subject: [PATCH 131/890] x86: fixup numa 64 namespace

Using a variable name, which is the same as a macro name is not
really smart. Change the variable names and fixup all users.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/numa_64.c         | 14 +++++++-------
 include/asm-x86/numa_64.h     |  2 +-
 include/asm-x86/topology_64.h | 10 +++++-----
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 4faed6a8f3ae..29b69300aee2 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -30,17 +30,17 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES];
 
 struct memnode memnode;
 
-unsigned char cpu_to_node[NR_CPUS] __read_mostly = {
+int cpu_to_node_map[NR_CPUS] __read_mostly = {
 	[0 ... NR_CPUS-1] = NUMA_NO_NODE
 };
-EXPORT_SYMBOL(cpu_to_node);
+EXPORT_SYMBOL(cpu_to_node_map);
 
 unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
 	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
 };
 
-cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
-EXPORT_SYMBOL(node_to_cpumask);
+cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL(node_to_cpumask_map);
 
 int numa_off __initdata;
 unsigned long __initdata nodemap_addr;
@@ -542,20 +542,20 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 	node_set(0, node_possible_map);
 	for (i = 0; i < NR_CPUS; i++)
 		numa_set_node(i, 0);
-	node_to_cpumask[0] = cpumask_of_cpu(0);
+	node_to_cpumask_map[0] = cpumask_of_cpu(0);
 	e820_register_active_regions(0, start_pfn, end_pfn);
 	setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
 }
 
 __cpuinit void numa_add_cpu(int cpu)
 {
-	set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
+	set_bit(cpu, &node_to_cpumask_map[cpu_to_node(cpu)]);
 }
 
 void __cpuinit numa_set_node(int cpu, int node)
 {
 	cpu_pda(cpu)->nodenumber = node;
-	cpu_to_node(cpu) = node;
+	cpu_to_node_map[cpu] = node;
 }
 
 unsigned long __init numa_free_all_bootmem(void)
diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h
index e6bc0b5dfe3a..c3c20db1fba3 100644
--- a/include/asm-x86/numa_64.h
+++ b/include/asm-x86/numa_64.h
@@ -32,7 +32,7 @@ extern void __init init_cpu_to_node(void);
 
 static inline void clear_node_cpumask(int cpu)
 {
-	clear_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
+	clear_bit(cpu, &node_to_cpumask_map[cpu_to_node(cpu)]);
 }
 
 #else
diff --git a/include/asm-x86/topology_64.h b/include/asm-x86/topology_64.h
index 407b22d4e3b0..cf529341b16c 100644
--- a/include/asm-x86/topology_64.h
+++ b/include/asm-x86/topology_64.h
@@ -7,8 +7,8 @@
 #include <asm/mpspec.h>
 #include <linux/bitops.h>
 
-extern unsigned char cpu_to_node[];
-extern cpumask_t     node_to_cpumask[];
+extern int cpu_to_node_map[];
+extern cpumask_t node_to_cpumask_map[];
 
 #ifdef CONFIG_ACPI_NUMA
 extern int __node_distance(int, int);
@@ -16,10 +16,10 @@ extern int __node_distance(int, int);
 /* #else fallback version */
 #endif
 
-#define cpu_to_node(cpu)		(cpu_to_node[cpu])
+#define cpu_to_node(cpu)		(cpu_to_node_map[cpu])
 #define parent_node(node)		(node)
-#define node_to_first_cpu(node) 	(first_cpu(node_to_cpumask[node]))
-#define node_to_cpumask(node)		(node_to_cpumask[node])
+#define node_to_first_cpu(node)		(first_cpu(node_to_cpumask_map[node]))
+#define node_to_cpumask(node)		(node_to_cpumask_map[node])
 #define pcibus_to_node(bus)	((struct pci_sysdata *)((bus)->sysdata))->node
 #define pcibus_to_cpumask(bus)		node_to_cpumask(pcibus_to_node(bus));
 

From fe21a445b98c9d52f02f3412d7a2fd39784f3b22 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:38 +0100
Subject: [PATCH 132/890] x86: adjust numa 32 namespace

Use the 64bit numa variable names for numa32 as well.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/smpboot_32.c  | 16 ++++++++--------
 include/asm-x86/topology_32.h |  8 ++++----
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c
index 753d7acf4dac..239ada1c499c 100644
--- a/arch/x86/kernel/smpboot_32.c
+++ b/arch/x86/kernel/smpboot_32.c
@@ -466,19 +466,19 @@ extern struct {
 #ifdef CONFIG_NUMA
 
 /* which logical CPUs are on which nodes */
-cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly =
+cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly =
 				{ [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
-EXPORT_SYMBOL(node_2_cpu_mask);
+EXPORT_SYMBOL(node_to_cpumask_map);
 /* which node each logical CPU is on */
-int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
-EXPORT_SYMBOL(cpu_2_node);
+int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
+EXPORT_SYMBOL(cpu_to_node_map);
 
 /* set up a mapping between cpu and node. */
 static inline void map_cpu_to_node(int cpu, int node)
 {
 	printk("Mapping cpu %d to node %d\n", cpu, node);
-	cpu_set(cpu, node_2_cpu_mask[node]);
-	cpu_2_node[cpu] = node;
+	cpu_set(cpu, node_to_cpumask_map[node]);
+	cpu_to_node_map[cpu] = node;
 }
 
 /* undo a mapping between cpu and node. */
@@ -488,8 +488,8 @@ static inline void unmap_cpu_to_node(int cpu)
 
 	printk("Unmapping cpu %d from all nodes\n", cpu);
 	for (node = 0; node < MAX_NUMNODES; node ++)
-		cpu_clear(cpu, node_2_cpu_mask[node]);
-	cpu_2_node[cpu] = 0;
+		cpu_clear(cpu, node_to_cpumask_map[node]);
+	cpu_to_node_map[cpu] = 0;
 }
 #else /* !CONFIG_NUMA */
 
diff --git a/include/asm-x86/topology_32.h b/include/asm-x86/topology_32.h
index fdca80a851e2..e28dbdcc5670 100644
--- a/include/asm-x86/topology_32.h
+++ b/include/asm-x86/topology_32.h
@@ -39,13 +39,13 @@
 #include <linux/cpumask.h>
 
 /* Mappings between logical cpu number and node number */
-extern cpumask_t node_2_cpu_mask[];
-extern int cpu_2_node[];
+extern cpumask_t node_to_cpumask_map[];
+extern int cpu_to_node_map[];
 
 /* Returns the number of the node containing CPU 'cpu' */
 static inline int cpu_to_node(int cpu)
 { 
-	return cpu_2_node[cpu];
+	return cpu_to_node_map[cpu];
 }
 
 /* Returns the number of the node containing Node 'node'.  This architecture is flat, 
@@ -55,7 +55,7 @@ static inline int cpu_to_node(int cpu)
 /* Returns a bitmask of CPUs on Node 'node'. */
 static inline cpumask_t node_to_cpumask(int node)
 {
-	return node_2_cpu_mask[node];
+	return node_to_cpumask_map[node];
 }
 
 /* Returns the number of the first CPU on Node 'node'. */

From b4ea9299df1fa04dbd51840f60918a63ff6a8a44 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:38 +0100
Subject: [PATCH 133/890] x86: consolidate toloplogy_32/64.h

Reorder defines and do white space / coding style cleanups
to get a readable diff.

Also convert the macros to inline functions. Move the pci
related inlines to pci.h

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/pci.h         | 13 ++++++++
 include/asm-x86/topology_32.h | 57 ++++++++++++++++-----------------
 include/asm-x86/topology_64.h | 60 ++++++++++++++++++++++++-----------
 3 files changed, 81 insertions(+), 49 deletions(-)

diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h
index 6983730d86fd..c61190cb9e12 100644
--- a/include/asm-x86/pci.h
+++ b/include/asm-x86/pci.h
@@ -92,6 +92,19 @@ static inline void early_quirks(void) { }
 /* generic pci stuff */
 #include <asm-generic/pci.h>
 
+#ifdef CONFIG_NUMA
+/* Returns the node based on pci bus */
+static inline int __pcibus_to_node(struct pci_bus *bus)
+{
+	struct pci_sysdata *sd = bus->sysdata;
 
+	return sd->node;
+}
+
+static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus)
+{
+	return node_to_cpumask(__pcibus_to_node(bus));
+}
+#endif
 
 #endif
diff --git a/include/asm-x86/topology_32.h b/include/asm-x86/topology_32.h
index e28dbdcc5670..35b00067587d 100644
--- a/include/asm-x86/topology_32.h
+++ b/include/asm-x86/topology_32.h
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2002, IBM Corp.
  *
- * All rights reserved.          
+ * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -22,34 +22,27 @@
  *
  * Send feedback to <colpatch@us.ibm.com>
  */
-#ifndef _ASM_I386_TOPOLOGY_H
-#define _ASM_I386_TOPOLOGY_H
-
-#ifdef CONFIG_X86_HT
-#define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)
-#define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
-#define topology_core_siblings(cpu)		(per_cpu(cpu_core_map, cpu))
-#define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
-#endif
+#ifndef _ASM_X86_TOPOLOGY_H
+#define _ASM_X86_TOPOLOGY_H
 
 #ifdef CONFIG_NUMA
-
+#include <linux/cpumask.h>
 #include <asm/mpspec.h>
 
-#include <linux/cpumask.h>
-
 /* Mappings between logical cpu number and node number */
-extern cpumask_t node_to_cpumask_map[];
 extern int cpu_to_node_map[];
+extern cpumask_t node_to_cpumask_map[];
 
 /* Returns the number of the node containing CPU 'cpu' */
 static inline int cpu_to_node(int cpu)
-{ 
+{
 	return cpu_to_node_map[cpu];
 }
 
-/* Returns the number of the node containing Node 'node'.  This architecture is flat, 
-   so it is a pretty simple function! */
+/*
+ * Returns the number of the node containing Node 'node'. This
+ * architecture is flat, so it is a pretty simple function!
+ */
 #define parent_node(node) (node)
 
 /* Returns a bitmask of CPUs on Node 'node'. */
@@ -60,13 +53,14 @@ static inline cpumask_t node_to_cpumask(int node)
 
 /* Returns the number of the first CPU on Node 'node'. */
 static inline int node_to_first_cpu(int node)
-{ 
+{
 	cpumask_t mask = node_to_cpumask(node);
+
 	return first_cpu(mask);
 }
 
-#define pcibus_to_node(bus) ((struct pci_sysdata *)((bus)->sysdata))->node
-#define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus))
+#define pcibus_to_node(bus) __pcibus_to_node(bus)
+#define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus)
 
 /* sched_domains SD_NODE_INIT for NUMAQ machines */
 #define SD_NODE_INIT (struct sched_domain) {		\
@@ -99,21 +93,24 @@ extern unsigned long node_remap_size[];
 
 #define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid])
 
-#else /* !CONFIG_NUMA */
-/*
- * Other i386 platforms should define their own version of the 
- * above macros here.
- */
+#else /* CONFIG_NUMA */
 
 #include <asm-generic/topology.h>
 
-#endif /* CONFIG_NUMA */
+#endif
 
 extern cpumask_t cpu_coregroup_map(int cpu);
 
-#ifdef CONFIG_SMP
-#define mc_capable()	(boot_cpu_data.x86_max_cores > 1)
-#define smt_capable()	(smp_num_siblings > 1)
+#ifdef CONFIG_X86_HT
+#define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)
+#define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
+#define topology_core_siblings(cpu)		(per_cpu(cpu_core_map, cpu))
+#define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
 #endif
 
-#endif /* _ASM_I386_TOPOLOGY_H */
+#ifdef CONFIG_SMP
+#define mc_capable()			(boot_cpu_data.x86_max_cores > 1)
+#define smt_capable()			(smp_num_siblings > 1)
+#endif
+
+#endif
diff --git a/include/asm-x86/topology_64.h b/include/asm-x86/topology_64.h
index cf529341b16c..93de948bd337 100644
--- a/include/asm-x86/topology_64.h
+++ b/include/asm-x86/topology_64.h
@@ -1,27 +1,47 @@
-#ifndef _ASM_X86_64_TOPOLOGY_H
-#define _ASM_X86_64_TOPOLOGY_H
-
+#ifndef _ASM_X86_TOPOLOGY_H
+#define _ASM_X86_TOPOLOGY_H
 
 #ifdef CONFIG_NUMA
-
+#include <linux/cpumask.h>
 #include <asm/mpspec.h>
-#include <linux/bitops.h>
 
+/* Mappings between logical cpu number and node number */
 extern int cpu_to_node_map[];
 extern cpumask_t node_to_cpumask_map[];
 
 #ifdef CONFIG_ACPI_NUMA
 extern int __node_distance(int, int);
 #define node_distance(a,b) __node_distance(a,b)
-/* #else fallback version */
 #endif
 
-#define cpu_to_node(cpu)		(cpu_to_node_map[cpu])
-#define parent_node(node)		(node)
-#define node_to_first_cpu(node)		(first_cpu(node_to_cpumask_map[node]))
-#define node_to_cpumask(node)		(node_to_cpumask_map[node])
-#define pcibus_to_node(bus)	((struct pci_sysdata *)((bus)->sysdata))->node
-#define pcibus_to_cpumask(bus)		node_to_cpumask(pcibus_to_node(bus));
+/* Returns the number of the node containing CPU 'cpu' */
+static inline int cpu_to_node(int cpu)
+{
+	return cpu_to_node_map[cpu];
+}
+
+/*
+ * Returns the number of the node containing Node 'node'. This
+ * architecture is flat, so it is a pretty simple function!
+ */
+#define parent_node(node) (node)
+
+/* Returns a bitmask of CPUs on Node 'node'. */
+static inline cpumask_t node_to_cpumask(int node)
+{
+	return node_to_cpumask_map[node];
+}
+
+/* Returns the number of the first CPU on Node 'node'. */
+static inline int node_to_first_cpu(int node)
+{
+	cpumask_t mask = node_to_cpumask(node);
+
+	return first_cpu(mask);
+}
+
+#define pcibus_to_node(bus) __pcibus_to_node(bus)
+#define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus)
 
 #define numa_node_id()			read_pda(nodenumber)
 
@@ -38,12 +58,12 @@ extern int __node_distance(int, int);
 	.cache_nice_tries	= 2,			\
 	.busy_idx		= 3,			\
 	.idle_idx		= 2,			\
-	.newidle_idx		= 0, 			\
+	.newidle_idx		= 0,			\
 	.wake_idx		= 1,			\
 	.forkexec_idx		= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
-				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
+				| SD_BALANCE_FORK	\
 				| SD_SERIALIZE		\
 				| SD_WAKE_BALANCE,	\
 	.last_balance		= jiffies,		\
@@ -51,19 +71,21 @@ extern int __node_distance(int, int);
 	.nr_balance_failed	= 0,			\
 }
 
+#else /* CONFIG_NUMA */
+
+#include <asm-generic/topology.h>
+
 #endif
 
+extern cpumask_t cpu_coregroup_map(int cpu);
+
 #ifdef CONFIG_SMP
 #define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)
 #define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
 #define topology_core_siblings(cpu)		(per_cpu(cpu_core_map, cpu))
 #define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
 #define mc_capable()			(boot_cpu_data.x86_max_cores > 1)
-#define smt_capable() 			(smp_num_siblings > 1)
+#define smt_capable()			(smp_num_siblings > 1)
 #endif
 
-#include <asm-generic/topology.h>
-
-extern cpumask_t cpu_coregroup_map(int cpu);
-
 #endif

From 3367e56f26a3a8647784328ffa846fe8021a5b79 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:38 +0100
Subject: [PATCH 134/890] x86: merge topology.h variants

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/topology.h    | 143 +++++++++++++++++++++++++++++++++-
 include/asm-x86/topology_32.h | 116 ---------------------------
 include/asm-x86/topology_64.h |  91 ----------------------
 3 files changed, 141 insertions(+), 209 deletions(-)
 delete mode 100644 include/asm-x86/topology_32.h
 delete mode 100644 include/asm-x86/topology_64.h

diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index b10fde9798ea..9c251604ecc4 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -1,5 +1,144 @@
+/*
+ * Written by: Matthew Dobson, IBM Corporation
+ *
+ * Copyright (C) 2002, IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to <colpatch@us.ibm.com>
+ */
+#ifndef _ASM_X86_TOPOLOGY_H
+#define _ASM_X86_TOPOLOGY_H
+
+#ifdef CONFIG_NUMA
+#include <linux/cpumask.h>
+#include <asm/mpspec.h>
+
+/* Mappings between logical cpu number and node number */
+extern int cpu_to_node_map[];
+extern cpumask_t node_to_cpumask_map[];
+
+/* Returns the number of the node containing CPU 'cpu' */
+static inline int cpu_to_node(int cpu)
+{
+	return cpu_to_node_map[cpu];
+}
+
+/*
+ * Returns the number of the node containing Node 'node'. This
+ * architecture is flat, so it is a pretty simple function!
+ */
+#define parent_node(node) (node)
+
+/* Returns a bitmask of CPUs on Node 'node'. */
+static inline cpumask_t node_to_cpumask(int node)
+{
+	return node_to_cpumask_map[node];
+}
+
+/* Returns the number of the first CPU on Node 'node'. */
+static inline int node_to_first_cpu(int node)
+{
+	cpumask_t mask = node_to_cpumask(node);
+
+	return first_cpu(mask);
+}
+
+#define pcibus_to_node(bus) __pcibus_to_node(bus)
+#define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus)
+
 #ifdef CONFIG_X86_32
-# include "topology_32.h"
+extern unsigned long node_start_pfn[];
+extern unsigned long node_end_pfn[];
+extern unsigned long node_remap_size[];
+#define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid])
+
+# ifdef CONFIG_X86_HT
+#  define ENABLE_TOPO_DEFINES
+# endif
+
+# define SD_CACHE_NICE_TRIES	1
+# define SD_IDLE_IDX		1
+# define SD_NEWIDLE_IDX		2
+# define SD_FORKEXEC_IDX	0
+
 #else
-# include "topology_64.h"
+
+# ifdef CONFIG_SMP
+#  define ENABLE_TOPO_DEFINES
+# endif
+
+# define SD_CACHE_NICE_TRIES	2
+# define SD_IDLE_IDX		2
+# define SD_NEWIDLE_IDX		0
+# define SD_FORKEXEC_IDX	1
+
+#endif
+
+/* sched_domains SD_NODE_INIT for NUMAQ machines */
+#define SD_NODE_INIT (struct sched_domain) {		\
+	.span			= CPU_MASK_NONE,	\
+	.parent			= NULL,			\
+	.child			= NULL,			\
+	.groups			= NULL,			\
+	.min_interval		= 8,			\
+	.max_interval		= 32,			\
+	.busy_factor		= 32,			\
+	.imbalance_pct		= 125,			\
+	.cache_nice_tries	= SD_CACHE_NICE_TRIES,	\
+	.busy_idx		= 3,			\
+	.idle_idx		= SD_IDLE_IDX,		\
+	.newidle_idx		= SD_NEWIDLE_IDX,	\
+	.wake_idx		= 1,			\
+	.forkexec_idx		= SD_FORKEXEC_IDX,	\
+	.flags			= SD_LOAD_BALANCE	\
+				| SD_BALANCE_EXEC	\
+				| SD_BALANCE_FORK	\
+				| SD_SERIALIZE		\
+				| SD_WAKE_BALANCE,	\
+	.last_balance		= jiffies,		\
+	.balance_interval	= 1,			\
+	.nr_balance_failed	= 0,			\
+}
+
+#ifdef CONFIG_X86_64_ACPI_NUMA
+extern int __node_distance(int, int);
+#define node_distance(a, b) __node_distance(a, b)
+#endif
+
+#else /* CONFIG_NUMA */
+
+#include <asm-generic/topology.h>
+
+#endif
+
+extern cpumask_t cpu_coregroup_map(int cpu);
+
+#ifdef ENABLE_TOPO_DEFINES
+#define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)
+#define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
+#define topology_core_siblings(cpu)		(per_cpu(cpu_core_map, cpu))
+#define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
+#endif
+
+#ifdef CONFIG_SMP
+#define mc_capable()			(boot_cpu_data.x86_max_cores > 1)
+#define smt_capable()			(smp_num_siblings > 1)
+#endif
+
 #endif
diff --git a/include/asm-x86/topology_32.h b/include/asm-x86/topology_32.h
deleted file mode 100644
index 35b00067587d..000000000000
--- a/include/asm-x86/topology_32.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Written by: Matthew Dobson, IBM Corporation
- *
- * Copyright (C) 2002, IBM Corp.
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to <colpatch@us.ibm.com>
- */
-#ifndef _ASM_X86_TOPOLOGY_H
-#define _ASM_X86_TOPOLOGY_H
-
-#ifdef CONFIG_NUMA
-#include <linux/cpumask.h>
-#include <asm/mpspec.h>
-
-/* Mappings between logical cpu number and node number */
-extern int cpu_to_node_map[];
-extern cpumask_t node_to_cpumask_map[];
-
-/* Returns the number of the node containing CPU 'cpu' */
-static inline int cpu_to_node(int cpu)
-{
-	return cpu_to_node_map[cpu];
-}
-
-/*
- * Returns the number of the node containing Node 'node'. This
- * architecture is flat, so it is a pretty simple function!
- */
-#define parent_node(node) (node)
-
-/* Returns a bitmask of CPUs on Node 'node'. */
-static inline cpumask_t node_to_cpumask(int node)
-{
-	return node_to_cpumask_map[node];
-}
-
-/* Returns the number of the first CPU on Node 'node'. */
-static inline int node_to_first_cpu(int node)
-{
-	cpumask_t mask = node_to_cpumask(node);
-
-	return first_cpu(mask);
-}
-
-#define pcibus_to_node(bus) __pcibus_to_node(bus)
-#define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus)
-
-/* sched_domains SD_NODE_INIT for NUMAQ machines */
-#define SD_NODE_INIT (struct sched_domain) {		\
-	.span			= CPU_MASK_NONE,	\
-	.parent			= NULL,			\
-	.child			= NULL,			\
-	.groups			= NULL,			\
-	.min_interval		= 8,			\
-	.max_interval		= 32,			\
-	.busy_factor		= 32,			\
-	.imbalance_pct		= 125,			\
-	.cache_nice_tries	= 1,			\
-	.busy_idx		= 3,			\
-	.idle_idx		= 1,			\
-	.newidle_idx		= 2,			\
-	.wake_idx		= 1,			\
-	.flags			= SD_LOAD_BALANCE	\
-				| SD_BALANCE_EXEC	\
-				| SD_BALANCE_FORK	\
-				| SD_SERIALIZE		\
-				| SD_WAKE_BALANCE,	\
-	.last_balance		= jiffies,		\
-	.balance_interval	= 1,			\
-	.nr_balance_failed	= 0,			\
-}
-
-extern unsigned long node_start_pfn[];
-extern unsigned long node_end_pfn[];
-extern unsigned long node_remap_size[];
-
-#define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid])
-
-#else /* CONFIG_NUMA */
-
-#include <asm-generic/topology.h>
-
-#endif
-
-extern cpumask_t cpu_coregroup_map(int cpu);
-
-#ifdef CONFIG_X86_HT
-#define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)
-#define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
-#define topology_core_siblings(cpu)		(per_cpu(cpu_core_map, cpu))
-#define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
-#endif
-
-#ifdef CONFIG_SMP
-#define mc_capable()			(boot_cpu_data.x86_max_cores > 1)
-#define smt_capable()			(smp_num_siblings > 1)
-#endif
-
-#endif
diff --git a/include/asm-x86/topology_64.h b/include/asm-x86/topology_64.h
deleted file mode 100644
index 93de948bd337..000000000000
--- a/include/asm-x86/topology_64.h
+++ /dev/null
@@ -1,91 +0,0 @@
-#ifndef _ASM_X86_TOPOLOGY_H
-#define _ASM_X86_TOPOLOGY_H
-
-#ifdef CONFIG_NUMA
-#include <linux/cpumask.h>
-#include <asm/mpspec.h>
-
-/* Mappings between logical cpu number and node number */
-extern int cpu_to_node_map[];
-extern cpumask_t node_to_cpumask_map[];
-
-#ifdef CONFIG_ACPI_NUMA
-extern int __node_distance(int, int);
-#define node_distance(a,b) __node_distance(a,b)
-#endif
-
-/* Returns the number of the node containing CPU 'cpu' */
-static inline int cpu_to_node(int cpu)
-{
-	return cpu_to_node_map[cpu];
-}
-
-/*
- * Returns the number of the node containing Node 'node'. This
- * architecture is flat, so it is a pretty simple function!
- */
-#define parent_node(node) (node)
-
-/* Returns a bitmask of CPUs on Node 'node'. */
-static inline cpumask_t node_to_cpumask(int node)
-{
-	return node_to_cpumask_map[node];
-}
-
-/* Returns the number of the first CPU on Node 'node'. */
-static inline int node_to_first_cpu(int node)
-{
-	cpumask_t mask = node_to_cpumask(node);
-
-	return first_cpu(mask);
-}
-
-#define pcibus_to_node(bus) __pcibus_to_node(bus)
-#define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus)
-
-#define numa_node_id()			read_pda(nodenumber)
-
-/* sched_domains SD_NODE_INIT for x86_64 machines */
-#define SD_NODE_INIT (struct sched_domain) {		\
-	.span			= CPU_MASK_NONE,	\
-	.parent			= NULL,			\
-	.child			= NULL,			\
-	.groups			= NULL,			\
-	.min_interval		= 8,			\
-	.max_interval		= 32,			\
-	.busy_factor		= 32,			\
-	.imbalance_pct		= 125,			\
-	.cache_nice_tries	= 2,			\
-	.busy_idx		= 3,			\
-	.idle_idx		= 2,			\
-	.newidle_idx		= 0,			\
-	.wake_idx		= 1,			\
-	.forkexec_idx		= 1,			\
-	.flags			= SD_LOAD_BALANCE	\
-				| SD_BALANCE_EXEC	\
-				| SD_BALANCE_FORK	\
-				| SD_SERIALIZE		\
-				| SD_WAKE_BALANCE,	\
-	.last_balance		= jiffies,		\
-	.balance_interval	= 1,			\
-	.nr_balance_failed	= 0,			\
-}
-
-#else /* CONFIG_NUMA */
-
-#include <asm-generic/topology.h>
-
-#endif
-
-extern cpumask_t cpu_coregroup_map(int cpu);
-
-#ifdef CONFIG_SMP
-#define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)
-#define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
-#define topology_core_siblings(cpu)		(per_cpu(cpu_core_map, cpu))
-#define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
-#define mc_capable()			(boot_cpu_data.x86_max_cores > 1)
-#define smt_capable()			(smp_num_siblings > 1)
-#endif
-
-#endif

From 3578facf737dc9a60ed19a83458bd9cfdc2af233 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:38 +0100
Subject: [PATCH 135/890] x86: merge resume-trace.h variants

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/asm.h             |  2 ++
 include/asm-x86/resume-trace.h    | 23 +++++++++++++++++++----
 include/asm-x86/resume-trace_32.h | 13 -------------
 include/asm-x86/resume-trace_64.h | 13 -------------
 4 files changed, 21 insertions(+), 30 deletions(-)
 delete mode 100644 include/asm-x86/resume-trace_32.h
 delete mode 100644 include/asm-x86/resume-trace_64.h

diff --git a/include/asm-x86/asm.h b/include/asm-x86/asm.h
index b5006eb9acba..8661ae75488a 100644
--- a/include/asm-x86/asm.h
+++ b/include/asm-x86/asm.h
@@ -6,12 +6,14 @@
 
 # define _ASM_PTR	" .long "
 # define _ASM_ALIGN	" .balign 4 "
+# define _ASM_MOV_UL	" movl "
 
 #else
 /* 64 bits */
 
 # define _ASM_PTR	" .quad "
 # define _ASM_ALIGN	" .balign 8 "
+# define _ASM_MOV_UL	" movq "
 
 #endif /* CONFIG_X86_32 */
 
diff --git a/include/asm-x86/resume-trace.h b/include/asm-x86/resume-trace.h
index 9b6dd093a9f7..46f725b0bc82 100644
--- a/include/asm-x86/resume-trace.h
+++ b/include/asm-x86/resume-trace.h
@@ -1,5 +1,20 @@
-#ifdef CONFIG_X86_32
-# include "resume-trace_32.h"
-#else
-# include "resume-trace_64.h"
+#ifndef _ASM_X86_RESUME_TRACE_H
+#define _ASM_X86_RESUME_TRACE_H
+
+#include <asm/asm.h>
+
+#define TRACE_RESUME(user) do {					\
+	if (pm_trace_enabled) {					\
+		void *tracedata;				\
+		asm volatile(_ASM_MOV_UL " $1f,%0\n"		\
+			".section .tracedata,\"a\"\n"		\
+			"1:\t.word %c1\n\t"			\
+			_ASM_PTR " %c2\n"			\
+			".previous"				\
+			:"=r" (tracedata)			\
+			: "i" (__LINE__), "i" (__FILE__));	\
+		generate_resume_trace(tracedata, user);		\
+	}							\
+} while (0)
+
 #endif
diff --git a/include/asm-x86/resume-trace_32.h b/include/asm-x86/resume-trace_32.h
deleted file mode 100644
index ec9cfd656230..000000000000
--- a/include/asm-x86/resume-trace_32.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#define TRACE_RESUME(user) do {					\
-	if (pm_trace_enabled) {					\
-		void *tracedata;				\
-		asm volatile("movl $1f,%0\n"			\
-			".section .tracedata,\"a\"\n"		\
-			"1:\t.word %c1\n"			\
-			"\t.long %c2\n"				\
-			".previous"				\
-			:"=r" (tracedata)			\
-			: "i" (__LINE__), "i" (__FILE__));	\
-		generate_resume_trace(tracedata, user);		\
-	}							\
-} while (0)
diff --git a/include/asm-x86/resume-trace_64.h b/include/asm-x86/resume-trace_64.h
deleted file mode 100644
index 34bf998fdf62..000000000000
--- a/include/asm-x86/resume-trace_64.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#define TRACE_RESUME(user) do {					\
-	if (pm_trace_enabled) {					\
-		void *tracedata;				\
-		asm volatile("movq $1f,%0\n"			\
-			".section .tracedata,\"a\"\n"		\
-			"1:\t.word %c1\n"			\
-			"\t.quad %c2\n"				\
-			".previous"				\
-			:"=r" (tracedata)			\
-			: "i" (__LINE__), "i" (__FILE__));	\
-		generate_resume_trace(tracedata, user);		\
-	}							\
-} while (0)

From 7ebad705340f35276326ed93a43190e62f725f77 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Wed, 30 Jan 2008 13:30:39 +0100
Subject: [PATCH 136/890] x86: use CR0 defines.

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/bugs.c         | 3 ++-
 arch/x86/kernel/cpu/cyrix.c        | 6 ++----
 arch/x86/kernel/cpu/mtrr/cyrix.c   | 3 ++-
 arch/x86/kernel/cpu/mtrr/generic.c | 3 ++-
 arch/x86/kernel/cpu/mtrr/state.c   | 3 ++-
 5 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 205fd5ba57f7..a96abd453e0d 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -11,6 +11,7 @@
 #include <linux/utsname.h>
 #include <asm/bugs.h>
 #include <asm/processor.h>
+#include <asm/processor-flags.h>
 #include <asm/i387.h>
 #include <asm/msr.h>
 #include <asm/paravirt.h>
@@ -35,7 +36,7 @@ __setup("mca-pentium", mca_pentium);
 static int __init no_387(char *s)
 {
 	boot_cpu_data.hard_math = 0;
-	write_cr0(0xE | read_cr0());
+	write_cr0(X86_CR0_TS | X86_CR0_EM | X86_CR0_MP | read_cr0());
 	return 1;
 }
 
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 88d66fb8411d..404a6a2d4016 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -5,6 +5,7 @@
 #include <asm/dma.h>
 #include <asm/io.h>
 #include <asm/processor-cyrix.h>
+#include <asm/processor-flags.h>
 #include <asm/timer.h>
 #include <asm/pci-direct.h>
 #include <asm/tsc.h>
@@ -126,15 +127,12 @@ static void __cpuinit set_cx86_reorder(void)
 
 static void __cpuinit set_cx86_memwb(void)
 {
-	u32 cr0;
-
 	printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
 
 	/* CCR2 bit 2: unlock NW bit */
 	setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
 	/* set 'Not Write-through' */
-	cr0 = 0x20000000;
-	write_cr0(read_cr0() | cr0);
+	write_cr0(read_cr0() | X86_CR0_NW);
 	/* CCR2 bit 2: lock NW bit and set WT1 */
 	setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
 }
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 9964be3de2b7..8e139c70f888 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -4,6 +4,7 @@
 #include <asm/msr.h>
 #include <asm/io.h>
 #include <asm/processor-cyrix.h>
+#include <asm/processor-flags.h>
 #include "mtrr.h"
 
 int arr3_protected;
@@ -142,7 +143,7 @@ static void prepare_set(void)
 
 	/*  Disable and flush caches. Note that wbinvd flushes the TLBs as
 	    a side-effect  */
-	cr0 = read_cr0() | 0x40000000;
+	cr0 = read_cr0() | X86_CR0_CD;
 	wbinvd();
 	write_cr0(cr0);
 	wbinvd();
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 1c331c373a43..55d31ff118fb 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -9,6 +9,7 @@
 #include <asm/msr.h>
 #include <asm/system.h>
 #include <asm/cpufeature.h>
+#include <asm/processor-flags.h>
 #include <asm/tlbflush.h>
 #include "mtrr.h"
 
@@ -350,7 +351,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
 	spin_lock(&set_atomicity_lock);
 
 	/*  Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
-	cr0 = read_cr0() | 0x40000000;	/* set CD flag */
+	cr0 = read_cr0() | X86_CR0_CD;
 	write_cr0(cr0);
 	wbinvd();
 
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c
index 49e20c2afcdf..9f8ba923d1c9 100644
--- a/arch/x86/kernel/cpu/mtrr/state.c
+++ b/arch/x86/kernel/cpu/mtrr/state.c
@@ -4,6 +4,7 @@
 #include <asm/mtrr.h>
 #include <asm/msr.h>
 #include <asm/processor-cyrix.h>
+#include <asm/processor-flags.h>
 #include "mtrr.h"
 
 
@@ -25,7 +26,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
 
 		/*  Disable and flush caches. Note that wbinvd flushes the TLBs as
 		    a side-effect  */
-		cr0 = read_cr0() | 0x40000000;
+		cr0 = read_cr0() | X86_CR0_CD;
 		wbinvd();
 		write_cr0(cr0);
 		wbinvd();

From 5cabbd97b10229159919ff17f746b69742cec84d Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 30 Jan 2008 13:30:39 +0100
Subject: [PATCH 137/890] x86: remove unused tsk_thread from asm-offsets_64.c

So this patch simply removes the "thread" from asm-offsets.c since I
can't find an owner for it.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/asm-offsets_64.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index d1b6ed98774e..40f41752c1df 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -38,7 +38,6 @@ int main(void)
 #define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry))
 	ENTRY(state);
 	ENTRY(flags); 
-	ENTRY(thread); 
 	ENTRY(pid);
 	BLANK();
 #undef ENTRY

From 04e1ba852132c9ad006affcd5b8c8606295170b0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:39 +0100
Subject: [PATCH 138/890] x86: cleanup kernel/setup_64.c

Clean it up before applying more patches to it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_64.c | 249 +++++++++++++++++++------------------
 1 file changed, 126 insertions(+), 123 deletions(-)

diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 1acb435a0585..62db062ba138 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -77,7 +77,7 @@ unsigned long saved_video_mode;
 
 int force_mwait __cpuinitdata;
 
-/* 
+/*
  * Early DMI memory
  */
 int dmi_alloc_index;
@@ -169,12 +169,12 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 	bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
 	bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
 	if (bootmap == -1L)
-		panic("Cannot find bootmem map of size %ld\n",bootmap_size);
+		panic("Cannot find bootmem map of size %ld\n", bootmap_size);
 	bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
 	e820_register_active_regions(0, start_pfn, end_pfn);
 	free_bootmem_with_active_regions(0, end_pfn);
 	reserve_bootmem(bootmap, bootmap_size);
-} 
+}
 #endif
 
 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
@@ -208,7 +208,8 @@ static void __init reserve_crashkernel(void)
 	unsigned long long crash_size, crash_base;
 	int ret;
 
-	free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
+	free_mem =
+		((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
 
 	ret = parse_crashkernel(boot_command_line, free_mem,
 			&crash_size, &crash_base);
@@ -240,7 +241,7 @@ unsigned __initdata ebda_size;
 static void discover_ebda(void)
 {
 	/*
-	 * there is a real-mode segmented pointer pointing to the 
+	 * there is a real-mode segmented pointer pointing to the
 	 * 4K EBDA area at 0x40E
 	 */
 	ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
@@ -259,6 +260,8 @@ static void discover_ebda(void)
 
 void __init setup_arch(char **cmdline_p)
 {
+	unsigned i;
+
 	printk(KERN_INFO "Command line: %s\n", boot_command_line);
 
 	ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
@@ -345,13 +348,13 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 #ifdef CONFIG_NUMA
-	numa_initmem_init(0, end_pfn); 
+	numa_initmem_init(0, end_pfn);
 #else
 	contig_initmem_init(0, end_pfn);
 #endif
 
 	/* Reserve direct mapping */
-	reserve_bootmem_generic(table_start << PAGE_SHIFT, 
+	reserve_bootmem_generic(table_start << PAGE_SHIFT,
 				(table_end - table_start) << PAGE_SHIFT);
 
 	/* reserve kernel */
@@ -379,14 +382,14 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 #ifdef CONFIG_ACPI_SLEEP
-       /*
-        * Reserve low memory region for sleep support.
-        */
+	/*
+	 * Reserve low memory region for sleep support.
+	 */
        acpi_reserve_bootmem();
 #endif
-	/*
-	 * Find and reserve possible boot-time SMP configuration:
-	 */
+       /*
+	* Find and reserve possible boot-time SMP configuration:
+	*/
 	find_smp_config();
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
@@ -437,16 +440,13 @@ void __init setup_arch(char **cmdline_p)
 
 	/*
 	 * We trust e820 completely. No explicit ROM probing in memory.
- 	 */
+	 */
 	e820_reserve_resources(&code_resource, &data_resource, &bss_resource);
 	e820_mark_nosave_regions();
 
-	{
-	unsigned i;
 	/* request I/O space for devices used on all i[345]86 PCs */
 	for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
 		request_resource(&ioport_resource, &standard_io_resources[i]);
-	}
 
 	e820_setup_gap();
 
@@ -483,9 +483,10 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
 
 	if (n >= 0x80000005) {
 		cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
-		printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
-			edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
-		c->x86_cache_size=(ecx>>24)+(edx>>24);
+		printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
+		       "D cache %dK (%d bytes/line)\n",
+		       edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
+		c->x86_cache_size = (ecx>>24) + (edx>>24);
 		/* On K8 L1 TLB is inclusive, so don't count it */
 		c->x86_tlbsize = 0;
 	}
@@ -501,9 +502,9 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
 	}
 
 	if (n >= 0x80000007)
-		cpuid(0x80000007, &dummy, &dummy, &dummy, &c->x86_power); 
+		cpuid(0x80000007, &dummy, &dummy, &dummy, &c->x86_power);
 	if (n >= 0x80000008) {
-		cpuid(0x80000008, &eax, &dummy, &dummy, &dummy); 
+		cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
 		c->x86_virt_bits = (eax >> 8) & 0xff;
 		c->x86_phys_bits = eax & 0xff;
 	}
@@ -512,14 +513,15 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
 #ifdef CONFIG_NUMA
 static int nearby_node(int apicid)
 {
-	int i;
+	int i, node;
+
 	for (i = apicid - 1; i >= 0; i--) {
-		int node = apicid_to_node[i];
+		node = apicid_to_node[i];
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
 	for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
-		int node = apicid_to_node[i];
+		node = apicid_to_node[i];
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
@@ -559,27 +561,29 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
 	c->phys_proc_id = phys_pkg_id(bits);
 
 #ifdef CONFIG_NUMA
-  	node = c->phys_proc_id;
- 	if (apicid_to_node[apicid] != NUMA_NO_NODE)
- 		node = apicid_to_node[apicid];
- 	if (!node_online(node)) {
- 		/* Two possibilities here:
- 		   - The CPU is missing memory and no node was created.
- 		   In that case try picking one from a nearby CPU
- 		   - The APIC IDs differ from the HyperTransport node IDs
- 		   which the K8 northbridge parsing fills in.
- 		   Assume they are all increased by a constant offset,
- 		   but in the same order as the HT nodeids.
- 		   If that doesn't result in a usable node fall back to the
- 		   path for the previous case.  */
+	node = c->phys_proc_id;
+	if (apicid_to_node[apicid] != NUMA_NO_NODE)
+		node = apicid_to_node[apicid];
+	if (!node_online(node)) {
+		/* Two possibilities here:
+		   - The CPU is missing memory and no node was created.
+		   In that case try picking one from a nearby CPU
+		   - The APIC IDs differ from the HyperTransport node IDs
+		   which the K8 northbridge parsing fills in.
+		   Assume they are all increased by a constant offset,
+		   but in the same order as the HT nodeids.
+		   If that doesn't result in a usable node fall back to the
+		   path for the previous case.  */
+
 		int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits);
- 		if (ht_nodeid >= 0 &&
- 		    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
- 			node = apicid_to_node[ht_nodeid];
- 		/* Pick a nearby node */
- 		if (!node_online(node))
- 			node = nearby_node(apicid);
- 	}
+
+		if (ht_nodeid >= 0 &&
+		    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+			node = apicid_to_node[ht_nodeid];
+		/* Pick a nearby node */
+		if (!node_online(node))
+			node = nearby_node(apicid);
+	}
 	numa_set_node(cpu, node);
 
 	printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
@@ -599,8 +603,8 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
 /* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
 static __cpuinit int amd_apic_timer_broken(void)
 {
-	u32 lo, hi;
-	u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
+	u32 lo, hi, eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
+
 	switch (eax & CPUID_XFAM) {
 	case CPUID_XFAM_K8:
 		if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F)
@@ -628,7 +632,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	/*
 	 * Disable TLB flush filter by setting HWCR.FFDIS on K8
 	 * bit 6 of msr C001_0015
- 	 *
+	 *
 	 * Errata 63 for SH-B3 steppings
 	 * Errata 122 for all steppings (F+ have it disabled by default)
 	 */
@@ -642,10 +646,11 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
 	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
 	clear_bit(0*32+31, &c->x86_capability);
-	
+
 	/* On C+ stepping K8 rep microcode works well for copy/memset */
 	level = cpuid_eax(1);
-	if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
+	if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) ||
+			     level >= 0x0f58))
 		set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
 	if (c->x86 == 0x10 || c->x86 == 0x11)
 		set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
@@ -656,14 +661,14 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 
 	level = get_model_name(c);
 	if (!level) {
-		switch (c->x86) { 
+		switch (c->x86) {
 		case 15:
 			/* Should distinguish Models here, but this is only
 			   a fallback anyways. */
 			strcpy(c->x86_model_id, "Hammer");
-			break; 
-		} 
-	} 
+			break;
+		}
+	}
 	display_cacheinfo(c);
 
 	/* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
@@ -697,25 +702,26 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
-	u32 	eax, ebx, ecx, edx;
-	int 	index_msb, core_bits;
+	u32 eax, ebx, ecx, edx;
+	int index_msb, core_bits;
 
 	cpuid(1, &eax, &ebx, &ecx, &edx);
 
 
 	if (!cpu_has(c, X86_FEATURE_HT))
 		return;
- 	if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
+	if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
 		goto out;
 
 	smp_num_siblings = (ebx & 0xff0000) >> 16;
 
 	if (smp_num_siblings == 1) {
 		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
-	} else if (smp_num_siblings > 1 ) {
+	} else if (smp_num_siblings > 1) {
 
 		if (smp_num_siblings > NR_CPUS) {
-			printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
+			printk(KERN_WARNING "CPU: Unsupported number of "
+			       "siblings %d", smp_num_siblings);
 			smp_num_siblings = 1;
 			return;
 		}
@@ -725,7 +731,7 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 
 		smp_num_siblings = smp_num_siblings / c->x86_max_cores;
 
-		index_msb = get_count_order(smp_num_siblings) ;
+		index_msb = get_count_order(smp_num_siblings);
 
 		core_bits = get_count_order(c->x86_max_cores);
 
@@ -734,8 +740,10 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 	}
 out:
 	if ((c->x86_max_cores * smp_num_siblings) > 1) {
-		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n", c->phys_proc_id);
-		printk(KERN_INFO  "CPU: Processor Core ID: %d\n", c->cpu_core_id);
+		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
+		       c->phys_proc_id);
+		printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
+		       c->cpu_core_id);
 	}
 
 #endif
@@ -783,7 +791,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 	unsigned n;
 
 	init_intel_cacheinfo(c);
-	if (c->cpuid_level > 9 ) {
+	if (c->cpuid_level > 9) {
 		unsigned eax = cpuid_eax(10);
 		/* Check for version and the number of counters */
 		if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
@@ -822,7 +830,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 		set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
 	else
 		clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
- 	c->x86_max_cores = intel_num_cpu_cores(c);
+	c->x86_max_cores = intel_num_cpu_cores(c);
 
 	srat_detect_node();
 }
@@ -869,7 +877,7 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 	      (unsigned int *)&c->x86_vendor_id[0],
 	      (unsigned int *)&c->x86_vendor_id[8],
 	      (unsigned int *)&c->x86_vendor_id[4]);
-		
+
 	get_cpu_vendor(c);
 
 	/* Initialize the standard set of capabilities */
@@ -887,7 +895,7 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 			c->x86 += (tfms >> 20) & 0xff;
 		if (c->x86 >= 0x6)
 			c->x86_model += ((tfms >> 16) & 0xF) << 4;
-		if (c->x86_capability[0] & (1<<19)) 
+		if (c->x86_capability[0] & (1<<19))
 			c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
 	} else {
 		/* Have CPUID level 0 only - unheard of */
@@ -959,7 +967,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 	}
 
 	select_idle_routine(c);
-	detect_ht(c); 
+	detect_ht(c);
 
 	/*
 	 * On SMP, boot_cpu_data holds the common feature set between
@@ -969,7 +977,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 	 */
 	if (c != &boot_cpu_data) {
 		/* AND the already accumulated flags with these */
-		for (i = 0 ; i < NCAPINTS ; i++)
+		for (i = 0; i < NCAPINTS; i++)
 			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
 	}
 
@@ -982,17 +990,16 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 	numa_add_cpu(smp_processor_id());
 #endif
 }
- 
 
 void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 {
 	if (c->x86_model_id[0])
-		printk("%s", c->x86_model_id);
+		printk(KERN_INFO "%s", c->x86_model_id);
 
-	if (c->x86_mask || c->cpuid_level >= 0) 
-		printk(" stepping %02x\n", c->x86_mask);
+	if (c->x86_mask || c->cpuid_level >= 0)
+		printk(KERN_CONT " stepping %02x\n", c->x86_mask);
 	else
-		printk("\n");
+		printk(KERN_CONT "\n");
 }
 
 /*
@@ -1002,9 +1009,9 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 static int show_cpuinfo(struct seq_file *m, void *v)
 {
 	struct cpuinfo_x86 *c = v;
-	int cpu = 0;
+	int cpu = 0, i;
 
-	/* 
+	/*
 	 * These flag bits must match the definitions in <asm/cpufeature.h>.
 	 * NULL means this bit is undefined or reserved; either way it doesn't
 	 * have meaning as far as Linux is concerned.  Note that it's important
@@ -1014,10 +1021,10 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	 */
 	static const char *const x86_cap_flags[] = {
 		/* Intel-defined */
-	        "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
-	        "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
-	        "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
-	        "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
+		"fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
+		"cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
+		"pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
+		"fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
 
 		/* AMD-defined */
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -1084,34 +1091,35 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	cpu = c->cpu_index;
 #endif
 
-	seq_printf(m,"processor\t: %u\n"
-		     "vendor_id\t: %s\n"
-		     "cpu family\t: %d\n"
-		     "model\t\t: %d\n"
-		     "model name\t: %s\n",
-		     (unsigned)cpu,
-		     c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
-		     c->x86,
-		     (int)c->x86_model,
-		     c->x86_model_id[0] ? c->x86_model_id : "unknown");
-	
+	seq_printf(m, "processor\t: %u\n"
+		   "vendor_id\t: %s\n"
+		   "cpu family\t: %d\n"
+		   "model\t\t: %d\n"
+		   "model name\t: %s\n",
+		   (unsigned)cpu,
+		   c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
+		   c->x86,
+		   (int)c->x86_model,
+		   c->x86_model_id[0] ? c->x86_model_id : "unknown");
+
 	if (c->x86_mask || c->cpuid_level >= 0)
 		seq_printf(m, "stepping\t: %d\n", c->x86_mask);
 	else
 		seq_printf(m, "stepping\t: unknown\n");
-	
-	if (cpu_has(c,X86_FEATURE_TSC)) {
+
+	if (cpu_has(c, X86_FEATURE_TSC)) {
 		unsigned int freq = cpufreq_quick_get((unsigned)cpu);
+
 		if (!freq)
 			freq = cpu_khz;
 		seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
-			     freq / 1000, (freq % 1000));
+			   freq / 1000, (freq % 1000));
 	}
 
 	/* Cache size */
-	if (c->x86_cache_size >= 0) 
+	if (c->x86_cache_size >= 0)
 		seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
-	
+
 #ifdef CONFIG_SMP
 	if (smp_num_siblings * c->x86_max_cores > 1) {
 		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
@@ -1120,48 +1128,43 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
 		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
 	}
-#endif	
+#endif
 
 	seq_printf(m,
-	        "fpu\t\t: yes\n"
-	        "fpu_exception\t: yes\n"
-	        "cpuid level\t: %d\n"
-	        "wp\t\t: yes\n"
-	        "flags\t\t:",
+		   "fpu\t\t: yes\n"
+		   "fpu_exception\t: yes\n"
+		   "cpuid level\t: %d\n"
+		   "wp\t\t: yes\n"
+		   "flags\t\t:",
 		   c->cpuid_level);
 
-	{ 
-		int i; 
-		for ( i = 0 ; i < 32*NCAPINTS ; i++ )
-			if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
-				seq_printf(m, " %s", x86_cap_flags[i]);
-	}
-		
+	for (i = 0; i < 32*NCAPINTS; i++)
+		if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
+			seq_printf(m, " %s", x86_cap_flags[i]);
+
 	seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
 		   c->loops_per_jiffy/(500000/HZ),
 		   (c->loops_per_jiffy/(5000/HZ)) % 100);
 
-	if (c->x86_tlbsize > 0) 
+	if (c->x86_tlbsize > 0)
 		seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
 	seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
 	seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
 
-	seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", 
+	seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
 		   c->x86_phys_bits, c->x86_virt_bits);
 
 	seq_printf(m, "power management:");
-	{
-		unsigned i;
-		for (i = 0; i < 32; i++) 
-			if (c->x86_power & (1 << i)) {
-				if (i < ARRAY_SIZE(x86_power_flags) &&
-					x86_power_flags[i])
-					seq_printf(m, "%s%s",
-						x86_power_flags[i][0]?" ":"",
-						x86_power_flags[i]);
-				else
-					seq_printf(m, " [%d]", i);
-			}
+	for (i = 0; i < 32; i++) {
+		if (c->x86_power & (1 << i)) {
+			if (i < ARRAY_SIZE(x86_power_flags) &&
+			    x86_power_flags[i])
+				seq_printf(m, "%s%s",
+					   x86_power_flags[i][0]?" ":"",
+					   x86_power_flags[i]);
+			else
+				seq_printf(m, " [%d]", i);
+		}
 	}
 
 	seq_printf(m, "\n\n");
@@ -1189,7 +1192,7 @@ static void c_stop(struct seq_file *m, void *v)
 }
 
 struct seq_operations cpuinfo_op = {
-	.start =c_start,
+	.start = c_start,
 	.next =	c_next,
 	.stop =	c_stop,
 	.show =	show_cpuinfo,

From 1c69524c2e5b83e52a098ebdeb4a8b52169f6a03 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Wed, 30 Jan 2008 13:30:39 +0100
Subject: [PATCH 139/890] x86: clear IO_APIC before enabing apic error vector.

4 socket quad core, 8 socket quad core will do apic ID lifting for BSP.

But io-apic regs for ExtINT still use 0 as dest.

so when we enable apic error vector in BSP, we will get one APIC error.

CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
CPU: L2 Cache: 512K (64 bytes/line)
CPU 0/4 -> Node 0
CPU: Physical Processor ID: 1
CPU: Processor Core ID: 0
SMP alternatives: switching to UP code
ACPI: Core revision 20070126
enabled ExtINT on CPU#0
ESR value after enabling vector: 00000000, after 0000000c
APIC error on CPU0: 0c(08)
ENABLING IO-APIC IRQs
Synchronizing Arb IDs.

So move enable_IO_APIC from setup_IO_APIC into setup_local_APIC and call it
before enabling the ACPI error vector.

[ tglx: arch/x86 adaptation ]

Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic_64.c    | 7 +++++++
 arch/x86/kernel/io_apic_64.c | 7 +++++--
 include/asm-x86/hw_irq_64.h  | 1 +
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 915808bd8a2a..994298bf4921 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -774,6 +774,13 @@ void __cpuinit setup_local_APIC(void)
 		value = APIC_DM_NMI | APIC_LVT_MASKED;
 	apic_write(APIC_LVT1, value);
 
+	/*
+	 * Now enable IO-APICs, actually call clear_IO_APIC
+	 * We need clear_IO_APIC before enabling vector on BP
+	 */
+	if (!smp_processor_id() && !skip_ioapic_setup && nr_ioapics)
+		enable_IO_APIC();
+
 	{
 		unsigned oldvalue;
 		maxlvt = lapic_get_maxlvt();
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 3e471d0fb150..4ef85a3b3f9f 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1172,7 +1172,7 @@ void __apicdebuginit print_PIC(void)
 
 #endif  /*  0  */
 
-static void __init enable_IO_APIC(void)
+void __init enable_IO_APIC(void)
 {
 	union IO_APIC_reg_01 reg_01;
 	int i8259_apic, i8259_pin;
@@ -1789,7 +1789,10 @@ __setup("no_timer_check", notimercheck);
 
 void __init setup_IO_APIC(void)
 {
-	enable_IO_APIC();
+
+	/*
+	 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
+	 */
 
 	if (acpi_ioapic)
 		io_apic_irqs = ~0;	/* all IRQs go through IOAPIC */
diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h
index a470d59da678..a346159b6ac3 100644
--- a/include/asm-x86/hw_irq_64.h
+++ b/include/asm-x86/hw_irq_64.h
@@ -135,6 +135,7 @@ extern void init_8259A(int aeoi);
 extern void send_IPI_self(int vector);
 extern void init_VISWS_APIC_irqs(void);
 extern void setup_IO_APIC(void);
+extern void enable_IO_APIC(void);
 extern void disable_IO_APIC(void);
 extern void print_IO_APIC(void);
 extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);

From 8c4385d7f5b77415156c9b900016cc8161d49f17 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 30 Jan 2008 13:30:39 +0100
Subject: [PATCH 140/890] x86: remove -maccumulate-outgoing-args on 32-bit

Contrary to the comment "newer gccs do it by default", newer gcc versions
default to -maccumulate-outgoing-args only with CONFIG_CC_OPTIMIZE_FOR_SIZE=n,
and then only with some CPU settings.

Measured with an i386 defconfig, gcc 4.2.1 and kernel 2.6.23-rc1 ("orig" is
the plain kernel, "changed is with -maccumulate-outgoing-args removed):

$ ls -la vmlinux*
-rwxrwxr-x 1 bunk bunk 6269713 2007-07-24 22:19 vmlinux.changed
-rwxrwxr-x 1 bunk bunk 6425361 2007-07-24 22:19 vmlinux.orig
$ size vmlinux.*
   text    data     bss     dec     hex filename
4493465  504108  614400 5611973  55a1c5 vmlinux.changed
4646160  504108  614400 5764668  57f63c vmlinux.orig
$

That's a 2.5% size increase that does for sure hurt small systems.

If the stack unwinder ever comes back and needs this as indicated in the
comment, adding it to the cflags when the user enabled the unwinder should be
a better option.

[ tglx: arch/x86 adaptation ]

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Makefile_32 | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/x86/Makefile_32 b/arch/x86/Makefile_32
index f806ceccff7a..b152db71d5b5 100644
--- a/arch/x86/Makefile_32
+++ b/arch/x86/Makefile_32
@@ -48,10 +48,6 @@ include $(srctree)/arch/x86/Makefile_32.cpu
 # temporary until string.h is fixed
 cflags-y += -ffreestanding
 
-# this works around some issues with generating unwind tables in older gccs
-# newer gccs do it by default
-cflags-y += -maccumulate-outgoing-args
-
 # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
 # a lot more stack due to the lack of sharing of stacklots:
 KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;)

From a860b63c41f241c685245127a3d32f352cb04c12 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Wed, 30 Jan 2008 13:30:39 +0100
Subject: [PATCH 141/890] x86: store core id bits in cpuinfo_x8

We need to store core id bits to cpuinfo_x86 in early_identify_cpu. So we
use it to create acpiid_to_node array in k8topolgy.c

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Len Brown <lenb@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup_64.c     | 72 ++++++++++++++++++++++------------
 include/asm-x86/processor_64.h |  1 +
 2 files changed, 48 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 62db062ba138..84f66b7b4d2e 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -542,18 +542,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
 	int node = 0;
 	unsigned apicid = hard_smp_processor_id();
 #endif
-	unsigned ecx = cpuid_ecx(0x80000008);
-
-	c->x86_max_cores = (ecx & 0xff) + 1;
-
-	/* CPU telling us the core id bits shift? */
-	bits = (ecx >> 12) & 0xF;
-
-	/* Otherwise recompute */
-	if (bits == 0) {
-		while ((1 << bits) < c->x86_max_cores)
-			bits++;
-	}
+	bits = c->x86_coreid_bits;
 
 	/* Low order bits define the core id (index of core in socket) */
 	c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
@@ -591,6 +580,33 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
 #endif
 }
 
+static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+	unsigned bits, ecx;
+
+	/* Multi core CPU? */
+	if (c->extended_cpuid_level < 0x80000008)
+		return;
+
+	ecx = cpuid_ecx(0x80000008);
+
+	c->x86_max_cores = (ecx & 0xff) + 1;
+
+	/* CPU telling us the core id bits shift? */
+	bits = (ecx >> 12) & 0xF;
+
+	/* Otherwise recompute */
+	if (bits == 0) {
+		while ((1 << bits) < c->x86_max_cores)
+			bits++;
+	}
+
+	c->x86_coreid_bits = bits;
+
+#endif
+}
+
 #define ENABLE_C1E_MASK		0x18000000
 #define CPUID_PROCESSOR_SIGNATURE	1
 #define CPUID_XFAM		0x0ff00000
@@ -858,7 +874,7 @@ struct cpu_model_info {
    below. */
 static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 {
-	u32 tfms;
+	u32 tfms, xlvl;
 
 	c->loops_per_jiffy = loops_per_jiffy;
 	c->x86_cache_size = -1;
@@ -869,6 +885,7 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 	c->x86_clflush_size = 64;
 	c->x86_cache_alignment = c->x86_clflush_size;
 	c->x86_max_cores = 1;
+	c->x86_coreid_bits = 0;
 	c->extended_cpuid_level = 0;
 	memset(&c->x86_capability, 0, sizeof c->x86_capability);
 
@@ -905,18 +922,6 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 #ifdef CONFIG_SMP
 	c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
 #endif
-}
-
-/*
- * This does the hard work of actually picking apart the CPU stuff...
- */
-void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
-{
-	int i;
-	u32 xlvl;
-
-	early_identify_cpu(c);
-
 	/* AMD-defined flags: level 0x80000001 */
 	xlvl = cpuid_eax(0x80000000);
 	c->extended_cpuid_level = xlvl;
@@ -937,6 +942,23 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 			c->x86_capability[2] = cpuid_edx(0x80860001);
 	}
 
+	switch (c->x86_vendor) {
+	case X86_VENDOR_AMD:
+		early_init_amd(c);
+		break;
+	}
+
+}
+
+/*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+{
+	int i;
+
+	early_identify_cpu(c);
+
 	init_scattered_cpuid_features(c);
 
 	c->apicid = phys_pkg_id(0);
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index 6c0d96a7af63..2ae8ceb8a74e 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -61,6 +61,7 @@ struct cpuinfo_x86 {
 	int	x86_tlbsize;	/* number of 4K pages in DTLB/ITLB combined(in pages)*/
         __u8    x86_virt_bits, x86_phys_bits;
 	__u8	x86_max_cores;	/* cpuid returned max cores value */
+	__u8	x86_coreid_bits; /* cpuid returned core id bits */
         __u32   x86_power; 	
 	__u32   extended_cpuid_level;	/* Max extended CPUID function supported */
 	unsigned long loops_per_jiffy;

From 3f6e5a12b8384ab9816ba6703f4b4be87ac94b5a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Wed, 30 Jan 2008 13:30:39 +0100
Subject: [PATCH 142/890] x86: use core id bits for apicid_to_node
 initialization

We shoud use core id bits instead of max cores, in case later with AMD
downcores Quad core Opteron.

[ tglx: arch/x86 adaptation ]

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Len Brown <lenb@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/k8topology_64.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index a96006f7ae0c..3695c9e8b9b6 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -44,12 +44,14 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
 { 
 	unsigned long prevbase;
 	struct bootnode nodes[8];
-	int nodeid, i, j, nb;
+	int nodeid, i, nb;
 	unsigned char nodeids[8];
 	int found = 0;
 	u32 reg;
 	unsigned numnodes;
-	unsigned num_cores;
+	unsigned cores;
+	unsigned bits;
+	int j;
 
 	if (!early_pci_allowed())
 		return -1;
@@ -60,9 +62,6 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
 
 	printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb); 
 
-	num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
-	printk(KERN_INFO "CPU has %d num_cores\n", num_cores);
-
 	reg = read_pci_config(0, nb, 0, 0x60); 
 	numnodes = ((reg >> 4) & 0xF) + 1;
 	if (numnodes <= 1)
@@ -168,11 +167,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
 	} 
 	printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift); 
 
+	/* use the coreid bits from early_identify_cpu */
+	bits = boot_cpu_data.x86_coreid_bits;
+	cores = (1<<bits);
+
 	for (i = 0; i < 8; i++) {
 		if (nodes[i].start != nodes[i].end) { 
 			nodeid = nodeids[i];
-			for (j = 0; j < num_cores; j++)
-				apicid_to_node[(nodeid * num_cores) + j] = i;
+			for (j = 0; j < cores; j++)
+				apicid_to_node[(nodeid << bits) + j] = i;
 			setup_node_bootmem(i, nodes[i].start, nodes[i].end); 
 		} 
 	}

From 739f33b38bf88312447e38ae8b7ac3acdbb72a6b Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:30:40 +0100
Subject: [PATCH 143/890] x86: untable __init references between IO data

Earlier patch added IO APIC setup into local APIC setup. This caused
modpost warnings. Fix them by untangling setup_local_APIC() and splitting
it into smaller functions. The IO APIC initialization is only called
for the BP init.

Also removed some outdated debugging code and minor cleanup.

[ tglx: arch/x86 adaptation ]

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic_64.c    | 46 ++++++++++++++++++------------------
 arch/x86/kernel/smpboot_64.c |  8 +++++++
 include/asm-x86/apic.h       |  1 +
 3 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 994298bf4921..d341f798255c 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -677,7 +677,7 @@ void __init init_bsp_APIC(void)
  */
 void __cpuinit setup_local_APIC(void)
 {
-	unsigned int value, maxlvt;
+	unsigned int value;
 	int i, j;
 
 	value = apic_read(APIC_LVR);
@@ -773,32 +773,23 @@ void __cpuinit setup_local_APIC(void)
 	else
 		value = APIC_DM_NMI | APIC_LVT_MASKED;
 	apic_write(APIC_LVT1, value);
+}
 
+void __cpuinit lapic_setup_esr(void)
+{
+	unsigned maxlvt = lapic_get_maxlvt();
+
+	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR);
 	/*
-	 * Now enable IO-APICs, actually call clear_IO_APIC
-	 * We need clear_IO_APIC before enabling vector on BP
+	 * spec says clear errors after enabling vector.
 	 */
-	if (!smp_processor_id() && !skip_ioapic_setup && nr_ioapics)
-		enable_IO_APIC();
-
-	{
-		unsigned oldvalue;
-		maxlvt = lapic_get_maxlvt();
-		oldvalue = apic_read(APIC_ESR);
-		value = ERROR_APIC_VECTOR;      // enables sending errors
-		apic_write(APIC_LVTERR, value);
-		/*
-		 * spec says clear errors after enabling vector.
-		 */
-		if (maxlvt > 3)
-			apic_write(APIC_ESR, 0);
-		value = apic_read(APIC_ESR);
-		if (value != oldvalue)
-			apic_printk(APIC_VERBOSE,
-			"ESR value after enabling vector: %08x, after %08x\n",
-			oldvalue, value);
-	}
+	if (maxlvt > 3)
+		apic_write(APIC_ESR, 0);
+}
 
+void __cpuinit end_local_APIC_setup(void)
+{
+	lapic_setup_esr();
 	nmi_watchdog_default();
 	setup_apic_nmi_watchdog(NULL);
 	apic_pm_activate();
@@ -879,6 +870,15 @@ int __init APIC_init_uniprocessor(void)
 
 	setup_local_APIC();
 
+	/*
+	 * Now enable IO-APICs, actually call clear_IO_APIC
+	 * We need clear_IO_APIC before enabling vector on BP
+	 */
+	if (!skip_ioapic_setup && nr_ioapics)
+		enable_IO_APIC();
+
+	end_local_APIC_setup();
+
 	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
 		setup_IO_APIC();
 	else
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index 7552db9ee9ff..ddefb38c53fb 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -210,6 +210,7 @@ void __cpuinit smp_callin(void)
 
 	Dprintk("CALLIN, before setup_local_APIC().\n");
 	setup_local_APIC();
+	end_local_APIC_setup();
 
 	/*
 	 * Get our bogomips.
@@ -884,6 +885,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	 */
 	setup_local_APIC();
 
+	/*
+	 * Enable IO APIC before setting up error vector
+	 */
+	if (!skip_ioapic_setup && nr_ioapics)
+		enable_IO_APIC();
+	end_local_APIC_setup();
+
 	if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
 		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
 		      GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index 18d932dff476..5e8192d36e5a 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -112,6 +112,7 @@ extern void cache_APIC_registers(void);
 extern void sync_Arb_IDs(void);
 extern void init_bsp_APIC(void);
 extern void setup_local_APIC(void);
+extern void end_local_APIC_setup(void);
 extern void init_apic_mappings(void);
 extern void setup_boot_APIC_clock(void);
 extern void setup_secondary_APIC_clock(void);

From 7b83dae7aa31db4f6d6e78c3c6d490a7ac58699c Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 30 Jan 2008 13:30:40 +0100
Subject: [PATCH 144/890] x86: extended interrupt LVT support for AMD Barcelona

Also macro definitions in apicdef.h has been updated.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic_64.c               | 26 +++++++++++++++++++++----
 arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 12 ++++++------
 include/asm-x86/apic.h                  |  4 ++--
 include/asm-x86/apicdef.h               | 19 +++++++++++-------
 4 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index d341f798255c..027004262105 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -187,17 +187,35 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
 }
 
 /*
- * Setup extended LVT (K8 specific)
+ * Setup extended LVT, AMD specific (K8, family 10h)
+ *
+ * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
+ * MCE interrupts are supported. Thus MCE offset must be set to 0.
  */
-void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector,
-			     unsigned char msg_type, unsigned char mask)
+
+#define APIC_EILVT_LVTOFF_MCE 0
+#define APIC_EILVT_LVTOFF_IBS 1
+
+static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
 {
-	unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE;
+	unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
 	unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
 
 	apic_write(reg, v);
 }
 
+u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
+{
+	setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
+	return APIC_EILVT_LVTOFF_MCE;
+}
+
+u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
+{
+	setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
+	return APIC_EILVT_LVTOFF_IBS;
+}
+
 /*
  * Program the next event, relative to now
  */
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 073afa7dd89a..550502596ca3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -118,6 +118,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
 {
 	unsigned int bank, block;
 	unsigned int cpu = smp_processor_id();
+	u8 lvt_off;
 	u32 low = 0, high = 0, address = 0;
 
 	for (bank = 0; bank < NR_BANKS; ++bank) {
@@ -153,13 +154,12 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
 			if (shared_bank[bank] && c->cpu_core_id)
 				break;
 #endif
-			high &= ~MASK_LVTOFF_HI;
-			high |= K8_APIC_EXT_LVT_ENTRY_THRESHOLD << 20;
-			wrmsr(address, low, high);
+			lvt_off = setup_APIC_eilvt_mce(THRESHOLD_APIC_VECTOR,
+						       APIC_EILVT_MSG_FIX, 0);
 
-			setup_APIC_extended_lvt(K8_APIC_EXT_LVT_ENTRY_THRESHOLD,
-						THRESHOLD_APIC_VECTOR,
-						K8_APIC_EXT_INT_MSG_FIX, 0);
+			high &= ~MASK_LVTOFF_HI;
+			high |= lvt_off << 20;
+			wrmsr(address, low, high);
 
 			threshold_defaults.address = address;
 			threshold_restart_bank(&threshold_defaults, 0, 0);
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index 5e8192d36e5a..423022759cb2 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -126,8 +126,8 @@ extern void enable_NMI_through_LVT0(void *dummy);
 extern void setup_apic_routing(void);
 #endif
 
-extern void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector,
-				    unsigned char msg_type, unsigned char mask);
+extern u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask);
+extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask);
 
 extern int apic_is_clustered_box(void);
 
diff --git a/include/asm-x86/apicdef.h b/include/asm-x86/apicdef.h
index 5f7abe9b5f87..550af7a6f88e 100644
--- a/include/asm-x86/apicdef.h
+++ b/include/asm-x86/apicdef.h
@@ -116,13 +116,18 @@
 #define		APIC_TDR_DIV_32		0x8
 #define		APIC_TDR_DIV_64		0x9
 #define		APIC_TDR_DIV_128	0xA
-
-#define K8_APIC_EXT_LVT_BASE		0x500
-#define K8_APIC_EXT_INT_MSG_FIX		0x0
-#define K8_APIC_EXT_INT_MSG_SMI		0x2
-#define K8_APIC_EXT_INT_MSG_NMI		0x4
-#define K8_APIC_EXT_INT_MSG_EXT		0x7
-#define K8_APIC_EXT_LVT_ENTRY_THRESHOLD	0
+#define	APIC_EILVT0     0x500
+#define		APIC_EILVT_NR_AMD_K8	1	/* Number of extended interrupts */
+#define		APIC_EILVT_NR_AMD_10H	4
+#define		APIC_EILVT_LVTOFF(x)	(((x)>>4)&0xF)
+#define		APIC_EILVT_MSG_FIX	0x0
+#define		APIC_EILVT_MSG_SMI	0x2
+#define		APIC_EILVT_MSG_NMI	0x4
+#define		APIC_EILVT_MSG_EXT	0x7
+#define		APIC_EILVT_MASKED	(1<<16)
+#define	APIC_EILVT1     0x510
+#define	APIC_EILVT2     0x520
+#define	APIC_EILVT3     0x530
 
 #define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
 

From c1d171a002942ea2d93b4fbd0c9583c56fce0772 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Wed, 30 Jan 2008 13:30:40 +0100
Subject: [PATCH 145/890] x86: randomize brk

Randomize the location of the heap (brk) for i386 and x86_64.  The range is
randomized in the range starting at current brk location up to 0x02000000
offset for both architectures.  This, together with
pie-executable-randomization.patch and
pie-executable-randomization-fix.patch, should make the address space
randomization on i386 and x86_64 complete.

Arjan says:

This is known to break older versions of some emacs variants, whose dumper
code assumed that the last variable declared in the program is equal to the
start of the dynamically allocated memory region.

(The dumper is the code where emacs effectively dumps core at the end of it's
compilation stage; this coredump is then loaded as the main program during
normal use)

iirc this was 5 years or so; we found this way back when I was at RH and we
first did the security stuff there (including this brk randomization).  It
wasn't all variants of emacs, and it got fixed as a result (I vaguely remember
that emacs already had code to deal with it for other archs/oses, just
ifdeffed wrongly).

It's a rare and wrong assumption as a general thing, just on x86 it mostly
happened to be true (but to be honest, it'll break too if gcc does
something fancy or if the linker does a non-standard order).  Still its
something we should at least document.

Note 2: afaik it only broke the emacs *build*.  I'm not 100% sure about that
(it IS 5 years ago) though.

[ akpm@linux-foundation.org: deuglification ]

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Roland McGrath <roland@redhat.com>
Cc: Jakub Jelinek <jakub@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process_32.c | 7 +++++++
 arch/x86/kernel/process_64.c | 7 +++++++
 fs/binfmt_elf.c              | 6 ++++++
 include/asm-x86/elf.h        | 3 +++
 mm/mmap.c                    | 3 ++-
 5 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a8cdd09ad53f..631af167bc51 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -992,3 +992,10 @@ unsigned long arch_align_stack(unsigned long sp)
 		sp -= get_random_int() % 8192;
 	return sp & ~0xf;
 }
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+	unsigned long range_end = mm->brk + 0x02000000;
+	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+}
+
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 98d85952f574..aa9414ed74c7 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -914,3 +914,10 @@ unsigned long arch_align_stack(unsigned long sp)
 		sp -= get_random_int() % 8192;
 	return sp & ~0xf;
 }
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+	unsigned long range_end = mm->brk + 0x02000000;
+	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+}
+
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f0b3171842f2..043a800c8f71 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1021,6 +1021,12 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	current->mm->end_data = end_data;
 	current->mm->start_stack = bprm->p;
 
+#ifdef arch_randomize_brk
+	if (current->flags & PF_RANDOMIZE)
+		current->mm->brk = current->mm->start_brk =
+			arch_randomize_brk(current->mm);
+#endif
+
 	if (current->personality & MMAP_PAGE_ZERO) {
 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
 		   and some applications "depend" upon this behavior.
diff --git a/include/asm-x86/elf.h b/include/asm-x86/elf.h
index ec42a4d2e83b..cd3204ebbbdd 100644
--- a/include/asm-x86/elf.h
+++ b/include/asm-x86/elf.h
@@ -285,6 +285,9 @@ struct linux_binprm;
 extern int arch_setup_additional_pages(struct linux_binprm *bprm,
 				       int executable_stack);
 
+extern unsigned long arch_randomize_brk(struct mm_struct *mm);
+#define arch_randomize_brk arch_randomize_brk
+
 #endif /* __KERNEL__ */
 
 #endif
diff --git a/mm/mmap.c b/mm/mmap.c
index bfa389fc6ded..d2b6d44962b7 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -251,7 +251,8 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
 	 * not page aligned -Ram Gupta
 	 */
 	rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
-	if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
+	if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
+			(mm->end_data - mm->start_data) > rlim)
 		goto out;
 
 	newbrk = PAGE_ALIGN(brk);

From 5b93049337301d6fbd3cf55db99b34e6b0dbd3a3 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:40 +0100
Subject: [PATCH 146/890] x86 vDSO: generate vdso-syms.lds

This patch adds a new way of extracting symbols from the built vDSO image.
This is much simpler and less fragile than using ld -R; it removes the
need to control the DSO layout quite so exactly.  I was clearly unduly
distracted by clever ld uses when I did the original vDSO implementation.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/vdso/Makefile   | 14 ++++++++++++++
 arch/x86/vdso/vdso.lds.S | 10 ++++++++++
 2 files changed, 24 insertions(+)

diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index e7bff0fbac23..d8200adae9fa 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -55,6 +55,20 @@ SYSCFLAGS_vdso-syms.o = -r -d
 $(obj)/vdso-syms.o: $(src)/vdso.lds $(vobjs) FORCE
 	$(call if_changed,syscall)
 
+targets += vdso-syms.lds
+obj-y += vdso-syms.lds
+
+#
+# Match symbols in the DSO that look like VDSO*; produce a file of constants.
+#
+sed-vdsosym := -e 's/^00*/0/' \
+	-e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p'
+quiet_cmd_vdsosym = VDSOSYM $@
+      cmd_vdsosym = $(NM) $< | sed -n $(sed-vdsosym) | LC_ALL=C sort > $@
+
+$(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE
+	$(call if_changed,vdsosym)
+
 quiet_cmd_vdso_install = INSTALL $@
       cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
 vdso.so:
diff --git a/arch/x86/vdso/vdso.lds.S b/arch/x86/vdso/vdso.lds.S
index 667d3245d972..d40d0e906190 100644
--- a/arch/x86/vdso/vdso.lds.S
+++ b/arch/x86/vdso/vdso.lds.S
@@ -8,6 +8,16 @@
 
 #define VDSO_PRELINK 0xffffffffff700000
 
+/*
+ * Symbols we define here called VDSO* get their values into vdso-syms.lds
+ * and become visible to the kernel itself.
+ */
+VDSO64_PRELINK = VDSO_PRELINK;
+
+#define VEXTERN(x)	VDSO64_ ## x = vdso_ ## x;
+#include "vextern.h"
+#undef	VEXTERN
+
 SECTIONS
 {
   . = VDSO_PRELINK + SIZEOF_HEADERS;

From 7f3646aa16f496201e045183402c6614fa27b0c7 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:41 +0100
Subject: [PATCH 147/890] x86 vDSO: use vdso-syms.lds

This patch changes the kernel's references to addresses in the vDSO image
to be based on the symbols defined by vdso-syms.lds instead of the old
vdso-syms.o symbols.  This is all wrapped up in a macro defined by the new
asm-x86/vdso.h header; that's the only place in the kernel source that has
to know the details of the scheme for getting vDSO symbol values.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/vdso/Makefile     |  3 +--
 arch/x86/vdso/vdso-start.S |  2 --
 arch/x86/vdso/vma.c        | 18 +++++++-----------
 include/asm-x86/vdso.h     | 14 ++++++++++++++
 4 files changed, 22 insertions(+), 15 deletions(-)
 delete mode 100644 arch/x86/vdso/vdso-start.S
 create mode 100644 include/asm-x86/vdso.h

diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index d8200adae9fa..d2ec17044adc 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -3,8 +3,7 @@
 #
 
 # files to link into the vdso
-# vdso-start.o has to be first
-vobjs-y := vdso-start.o vdso-note.o vclock_gettime.o vgetcpu.o vvar.o
+vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vvar.o
 
 # files to link into kernel
 obj-y := vma.o vdso.o vdso-syms.o
diff --git a/arch/x86/vdso/vdso-start.S b/arch/x86/vdso/vdso-start.S
deleted file mode 100644
index 2dc2cdb84d67..000000000000
--- a/arch/x86/vdso/vdso-start.S
+++ /dev/null
@@ -1,2 +0,0 @@
-	.globl vdso_kernel_start
-vdso_kernel_start:
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index ff9333e5fb08..3fdd51497a83 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -11,23 +11,20 @@
 #include <asm/vsyscall.h>
 #include <asm/vgtod.h>
 #include <asm/proto.h>
-#include "voffset.h"
+#include <asm/vdso.h>
+
+#include "vextern.h"		/* Just for VMAGIC.  */
+#undef VEXTERN
 
 int vdso_enabled = 1;
 
-#define VEXTERN(x) extern typeof(__ ## x) *vdso_ ## x;
-#include "vextern.h"
-#undef VEXTERN
-
-extern char vdso_kernel_start[], vdso_start[], vdso_end[];
+extern char vdso_start[], vdso_end[];
 extern unsigned short vdso_sync_cpuid;
 
 struct page **vdso_pages;
 
-static inline void *var_ref(void *vbase, char *var, char *name)
+static inline void *var_ref(void *p, char *name)
 {
-	unsigned offset = var - &vdso_kernel_start[0] + VDSO_TEXT_OFFSET;
-	void *p = vbase + offset;
 	if (*(void **)p != (void *)VMAGIC) {
 		printk("VDSO: variable %s broken\n", name);
 		vdso_enabled = 0;
@@ -62,9 +59,8 @@ static int __init init_vdso_vars(void)
 		vdso_enabled = 0;
 	}
 
-#define V(x) *(typeof(x) *) var_ref(vbase, (char *)RELOC_HIDE(&x, 0), #x)
 #define VEXTERN(x) \
-	V(vdso_ ## x) = &__ ## x;
+	*(typeof(__ ## x) **) var_ref(VDSO64_SYMBOL(vbase, x), #x) = &__ ## x;
 #include "vextern.h"
 #undef VEXTERN
 	return 0;
diff --git a/include/asm-x86/vdso.h b/include/asm-x86/vdso.h
new file mode 100644
index 000000000000..9379ec06fd1f
--- /dev/null
+++ b/include/asm-x86/vdso.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_X86_VDSO_H
+#define _ASM_X86_VDSO_H	1
+
+extern const char VDSO64_PRELINK[];
+
+/*
+ * Given a pointer to the vDSO image, find the pointer to VDSO64_name
+ * as that symbol is defined in the vDSO sources or linker script.
+ */
+#define VDSO64_SYMBOL(base, name) ({		\
+	extern const char VDSO64_##name[];	\
+	(void *) (VDSO64_##name - VDSO64_PRELINK + (unsigned long) (base)); })
+
+#endif	/* asm-x86/vdso.h */

From 2b9c97e16101e8dc2b0810d6f932d475a051d785 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:41 +0100
Subject: [PATCH 148/890] x86 vDSO: remove vdso-syms.o

Get rid of vdso-syms.o from the kernel link.  We don't need it any more.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/vdso/Makefile | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index d2ec17044adc..6a665dd09848 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -6,13 +6,13 @@
 vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vvar.o
 
 # files to link into kernel
-obj-y := vma.o vdso.o vdso-syms.o
+obj-y := vma.o vdso.o
 
 vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
 
 $(obj)/vdso.o: $(obj)/vdso.so
 
-targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y) vdso-syms.o
+targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y)
 
 # The DSO images are built using a special linker script.
 quiet_cmd_syscall = SYSCALL $@
@@ -43,17 +43,6 @@ CFL := $(PROFILING) -mcmodel=small -fPIC -g0 -O2 -fasynchronous-unwind-tables -m
 $(obj)/vclock_gettime.o: KBUILD_CFLAGS = $(CFL)
 $(obj)/vgetcpu.o: KBUILD_CFLAGS = $(CFL)
 
-# We also create a special relocatable object that should mirror the symbol
-# table and layout of the linked DSO.  With ld -R we can then refer to
-# these symbols in the kernel code rather than hand-coded addresses.
-extra-y += vdso-syms.o
-$(obj)/built-in.o: $(obj)/vdso-syms.o
-$(obj)/built-in.o: ld_flags += -R $(obj)/vdso-syms.o
-
-SYSCFLAGS_vdso-syms.o = -r -d
-$(obj)/vdso-syms.o: $(src)/vdso.lds $(vobjs) FORCE
-	$(call if_changed,syscall)
-
 targets += vdso-syms.lds
 obj-y += vdso-syms.lds
 

From f6b46ebf904f69a73907a5e6b1ed2228e3f03d9e Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:41 +0100
Subject: [PATCH 149/890] x86 vDSO: new layout

This revamps the vDSO linker script to lay things out with the best
packing of the data and good, separate alignment of the code.  The
rigid layout using VDSO_TEXT_OFFSET no longer matters to the kernel.
I've moved the layout parts of the linker script into a new include
file, vdso-layout.lds.S; this is in preparation for sharing the script
for the 32-bit vDSO builds too.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/vdso/vdso-layout.lds.S |  64 ++++++++++++++++++++
 arch/x86/vdso/vdso.lds.S        | 102 ++++++++------------------------
 arch/x86/vdso/voffset.h         |   1 -
 3 files changed, 89 insertions(+), 78 deletions(-)
 create mode 100644 arch/x86/vdso/vdso-layout.lds.S
 delete mode 100644 arch/x86/vdso/voffset.h

diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/vdso/vdso-layout.lds.S
new file mode 100644
index 000000000000..634a2cf62046
--- /dev/null
+++ b/arch/x86/vdso/vdso-layout.lds.S
@@ -0,0 +1,64 @@
+/*
+ * Linker script for vDSO.  This is an ELF shared object prelinked to
+ * its virtual address, and with only one read-only segment.
+ * This script controls its layout.
+ */
+
+SECTIONS
+{
+	. = VDSO_PRELINK + SIZEOF_HEADERS;
+
+	.hash		: { *(.hash) }			:text
+	.gnu.hash	: { *(.gnu.hash) }
+	.dynsym		: { *(.dynsym) }
+	.dynstr		: { *(.dynstr) }
+	.gnu.version	: { *(.gnu.version) }
+	.gnu.version_d	: { *(.gnu.version_d) }
+	.gnu.version_r	: { *(.gnu.version_r) }
+
+	.note		: { *(.note.*) }		:text	:note
+
+	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
+
+	.dynamic	: { *(.dynamic) }		:text	:dynamic
+
+	.rodata		: { *(.rodata*) }		:text
+	.data		: {
+	      *(.data*)
+	      *(.sdata*)
+	      *(.got.plt) *(.got)
+	      *(.gnu.linkonce.d.*)
+	      *(.bss*)
+	      *(.dynbss*)
+	      *(.gnu.linkonce.b.*)
+	}
+
+	.altinstructions	: { *(.altinstructions) }
+	.altinstr_replacement	: { *(.altinstr_replacement) }
+
+	/*
+	 * Align the actual code well away from the non-instruction data.
+	 * This is the best thing for the I-cache.
+	 */
+	. = ALIGN(0x100);
+
+	.text		: { *(.text*) }			:text	=0x90909090
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME	0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+	text		PT_LOAD		FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
+	dynamic		PT_DYNAMIC	FLAGS(4);		/* PF_R */
+	note		PT_NOTE		FLAGS(4);		/* PF_R */
+	eh_frame_hdr	PT_GNU_EH_FRAME;
+}
diff --git a/arch/x86/vdso/vdso.lds.S b/arch/x86/vdso/vdso.lds.S
index d40d0e906190..4e5dd3b4de7f 100644
--- a/arch/x86/vdso/vdso.lds.S
+++ b/arch/x86/vdso/vdso.lds.S
@@ -1,89 +1,37 @@
 /*
- * Linker script for vsyscall DSO.  The vsyscall page is an ELF shared
- * object prelinked to its virtual address, and with only one read-only
- * segment (that fits in one page).  This script controls its layout.
+ * Linker script for 64-bit vDSO.
+ * We #include the file to define the layout details.
+ * Here we only choose the prelinked virtual address.
+ *
+ * This file defines the version script giving the user-exported symbols in
+ * the DSO.  We can define local symbols here called VDSO* to make their
+ * values visible using the asm-x86/vdso.h macros from the kernel proper.
  */
-#include <asm/asm-offsets.h>
-#include "voffset.h"
 
 #define VDSO_PRELINK 0xffffffffff700000
+#include "vdso-layout.lds.S"
 
 /*
- * Symbols we define here called VDSO* get their values into vdso-syms.lds
- * and become visible to the kernel itself.
+ * This controls what userland symbols we export from the vDSO.
  */
+VERSION {
+	LINUX_2.6 {
+	global:
+		clock_gettime;
+		__vdso_clock_gettime;
+		gettimeofday;
+		__vdso_gettimeofday;
+		getcpu;
+		__vdso_getcpu;
+	local: *;
+	};
+}
+
 VDSO64_PRELINK = VDSO_PRELINK;
 
+/*
+ * Define VDSO64_x for each VEXTERN(x), for use via VDSO64_SYMBOL.
+ */
 #define VEXTERN(x)	VDSO64_ ## x = vdso_ ## x;
 #include "vextern.h"
 #undef	VEXTERN
-
-SECTIONS
-{
-  . = VDSO_PRELINK + SIZEOF_HEADERS;
-
-  .hash           : { *(.hash) }		:text
-  .gnu.hash       : { *(.gnu.hash) }
-  .dynsym         : { *(.dynsym) }
-  .dynstr         : { *(.dynstr) }
-  .gnu.version    : { *(.gnu.version) }
-  .gnu.version_d  : { *(.gnu.version_d) }
-  .gnu.version_r  : { *(.gnu.version_r) }
-
-  /* This linker script is used both with -r and with -shared.
-     For the layouts to match, we need to skip more than enough
-     space for the dynamic symbol table et al.  If this amount
-     is insufficient, ld -shared will barf.  Just increase it here.  */
-  . = VDSO_PRELINK + VDSO_TEXT_OFFSET;
-
-  .text           : { *(.text*) }		:text
-  .rodata         : { *(.rodata*) }		:text
-  .data		  : {
-	*(.data*)
-	*(.sdata*)
-	*(.bss*)
-	*(.dynbss*)
-  }						:text
-
-  .altinstructions : { *(.altinstructions) }		:text
-  .altinstr_replacement  : { *(.altinstr_replacement) }	:text
-
-  .note		  : { *(.note.*) }		:text :note
-  .eh_frame_hdr   : { *(.eh_frame_hdr) }	:text :eh_frame_hdr
-  .eh_frame       : { KEEP (*(.eh_frame)) }	:text
-  .dynamic        : { *(.dynamic) }		:text :dynamic
-  .useless        : {
-  	*(.got.plt) *(.got)
-	*(.gnu.linkonce.d.*)
-	*(.gnu.linkonce.b.*)
-  }						:text
-}
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-  text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
-  dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
-  note PT_NOTE FLAGS(4); /* PF_R */
-  eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
-  LINUX_2.6 {
-    global:
-	clock_gettime;
-	__vdso_clock_gettime;
-	gettimeofday;
-	__vdso_gettimeofday;
-	getcpu;
-	__vdso_getcpu;
-    local: *;
-  };
-}
diff --git a/arch/x86/vdso/voffset.h b/arch/x86/vdso/voffset.h
deleted file mode 100644
index 4af67c79085f..000000000000
--- a/arch/x86/vdso/voffset.h
+++ /dev/null
@@ -1 +0,0 @@
-#define VDSO_TEXT_OFFSET 0x600

From 108b545137b03ec1d6a5765017d57f86056bf57c Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:41 +0100
Subject: [PATCH 150/890] x86 vDSO: harmonize asm-offsets

This change harmonizes the asm-offsets macros used in the 32-bit vDSO
across 32-bit and 64-bit builds.  It's a purely cosmetic change for now,
but it paves the way for consolidating the 32-bit vDSO builds.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/asm-offsets_32.c        | 20 ++++-----
 arch/x86/kernel/vsyscall-sigreturn_32.S | 54 ++++++++++++-------------
 2 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index c1ccfabb4a9e..4f750ec810f6 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -38,15 +38,15 @@ void foo(void);
 
 void foo(void)
 {
-	OFFSET(SIGCONTEXT_eax, sigcontext, eax);
-	OFFSET(SIGCONTEXT_ebx, sigcontext, ebx);
-	OFFSET(SIGCONTEXT_ecx, sigcontext, ecx);
-	OFFSET(SIGCONTEXT_edx, sigcontext, edx);
-	OFFSET(SIGCONTEXT_esi, sigcontext, esi);
-	OFFSET(SIGCONTEXT_edi, sigcontext, edi);
-	OFFSET(SIGCONTEXT_ebp, sigcontext, ebp);
-	OFFSET(SIGCONTEXT_esp, sigcontext, esp);
-	OFFSET(SIGCONTEXT_eip, sigcontext, eip);
+	OFFSET(IA32_SIGCONTEXT_eax, sigcontext, eax);
+	OFFSET(IA32_SIGCONTEXT_ebx, sigcontext, ebx);
+	OFFSET(IA32_SIGCONTEXT_ecx, sigcontext, ecx);
+	OFFSET(IA32_SIGCONTEXT_edx, sigcontext, edx);
+	OFFSET(IA32_SIGCONTEXT_esi, sigcontext, esi);
+	OFFSET(IA32_SIGCONTEXT_edi, sigcontext, edi);
+	OFFSET(IA32_SIGCONTEXT_ebp, sigcontext, ebp);
+	OFFSET(IA32_SIGCONTEXT_esp, sigcontext, esp);
+	OFFSET(IA32_SIGCONTEXT_eip, sigcontext, eip);
 	BLANK();
 
 	OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
@@ -94,7 +94,7 @@ void foo(void)
 	BLANK();
 
 	OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
-	OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
+	OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
 	BLANK();
 
 	OFFSET(pbe_address, pbe, address);
diff --git a/arch/x86/kernel/vsyscall-sigreturn_32.S b/arch/x86/kernel/vsyscall-sigreturn_32.S
index a92262f41659..e939253ad654 100644
--- a/arch/x86/kernel/vsyscall-sigreturn_32.S
+++ b/arch/x86/kernel/vsyscall-sigreturn_32.S
@@ -91,27 +91,27 @@ __kernel_rt_sigreturn:
 	.sleb128 offset;		/*       offset */		\
 1:
 
-	do_cfa_expr(SIGCONTEXT_esp+4)
-	do_expr(0, SIGCONTEXT_eax+4)
-	do_expr(1, SIGCONTEXT_ecx+4)
-	do_expr(2, SIGCONTEXT_edx+4)
-	do_expr(3, SIGCONTEXT_ebx+4)
-	do_expr(5, SIGCONTEXT_ebp+4)
-	do_expr(6, SIGCONTEXT_esi+4)
-	do_expr(7, SIGCONTEXT_edi+4)
-	do_expr(8, SIGCONTEXT_eip+4)
+	do_cfa_expr(IA32_SIGCONTEXT_esp+4)
+	do_expr(0, IA32_SIGCONTEXT_eax+4)
+	do_expr(1, IA32_SIGCONTEXT_ecx+4)
+	do_expr(2, IA32_SIGCONTEXT_edx+4)
+	do_expr(3, IA32_SIGCONTEXT_ebx+4)
+	do_expr(5, IA32_SIGCONTEXT_ebp+4)
+	do_expr(6, IA32_SIGCONTEXT_esi+4)
+	do_expr(7, IA32_SIGCONTEXT_edi+4)
+	do_expr(8, IA32_SIGCONTEXT_eip+4)
 
 	.byte 0x42	/* DW_CFA_advance_loc 2 -- nop; popl eax. */
 
-	do_cfa_expr(SIGCONTEXT_esp)
-	do_expr(0, SIGCONTEXT_eax)
-	do_expr(1, SIGCONTEXT_ecx)
-	do_expr(2, SIGCONTEXT_edx)
-	do_expr(3, SIGCONTEXT_ebx)
-	do_expr(5, SIGCONTEXT_ebp)
-	do_expr(6, SIGCONTEXT_esi)
-	do_expr(7, SIGCONTEXT_edi)
-	do_expr(8, SIGCONTEXT_eip)
+	do_cfa_expr(IA32_SIGCONTEXT_esp)
+	do_expr(0, IA32_SIGCONTEXT_eax)
+	do_expr(1, IA32_SIGCONTEXT_ecx)
+	do_expr(2, IA32_SIGCONTEXT_edx)
+	do_expr(3, IA32_SIGCONTEXT_ebx)
+	do_expr(5, IA32_SIGCONTEXT_ebp)
+	do_expr(6, IA32_SIGCONTEXT_esi)
+	do_expr(7, IA32_SIGCONTEXT_edi)
+	do_expr(8, IA32_SIGCONTEXT_eip)
 
 	.align 4
 .LENDFDEDLSI1:
@@ -128,15 +128,15 @@ __kernel_rt_sigreturn:
 	   slightly less complicated than the above, since we don't
 	   modify the stack pointer in the process.  */
 
-	do_cfa_expr(RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_esp)
-	do_expr(0, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_eax)
-	do_expr(1, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ecx)
-	do_expr(2, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_edx)
-	do_expr(3, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ebx)
-	do_expr(5, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ebp)
-	do_expr(6, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_esi)
-	do_expr(7, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_edi)
-	do_expr(8, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_eip)
+	do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esp)
+	do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eax)
+	do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ecx)
+	do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edx)
+	do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebx)
+	do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebp)
+	do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esi)
+	do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edi)
+	do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eip)
 
 	.align 4
 .LENDFDEDLSI2:

From 0c2f51a7d2546f65e4198cccd6a56e2a8b857677 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:42 +0100
Subject: [PATCH 151/890] x86 vDSO: arch/x86/vdso/vdso32

This moves the i386 vDSO sources into arch/x86/vdso/vdso32/, a
new directory.  This patch is a pure renaming, but paves the way
for consolidating the vDSO build logic.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/ia32/vsyscall-sigreturn.S                            | 3 +--
 arch/x86/kernel/Makefile_32                                   | 3 +++
 arch/x86/{kernel/vsyscall-int80_32.S => vdso/vdso32/int80.S}  | 2 +-
 arch/x86/{kernel/vsyscall-note_32.S => vdso/vdso32/note.S}    | 2 +-
 .../vsyscall-sigreturn_32.S => vdso/vdso32/sigreturn.S}       | 4 ++--
 .../{kernel/vsyscall-sysenter_32.S => vdso/vdso32/sysenter.S} | 2 +-
 6 files changed, 9 insertions(+), 7 deletions(-)
 rename arch/x86/{kernel/vsyscall-int80_32.S => vdso/vdso32/int80.S} (97%)
 rename arch/x86/{kernel/vsyscall-note_32.S => vdso/vdso32/note.S} (95%)
 rename arch/x86/{kernel/vsyscall-sigreturn_32.S => vdso/vdso32/sigreturn.S} (97%)
 rename arch/x86/{kernel/vsyscall-sysenter_32.S => vdso/vdso32/sysenter.S} (99%)

diff --git a/arch/x86/ia32/vsyscall-sigreturn.S b/arch/x86/ia32/vsyscall-sigreturn.S
index b383be00baec..295eecf91f17 100644
--- a/arch/x86/ia32/vsyscall-sigreturn.S
+++ b/arch/x86/ia32/vsyscall-sigreturn.S
@@ -139,5 +139,4 @@ __kernel_rt_sigreturn:
 	.align 4
 .LENDFDE3:
 
-#include "../../x86/kernel/vsyscall-note_32.S"
-
+#include "../vdso/vdso32/note.S"
diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index eb2da53578d7..f7c1c1c88dda 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -56,6 +56,9 @@ $(obj)/vsyscall_32.o: $(obj)/vsyscall-int80_32.so $(obj)/vsyscall-sysenter_32.so
 targets += $(foreach F,int80 sysenter,vsyscall-$F_32.o vsyscall-$F_32.so)
 targets += vsyscall-note_32.o vsyscall_32.lds
 
+$(obj)/vsyscall-%_32.o: $(src)/../vdso/vdso32/%.S
+	$(call if_changed_dep,as_o_S)
+
 # The DSO images are built using a special linker script.
 quiet_cmd_syscall = SYSCALL $@
       cmd_syscall = $(CC) -m elf_i386 -nostdlib $(SYSCFLAGS_$(@F)) \
diff --git a/arch/x86/kernel/vsyscall-int80_32.S b/arch/x86/vdso/vdso32/int80.S
similarity index 97%
rename from arch/x86/kernel/vsyscall-int80_32.S
rename to arch/x86/vdso/vdso32/int80.S
index 103cab6aa7c0..3c8e4c62ace3 100644
--- a/arch/x86/kernel/vsyscall-int80_32.S
+++ b/arch/x86/vdso/vdso32/int80.S
@@ -50,4 +50,4 @@ __kernel_vsyscall:
 /*
  * Get the common code for the sigreturn entry points.
  */
-#include "vsyscall-sigreturn_32.S"
+#include "sigreturn.S"
diff --git a/arch/x86/kernel/vsyscall-note_32.S b/arch/x86/vdso/vdso32/note.S
similarity index 95%
rename from arch/x86/kernel/vsyscall-note_32.S
rename to arch/x86/vdso/vdso32/note.S
index fcf376a37f79..0cf934f2633e 100644
--- a/arch/x86/kernel/vsyscall-note_32.S
+++ b/arch/x86/vdso/vdso32/note.S
@@ -33,7 +33,7 @@ ELFNOTE_END
  * at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen.
  */
 
-#include "../../x86/xen/vdso.h"	/* Defines VDSO_NOTE_NONEGSEG_BIT.  */
+#include "../../xen/vdso.h"	/* Defines VDSO_NOTE_NONEGSEG_BIT.  */
 
 	.globl VDSO_NOTE_MASK
 ELFNOTE_START(GNU, 2, "a")
diff --git a/arch/x86/kernel/vsyscall-sigreturn_32.S b/arch/x86/vdso/vdso32/sigreturn.S
similarity index 97%
rename from arch/x86/kernel/vsyscall-sigreturn_32.S
rename to arch/x86/vdso/vdso32/sigreturn.S
index e939253ad654..778f4649e32f 100644
--- a/arch/x86/kernel/vsyscall-sigreturn_32.S
+++ b/arch/x86/vdso/vdso32/sigreturn.S
@@ -70,9 +70,9 @@ __kernel_rt_sigreturn:
 	   be the value of the stack pointer in the caller.  This means
 	   that we must define the CFA of this body of code to be the
 	   saved value of the stack pointer in the sigcontext.  Which
-	   also means that there is no fixed relation to the other 
+	   also means that there is no fixed relation to the other
 	   saved registers, which means that we must use DW_CFA_expression
-	   to compute their addresses.  It also means that when we 
+	   to compute their addresses.  It also means that when we
 	   adjust the stack with the popl, we have to do it all over again.  */
 
 #define do_cfa_expr(offset)						\
diff --git a/arch/x86/kernel/vsyscall-sysenter_32.S b/arch/x86/vdso/vdso32/sysenter.S
similarity index 99%
rename from arch/x86/kernel/vsyscall-sysenter_32.S
rename to arch/x86/vdso/vdso32/sysenter.S
index ed879bf42995..4b98fc41d3e0 100644
--- a/arch/x86/kernel/vsyscall-sysenter_32.S
+++ b/arch/x86/vdso/vdso32/sysenter.S
@@ -119,4 +119,4 @@ SYSENTER_RETURN:
 /*
  * Get the common code for the sigreturn entry points.
  */
-#include "vsyscall-sigreturn_32.S"
+#include "sigreturn.S"

From 0249c9c1e7505c2b020bcc6deaf1e0415de9943e Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:42 +0100
Subject: [PATCH 152/890] x86 vDSO: vdso32 build

This builds the 32-bit vDSO images in the arch/x86/vdso subdirectory.
Nothing uses the images yet, but this paves the way for consolidating
the vDSO build logic all in one place.  The new images use a linker
script sharing the layout parts from vdso-layout.lds.S with the 64-bit
vDSO.  A new vdso32-syms.lds is generated in the style of vdso-syms.lds.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/vdso/Makefile            | 78 +++++++++++++++++++++++++++++--
 arch/x86/vdso/vdso32/vdso32.lds.S | 37 +++++++++++++++
 2 files changed, 112 insertions(+), 3 deletions(-)
 create mode 100644 arch/x86/vdso/vdso32/vdso32.lds.S

diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 6a665dd09848..a02e1ca2a1bc 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -1,7 +1,15 @@
 #
-# x86-64 vDSO.
+# Building vDSO images for x86.
 #
 
+VDSO64-$(CONFIG_X86_64)		:= y
+VDSO32-$(CONFIG_X86_32)		:= y
+VDSO32-$(CONFIG_COMPAT)		:= y
+
+vdso-install-$(VDSO64-y)	+= vdso.so
+vdso-install-$(VDSO32-y)	+= $(vdso32-y:=.so)
+
+
 # files to link into the vdso
 vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vvar.o
 
@@ -57,10 +65,74 @@ quiet_cmd_vdsosym = VDSOSYM $@
 $(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE
 	$(call if_changed,vdsosym)
 
+#
+# Build multiple 32-bit vDSO images to choose from at boot time.
+#
+vdso32.so-$(CONFIG_X86_32)	+= int80
+vdso32.so-$(VDSO32-y)		+= sysenter
+
+CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
+VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1
+
+# This makes sure the $(obj) subdirectory exists even though vdso32/
+# is not a kbuild sub-make subdirectory.
+override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
+
+targets += vdso32/vdso32.lds
+targets += $(vdso32.so-y:%=vdso32-%.so.dbg) $(vdso32.so-y:%=vdso32-%.so)
+targets += vdso32/note.o $(vdso32.so-y:%=vdso32/%.o)
+
+extra-y	+= $(vdso32.so-y:%=vdso32-%.so)
+
+KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
+$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
+$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): asflags-$(CONFIG_X86_64) += -m32
+
+$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
+					 $(obj)/vdso32/vdso32.lds \
+					 $(obj)/vdso32/note.o \
+					 $(obj)/vdso32/%.o
+	$(call if_changed,vdso)
+
+# Make vdso32-*-syms.lds from each image, and then make sure they match.
+# The only difference should be that some do not define VDSO32_SYSENTER_RETURN.
+
+targets += vdso32-syms.lds $(vdso32.so-y:%=vdso32-%-syms.lds)
+
+quiet_cmd_vdso32sym = VDSOSYM $@
+define cmd_vdso32sym
+	if LC_ALL=C sort -u $(filter-out FORCE,$^) > $(@D)/.tmp_$(@F) && \
+	   $(foreach H,$(filter-out FORCE,$^),\
+		     if grep -q VDSO32_SYSENTER_RETURN $H; \
+		     then diff -u $(@D)/.tmp_$(@F) $H; \
+		     else sed /VDSO32_SYSENTER_RETURN/d $(@D)/.tmp_$(@F) | \
+			  diff -u - $H; fi &&) : ;\
+	then mv -f $(@D)/.tmp_$(@F) $@; \
+	else rm -f $(@D)/.tmp_$(@F); exit 1; \
+	fi
+endef
+
+$(obj)/vdso32-syms.lds: $(vdso32.so-y:%=$(obj)/vdso32-%-syms.lds) FORCE
+	$(call if_changed,vdso32sym)
+
+#
+# The DSO images are built using a special linker script.
+#
+quiet_cmd_vdso = VDSO    $@
+      cmd_vdso = $(CC) -nostdlib -o $@ \
+		       $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
+		       -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^)
+
+VDSO_LDFLAGS = -fPIC -shared $(call ld-option, -Wl$(comma)--hash-style=sysv)
+
+#
+# Install the unstripped copy of vdso*.so listed in $(vdso-install-y).
+#
 quiet_cmd_vdso_install = INSTALL $@
       cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-vdso.so:
+$(vdso-install-y): %.so: $(obj)/%.so.dbg FORCE
 	@mkdir -p $(MODLIB)/vdso
 	$(call cmd,vdso_install)
 
-vdso_install: vdso.so
+PHONY += vdso_install $(vdso-install-y)
+vdso_install: $(vdso-install-y)
diff --git a/arch/x86/vdso/vdso32/vdso32.lds.S b/arch/x86/vdso/vdso32/vdso32.lds.S
new file mode 100644
index 000000000000..976124bb5f92
--- /dev/null
+++ b/arch/x86/vdso/vdso32/vdso32.lds.S
@@ -0,0 +1,37 @@
+/*
+ * Linker script for 32-bit vDSO.
+ * We #include the file to define the layout details.
+ * Here we only choose the prelinked virtual address.
+ *
+ * This file defines the version script giving the user-exported symbols in
+ * the DSO.  We can define local symbols here called VDSO* to make their
+ * values visible using the asm-x86/vdso.h macros from the kernel proper.
+ */
+
+#define VDSO_PRELINK 0
+#include "../vdso-layout.lds.S"
+
+/* The ELF entry point can be used to set the AT_SYSINFO value.  */
+ENTRY(__kernel_vsyscall);
+
+/*
+ * This controls what userland symbols we export from the vDSO.
+ */
+VERSION
+{
+	LINUX_2.5 {
+	global:
+		__kernel_vsyscall;
+		__kernel_sigreturn;
+		__kernel_rt_sigreturn;
+	local: *;
+	};
+}
+
+/*
+ * Symbols we define here called VDSO* get their values into vdso32-syms.h.
+ */
+VDSO32_PRELINK		= VDSO_PRELINK;
+VDSO32_vsyscall		= __kernel_vsyscall;
+VDSO32_sigreturn	= __kernel_sigreturn;
+VDSO32_rt_sigreturn	= __kernel_rt_sigreturn;

From 6c3652efcafa6a6d795093362cb4290c84994b5c Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:42 +0100
Subject: [PATCH 153/890] x86 vDSO: i386 vdso32

This makes the i386 kernel use the new vDSO build in arch/x86/vdso/vdso32/
to replace the old one from arch/x86/kernel/.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Makefile_32                          |  3 +-
 arch/x86/kernel/Makefile_32                   | 43 +-----------
 arch/x86/kernel/asm-offsets_32.c              |  2 -
 arch/x86/kernel/signal_32.c                   |  5 +-
 arch/x86/kernel/sysenter_32.c                 |  6 +-
 arch/x86/kernel/vsyscall_32.lds.S             | 67 -------------------
 arch/x86/vdso/Makefile                        |  8 ++-
 .../{kernel/vsyscall_32.S => vdso/vdso32.S}   |  4 +-
 arch/x86/vdso/vdso32/note.S                   |  3 +-
 arch/x86/vdso/vdso32/sysenter.S               |  3 +-
 arch/x86/xen/setup.c                          |  5 +-
 include/asm-x86/elf.h                         | 13 +---
 include/asm-x86/vdso.h                        | 14 ++++
 13 files changed, 37 insertions(+), 139 deletions(-)
 delete mode 100644 arch/x86/kernel/vsyscall_32.lds.S
 rename arch/x86/{kernel/vsyscall_32.S => vdso/vdso32.S} (70%)

diff --git a/arch/x86/Makefile_32 b/arch/x86/Makefile_32
index b152db71d5b5..9214224773ed 100644
--- a/arch/x86/Makefile_32
+++ b/arch/x86/Makefile_32
@@ -110,7 +110,8 @@ libs-y 					+= arch/x86/lib/
 core-y					+= arch/x86/kernel/ \
 					   arch/x86/mm/ \
 					   $(mcore-y)/ \
-					   arch/x86/crypto/
+					   arch/x86/crypto/ \
+					   arch/x86/vdso/
 drivers-$(CONFIG_MATH_EMULATION)	+= arch/x86/math-emu/
 drivers-$(CONFIG_PCI)			+= arch/x86/pci/
 # must be linked after kernel/
diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index f7c1c1c88dda..0eef8226753b 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -33,7 +33,7 @@ obj-$(CONFIG_X86_NUMAQ)		+= numaq_32.o
 obj-$(CONFIG_X86_SUMMIT_NUMA)	+= summit_32.o
 obj-$(CONFIG_KPROBES)		+= kprobes_32.o
 obj-$(CONFIG_MODULES)		+= module_32.o
-obj-y				+= sysenter_32.o vsyscall_32.o
+obj-y				+= sysenter_32.o
 obj-$(CONFIG_ACPI_SRAT) 	+= srat_32.o
 obj-$(CONFIG_EFI) 		+= efi_32.o efi_stub_32.o
 obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault_32.o
@@ -48,44 +48,3 @@ obj-$(CONFIG_PARAVIRT)		+= paravirt_32.o
 obj-y				+= pcspeaker.o
 
 obj-$(CONFIG_SCx200)		+= scx200_32.o
-
-# vsyscall_32.o contains the vsyscall DSO images as __initdata.
-# We must build both images before we can assemble it.
-# Note: kbuild does not track this dependency due to usage of .incbin
-$(obj)/vsyscall_32.o: $(obj)/vsyscall-int80_32.so $(obj)/vsyscall-sysenter_32.so
-targets += $(foreach F,int80 sysenter,vsyscall-$F_32.o vsyscall-$F_32.so)
-targets += vsyscall-note_32.o vsyscall_32.lds
-
-$(obj)/vsyscall-%_32.o: $(src)/../vdso/vdso32/%.S
-	$(call if_changed_dep,as_o_S)
-
-# The DSO images are built using a special linker script.
-quiet_cmd_syscall = SYSCALL $@
-      cmd_syscall = $(CC) -m elf_i386 -nostdlib $(SYSCFLAGS_$(@F)) \
-		          -Wl,-T,$(filter-out FORCE,$^) -o $@
-
-export CPPFLAGS_vsyscall_32.lds += -P -C -Ui386
-
-vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1 \
-		 $(call ld-option, -Wl$(comma)--hash-style=sysv)
-SYSCFLAGS_vsyscall-sysenter_32.so	= $(vsyscall-flags)
-SYSCFLAGS_vsyscall-int80_32.so	= $(vsyscall-flags)
-
-$(obj)/vsyscall-int80_32.so $(obj)/vsyscall-sysenter_32.so: \
-$(obj)/vsyscall-%.so: $(src)/vsyscall_32.lds \
-		      $(obj)/vsyscall-%.o $(obj)/vsyscall-note_32.o FORCE
-	$(call if_changed,syscall)
-
-# We also create a special relocatable object that should mirror the symbol
-# table and layout of the linked DSO.  With ld -R we can then refer to
-# these symbols in the kernel code rather than hand-coded addresses.
-extra-y += vsyscall-syms.o
-$(obj)/built-in.o: $(obj)/vsyscall-syms.o
-$(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o
-
-SYSCFLAGS_vsyscall-syms.o = -r
-$(obj)/vsyscall-syms.o: $(src)/vsyscall_32.lds \
-			$(obj)/vsyscall-sysenter_32.o $(obj)/vsyscall-note_32.o FORCE
-	$(call if_changed,syscall)
-
-
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 4f750ec810f6..fd7464d23339 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -111,8 +111,6 @@ void foo(void)
 	DEFINE(PTRS_PER_PMD, PTRS_PER_PMD);
 	DEFINE(PTRS_PER_PGD, PTRS_PER_PGD);
 
-	DEFINE(VDSO_PRELINK_asm, VDSO_PRELINK);
-
 	OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
 
 #ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 5c6170c44b00..1ac53e9a0859 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -23,6 +23,7 @@
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/i387.h>
+#include <asm/vdso.h>
 #include "sigframe_32.h"
 
 #define DEBUG_SIG 0
@@ -362,7 +363,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 	}
 
 	if (current->binfmt->hasvdso)
-		restorer = (void *)VDSO_SYM(&__kernel_sigreturn);
+		restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn);
 	else
 		restorer = (void *)&frame->retcode;
 	if (ka->sa.sa_flags & SA_RESTORER)
@@ -459,7 +460,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		goto give_sigsegv;
 
 	/* Set up to return from userspace.  */
-	restorer = (void *)VDSO_SYM(&__kernel_rt_sigreturn);
+	restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
 	if (ka->sa.sa_flags & SA_RESTORER)
 		restorer = ka->sa.sa_restorer;
 	err |= __put_user(restorer, &frame->pretcode);
diff --git a/arch/x86/kernel/sysenter_32.c b/arch/x86/kernel/sysenter_32.c
index 5a2d951e2608..85c52d23ee40 100644
--- a/arch/x86/kernel/sysenter_32.c
+++ b/arch/x86/kernel/sysenter_32.c
@@ -23,6 +23,7 @@
 #include <asm/unistd.h>
 #include <asm/elf.h>
 #include <asm/tlbflush.h>
+#include <asm/vdso.h>
 
 enum {
 	VDSO_DISABLED = 0,
@@ -259,9 +260,6 @@ int __init sysenter_setup(void)
 	return 0;
 }
 
-/* Defined in vsyscall-sysenter.S */
-extern void SYSENTER_RETURN;
-
 /* Setup a VMA at program startup for the vsyscall page */
 int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
 {
@@ -308,7 +306,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
 
 	current->mm->context.vdso = (void *)addr;
 	current_thread_info()->sysenter_return =
-		(void *)VDSO_SYM(&SYSENTER_RETURN);
+		VDSO32_SYMBOL(addr, SYSENTER_RETURN);
 
   up_fail:
 	up_write(&mm->mmap_sem);
diff --git a/arch/x86/kernel/vsyscall_32.lds.S b/arch/x86/kernel/vsyscall_32.lds.S
deleted file mode 100644
index 4a8b0ed9b8fb..000000000000
--- a/arch/x86/kernel/vsyscall_32.lds.S
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Linker script for vsyscall DSO.  The vsyscall page is an ELF shared
- * object prelinked to its virtual address, and with only one read-only
- * segment (that fits in one page).  This script controls its layout.
- */
-#include <asm/asm-offsets.h>
-
-SECTIONS
-{
-  . = VDSO_PRELINK_asm + SIZEOF_HEADERS;
-
-  .hash           : { *(.hash) }		:text
-  .gnu.hash       : { *(.gnu.hash) }
-  .dynsym         : { *(.dynsym) }
-  .dynstr         : { *(.dynstr) }
-  .gnu.version    : { *(.gnu.version) }
-  .gnu.version_d  : { *(.gnu.version_d) }
-  .gnu.version_r  : { *(.gnu.version_r) }
-
-  /* This linker script is used both with -r and with -shared.
-     For the layouts to match, we need to skip more than enough
-     space for the dynamic symbol table et al.  If this amount
-     is insufficient, ld -shared will barf.  Just increase it here.  */
-  . = VDSO_PRELINK_asm + 0x400;
-
-  .text           : { *(.text) }		:text =0x90909090
-  .note		  : { *(.note.*) }		:text :note
-  .eh_frame_hdr   : { *(.eh_frame_hdr) }	:text :eh_frame_hdr
-  .eh_frame       : { KEEP (*(.eh_frame)) }	:text
-  .dynamic        : { *(.dynamic) }		:text :dynamic
-  .useless        : {
-  	*(.got.plt) *(.got)
-	*(.data .data.* .gnu.linkonce.d.*)
-	*(.dynbss)
-	*(.bss .bss.* .gnu.linkonce.b.*)
-  }						:text
-}
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-  text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
-  dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
-  note PT_NOTE FLAGS(4); /* PF_R */
-  eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
-  LINUX_2.5 {
-    global:
-    	__kernel_vsyscall;
-    	__kernel_sigreturn;
-    	__kernel_rt_sigreturn;
-
-    local: *;
-  };
-}
-
-/* The ELF entry point can be used to set the AT_SYSINFO value.  */
-ENTRY(__kernel_vsyscall);
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index a02e1ca2a1bc..ca2aabf8ed39 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -14,7 +14,8 @@ vdso-install-$(VDSO32-y)	+= $(vdso32-y:=.so)
 vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vvar.o
 
 # files to link into kernel
-obj-y := vma.o vdso.o
+obj-$(VDSO64-y)			+= vma.o vdso.o
+obj-$(CONFIG_X86_32)		+= vdso32.o
 
 vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
 
@@ -52,7 +53,7 @@ $(obj)/vclock_gettime.o: KBUILD_CFLAGS = $(CFL)
 $(obj)/vgetcpu.o: KBUILD_CFLAGS = $(CFL)
 
 targets += vdso-syms.lds
-obj-y += vdso-syms.lds
+obj-$(VDSO64-y)			+= vdso-syms.lds
 
 #
 # Match symbols in the DSO that look like VDSO*; produce a file of constants.
@@ -68,6 +69,7 @@ $(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE
 #
 # Build multiple 32-bit vDSO images to choose from at boot time.
 #
+obj-$(VDSO32-y)			+= vdso32-syms.lds
 vdso32.so-$(CONFIG_X86_32)	+= int80
 vdso32.so-$(VDSO32-y)		+= sysenter
 
@@ -84,6 +86,8 @@ targets += vdso32/note.o $(vdso32.so-y:%=vdso32/%.o)
 
 extra-y	+= $(vdso32.so-y:%=vdso32-%.so)
 
+$(obj)/vdso32.o: $(vdso32.so-y:%=$(obj)/vdso32-%.so)
+
 KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
 $(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
 $(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): asflags-$(CONFIG_X86_64) += -m32
diff --git a/arch/x86/kernel/vsyscall_32.S b/arch/x86/vdso/vdso32.S
similarity index 70%
rename from arch/x86/kernel/vsyscall_32.S
rename to arch/x86/vdso/vdso32.S
index a5ab3dc4fd25..cab020c99c3d 100644
--- a/arch/x86/kernel/vsyscall_32.S
+++ b/arch/x86/vdso/vdso32.S
@@ -4,12 +4,12 @@ __INITDATA
 
 	.globl vsyscall_int80_start, vsyscall_int80_end
 vsyscall_int80_start:
-	.incbin "arch/x86/kernel/vsyscall-int80_32.so"
+	.incbin "arch/x86/vdso/vdso32-int80.so"
 vsyscall_int80_end:
 
 	.globl vsyscall_sysenter_start, vsyscall_sysenter_end
 vsyscall_sysenter_start:
-	.incbin "arch/x86/kernel/vsyscall-sysenter_32.so"
+	.incbin "arch/x86/vdso/vdso32-sysenter.so"
 vsyscall_sysenter_end:
 
 __FINIT
diff --git a/arch/x86/vdso/vdso32/note.S b/arch/x86/vdso/vdso32/note.S
index 0cf934f2633e..c83f25734696 100644
--- a/arch/x86/vdso/vdso32/note.S
+++ b/arch/x86/vdso/vdso32/note.S
@@ -35,10 +35,9 @@ ELFNOTE_END
 
 #include "../../xen/vdso.h"	/* Defines VDSO_NOTE_NONEGSEG_BIT.  */
 
-	.globl VDSO_NOTE_MASK
 ELFNOTE_START(GNU, 2, "a")
 	.long 1			/* ncaps */
-VDSO_NOTE_MASK:
+VDSO32_NOTE_MASK:		/* Symbol used by arch/x86/xen/setup.c */
 	.long 0			/* mask */
 	.byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg"	/* bit, name */
 ELFNOTE_END
diff --git a/arch/x86/vdso/vdso32/sysenter.S b/arch/x86/vdso/vdso32/sysenter.S
index 4b98fc41d3e0..109bfa394eaa 100644
--- a/arch/x86/vdso/vdso32/sysenter.S
+++ b/arch/x86/vdso/vdso32/sysenter.S
@@ -45,8 +45,7 @@ __kernel_vsyscall:
 	/* 14: System call restart point is here! (SYSENTER_RETURN-2) */
 	jmp .Lenter_kernel
 	/* 16: System call normal return point is here! */
-	.globl SYSENTER_RETURN	/* Symbol used by sysenter.c  */
-SYSENTER_RETURN:
+VDSO32_SYSENTER_RETURN:	/* Symbol used by sysenter.c via vdso32-syms.h */
 	pop %ebp
 .Lpop_ebp:
 	pop %edx
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index f84e77226646..fd91568090f4 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -10,6 +10,7 @@
 #include <linux/pm.h>
 
 #include <asm/elf.h>
+#include <asm/vdso.h>
 #include <asm/e820.h>
 #include <asm/setup.h>
 #include <asm/xen/hypervisor.h>
@@ -61,10 +62,8 @@ static void xen_idle(void)
  */
 static void fiddle_vdso(void)
 {
-	extern u32 VDSO_NOTE_MASK; /* See ../kernel/vsyscall-note.S.  */
 	extern char vsyscall_int80_start;
-	u32 *mask = (u32 *) ((unsigned long) &VDSO_NOTE_MASK - VDSO_PRELINK +
-			     &vsyscall_int80_start);
+	u32 *mask = VDSO32_SYMBOL(&vsyscall_int80_start, NOTE_MASK);
 	*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
 }
 
diff --git a/include/asm-x86/elf.h b/include/asm-x86/elf.h
index cd3204ebbbdd..70edff2d5671 100644
--- a/include/asm-x86/elf.h
+++ b/include/asm-x86/elf.h
@@ -78,6 +78,7 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
 #include <asm/processor.h>
 #include <asm/system.h>		/* for savesegment */
 #include <asm/desc.h>
+#include <asm/vdso.h>
 
 /*
  * This is used to ensure we don't load something for the wrong architecture.
@@ -247,17 +248,9 @@ extern int dump_task_extended_fpu (struct task_struct *,
 
 #define VDSO_HIGH_BASE		(__fix_to_virt(FIX_VDSO))
 #define VDSO_CURRENT_BASE	((unsigned long)current->mm->context.vdso)
-#define VDSO_PRELINK		0
 
-#define VDSO_SYM(x) \
-		(VDSO_CURRENT_BASE + (unsigned long)(x) - VDSO_PRELINK)
-
-#define VDSO_HIGH_EHDR		((const struct elfhdr *) VDSO_HIGH_BASE)
-#define VDSO_EHDR		((const struct elfhdr *) VDSO_CURRENT_BASE)
-
-extern void __kernel_vsyscall;
-
-#define VDSO_ENTRY		VDSO_SYM(&__kernel_vsyscall)
+#define VDSO_ENTRY \
+	((unsigned long) VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall))
 
 /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
 
diff --git a/include/asm-x86/vdso.h b/include/asm-x86/vdso.h
index 9379ec06fd1f..629bcb6e8e45 100644
--- a/include/asm-x86/vdso.h
+++ b/include/asm-x86/vdso.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_X86_VDSO_H
 #define _ASM_X86_VDSO_H	1
 
+#ifdef CONFIG_X86_64
 extern const char VDSO64_PRELINK[];
 
 /*
@@ -10,5 +11,18 @@ extern const char VDSO64_PRELINK[];
 #define VDSO64_SYMBOL(base, name) ({		\
 	extern const char VDSO64_##name[];	\
 	(void *) (VDSO64_##name - VDSO64_PRELINK + (unsigned long) (base)); })
+#endif
+
+#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
+extern const char VDSO32_PRELINK[];
+
+/*
+ * Given a pointer to the vDSO image, find the pointer to VDSO32_name
+ * as that symbol is defined in the vDSO sources or linker script.
+ */
+#define VDSO32_SYMBOL(base, name) ({		\
+	extern const char VDSO32_##name[];	\
+	(void *) (VDSO32_##name - VDSO32_PRELINK + (unsigned long) (base)); })
+#endif
 
 #endif	/* asm-x86/vdso.h */

From 4707c4717a5a6f375f3300bbccff1d46dcf75b25 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:42 +0100
Subject: [PATCH 154/890] x86 vDSO: absolute relocs

This updates the exceptions for absolute relocs for the new symbol name
convention used for symbols extracted from the vDSO images.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/compressed/relocs.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c
index 7a0d00b2cf28..d01ea42187e6 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/boot/compressed/relocs.c
@@ -27,11 +27,6 @@ static unsigned long *relocs;
  * absolute relocations present w.r.t these symbols.
  */
 static const char* safe_abs_relocs[] = {
-		"__kernel_vsyscall",
-		"__kernel_rt_sigreturn",
-		"__kernel_sigreturn",
-		"SYSENTER_RETURN",
-		"VDSO_NOTE_MASK",
 		"xen_irq_disable_direct_reloc",
 		"xen_save_fl_direct_reloc",
 };
@@ -45,6 +40,8 @@ static int is_safe_abs_reloc(const char* sym_name)
 			/* Match found */
 			return 1;
 	}
+	if (strncmp(sym_name, "VDSO", 4) == 0)
+		return 1;
 	if (strncmp(sym_name, "__crc_", 6) == 0)
 		return 1;
 	return 0;

From 85a93b95dcd24023e0590ba4852ebff29208491a Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:42 +0100
Subject: [PATCH 155/890] x86 vDSO: i386 vdso32 install

This enables 'make vdso_install' for i386 as on x86_64 and powerpc.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Makefile_32 | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/x86/Makefile_32 b/arch/x86/Makefile_32
index 9214224773ed..5c6724de904e 100644
--- a/arch/x86/Makefile_32
+++ b/arch/x86/Makefile_32
@@ -149,9 +149,13 @@ zdisk bzdisk: vmlinux
 fdimage fdimage144 fdimage288 isoimage: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
 
-install:
+install: vdso_install
 	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install
 
+PHONY += vdso_install
+vdso_install:
+	$(Q)$(MAKE) $(build)=arch/x86/vdso $@
+
 archclean:
 	$(Q)rm -rf $(objtree)/arch/i386
 	$(Q)rm -rf $(objtree)/arch/x86_64

From f288f32dc51042fd3a493b85b226e63f1ad3edcc Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:42 +0100
Subject: [PATCH 156/890] x86 vDSO: vdso32 setup

This moves arch/x86/kernel/sysenter_32.c to arch/x86/vdso/vdso32-setup.c,
keeping all the code relating only to vDSO magic in the vdso/ subdirectory.
This is a pure renaming, but it paves the way to consolidating the code for
dealing with 32-bit vDSOs across CONFIG_X86_32 and CONFIG_IA32_EMULATION.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/Makefile_32                            | 1 -
 arch/x86/vdso/Makefile                                 | 2 +-
 arch/x86/{kernel/sysenter_32.c => vdso/vdso32-setup.c} | 0
 3 files changed, 1 insertion(+), 2 deletions(-)
 rename arch/x86/{kernel/sysenter_32.c => vdso/vdso32-setup.c} (100%)

diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index 0eef8226753b..2c9596b9349c 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -33,7 +33,6 @@ obj-$(CONFIG_X86_NUMAQ)		+= numaq_32.o
 obj-$(CONFIG_X86_SUMMIT_NUMA)	+= summit_32.o
 obj-$(CONFIG_KPROBES)		+= kprobes_32.o
 obj-$(CONFIG_MODULES)		+= module_32.o
-obj-y				+= sysenter_32.o
 obj-$(CONFIG_ACPI_SRAT) 	+= srat_32.o
 obj-$(CONFIG_EFI) 		+= efi_32.o efi_stub_32.o
 obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault_32.o
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index ca2aabf8ed39..1efe785979a2 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -15,7 +15,7 @@ vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vvar.o
 
 # files to link into kernel
 obj-$(VDSO64-y)			+= vma.o vdso.o
-obj-$(CONFIG_X86_32)		+= vdso32.o
+obj-$(CONFIG_X86_32)		+= vdso32.o vdso32-setup.o
 
 vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
 
diff --git a/arch/x86/kernel/sysenter_32.c b/arch/x86/vdso/vdso32-setup.c
similarity index 100%
rename from arch/x86/kernel/sysenter_32.c
rename to arch/x86/vdso/vdso32-setup.c

From 0aa97fb22624f18e5925d702ab0364d3838cfd91 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:43 +0100
Subject: [PATCH 157/890] x86 vDSO: ia32_sysenter_target

This harmonizes the name for the entry point from the 32-bit sysenter
instruction across 32-bit and 64-bit kernels.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/entry_32.S   | 10 +++++-----
 arch/x86/vdso/vdso32-setup.c |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index d63609dd64b9..153bb87a4eea 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -283,7 +283,7 @@ END(resume_kernel)
    the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
 
 	# sysenter call handler stub
-ENTRY(sysenter_entry)
+ENTRY(ia32_sysenter_target)
 	CFI_STARTPROC simple
 	CFI_SIGNAL_FRAME
 	CFI_DEF_CFA esp, 0
@@ -360,7 +360,7 @@ sysenter_past_esp:
 	.align 4
 	.long 1b,2b
 .popsection
-ENDPROC(sysenter_entry)
+ENDPROC(ia32_sysenter_target)
 
 	# system call handler stub
 ENTRY(system_call)
@@ -768,7 +768,7 @@ label:						\
 
 KPROBE_ENTRY(debug)
 	RING0_INT_FRAME
-	cmpl $sysenter_entry,(%esp)
+	cmpl $ia32_sysenter_target,(%esp)
 	jne debug_stack_correct
 	FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
 debug_stack_correct:
@@ -799,7 +799,7 @@ KPROBE_ENTRY(nmi)
 	popl %eax
 	CFI_ADJUST_CFA_OFFSET -4
 	je nmi_espfix_stack
-	cmpl $sysenter_entry,(%esp)
+	cmpl $ia32_sysenter_target,(%esp)
 	je nmi_stack_fixup
 	pushl %eax
 	CFI_ADJUST_CFA_OFFSET 4
@@ -812,7 +812,7 @@ KPROBE_ENTRY(nmi)
 	popl %eax
 	CFI_ADJUST_CFA_OFFSET -4
 	jae nmi_stack_correct
-	cmpl $sysenter_entry,12(%esp)
+	cmpl $ia32_sysenter_target,12(%esp)
 	je nmi_debug_stack_check
 nmi_stack_correct:
 	/* We have a RING0_INT_FRAME here */
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 85c52d23ee40..fb71a93c5dce 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -54,7 +54,7 @@ static int __init vdso_setup(char *s)
 
 __setup("vdso=", vdso_setup);
 
-extern asmlinkage void sysenter_entry(void);
+extern asmlinkage void ia32_sysenter_target(void);
 
 static __init void reloc_symtab(Elf32_Ehdr *ehdr,
 				unsigned offset, unsigned size)
@@ -187,7 +187,7 @@ void enable_sep_cpu(void)
 	tss->x86_tss.esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
 	wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
 	wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.esp1, 0);
-	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0);
+	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0);
 	put_cpu();	
 }
 

From 36197c92a20c142fc2a068e0366053d770fa0096 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:43 +0100
Subject: [PATCH 158/890] x86 vDSO: ia32 sysenter_return

This changes the 64-bit kernel's support for the 32-bit sysenter
instruction to use stored fields rather than constants for the
user-mode return address, as the 32-bit kernel does.  This adds a
sysenter_return field to struct thread_info, as 32-bit has.  There
is no observable effect from this yet.  It makes the assembly code
independent of the 32-bit vDSO mapping address, paving the way for
making the vDSO address vary as it does on the 32-bit kernel.

[ akpm@linux-foundation.org: build fix on !CONFIG_IA32_EMULATION ]

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/ia32/ia32entry.S        | 7 +++----
 arch/x86/ia32/syscall32.c        | 4 ++++
 arch/x86/kernel/asm-offsets_64.c | 3 +++
 include/asm-x86/thread_info_64.h | 3 +++
 4 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index df588f0f76e1..2499a324feaa 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -12,7 +12,6 @@
 #include <asm/ia32_unistd.h>	
 #include <asm/thread_info.h>	
 #include <asm/segment.h>
-#include <asm/vsyscall32.h>
 #include <asm/irqflags.h>
 #include <linux/linkage.h>
 
@@ -104,7 +103,7 @@ ENTRY(ia32_sysenter_target)
 	pushfq
 	CFI_ADJUST_CFA_OFFSET 8
 	/*CFI_REL_OFFSET rflags,0*/
-	movl	$VSYSCALL32_SYSEXIT, %r10d
+	movl	8*3-THREAD_SIZE+threadinfo_sysenter_return(%rsp), %r10d
 	CFI_REGISTER rip,r10
 	pushq	$__USER32_CS
 	CFI_ADJUST_CFA_OFFSET 8
@@ -142,6 +141,8 @@ sysenter_do_call:
 	andl    $~TS_COMPAT,threadinfo_status(%r10)
 	/* clear IF, that popfq doesn't enable interrupts early */
 	andl  $~0x200,EFLAGS-R11(%rsp) 
+	movl	RIP-R11(%rsp),%edx		/* User %eip */
+	CFI_REGISTER rip,rdx
 	RESTORE_ARGS 1,24,1,1,1,1
 	popfq
 	CFI_ADJUST_CFA_OFFSET -8
@@ -149,8 +150,6 @@ sysenter_do_call:
 	popq	%rcx				/* User %esp */
 	CFI_ADJUST_CFA_OFFSET -8
 	CFI_REGISTER rsp,rcx
-	movl	$VSYSCALL32_SYSEXIT,%edx	/* User %eip */
-	CFI_REGISTER rip,rdx
 	TRACE_IRQS_ON
 	swapgs
 	sti		/* sti only takes effect after the next instruction */
diff --git a/arch/x86/ia32/syscall32.c b/arch/x86/ia32/syscall32.c
index d751d96c2ef2..98ff99f5b59a 100644
--- a/arch/x86/ia32/syscall32.c
+++ b/arch/x86/ia32/syscall32.c
@@ -46,6 +46,10 @@ int syscall32_setup_pages(struct linux_binprm *bprm, int exstack)
 				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
 				      VM_ALWAYSDUMP,
 				      syscall32_pages);
+	if (ret == 0) {
+		current->mm->context.vdso = (void __user *)VSYSCALL32_BASE;
+		current_thread_info()->sysenter_return = VSYSCALL32_SYSEXIT;
+	}
 	up_write(&mm->mmap_sem);
 	return ret;
 }
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 40f41752c1df..c27c646214f4 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -46,6 +46,9 @@ int main(void)
 	ENTRY(addr_limit);
 	ENTRY(preempt_count);
 	ENTRY(status);
+#ifdef CONFIG_IA32_EMULATION
+	ENTRY(sysenter_return);
+#endif
 	BLANK();
 #undef ENTRY
 #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h
index 1ac23c157231..c8e7736fc792 100644
--- a/include/asm-x86/thread_info_64.h
+++ b/include/asm-x86/thread_info_64.h
@@ -33,6 +33,9 @@ struct thread_info {
 
 	mm_segment_t		addr_limit;	
 	struct restart_block    restart_block;
+#ifdef CONFIG_IA32_EMULATION
+	void __user		*sysenter_return;
+#endif
 };
 #endif
 

From 00f8b1bc0e44ba94fb33e1fbd8ac82841d7cc570 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:43 +0100
Subject: [PATCH 159/890] x86 vDSO: ia32 vdso32-syscall build

This puts the syscall version of the 32-bit vDSO in arch/x86/vdso/vdso32/
for 64-bit IA32 support.  This is not used yet, but it paves the way for
consolidating the 32-bit vDSO source and build logic all in one place.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/vdso/Makefile           |  1 +
 arch/x86/vdso/vdso32/sigreturn.S |  9 +++--
 arch/x86/vdso/vdso32/syscall.S   | 69 ++++++++++++++++++++++++++++++++
 3 files changed, 76 insertions(+), 3 deletions(-)
 create mode 100644 arch/x86/vdso/vdso32/syscall.S

diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 1efe785979a2..1127c716df02 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -71,6 +71,7 @@ $(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE
 #
 obj-$(VDSO32-y)			+= vdso32-syms.lds
 vdso32.so-$(CONFIG_X86_32)	+= int80
+vdso32.so-$(CONFIG_COMPAT)	+= syscall
 vdso32.so-$(VDSO32-y)		+= sysenter
 
 CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
diff --git a/arch/x86/vdso/vdso32/sigreturn.S b/arch/x86/vdso/vdso32/sigreturn.S
index 778f4649e32f..8d65a0a0eb70 100644
--- a/arch/x86/vdso/vdso32/sigreturn.S
+++ b/arch/x86/vdso/vdso32/sigreturn.S
@@ -6,9 +6,12 @@
  * routines are constant for all vsyscall implementations.
  */
 
-#include <asm/unistd.h>
+#include <asm/unistd_32.h>
 #include <asm/asm-offsets.h>
 
+#ifndef SYSCALL_ENTER_KERNEL
+#define	SYSCALL_ENTER_KERNEL	int $0x80
+#endif
 
 /* XXX
    Should these be named "_sigtramp" or something?
@@ -22,7 +25,7 @@ __kernel_sigreturn:
 .LSTART_sigreturn:
 	popl %eax		/* XXX does this mean it needs unwind info? */
 	movl $__NR_sigreturn, %eax
-	int $0x80
+	SYSCALL_ENTER_KERNEL
 .LEND_sigreturn:
 	.size __kernel_sigreturn,.-.LSTART_sigreturn
 
@@ -32,7 +35,7 @@ __kernel_sigreturn:
 __kernel_rt_sigreturn:
 .LSTART_rt_sigreturn:
 	movl $__NR_rt_sigreturn, %eax
-	int $0x80
+	SYSCALL_ENTER_KERNEL
 .LEND_rt_sigreturn:
 	.size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
 	.balign 32
diff --git a/arch/x86/vdso/vdso32/syscall.S b/arch/x86/vdso/vdso32/syscall.S
new file mode 100644
index 000000000000..333bfb552c88
--- /dev/null
+++ b/arch/x86/vdso/vdso32/syscall.S
@@ -0,0 +1,69 @@
+/*
+ * Code for the vsyscall page.  This version uses the syscall instruction.
+ */
+
+#include <asm/asm-offsets.h>
+#include <asm/segment.h>
+
+	.text
+	.globl __kernel_vsyscall
+	.type __kernel_vsyscall,@function
+__kernel_vsyscall:
+.LSTART_vsyscall:
+	push	%ebp
+.Lpush_ebp:
+	movl	%ecx, %ebp
+	syscall
+	movl	$__USER32_DS, %ecx
+	movl	%ecx, %ss
+	movl	%ebp, %ecx
+	popl	%ebp
+.Lpop_ebp:
+	ret
+.LEND_vsyscall:
+	.size __kernel_vsyscall,.-.LSTART_vsyscall
+
+	.section .eh_frame,"a",@progbits
+.LSTARTFRAME:
+	.long .LENDCIE-.LSTARTCIE
+.LSTARTCIE:
+	.long 0			/* CIE ID */
+	.byte 1			/* Version number */
+	.string "zR"		/* NUL-terminated augmentation string */
+	.uleb128 1		/* Code alignment factor */
+	.sleb128 -4		/* Data alignment factor */
+	.byte 8			/* Return address register column */
+	.uleb128 1		/* Augmentation value length */
+	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+	.byte 0x0c		/* DW_CFA_def_cfa */
+	.uleb128 4
+	.uleb128 4
+	.byte 0x88		/* DW_CFA_offset, column 0x8 */
+	.uleb128 1
+	.align 4
+.LENDCIE:
+
+	.long .LENDFDE1-.LSTARTFDE1	/* Length FDE */
+.LSTARTFDE1:
+	.long .LSTARTFDE1-.LSTARTFRAME	/* CIE pointer */
+	.long .LSTART_vsyscall-.	/* PC-relative start address */
+	.long .LEND_vsyscall-.LSTART_vsyscall
+	.uleb128 0			/* Augmentation length */
+	/* What follows are the instructions for the table generation.
+	   We have to record all changes of the stack pointer.  */
+	.byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */
+	.byte 0x0e		/* DW_CFA_def_cfa_offset */
+	.uleb128 8
+	.byte 0x85, 0x02	/* DW_CFA_offset %ebp -8 */
+	.byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */
+	.byte 0xc5		/* DW_CFA_restore %ebp */
+	.byte 0x0e		/* DW_CFA_def_cfa_offset */
+	.uleb128 4
+	.align 4
+.LENDFDE1:
+
+/*
+ * Get the common code for the sigreturn entry points.
+ */
+#define SYSCALL_ENTER_KERNEL	syscall
+#include "sigreturn.S"

From af65d64845a90c8f2fc90b97e2148ff74672e979 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:43 +0100
Subject: [PATCH 160/890] x86 vDSO: consolidate vdso32

This makes x86_64's ia32 emulation support share the sources used in the
32-bit kernel for the 32-bit vDSO and much of its setup code.

The 32-bit vDSO mapping now behaves the same on x86_64 as on native 32-bit.
The abi.syscall32 sysctl on x86_64 now takes the same values that
vm.vdso_enabled takes on the 32-bit kernel.  That is, 1 means a randomized
vDSO location, 2 means the fixed old address.  The CONFIG_COMPAT_VDSO
option is now available to make this the default setting, the same meaning
it has for the 32-bit kernel.  (This does not affect the 64-bit vDSO.)

The argument vdso32=[012] can be used on both 32-bit and 64-bit kernels to
set this paramter at boot time.  The vdso=[012] argument still does this
same thing on the 32-bit kernel.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/kernel-parameters.txt |   5 ++
 arch/x86/Kconfig                    |   4 +-
 arch/x86/ia32/Makefile              |   2 +-
 arch/x86/ia32/ia32_binfmt.c         |   9 +--
 arch/x86/ia32/ia32_signal.c         |  22 +++--
 arch/x86/vdso/Makefile              |   2 +-
 arch/x86/vdso/vdso32-setup.c        | 119 ++++++++++++++++++++++------
 arch/x86/vdso/vdso32.S              |  16 ++--
 arch/x86/xen/setup.c                |   4 +-
 include/asm-x86/elf.h               |  23 +++---
 10 files changed, 146 insertions(+), 60 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index b427b7c0e5d0..b3f20beea411 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1988,6 +1988,11 @@ and is between 256 and 4096 characters. It is defined in the file
 			vdso=1: enable VDSO (default)
 			vdso=0: disable VDSO mapping
 
+	vdso32=		[X86-32,X86-64]
+			vdso32=2: enable compat VDSO (default with COMPAT_VDSO)
+			vdso32=1: enable 32-bit VDSO (default)
+			vdso32=0: disable 32-bit VDSO mapping
+
 	vector=		[IA-64,SMP]
 			vector=percpu: enable percpu vector domain
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 80b7ba4056db..2f4d88babd36 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1191,9 +1191,9 @@ config HOTPLUG_CPU
 config COMPAT_VDSO
 	bool "Compat VDSO support"
 	default y
-	depends on X86_32
+	depends on X86_32 || IA32_EMULATION
 	help
-	  Map the VDSO to the predictable old-style address too.
+	  Map the 32-bit VDSO to the predictable old-style address too.
 	---help---
 	  Say N here if you are running a sufficiently recent glibc
 	  version (2.3.3 or later), to remove the high-mapped
diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile
index a3c997e9f39a..1f58a21a41dc 100644
--- a/arch/x86/ia32/Makefile
+++ b/arch/x86/ia32/Makefile
@@ -3,7 +3,7 @@
 #
 
 obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o tls32.o \
-	ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o
+	ia32_binfmt.o fpu32.o ptrace32.o
 
 sysv-$(CONFIG_SYSVIPC) := ipc32.o
 obj-$(CONFIG_IA32_EMULATION) += $(sysv-y)
diff --git a/arch/x86/ia32/ia32_binfmt.c b/arch/x86/ia32/ia32_binfmt.c
index 55822d2cf053..e32974c3dd3b 100644
--- a/arch/x86/ia32/ia32_binfmt.c
+++ b/arch/x86/ia32/ia32_binfmt.c
@@ -26,7 +26,7 @@
 #include <asm/i387.h>
 #include <asm/uaccess.h>
 #include <asm/ia32.h>
-#include <asm/vsyscall32.h>
+#include <asm/vdso.h>
 
 #undef	ELF_ARCH
 #undef	ELF_CLASS
@@ -47,14 +47,13 @@
 #define AT_SYSINFO 32
 #define AT_SYSINFO_EHDR		33
 
-int sysctl_vsyscall32 = 1;
+extern int sysctl_vsyscall32;
 
 #undef ARCH_DLINFO
 #define ARCH_DLINFO do {  \
 	if (sysctl_vsyscall32) { \
-		current->mm->context.vdso = (void *)VSYSCALL32_BASE;	\
-		NEW_AUX_ENT(AT_SYSINFO, (u32)(u64)VSYSCALL32_VSYSCALL); \
-		NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL32_BASE);    \
+		NEW_AUX_ENT(AT_SYSINFO, (u32)VDSO_ENTRY);		\
+		NEW_AUX_ENT(AT_SYSINFO_EHDR, (u32)VDSO_CURRENT_BASE);	\
 	}	\
 } while(0)
 
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 0fc5d8563e19..39356a756b28 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -31,7 +31,7 @@
 #include <asm/sigcontext32.h>
 #include <asm/fpu32.h>
 #include <asm/proto.h>
-#include <asm/vsyscall32.h>
+#include <asm/vdso.h>
 
 #define DEBUG_SIG 0
 
@@ -465,13 +465,16 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 			goto give_sigsegv;
 	}
 
-	/* Return stub is in 32bit vsyscall page */
-	if (current->binfmt->hasvdso)
-		restorer = VSYSCALL32_SIGRETURN;
-	else
-		restorer = (void *)&frame->retcode;
-	if (ka->sa.sa_flags & SA_RESTORER)
+	if (ka->sa.sa_flags & SA_RESTORER) {
 		restorer = ka->sa.sa_restorer;
+	} else {
+		/* Return stub is in 32bit vsyscall page */
+		if (current->binfmt->hasvdso)
+			restorer = VDSO32_SYMBOL(current->mm->context.vdso,
+						 sigreturn);
+		else
+			restorer = (void *)&frame->retcode;
+	}
 	err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
 
 	/*
@@ -519,7 +522,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 {
 	struct rt_sigframe __user *frame;
 	struct exec_domain *ed = current_thread_info()->exec_domain;
-	void __user *restorer = VSYSCALL32_RTSIGRETURN;
+	void __user *restorer;
 	int err = 0;
 
 	/* __copy_to_user optimizes that into a single 8 byte store */
@@ -564,6 +567,9 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 
 	if (ka->sa.sa_flags & SA_RESTORER)
 		restorer = ka->sa.sa_restorer;
+	else
+		restorer = VDSO32_SYMBOL(current->mm->context.vdso,
+					 rt_sigreturn);
 	err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
 
 	/*
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 1127c716df02..47bc2760e6a8 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -15,7 +15,7 @@ vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vvar.o
 
 # files to link into kernel
 obj-$(VDSO64-y)			+= vma.o vdso.o
-obj-$(CONFIG_X86_32)		+= vdso32.o vdso32-setup.o
+obj-$(VDSO32-y)			+= vdso32.o vdso32-setup.o
 
 vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
 
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index fb71a93c5dce..d97a6d7d062b 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -24,6 +24,7 @@
 #include <asm/elf.h>
 #include <asm/tlbflush.h>
 #include <asm/vdso.h>
+#include <asm/proto.h>
 
 enum {
 	VDSO_DISABLED = 0,
@@ -37,14 +38,24 @@ enum {
 #define VDSO_DEFAULT	VDSO_ENABLED
 #endif
 
+#ifdef CONFIG_X86_64
+#define vdso_enabled			sysctl_vsyscall32
+#define arch_setup_additional_pages	syscall32_setup_pages
+#endif
+
+/*
+ * This is the difference between the prelinked addresses in the vDSO images
+ * and the VDSO_HIGH_BASE address where CONFIG_COMPAT_VDSO places the vDSO
+ * in the user address space.
+ */
+#define VDSO_ADDR_ADJUST	(VDSO_HIGH_BASE - (unsigned long)VDSO32_PRELINK)
+
 /*
  * Should the kernel map a VDSO page into processes and pass its
  * address down to glibc upon exec()?
  */
 unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT;
 
-EXPORT_SYMBOL_GPL(vdso_enabled);
-
 static int __init vdso_setup(char *s)
 {
 	vdso_enabled = simple_strtoul(s, NULL, 0);
@@ -52,9 +63,18 @@ static int __init vdso_setup(char *s)
 	return 1;
 }
 
-__setup("vdso=", vdso_setup);
+/*
+ * For consistency, the argument vdso32=[012] affects the 32-bit vDSO
+ * behavior on both 64-bit and 32-bit kernels.
+ * On 32-bit kernels, vdso=[012] means the same thing.
+ */
+__setup("vdso32=", vdso_setup);
 
-extern asmlinkage void ia32_sysenter_target(void);
+#ifdef CONFIG_X86_32
+__setup_param("vdso=", vdso32_setup, vdso_setup, 0);
+
+EXPORT_SYMBOL_GPL(vdso_enabled);
+#endif
 
 static __init void reloc_symtab(Elf32_Ehdr *ehdr,
 				unsigned offset, unsigned size)
@@ -79,7 +99,7 @@ static __init void reloc_symtab(Elf32_Ehdr *ehdr,
 		case STT_FUNC:
 		case STT_SECTION:
 		case STT_FILE:
-			sym->st_value += VDSO_HIGH_BASE;
+			sym->st_value += VDSO_ADDR_ADJUST;
 		}
 	}
 }
@@ -105,7 +125,7 @@ static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset)
 		case DT_VERNEED:
 		case DT_ADDRRNGLO ... DT_ADDRRNGHI:
 			/* definitely pointers needing relocation */
-			dyn->d_un.d_ptr += VDSO_HIGH_BASE;
+			dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
 			break;
 
 		case DT_ENCODING ... OLD_DT_LOOS-1:
@@ -114,7 +134,7 @@ static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset)
 			   they're even */
 			if (dyn->d_tag >= DT_ENCODING &&
 			    (dyn->d_tag & 1) == 0)
-				dyn->d_un.d_ptr += VDSO_HIGH_BASE;
+				dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
 			break;
 
 		case DT_VERDEFNUM:
@@ -143,15 +163,15 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr)
 	int i;
 
 	BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 ||
-	       !elf_check_arch(ehdr) ||
+	       !elf_check_arch_ia32(ehdr) ||
 	       ehdr->e_type != ET_DYN);
 
-	ehdr->e_entry += VDSO_HIGH_BASE;
+	ehdr->e_entry += VDSO_ADDR_ADJUST;
 
 	/* rebase phdrs */
 	phdr = (void *)ehdr + ehdr->e_phoff;
 	for (i = 0; i < ehdr->e_phnum; i++) {
-		phdr[i].p_vaddr += VDSO_HIGH_BASE;
+		phdr[i].p_vaddr += VDSO_ADDR_ADJUST;
 
 		/* relocate dynamic stuff */
 		if (phdr[i].p_type == PT_DYNAMIC)
@@ -164,7 +184,7 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr)
 		if (!(shdr[i].sh_flags & SHF_ALLOC))
 			continue;
 
-		shdr[i].sh_addr += VDSO_HIGH_BASE;
+		shdr[i].sh_addr += VDSO_ADDR_ADJUST;
 
 		if (shdr[i].sh_type == SHT_SYMTAB ||
 		    shdr[i].sh_type == SHT_DYNSYM)
@@ -173,6 +193,45 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr)
 	}
 }
 
+/*
+ * These symbols are defined by vdso32.S to mark the bounds
+ * of the ELF DSO images included therein.
+ */
+extern const char vdso32_default_start, vdso32_default_end;
+extern const char vdso32_sysenter_start, vdso32_sysenter_end;
+static struct page *vdso32_pages[1];
+
+#ifdef CONFIG_X86_64
+
+static int use_sysenter __read_mostly = -1;
+
+#define	vdso32_sysenter()	(use_sysenter > 0)
+
+/* May not be __init: called during resume */
+void syscall32_cpu_init(void)
+{
+	if (use_sysenter < 0)
+		use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
+
+	/* Load these always in case some future AMD CPU supports
+	   SYSENTER from compat mode too. */
+	checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+	checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL);
+	checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
+
+	wrmsrl(MSR_CSTAR, ia32_cstar_target);
+}
+
+#define compat_uses_vma		1
+
+static inline void map_compat_vdso(int map)
+{
+}
+
+#else  /* CONFIG_X86_32 */
+
+#define vdso32_sysenter()	(boot_cpu_has(X86_FEATURE_SEP))
+
 void enable_sep_cpu(void)
 {
 	int cpu = get_cpu();
@@ -210,13 +269,7 @@ static int __init gate_vma_init(void)
 	return 0;
 }
 
-/*
- * These symbols are defined by vsyscall.o to mark the bounds
- * of the ELF DSO images included therein.
- */
-extern const char vsyscall_int80_start, vsyscall_int80_end;
-extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
-static struct page *syscall_pages[1];
+#define compat_uses_vma		0
 
 static void map_compat_vdso(int map)
 {
@@ -227,31 +280,35 @@ static void map_compat_vdso(int map)
 
 	vdso_mapped = map;
 
-	__set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT,
+	__set_fixmap(FIX_VDSO, page_to_pfn(vdso32_pages[0]) << PAGE_SHIFT,
 		     map ? PAGE_READONLY_EXEC : PAGE_NONE);
 
 	/* flush stray tlbs */
 	flush_tlb_all();
 }
 
+#endif	/* CONFIG_X86_64 */
+
 int __init sysenter_setup(void)
 {
 	void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
 	const void *vsyscall;
 	size_t vsyscall_len;
 
-	syscall_pages[0] = virt_to_page(syscall_page);
+	vdso32_pages[0] = virt_to_page(syscall_page);
 
+#ifdef CONFIG_X86_32
 	gate_vma_init();
 
 	printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
+#endif
 
-	if (!boot_cpu_has(X86_FEATURE_SEP)) {
-		vsyscall = &vsyscall_int80_start;
-		vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start;
+	if (!vdso32_sysenter()) {
+		vsyscall = &vdso32_default_start;
+		vsyscall_len = &vdso32_default_end - &vdso32_default_start;
 	} else {
-		vsyscall = &vsyscall_sysenter_start;
-		vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start;
+		vsyscall = &vdso32_sysenter_start;
+		vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start;
 	}
 
 	memcpy(syscall_page, vsyscall, vsyscall_len);
@@ -284,7 +341,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
 			ret = addr;
 			goto up_fail;
 		}
+	}
 
+	if (compat_uses_vma || !compat) {
 		/*
 		 * MAYWRITE to allow gdb to COW and set breakpoints
 		 *
@@ -298,7 +357,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
 					      VM_READ|VM_EXEC|
 					      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
 					      VM_ALWAYSDUMP,
-					      syscall_pages);
+					      vdso32_pages);
 
 		if (ret)
 			goto up_fail;
@@ -314,6 +373,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
 	return ret;
 }
 
+#ifdef CONFIG_X86_64
+
+__initcall(sysenter_setup);
+
+#else  /* CONFIG_X86_32 */
+
 const char *arch_vma_name(struct vm_area_struct *vma)
 {
 	if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
@@ -342,3 +407,5 @@ int in_gate_area_no_task(unsigned long addr)
 {
 	return 0;
 }
+
+#endif	/* CONFIG_X86_64 */
diff --git a/arch/x86/vdso/vdso32.S b/arch/x86/vdso/vdso32.S
index cab020c99c3d..1e36f72cab86 100644
--- a/arch/x86/vdso/vdso32.S
+++ b/arch/x86/vdso/vdso32.S
@@ -2,14 +2,18 @@
 
 __INITDATA
 
-	.globl vsyscall_int80_start, vsyscall_int80_end
-vsyscall_int80_start:
+	.globl vdso32_default_start, vdso32_default_end
+vdso32_default_start:
+#ifdef CONFIG_X86_32
 	.incbin "arch/x86/vdso/vdso32-int80.so"
-vsyscall_int80_end:
+#else
+	.incbin "arch/x86/vdso/vdso32-syscall.so"
+#endif
+vdso32_default_end:
 
-	.globl vsyscall_sysenter_start, vsyscall_sysenter_end
-vsyscall_sysenter_start:
+	.globl vdso32_sysenter_start, vdso32_sysenter_end
+vdso32_sysenter_start:
 	.incbin "arch/x86/vdso/vdso32-sysenter.so"
-vsyscall_sysenter_end:
+vdso32_sysenter_end:
 
 __FINIT
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index fd91568090f4..7d6d0ef55890 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -62,8 +62,8 @@ static void xen_idle(void)
  */
 static void fiddle_vdso(void)
 {
-	extern char vsyscall_int80_start;
-	u32 *mask = VDSO32_SYMBOL(&vsyscall_int80_start, NOTE_MASK);
+	extern const char vdso32_default_start;
+	u32 *mask = VDSO32_SYMBOL(&vdso32_default_start, NOTE_MASK);
 	*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
 }
 
diff --git a/include/asm-x86/elf.h b/include/asm-x86/elf.h
index 70edff2d5671..60f5101d9483 100644
--- a/include/asm-x86/elf.h
+++ b/include/asm-x86/elf.h
@@ -74,17 +74,19 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
 
 #ifdef __KERNEL__
 
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch_ia32(x) \
+	(((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
+
 #ifdef CONFIG_X86_32
 #include <asm/processor.h>
 #include <asm/system.h>		/* for savesegment */
 #include <asm/desc.h>
 #include <asm/vdso.h>
 
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- */
-#define elf_check_arch(x) \
-	(((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
+#define elf_check_arch(x)	elf_check_arch_ia32(x)
 
 /* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx
    contains a pointer to a function which might be registered using `atexit'.
@@ -247,10 +249,6 @@ extern int dump_task_extended_fpu (struct task_struct *,
 #define ELF_CORE_XFPREG_TYPE NT_PRXFPREG
 
 #define VDSO_HIGH_BASE		(__fix_to_virt(FIX_VDSO))
-#define VDSO_CURRENT_BASE	((unsigned long)current->mm->context.vdso)
-
-#define VDSO_ENTRY \
-	((unsigned long) VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall))
 
 /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
 
@@ -262,6 +260,8 @@ do if (vdso_enabled) {							\
 
 #else /* CONFIG_X86_32 */
 
+#define VDSO_HIGH_BASE		0xffffe000U /* CONFIG_COMPAT_VDSO address */
+
 /* 1GB for 64bit, 8MB for 32bit */
 #define STACK_RND_MASK (test_thread_flag(TIF_IA32) ? 0x7ff : 0x3fffff)
 
@@ -272,6 +272,11 @@ do if (vdso_enabled) {						\
 
 #endif /* !CONFIG_X86_32 */
 
+#define VDSO_CURRENT_BASE	((unsigned long)current->mm->context.vdso)
+
+#define VDSO_ENTRY \
+	((unsigned long) VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall))
+
 struct linux_binprm;
 
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1

From 16f4bc738d616962a844e80f7b1fcb52c626542a Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:44 +0100
Subject: [PATCH 161/890] x86 vDSO: ia32 vsyscall removal

This removes all the old vsyscall code from arch/x86/ia32/ that is
no longer used because arch/x86/vdso/ code has replaced it.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Makefile_64               |   3 -
 arch/x86/ia32/Makefile             |  37 --------
 arch/x86/ia32/syscall32.c          |  90 ------------------
 arch/x86/ia32/syscall32_syscall.S  |  17 ----
 arch/x86/ia32/vsyscall-sigreturn.S | 142 -----------------------------
 arch/x86/ia32/vsyscall-syscall.S   |  69 --------------
 arch/x86/ia32/vsyscall-sysenter.S  |  95 -------------------
 arch/x86/ia32/vsyscall.lds         |  80 ----------------
 include/asm-x86/Kbuild             |   1 -
 include/asm-x86/vsyscall32.h       |  20 ----
 10 files changed, 554 deletions(-)
 delete mode 100644 arch/x86/ia32/syscall32.c
 delete mode 100644 arch/x86/ia32/syscall32_syscall.S
 delete mode 100644 arch/x86/ia32/vsyscall-sigreturn.S
 delete mode 100644 arch/x86/ia32/vsyscall-syscall.S
 delete mode 100644 arch/x86/ia32/vsyscall-sysenter.S
 delete mode 100644 arch/x86/ia32/vsyscall.lds
 delete mode 100644 include/asm-x86/vsyscall32.h

diff --git a/arch/x86/Makefile_64 b/arch/x86/Makefile_64
index 93cf17769ff5..d35a0c50b67b 100644
--- a/arch/x86/Makefile_64
+++ b/arch/x86/Makefile_64
@@ -117,9 +117,6 @@ install: vdso_install
 	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@ 
 
 vdso_install:
-ifeq ($(CONFIG_IA32_EMULATION),y)
-	$(Q)$(MAKE) $(build)=arch/x86/ia32 $@
-endif
 	$(Q)$(MAKE) $(build)=arch/x86/vdso $@
 
 archclean:
diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile
index 1f58a21a41dc..8c19b763b2fb 100644
--- a/arch/x86/ia32/Makefile
+++ b/arch/x86/ia32/Makefile
@@ -12,40 +12,3 @@ obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
 
 audit-class-$(CONFIG_AUDIT) := audit.o
 obj-$(CONFIG_IA32_EMULATION) += $(audit-class-y)
-
-$(obj)/syscall32_syscall.o: \
-	$(foreach F,sysenter syscall,$(obj)/vsyscall-$F.so)
-
-# Teach kbuild about targets
-targets := $(foreach F,$(addprefix vsyscall-,sysenter syscall),\
-		     $F.o $F.so $F.so.dbg)
-
-# The DSO images are built using a special linker script
-quiet_cmd_syscall = SYSCALL $@
-      cmd_syscall = $(CC) -m32 -nostdlib -shared \
-			  $(call ld-option, -Wl$(comma)--hash-style=sysv) \
-			   -Wl,-soname=linux-gate.so.1 -o $@ \
-			   -Wl,-T,$(filter-out FORCE,$^)
-
-$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg FORCE
-	$(call if_changed,objcopy)
-
-$(obj)/vsyscall-sysenter.so.dbg $(obj)/vsyscall-syscall.so.dbg: \
-$(obj)/vsyscall-%.so.dbg: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
-	$(call if_changed,syscall)
-
-AFLAGS_vsyscall-sysenter.o = -m32 -Wa,-32
-AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32
-
-vdsos := vdso32-sysenter.so vdso32-syscall.so
-
-quiet_cmd_vdso_install = INSTALL $@
-      cmd_vdso_install = cp $(@:vdso32-%.so=$(obj)/vsyscall-%.so.dbg) \
-			    $(MODLIB)/vdso/$@
-
-$(vdsos):
-	@mkdir -p $(MODLIB)/vdso
-	$(call cmd,vdso_install)
-
-vdso_install: $(vdsos)
diff --git a/arch/x86/ia32/syscall32.c b/arch/x86/ia32/syscall32.c
deleted file mode 100644
index 98ff99f5b59a..000000000000
--- a/arch/x86/ia32/syscall32.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright 2002,2003 Andi Kleen, SuSE Labs
- *
- * vsyscall handling for 32bit processes. Map a stub page into it on
- * demand because 32bit cannot reach the kernel's fixmaps
- */
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/stringify.h>
-#include <linux/security.h>
-#include <asm/proto.h>
-#include <asm/tlbflush.h>
-#include <asm/ia32_unistd.h>
-#include <asm/vsyscall32.h>
-
-extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
-extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
-extern int sysctl_vsyscall32;
-
-static struct page *syscall32_pages[1];
-static int use_sysenter = -1;
-
-struct linux_binprm;
-
-/* Setup a VMA at program startup for the vsyscall page */
-int syscall32_setup_pages(struct linux_binprm *bprm, int exstack)
-{
-	struct mm_struct *mm = current->mm;
-	int ret;
-
-	down_write(&mm->mmap_sem);
-	/*
-	 * MAYWRITE to allow gdb to COW and set breakpoints
-	 *
-	 * Make sure the vDSO gets into every core dump.
-	 * Dumping its contents makes post-mortem fully interpretable later
-	 * without matching up the same kernel and hardware config to see
-	 * what PC values meant.
-	 */
-	/* Could randomize here */
-	ret = install_special_mapping(mm, VSYSCALL32_BASE, PAGE_SIZE,
-				      VM_READ|VM_EXEC|
-				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
-				      VM_ALWAYSDUMP,
-				      syscall32_pages);
-	if (ret == 0) {
-		current->mm->context.vdso = (void __user *)VSYSCALL32_BASE;
-		current_thread_info()->sysenter_return = VSYSCALL32_SYSEXIT;
-	}
-	up_write(&mm->mmap_sem);
-	return ret;
-}
-
-static int __init init_syscall32(void)
-{
-	char *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
-
-	if (!syscall32_page)
-		panic("Cannot allocate syscall32 page");
-	syscall32_pages[0] = virt_to_page(syscall32_page);
-	if (use_sysenter > 0) {
-		memcpy(syscall32_page, syscall32_sysenter,
-		       syscall32_sysenter_end - syscall32_sysenter);
-	} else {
-		memcpy(syscall32_page, syscall32_syscall,
-		       syscall32_syscall_end - syscall32_syscall);
-	}
-	return 0;
-}
-__initcall(init_syscall32);
-
-/* May not be __init: called during resume */
-void syscall32_cpu_init(void)
-{
-	if (use_sysenter < 0)
-		use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
-
-	/*
-	 * Load these always in case some future AMD CPU supports
-	 * SYSENTER from compat mode too.
-	 */
-	checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
-	checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL);
-	checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
-
-	wrmsrl(MSR_CSTAR, ia32_cstar_target);
-}
diff --git a/arch/x86/ia32/syscall32_syscall.S b/arch/x86/ia32/syscall32_syscall.S
deleted file mode 100644
index 933f0f08b1cf..000000000000
--- a/arch/x86/ia32/syscall32_syscall.S
+++ /dev/null
@@ -1,17 +0,0 @@
-/* 32bit VDSOs mapped into user space. */
-
-	.section ".init.data","aw"
-
-	.globl syscall32_syscall
-	.globl syscall32_syscall_end
-
-syscall32_syscall:
-	.incbin "arch/x86/ia32/vsyscall-syscall.so"
-syscall32_syscall_end:
-
-	.globl syscall32_sysenter
-	.globl syscall32_sysenter_end
-
-syscall32_sysenter:
-	.incbin "arch/x86/ia32/vsyscall-sysenter.so"
-syscall32_sysenter_end:
diff --git a/arch/x86/ia32/vsyscall-sigreturn.S b/arch/x86/ia32/vsyscall-sigreturn.S
deleted file mode 100644
index 295eecf91f17..000000000000
--- a/arch/x86/ia32/vsyscall-sigreturn.S
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Common code for the sigreturn entry points on the vsyscall page.
- * This code uses SYSCALL_ENTER_KERNEL (either syscall or int $0x80)
- * to enter the kernel.
- * This file is #include'd by vsyscall-*.S to define them after the
- * vsyscall entry point.  The addresses we get for these entry points
- * by doing ".balign 32" must match in both versions of the page.
- */
-
-	.code32
-	.section .text.sigreturn,"ax"
-	.balign 32
-	.globl __kernel_sigreturn
-	.type __kernel_sigreturn,@function
-__kernel_sigreturn:
-.LSTART_sigreturn:
-	popl %eax
-	movl $__NR_ia32_sigreturn, %eax
-	SYSCALL_ENTER_KERNEL
-.LEND_sigreturn:
-	.size __kernel_sigreturn,.-.LSTART_sigreturn
-
-	.section .text.rtsigreturn,"ax"
-	.balign 32
-	.globl __kernel_rt_sigreturn
-	.type __kernel_rt_sigreturn,@function
-__kernel_rt_sigreturn:
-.LSTART_rt_sigreturn:
-	movl $__NR_ia32_rt_sigreturn, %eax
-	SYSCALL_ENTER_KERNEL
-.LEND_rt_sigreturn:
-	.size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
-
-	.section .eh_frame,"a",@progbits
-.LSTARTFRAMES:
-        .long .LENDCIES-.LSTARTCIES
-.LSTARTCIES:
-	.long 0			/* CIE ID */
-	.byte 1			/* Version number */
-	.string "zRS"		/* NUL-terminated augmentation string */
-	.uleb128 1		/* Code alignment factor */
-	.sleb128 -4		/* Data alignment factor */
-	.byte 8			/* Return address register column */
-	.uleb128 1		/* Augmentation value length */
-	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
-	.byte 0x0c		/* DW_CFA_def_cfa */
-	.uleb128 4
-	.uleb128 4
-	.byte 0x88		/* DW_CFA_offset, column 0x8 */
-	.uleb128 1
-	.align 4
-.LENDCIES:
-
-	.long .LENDFDE2-.LSTARTFDE2	/* Length FDE */
-.LSTARTFDE2:
-	.long .LSTARTFDE2-.LSTARTFRAMES	/* CIE pointer */
-	/* HACK: The dwarf2 unwind routines will subtract 1 from the
-	   return address to get an address in the middle of the
-	   presumed call instruction.  Since we didn't get here via
-	   a call, we need to include the nop before the real start
-	   to make up for it.  */
-	.long .LSTART_sigreturn-1-.	/* PC-relative start address */
-	.long .LEND_sigreturn-.LSTART_sigreturn+1
-	.uleb128 0			/* Augmentation length */
-	/* What follows are the instructions for the table generation.
-	   We record the locations of each register saved.  This is
-	   complicated by the fact that the "CFA" is always assumed to
-	   be the value of the stack pointer in the caller.  This means
-	   that we must define the CFA of this body of code to be the
-	   saved value of the stack pointer in the sigcontext.  Which
-	   also means that there is no fixed relation to the other 
-	   saved registers, which means that we must use DW_CFA_expression
-	   to compute their addresses.  It also means that when we 
-	   adjust the stack with the popl, we have to do it all over again.  */
-
-#define do_cfa_expr(offset)						\
-	.byte 0x0f;			/* DW_CFA_def_cfa_expression */	\
-	.uleb128 1f-0f;			/*   length */			\
-0:	.byte 0x74;			/*     DW_OP_breg4 */		\
-	.sleb128 offset;		/*      offset */		\
-	.byte 0x06;			/*     DW_OP_deref */		\
-1:
-
-#define do_expr(regno, offset)						\
-	.byte 0x10;			/* DW_CFA_expression */		\
-	.uleb128 regno;			/*   regno */			\
-	.uleb128 1f-0f;			/*   length */			\
-0:	.byte 0x74;			/*     DW_OP_breg4 */		\
-	.sleb128 offset;		/*       offset */		\
-1:
-
-	do_cfa_expr(IA32_SIGCONTEXT_esp+4)
-	do_expr(0, IA32_SIGCONTEXT_eax+4)
-	do_expr(1, IA32_SIGCONTEXT_ecx+4)
-	do_expr(2, IA32_SIGCONTEXT_edx+4)
-	do_expr(3, IA32_SIGCONTEXT_ebx+4)
-	do_expr(5, IA32_SIGCONTEXT_ebp+4)
-	do_expr(6, IA32_SIGCONTEXT_esi+4)
-	do_expr(7, IA32_SIGCONTEXT_edi+4)
-	do_expr(8, IA32_SIGCONTEXT_eip+4)
-
-	.byte 0x42	/* DW_CFA_advance_loc 2 -- nop; popl eax. */
-
-	do_cfa_expr(IA32_SIGCONTEXT_esp)
-	do_expr(0, IA32_SIGCONTEXT_eax)
-	do_expr(1, IA32_SIGCONTEXT_ecx)
-	do_expr(2, IA32_SIGCONTEXT_edx)
-	do_expr(3, IA32_SIGCONTEXT_ebx)
-	do_expr(5, IA32_SIGCONTEXT_ebp)
-	do_expr(6, IA32_SIGCONTEXT_esi)
-	do_expr(7, IA32_SIGCONTEXT_edi)
-	do_expr(8, IA32_SIGCONTEXT_eip)
-
-	.align 4
-.LENDFDE2:
-
-	.long .LENDFDE3-.LSTARTFDE3	/* Length FDE */
-.LSTARTFDE3:
-	.long .LSTARTFDE3-.LSTARTFRAMES	/* CIE pointer */
-	/* HACK: See above wrt unwind library assumptions.  */
-	.long .LSTART_rt_sigreturn-1-.	/* PC-relative start address */
-	.long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1
-	.uleb128 0			/* Augmentation */
-	/* What follows are the instructions for the table generation.
-	   We record the locations of each register saved.  This is
-	   slightly less complicated than the above, since we don't
-	   modify the stack pointer in the process.  */
-
-	do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esp)
-	do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eax)
-	do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ecx)
-	do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edx)
-	do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebx)
-	do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebp)
-	do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esi)
-	do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edi)
-	do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eip)
-
-	.align 4
-.LENDFDE3:
-
-#include "../vdso/vdso32/note.S"
diff --git a/arch/x86/ia32/vsyscall-syscall.S b/arch/x86/ia32/vsyscall-syscall.S
deleted file mode 100644
index cf9ef678de3e..000000000000
--- a/arch/x86/ia32/vsyscall-syscall.S
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Code for the vsyscall page.  This version uses the syscall instruction.
- */
-
-#include <asm/ia32_unistd.h>
-#include <asm/asm-offsets.h>
-#include <asm/segment.h>
-
-	.code32
-	.text
-	.section .text.vsyscall,"ax"
-	.globl __kernel_vsyscall
-	.type __kernel_vsyscall,@function
-__kernel_vsyscall:
-.LSTART_vsyscall:
-	push	%ebp
-.Lpush_ebp:
-	movl	%ecx, %ebp
-	syscall
-	movl	$__USER32_DS, %ecx
-	movl	%ecx, %ss
-	movl	%ebp, %ecx
-	popl	%ebp
-.Lpop_ebp:
-	ret
-.LEND_vsyscall:
-	.size __kernel_vsyscall,.-.LSTART_vsyscall
-
-	.section .eh_frame,"a",@progbits
-.LSTARTFRAME:
-	.long .LENDCIE-.LSTARTCIE
-.LSTARTCIE:
-	.long 0			/* CIE ID */
-	.byte 1			/* Version number */
-	.string "zR"		/* NUL-terminated augmentation string */
-	.uleb128 1		/* Code alignment factor */
-	.sleb128 -4		/* Data alignment factor */
-	.byte 8			/* Return address register column */
-	.uleb128 1		/* Augmentation value length */
-	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
-	.byte 0x0c		/* DW_CFA_def_cfa */
-	.uleb128 4
-	.uleb128 4
-	.byte 0x88		/* DW_CFA_offset, column 0x8 */
-	.uleb128 1
-	.align 4
-.LENDCIE:
-
-	.long .LENDFDE1-.LSTARTFDE1	/* Length FDE */
-.LSTARTFDE1:
-	.long .LSTARTFDE1-.LSTARTFRAME	/* CIE pointer */
-	.long .LSTART_vsyscall-.	/* PC-relative start address */
-	.long .LEND_vsyscall-.LSTART_vsyscall
-	.uleb128 0			/* Augmentation length */
-	/* What follows are the instructions for the table generation.
-	   We have to record all changes of the stack pointer.  */
-	.byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.uleb128 8
-	.byte 0x85, 0x02	/* DW_CFA_offset %ebp -8 */
-	.byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */
-	.byte 0xc5		/* DW_CFA_restore %ebp */
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.uleb128 4
-	.align 4
-.LENDFDE1:
-
-#define SYSCALL_ENTER_KERNEL	syscall
-#include "vsyscall-sigreturn.S"
diff --git a/arch/x86/ia32/vsyscall-sysenter.S b/arch/x86/ia32/vsyscall-sysenter.S
deleted file mode 100644
index ae056e553d13..000000000000
--- a/arch/x86/ia32/vsyscall-sysenter.S
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Code for the vsyscall page.  This version uses the sysenter instruction.
- */
-
-#include <asm/ia32_unistd.h>
-#include <asm/asm-offsets.h>
-
-	.code32
-	.text
-	.section .text.vsyscall,"ax"
-	.globl __kernel_vsyscall
-	.type __kernel_vsyscall,@function
-__kernel_vsyscall:
-.LSTART_vsyscall:
-	push	%ecx
-.Lpush_ecx:
-	push	%edx
-.Lpush_edx:
-	push	%ebp
-.Lenter_kernel:
-	movl	%esp,%ebp
-	sysenter
-	.space 7,0x90
-	jmp	.Lenter_kernel
-	/* 16: System call normal return point is here! */
-	pop	%ebp
-.Lpop_ebp:
-	pop	%edx
-.Lpop_edx:
-	pop	%ecx
-.Lpop_ecx:
-	ret
-.LEND_vsyscall:
-	.size __kernel_vsyscall,.-.LSTART_vsyscall
-
-	.section .eh_frame,"a",@progbits
-.LSTARTFRAME:
-	.long .LENDCIE-.LSTARTCIE
-.LSTARTCIE:
-	.long 0			/* CIE ID */
-	.byte 1			/* Version number */
-	.string "zR"		/* NUL-terminated augmentation string */
-	.uleb128 1		/* Code alignment factor */
-	.sleb128 -4		/* Data alignment factor */
-	.byte 8			/* Return address register column */
-	.uleb128 1		/* Augmentation value length */
-	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
-	.byte 0x0c		/* DW_CFA_def_cfa */
-	.uleb128 4
-	.uleb128 4
-	.byte 0x88		/* DW_CFA_offset, column 0x8 */
-	.uleb128 1
-	.align 4
-.LENDCIE:
-
-	.long .LENDFDE1-.LSTARTFDE1	/* Length FDE */
-.LSTARTFDE1:
-	.long .LSTARTFDE1-.LSTARTFRAME	/* CIE pointer */
-	.long .LSTART_vsyscall-.	/* PC-relative start address */
-	.long .LEND_vsyscall-.LSTART_vsyscall
-	.uleb128 0			/* Augmentation length */
-	/* What follows are the instructions for the table generation.
-	   We have to record all changes of the stack pointer.  */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lpush_ecx-.LSTART_vsyscall
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x08		/* RA at offset 8 now */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lpush_edx-.Lpush_ecx
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x0c		/* RA at offset 12 now */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lenter_kernel-.Lpush_edx
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x10		/* RA at offset 16 now */
-	.byte 0x85, 0x04	/* DW_CFA_offset %ebp -16 */
-	/* Finally the epilogue.  */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lpop_ebp-.Lenter_kernel
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x12		/* RA at offset 12 now */
-	.byte 0xc5		/* DW_CFA_restore %ebp */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lpop_edx-.Lpop_ebp
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x08		/* RA at offset 8 now */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lpop_ecx-.Lpop_edx
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x04		/* RA at offset 4 now */
-	.align 4
-.LENDFDE1:
-
-#define SYSCALL_ENTER_KERNEL	int $0x80
-#include "vsyscall-sigreturn.S"
diff --git a/arch/x86/ia32/vsyscall.lds b/arch/x86/ia32/vsyscall.lds
deleted file mode 100644
index 1dc86ff5bcb9..000000000000
--- a/arch/x86/ia32/vsyscall.lds
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Linker script for vsyscall DSO.  The vsyscall page is an ELF shared
- * object prelinked to its virtual address. This script controls its layout.
- */
-
-/* This must match <asm/fixmap.h>.  */
-VSYSCALL_BASE = 0xffffe000;
-
-SECTIONS
-{
-  . = VSYSCALL_BASE + SIZEOF_HEADERS;
-
-  .hash           : { *(.hash) }		:text
-  .gnu.hash       : { *(.gnu.hash) }
-  .dynsym         : { *(.dynsym) }
-  .dynstr         : { *(.dynstr) }
-  .gnu.version    : { *(.gnu.version) }
-  .gnu.version_d  : { *(.gnu.version_d) }
-  .gnu.version_r  : { *(.gnu.version_r) }
-
-  /* This linker script is used both with -r and with -shared.
-     For the layouts to match, we need to skip more than enough
-     space for the dynamic symbol table et al.  If this amount
-     is insufficient, ld -shared will barf.  Just increase it here.  */
-  . = VSYSCALL_BASE + 0x400;
-  
-  .text.vsyscall   : { *(.text.vsyscall) } 	:text =0x90909090
-
-  /* This is an 32bit object and we cannot easily get the offsets
-     into the 64bit kernel. Just hardcode them here. This assumes
-     that all the stubs don't need more than 0x100 bytes. */
-  . = VSYSCALL_BASE + 0x500;
-
-  .text.sigreturn  : { *(.text.sigreturn) }	:text =0x90909090
-
-  . = VSYSCALL_BASE + 0x600;
-
-  .text.rtsigreturn : { *(.text.rtsigreturn) }   :text =0x90909090
-	
-  .note		  : { *(.note.*) }		:text :note
-  .eh_frame_hdr   : { *(.eh_frame_hdr) }	:text :eh_frame_hdr
-  .eh_frame       : { KEEP (*(.eh_frame)) }	:text
-  .dynamic        : { *(.dynamic) }		:text :dynamic
-  .useless        : {
-  	*(.got.plt) *(.got)
-	*(.data .data.* .gnu.linkonce.d.*)
-	*(.dynbss)
-	*(.bss .bss.* .gnu.linkonce.b.*)
-  }						:text
-}
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-  text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
-  dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
-  note PT_NOTE FLAGS(4); /* PF_R */
-  eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
-  LINUX_2.5 {
-    global:
-    	__kernel_vsyscall;
-    	__kernel_sigreturn;
-    	__kernel_rt_sigreturn;
-
-    local: *;
-  };
-}
-
-/* The ELF entry point can be used to set the AT_SYSINFO value.  */
-ENTRY(__kernel_vsyscall);
diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild
index 12db5a1cdd74..22e3a284c1ed 100644
--- a/include/asm-x86/Kbuild
+++ b/include/asm-x86/Kbuild
@@ -9,7 +9,6 @@ header-y += prctl.h
 header-y += ptrace-abi.h
 header-y += sigcontext32.h
 header-y += ucontext.h
-header-y += vsyscall32.h
 
 unifdef-y += e820.h
 unifdef-y += ist.h
diff --git a/include/asm-x86/vsyscall32.h b/include/asm-x86/vsyscall32.h
deleted file mode 100644
index c631c082f8f7..000000000000
--- a/include/asm-x86/vsyscall32.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef _ASM_VSYSCALL32_H
-#define _ASM_VSYSCALL32_H 1
-
-/* Values need to match arch/x86_64/ia32/vsyscall.lds */
-
-#ifdef __ASSEMBLY__
-#define VSYSCALL32_BASE 0xffffe000
-#define VSYSCALL32_SYSEXIT (VSYSCALL32_BASE + 0x410)
-#else
-#define VSYSCALL32_BASE 0xffffe000UL
-#define VSYSCALL32_END (VSYSCALL32_BASE + PAGE_SIZE)
-#define VSYSCALL32_EHDR ((const struct elf32_hdr *) VSYSCALL32_BASE)
-
-#define VSYSCALL32_VSYSCALL ((void *)VSYSCALL32_BASE + 0x400) 
-#define VSYSCALL32_SYSEXIT ((void *)VSYSCALL32_BASE + 0x410)
-#define VSYSCALL32_SIGRETURN ((void __user *)VSYSCALL32_BASE + 0x500) 
-#define VSYSCALL32_RTSIGRETURN ((void __user *)VSYSCALL32_BASE + 0x600) 
-#endif
-
-#endif

From 69d0627a7f6e891189124d784d2fa90cae7c449a Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:44 +0100
Subject: [PATCH 162/890] x86 vDSO: reorder vdso32 code

This reorders the code in the 32-bit vDSO images to put the signal
trampolines first and __kernel_vsyscall after them.  The order does
not matter to userland, it just uses what AT_SYSINFO or e_entry
says.  Since the signal trampolines are the same size in both
versions of the vDSO, putting them first is the simplest way to get
the addresses to line up.  This makes it work to use a more compact
layout for the vDSO.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/vdso/vdso32/int80.S     | 21 ++++++++++++---------
 arch/x86/vdso/vdso32/sigreturn.S | 20 +++++++++-----------
 arch/x86/vdso/vdso32/syscall.S   | 22 +++++++++++++++-------
 arch/x86/vdso/vdso32/sysenter.S  | 21 +++++++++++----------
 4 files changed, 47 insertions(+), 37 deletions(-)

diff --git a/arch/x86/vdso/vdso32/int80.S b/arch/x86/vdso/vdso32/int80.S
index 3c8e4c62ace3..be4b7a9a7cdd 100644
--- a/arch/x86/vdso/vdso32/int80.S
+++ b/arch/x86/vdso/vdso32/int80.S
@@ -1,15 +1,15 @@
 /*
- * Code for the vsyscall page.  This version uses the old int $0x80 method.
+ * Code for the vDSO.  This version uses the old int $0x80 method.
  *
- * NOTE:
- * 1) __kernel_vsyscall _must_ be first in this page.
- * 2) there are alignment constraints on this stub, see vsyscall-sigreturn.S
- *    for details.
+ * First get the common code for the sigreturn entry points.
+ * This must come first.
  */
+#include "sigreturn.S"
 
 	.text
 	.globl __kernel_vsyscall
 	.type __kernel_vsyscall,@function
+	ALIGN
 __kernel_vsyscall:
 .LSTART_vsyscall:
 	int $0x80
@@ -47,7 +47,10 @@ __kernel_vsyscall:
 .LENDFDEDLSI:
 	.previous
 
-/*
- * Get the common code for the sigreturn entry points.
- */
-#include "sigreturn.S"
+	/*
+	 * Pad out the segment to match the size of the sysenter.S version.
+	 */
+VDSO32_vsyscall_eh_frame_size = 0x44
+	.section .data,"aw",@progbits
+	.space VDSO32_vsyscall_eh_frame_size-(.LENDFDEDLSI-.LSTARTFRAMEDLSI), 0
+	.previous
diff --git a/arch/x86/vdso/vdso32/sigreturn.S b/arch/x86/vdso/vdso32/sigreturn.S
index 8d65a0a0eb70..cade2752928b 100644
--- a/arch/x86/vdso/vdso32/sigreturn.S
+++ b/arch/x86/vdso/vdso32/sigreturn.S
@@ -1,11 +1,12 @@
 /*
- * Common code for the sigreturn entry points on the vsyscall page.
+ * Common code for the sigreturn entry points in vDSO images.
  * So far this code is the same for both int80 and sysenter versions.
- * This file is #include'd by vsyscall-*.S to define them after the
- * vsyscall entry point.  The kernel assumes that the addresses of these
- * routines are constant for all vsyscall implementations.
+ * This file is #include'd by int80.S et al to define them first thing.
+ * The kernel assumes that the addresses of these routines are constant
+ * for all vDSO implementations.
  */
 
+#include <linux/linkage.h>
 #include <asm/unistd_32.h>
 #include <asm/asm-offsets.h>
 
@@ -13,32 +14,29 @@
 #define	SYSCALL_ENTER_KERNEL	int $0x80
 #endif
 
-/* XXX
-   Should these be named "_sigtramp" or something?
-*/
-
 	.text
-	.org __kernel_vsyscall+32,0x90
 	.globl __kernel_sigreturn
 	.type __kernel_sigreturn,@function
+	ALIGN
 __kernel_sigreturn:
 .LSTART_sigreturn:
 	popl %eax		/* XXX does this mean it needs unwind info? */
 	movl $__NR_sigreturn, %eax
 	SYSCALL_ENTER_KERNEL
 .LEND_sigreturn:
+	nop
 	.size __kernel_sigreturn,.-.LSTART_sigreturn
 
-	.balign 32
 	.globl __kernel_rt_sigreturn
 	.type __kernel_rt_sigreturn,@function
+	ALIGN
 __kernel_rt_sigreturn:
 .LSTART_rt_sigreturn:
 	movl $__NR_rt_sigreturn, %eax
 	SYSCALL_ENTER_KERNEL
 .LEND_rt_sigreturn:
+	nop
 	.size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
-	.balign 32
 	.previous
 
 	.section .eh_frame,"a",@progbits
diff --git a/arch/x86/vdso/vdso32/syscall.S b/arch/x86/vdso/vdso32/syscall.S
index 333bfb552c88..fe88d34f822f 100644
--- a/arch/x86/vdso/vdso32/syscall.S
+++ b/arch/x86/vdso/vdso32/syscall.S
@@ -1,13 +1,18 @@
 /*
- * Code for the vsyscall page.  This version uses the syscall instruction.
+ * Code for the vDSO.  This version uses the syscall instruction.
+ *
+ * First get the common code for the sigreturn entry points.
+ * This must come first.
  */
+#define SYSCALL_ENTER_KERNEL	syscall
+#include "sigreturn.S"
 
-#include <asm/asm-offsets.h>
 #include <asm/segment.h>
 
 	.text
 	.globl __kernel_vsyscall
 	.type __kernel_vsyscall,@function
+	ALIGN
 __kernel_vsyscall:
 .LSTART_vsyscall:
 	push	%ebp
@@ -61,9 +66,12 @@ __kernel_vsyscall:
 	.uleb128 4
 	.align 4
 .LENDFDE1:
+	.previous
 
-/*
- * Get the common code for the sigreturn entry points.
- */
-#define SYSCALL_ENTER_KERNEL	syscall
-#include "sigreturn.S"
+	/*
+	 * Pad out the segment to match the size of the sysenter.S version.
+	 */
+VDSO32_vsyscall_eh_frame_size = 0x44
+	.section .data,"aw",@progbits
+	.space VDSO32_vsyscall_eh_frame_size-(.LENDFDE1-.LSTARTFRAME), 0
+	.previous
diff --git a/arch/x86/vdso/vdso32/sysenter.S b/arch/x86/vdso/vdso32/sysenter.S
index 109bfa394eaa..902d5fc11f1b 100644
--- a/arch/x86/vdso/vdso32/sysenter.S
+++ b/arch/x86/vdso/vdso32/sysenter.S
@@ -1,11 +1,10 @@
 /*
- * Code for the vsyscall page.  This version uses the sysenter instruction.
+ * Code for the vDSO.  This version uses the sysenter instruction.
  *
- * NOTE:
- * 1) __kernel_vsyscall _must_ be first in this page.
- * 2) there are alignment constraints on this stub, see vsyscall-sigreturn.S
- *    for details.
+ * First get the common code for the sigreturn entry points.
+ * This must come first.
  */
+#include "sigreturn.S"
 
 /*
  * The caller puts arg2 in %ecx, which gets pushed. The kernel will use
@@ -23,11 +22,12 @@
  * arg6 from the stack.
  *
  * You can not use this vsyscall for the clone() syscall because the
- * three dwords on the parent stack do not get copied to the child.
+ * three words on the parent stack do not get copied to the child.
  */
 	.text
 	.globl __kernel_vsyscall
 	.type __kernel_vsyscall,@function
+	ALIGN
 __kernel_vsyscall:
 .LSTART_vsyscall:
 	push %ecx
@@ -115,7 +115,8 @@ VDSO32_SYSENTER_RETURN:	/* Symbol used by sysenter.c via vdso32-syms.h */
 .LENDFDEDLSI:
 	.previous
 
-/*
- * Get the common code for the sigreturn entry points.
- */
-#include "sigreturn.S"
+	/*
+	 * Emit a symbol with the size of this .eh_frame data,
+	 * to verify it matches the other versions.
+	 */
+VDSO32_vsyscall_eh_frame_size = (.LENDFDEDLSI-.LSTARTFRAMEDLSI)

From 16e48e7e7929d841273d87027079660bca5e37bd Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:44 +0100
Subject: [PATCH 163/890] x86 vDSO: makefile cleanup

This cleans up the arch/x86/vdso/Makefile rules for vdso.so to
share more code with the vdso32-*.so rules and remove old cruft.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/vdso/Makefile | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 47bc2760e6a8..58708f6c1329 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -23,25 +23,15 @@ $(obj)/vdso.o: $(obj)/vdso.so
 
 targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y)
 
-# The DSO images are built using a special linker script.
-quiet_cmd_syscall = SYSCALL $@
-      cmd_syscall = $(CC) -m elf_x86_64 -nostdlib $(SYSCFLAGS_$(@F)) \
-		          -Wl,-T,$(filter-out FORCE,$^) -o $@
-
 export CPPFLAGS_vdso.lds += -P -C
 
-vdso-flags = -fPIC -shared -Wl,-soname=linux-vdso.so.1 \
-		 $(call ld-option, -Wl$(comma)--hash-style=sysv) \
-		-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
-SYSCFLAGS_vdso.so = $(vdso-flags)
-SYSCFLAGS_vdso.so.dbg = $(vdso-flags)
+VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -Wl,-soname=linux-vdso.so.1 \
+		      	-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
 
 $(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
 
-$(obj)/vdso.so: $(src)/vdso.lds $(vobjs) FORCE
-
 $(obj)/vdso.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
-	$(call if_changed,syscall)
+	$(call if_changed,vdso)
 
 $(obj)/%.so: OBJCOPYFLAGS := -S
 $(obj)/%.so: $(obj)/%.so.dbg FORCE
@@ -49,8 +39,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
 
 CFL := $(PROFILING) -mcmodel=small -fPIC -g0 -O2 -fasynchronous-unwind-tables -m64
 
-$(obj)/vclock_gettime.o: KBUILD_CFLAGS = $(CFL)
-$(obj)/vgetcpu.o: KBUILD_CFLAGS = $(CFL)
+$(vobjs): KBUILD_CFLAGS = $(CFL)
 
 targets += vdso-syms.lds
 obj-$(VDSO64-y)			+= vdso-syms.lds

From cadd516422d9382313144aa58f9e46ee7a2c0a28 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:44 +0100
Subject: [PATCH 164/890] x86 vDSO: canonicalize sysenter .eh_frame

Some assembler versions automagically optimize .eh_frame contents,
changing their size.  The CFI in sysenter.S was not using optimal
formatting, so it would be changed by newer/smarter assemblers.
This ran afoul of the wired constant for padding out the other vDSO
images to match its size.  This changes the original hand-coded
source to use the optimal format encoding for its operations.  That
leaves nothing more for a fancy assembler to do, so the sizes will
match the wired-in expected size regardless of the assembler version.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/vdso/vdso32/int80.S    |  2 +-
 arch/x86/vdso/vdso32/syscall.S  |  2 +-
 arch/x86/vdso/vdso32/sysenter.S | 18 ++++++------------
 3 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/arch/x86/vdso/vdso32/int80.S b/arch/x86/vdso/vdso32/int80.S
index be4b7a9a7cdd..b15b7c01aedb 100644
--- a/arch/x86/vdso/vdso32/int80.S
+++ b/arch/x86/vdso/vdso32/int80.S
@@ -50,7 +50,7 @@ __kernel_vsyscall:
 	/*
 	 * Pad out the segment to match the size of the sysenter.S version.
 	 */
-VDSO32_vsyscall_eh_frame_size = 0x44
+VDSO32_vsyscall_eh_frame_size = 0x40
 	.section .data,"aw",@progbits
 	.space VDSO32_vsyscall_eh_frame_size-(.LENDFDEDLSI-.LSTARTFRAMEDLSI), 0
 	.previous
diff --git a/arch/x86/vdso/vdso32/syscall.S b/arch/x86/vdso/vdso32/syscall.S
index fe88d34f822f..5415b5613d55 100644
--- a/arch/x86/vdso/vdso32/syscall.S
+++ b/arch/x86/vdso/vdso32/syscall.S
@@ -71,7 +71,7 @@ __kernel_vsyscall:
 	/*
 	 * Pad out the segment to match the size of the sysenter.S version.
 	 */
-VDSO32_vsyscall_eh_frame_size = 0x44
+VDSO32_vsyscall_eh_frame_size = 0x40
 	.section .data,"aw",@progbits
 	.space VDSO32_vsyscall_eh_frame_size-(.LENDFDE1-.LSTARTFRAME), 0
 	.previous
diff --git a/arch/x86/vdso/vdso32/sysenter.S b/arch/x86/vdso/vdso32/sysenter.S
index 902d5fc11f1b..e2800affa754 100644
--- a/arch/x86/vdso/vdso32/sysenter.S
+++ b/arch/x86/vdso/vdso32/sysenter.S
@@ -84,31 +84,25 @@ VDSO32_SYSENTER_RETURN:	/* Symbol used by sysenter.c via vdso32-syms.h */
 	.uleb128 0
 	/* What follows are the instructions for the table generation.
 	   We have to record all changes of the stack pointer.  */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lpush_ecx-.LSTART_vsyscall
+	.byte 0x40 + (.Lpush_ecx-.LSTART_vsyscall) /* DW_CFA_advance_loc */
 	.byte 0x0e		/* DW_CFA_def_cfa_offset */
 	.byte 0x08		/* RA at offset 8 now */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lpush_edx-.Lpush_ecx
+	.byte 0x40 + (.Lpush_edx-.Lpush_ecx) /* DW_CFA_advance_loc */
 	.byte 0x0e		/* DW_CFA_def_cfa_offset */
 	.byte 0x0c		/* RA at offset 12 now */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lenter_kernel-.Lpush_edx
+	.byte 0x40 + (.Lenter_kernel-.Lpush_edx) /* DW_CFA_advance_loc */
 	.byte 0x0e		/* DW_CFA_def_cfa_offset */
 	.byte 0x10		/* RA at offset 16 now */
 	.byte 0x85, 0x04	/* DW_CFA_offset %ebp -16 */
 	/* Finally the epilogue.  */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lpop_ebp-.Lenter_kernel
+	.byte 0x40 + (.Lpop_ebp-.Lenter_kernel)	/* DW_CFA_advance_loc */
 	.byte 0x0e		/* DW_CFA_def_cfa_offset */
 	.byte 0x0c		/* RA at offset 12 now */
 	.byte 0xc5		/* DW_CFA_restore %ebp */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lpop_edx-.Lpop_ebp
+	.byte 0x40 + (.Lpop_edx-.Lpop_ebp) /* DW_CFA_advance_loc */
 	.byte 0x0e		/* DW_CFA_def_cfa_offset */
 	.byte 0x08		/* RA at offset 8 now */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lpop_ecx-.Lpop_edx
+	.byte 0x40 + (.Lpop_ecx-.Lpop_edx) /* DW_CFA_advance_loc */
 	.byte 0x0e		/* DW_CFA_def_cfa_offset */
 	.byte 0x04		/* RA at offset 4 now */
 	.align 4

From 859c0a5b9ca8eba2d65fa42f02505d76f8a712c7 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:44 +0100
Subject: [PATCH 165/890] x86: get_desc_base

This defines the get_desc_base function in asm-x86/desc_64.h to match the
one in desc_32.h.  If these two files ever get merged together, this
function could be the same in both.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/desc_64.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h
index 7d48df72aef2..230ac6e50a0f 100644
--- a/include/asm-x86/desc_64.h
+++ b/include/asm-x86/desc_64.h
@@ -215,6 +215,16 @@ static inline void load_LDT(mm_context_t *pc)
 
 extern struct desc_ptr idt_descr;
 
+static inline unsigned long get_desc_base(const void *ptr)
+{
+	const u32 *desc = ptr;
+	unsigned long base;
+	base = ((desc[0] >> 16)  & 0x0000ffff) |
+		((desc[1] << 16) & 0x00ff0000) |
+		(desc[1] & 0xff000000);
+	return base;
+}
+
 #endif /* !__ASSEMBLY__ */
 
 #endif

From 91394eb0975b3771dde7071a0825c6df6c20ff8a Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:45 +0100
Subject: [PATCH 166/890] x86: use get_desc_base

This changes a couple of places to use the get_desc_base function.
They were duplicating the same calculation with different equivalent code.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/ia32/tls32.c        | 7 +------
 arch/x86/kernel/process_64.c | 6 +-----
 2 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/arch/x86/ia32/tls32.c b/arch/x86/ia32/tls32.c
index 1cc4340de3ca..cac4b26b5895 100644
--- a/arch/x86/ia32/tls32.c
+++ b/arch/x86/ia32/tls32.c
@@ -85,11 +85,6 @@ asmlinkage long sys32_set_thread_area(struct user_desc __user *u_info)
  * Get the current Thread-Local Storage area:
  */
 
-#define GET_BASE(desc) ( \
-	(((desc)->a >> 16) & 0x0000ffff) | \
-	(((desc)->b << 16) & 0x00ff0000) | \
-	( (desc)->b        & 0xff000000)   )
-
 #define GET_LIMIT(desc) ( \
 	((desc)->a & 0x0ffff) | \
 	 ((desc)->b & 0xf0000) )
@@ -117,7 +112,7 @@ int do_get_thread_area(struct thread_struct *t, struct user_desc __user *u_info)
 
 	memset(&info, 0, sizeof(struct user_desc));
 	info.entry_number = idx;
-	info.base_addr = GET_BASE(desc);
+	info.base_addr = get_desc_base(desc);
 	info.limit = GET_LIMIT(desc);
 	info.seg_32bit = GET_32BIT(desc);
 	info.contents = GET_CONTENTS(desc);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index aa9414ed74c7..9ea1d7546f80 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -464,11 +464,7 @@ static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
 
 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
 {
-	struct desc_struct *desc = (void *)t->thread.tls_array;
-	desc += tls;
-	return desc->base0 | 
-		(((u32)desc->base1) << 16) | 
-		(((u32)desc->base2) << 24);
+	return get_desc_base(&t->thread.tls_array[tls]);
 }
 
 /*

From df5d438e33d7fc914ba9b6e0d6b019a8966c5fcc Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:45 +0100
Subject: [PATCH 167/890] x86: ptrace fs/gs_base

The fs_base and gs_base fields are available in user_regs_struct.
But reading these via ptrace (PTRACE_GETREGS or PTRACE_PEEKUSR) does
not give a reliably useful value.  The thread_struct fields are 0
when do_arch_prctl decided to use a GDT slot instead of MSR_FS_BASE,
which it does for a value under 1<<32.

This changes ptrace access to fs_base and gs_base to work like
PTRACE_ARCH_PRCTL does.  That is, it reads the base address that
user-mode memory access using the fs/gs instruction prefixes will
use, regardless of how it's being implemented in the kernel.  The
MSR vs GDT is an implementation detail that is pretty much hidden
from userland in the actual using, and there is no reason that
ptrace should give the internal implementation picture rather than
the user-mode semantic picture.  In the case of setting the value,
this can implicitly change the fsindex/gsindex value (also
separately in user_regs_struct), which is what happens when the
thread calls arch_prctl itself.  In a PTRACE_SETREGS, the fs_base
change will come after the fsindex change due to the order of the
struct, and so a change the debugger made to fs_base will have the
effect intended, another part of the user_regs_struct will now
differ when read back from what the debugger wrote.

This makes PTRACE_ARCH_PRCTL obsolete.  We could consider declaring
it deprecated and removing it one day, though there is no hurry.
For the foreseeable future, debuggers have to assume an old kernel
that does not report reliable fs_base/gs_base values in user_regs_struct
and stick to PTRACE_ARCH_PRCTL anyway.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace_64.c | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c
index 607085f3f08a..1edece36044c 100644
--- a/arch/x86/kernel/ptrace_64.c
+++ b/arch/x86/kernel/ptrace_64.c
@@ -22,6 +22,7 @@
 #include <asm/pgtable.h>
 #include <asm/system.h>
 #include <asm/processor.h>
+#include <asm/prctl.h>
 #include <asm/i387.h>
 #include <asm/debugreg.h>
 #include <asm/ldt.h>
@@ -260,12 +261,22 @@ static int putreg(struct task_struct *child,
 		case offsetof(struct user_regs_struct,fs_base):
 			if (value >= TASK_SIZE_OF(child))
 				return -EIO;
-			child->thread.fs = value;
+			/*
+			 * When changing the segment base, use do_arch_prctl
+			 * to set either thread.fs or thread.fsindex and the
+			 * corresponding GDT slot.
+			 */
+			if (child->thread.fs != value)
+				return do_arch_prctl(child, ARCH_SET_FS, value);
 			return 0;
 		case offsetof(struct user_regs_struct,gs_base):
+			/*
+			 * Exactly the same here as the %fs handling above.
+			 */
 			if (value >= TASK_SIZE_OF(child))
 				return -EIO;
-			child->thread.gs = value;
+			if (child->thread.gs != value)
+				return do_arch_prctl(child, ARCH_SET_GS, value);
 			return 0;
 		case offsetof(struct user_regs_struct, eflags):
 			value &= FLAG_MASK;
@@ -296,9 +307,25 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
 		case offsetof(struct user_regs_struct, es):
 			return child->thread.es; 
 		case offsetof(struct user_regs_struct, fs_base):
-			return child->thread.fs;
+			/*
+			 * do_arch_prctl may have used a GDT slot instead of
+			 * the MSR.  To userland, it appears the same either
+			 * way, except the %fs segment selector might not be 0.
+			 */
+			if (child->thread.fs != 0)
+				return child->thread.fs;
+			if (child->thread.fsindex != FS_TLS_SEL)
+				return 0;
+			return get_desc_base(&child->thread.tls_array[FS_TLS]);
 		case offsetof(struct user_regs_struct, gs_base):
-			return child->thread.gs;
+			/*
+			 * Exactly the same here as the %fs handling above.
+			 */
+			if (child->thread.gs != 0)
+				return child->thread.gs;
+			if (child->thread.gsindex != GS_TLS_SEL)
+				return 0;
+			return get_desc_base(&child->thread.tls_array[GS_TLS]);
 		default:
 			regno = regno - sizeof(struct pt_regs);
 			val = get_stack_long(child, regno);

From 12c3cbbb7e3218b1c3ed4c97b3496fc3f94ea967 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:45 +0100
Subject: [PATCH 168/890] x86: desc_empty

This replaces the desc_empty macro with an inline.  It now handles
easily any of the four different types used between 32/64 code to
refer to these 8 bytes.  It's identical in both asm-x86/processor_64.h
and asm-x86/processor_32.h, so if these files ever get merged this
function can be in the common code.

This also removes the desc_equal macro because nothing uses it.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/processor_32.h |  9 +++++----
 include/asm-x86/processor_64.h | 10 +++++-----
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 3b51a18f16d5..2540bf8d5724 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -28,11 +28,12 @@ struct desc_struct {
 	unsigned long a,b;
 };
 
-#define desc_empty(desc) \
-		(!((desc)->a | (desc)->b))
+static inline int desc_empty(const void *ptr)
+{
+	const u32 *desc = ptr;
+	return !(desc[0] | desc[1]);
+}
 
-#define desc_equal(desc1, desc2) \
-		(((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
 /*
  * Default implementation of macro that returns current
  * instruction pointer ("program counter").
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index 2ae8ceb8a74e..20d8935d141a 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -30,11 +30,11 @@
 #define VIP_MASK	0x00100000	/* virtual interrupt pending */
 #define ID_MASK		0x00200000
 
-#define desc_empty(desc) \
-               (!((desc)->a | (desc)->b))
-
-#define desc_equal(desc1, desc2) \
-               (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
+static inline int desc_empty(const void *ptr)
+{
+	const u32 *desc = ptr;
+	return !(desc[0] | desc[1]);
+}
 
 /*
  * Default implementation of macro that returns current

From 13abd0e50433092c41551bc13c32268028b6d663 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:45 +0100
Subject: [PATCH 169/890] x86: tls32 moved

This renames arch/x86/ia32/tls32.c to arch/x86/kernel/tls.c, which does
nothing now but paves the way to consolidate this code for 32-bit too.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Zachary Amsden <zach@vmware.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/ia32/Makefile                  |  2 +-
 arch/x86/kernel/Makefile_64             |  1 +
 arch/x86/{ia32/tls32.c => kernel/tls.c} | 18 +++++++++---------
 3 files changed, 11 insertions(+), 10 deletions(-)
 rename arch/x86/{ia32/tls32.c => kernel/tls.c} (96%)

diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile
index 8c19b763b2fb..ea6088640847 100644
--- a/arch/x86/ia32/Makefile
+++ b/arch/x86/ia32/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the ia32 kernel emulation subsystem.
 #
 
-obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o tls32.o \
+obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o \
 	ia32_binfmt.o fpu32.o ptrace32.o
 
 sysv-$(CONFIG_SYSVIPC) := ipc32.o
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index ae95d21ea885..e1ba82e582a8 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -13,6 +13,7 @@ obj-y	:= process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
 		pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \
 		i8253.o io_delay.o rtc.o
 
+obj-$(CONFIG_IA32_EMULATION)	+= tls.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-y				+= cpu/
 obj-y				+= acpi/
diff --git a/arch/x86/ia32/tls32.c b/arch/x86/kernel/tls.c
similarity index 96%
rename from arch/x86/ia32/tls32.c
rename to arch/x86/kernel/tls.c
index cac4b26b5895..5291596f19b0 100644
--- a/arch/x86/ia32/tls32.c
+++ b/arch/x86/kernel/tls.c
@@ -26,7 +26,7 @@ static int get_free_idx(void)
 
 /*
  * Set a given TLS descriptor:
- * When you want addresses > 32bit use arch_prctl() 
+ * When you want addresses > 32bit use arch_prctl()
  */
 int do_set_thread_area(struct thread_struct *t, struct user_desc __user *u_info)
 {
@@ -76,9 +76,9 @@ int do_set_thread_area(struct thread_struct *t, struct user_desc __user *u_info)
 }
 
 asmlinkage long sys32_set_thread_area(struct user_desc __user *u_info)
-{ 
-	return do_set_thread_area(&current->thread, u_info); 
-} 
+{
+	return do_set_thread_area(&current->thread, u_info);
+}
 
 
 /*
@@ -88,7 +88,7 @@ asmlinkage long sys32_set_thread_area(struct user_desc __user *u_info)
 #define GET_LIMIT(desc) ( \
 	((desc)->a & 0x0ffff) | \
 	 ((desc)->b & 0xf0000) )
-	
+
 #define GET_32BIT(desc)		(((desc)->b >> 22) & 1)
 #define GET_CONTENTS(desc)	(((desc)->b >> 10) & 3)
 #define GET_WRITABLE(desc)	(((desc)->b >>  9) & 1)
@@ -130,7 +130,7 @@ int do_get_thread_area(struct thread_struct *t, struct user_desc __user *u_info)
 asmlinkage long sys32_get_thread_area(struct user_desc __user *u_info)
 {
 	return do_get_thread_area(&current->thread, u_info);
-} 
+}
 
 
 int ia32_child_tls(struct task_struct *p, struct pt_regs *childregs)
@@ -139,17 +139,17 @@ int ia32_child_tls(struct task_struct *p, struct pt_regs *childregs)
 	struct user_desc info;
 	struct user_desc __user *cp;
 	int idx;
-	
+
 	cp = (void __user *)childregs->rsi;
 	if (copy_from_user(&info, cp, sizeof(info)))
 		return -EFAULT;
 	if (LDT_empty(&info))
 		return -EINVAL;
-	
+
 	idx = info.entry_number;
 	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
 		return -EINVAL;
-	
+
 	desc = (struct n_desc_struct *)(p->thread.tls_array) + idx - GDT_ENTRY_TLS_MIN;
 	desc->a = LDT_entry_a(&info);
 	desc->b = LDT_entry_b(&info);

From efd1ca52d04d2f6df337a3332cee56cd60e6d4c4 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:46 +0100
Subject: [PATCH 170/890] x86: TLS cleanup

This consolidates the four different places that implemented the same
encoding magic for the GDT-slot 32-bit TLS support.  The old tls32.c was
renamed and is now only slightly modified to be the shared implementation.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Zachary Amsden <zach@vmware.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/ia32/ia32entry.S    |   4 +-
 arch/x86/kernel/Makefile_32  |   1 +
 arch/x86/kernel/process_32.c | 141 ++---------------------------------
 arch/x86/kernel/process_64.c |   3 +-
 arch/x86/kernel/ptrace_32.c  |  91 ++--------------------
 arch/x86/kernel/ptrace_64.c  |  26 +++----
 arch/x86/kernel/tls.c        |  94 +++++++++--------------
 include/asm-x86/ia32.h       |   6 --
 include/asm-x86/ptrace.h     |  11 +++
 9 files changed, 76 insertions(+), 301 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 2499a324feaa..0db0a6291bbd 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -643,8 +643,8 @@ ia32_sys_call_table:
 	.quad compat_sys_futex		/* 240 */
 	.quad compat_sys_sched_setaffinity
 	.quad compat_sys_sched_getaffinity
-	.quad sys32_set_thread_area
-	.quad sys32_get_thread_area
+	.quad sys_set_thread_area
+	.quad sys_get_thread_area
 	.quad compat_sys_io_setup	/* 245 */
 	.quad sys_io_destroy
 	.quad compat_sys_io_getevents
diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index 2c9596b9349c..9a6577a746ba 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -10,6 +10,7 @@ obj-y	:= process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
 		pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\
 		quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o rtc.o
 
+obj-y				+= tls.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-y				+= cpu/
 obj-y				+= acpi/
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 631af167bc51..4d66a56280d3 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -501,32 +501,15 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
 		set_tsk_thread_flag(p, TIF_IO_BITMAP);
 	}
 
+	err = 0;
+
 	/*
 	 * Set a new TLS for the child thread?
 	 */
-	if (clone_flags & CLONE_SETTLS) {
-		struct desc_struct *desc;
-		struct user_desc info;
-		int idx;
+	if (clone_flags & CLONE_SETTLS)
+		err = do_set_thread_area(p, -1,
+			(struct user_desc __user *)childregs->esi, 0);
 
-		err = -EFAULT;
-		if (copy_from_user(&info, (void __user *)childregs->esi, sizeof(info)))
-			goto out;
-		err = -EINVAL;
-		if (LDT_empty(&info))
-			goto out;
-
-		idx = info.entry_number;
-		if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
-			goto out;
-
-		desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
-		desc->a = LDT_entry_a(&info);
-		desc->b = LDT_entry_b(&info);
-	}
-
-	err = 0;
- out:
 	if (err && p->thread.io_bitmap_ptr) {
 		kfree(p->thread.io_bitmap_ptr);
 		p->thread.io_bitmap_max = 0;
@@ -872,120 +855,6 @@ unsigned long get_wchan(struct task_struct *p)
 	return 0;
 }
 
-/*
- * sys_alloc_thread_area: get a yet unused TLS descriptor index.
- */
-static int get_free_idx(void)
-{
-	struct thread_struct *t = &current->thread;
-	int idx;
-
-	for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
-		if (desc_empty(t->tls_array + idx))
-			return idx + GDT_ENTRY_TLS_MIN;
-	return -ESRCH;
-}
-
-/*
- * Set a given TLS descriptor:
- */
-asmlinkage int sys_set_thread_area(struct user_desc __user *u_info)
-{
-	struct thread_struct *t = &current->thread;
-	struct user_desc info;
-	struct desc_struct *desc;
-	int cpu, idx;
-
-	if (copy_from_user(&info, u_info, sizeof(info)))
-		return -EFAULT;
-	idx = info.entry_number;
-
-	/*
-	 * index -1 means the kernel should try to find and
-	 * allocate an empty descriptor:
-	 */
-	if (idx == -1) {
-		idx = get_free_idx();
-		if (idx < 0)
-			return idx;
-		if (put_user(idx, &u_info->entry_number))
-			return -EFAULT;
-	}
-
-	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
-		return -EINVAL;
-
-	desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
-
-	/*
-	 * We must not get preempted while modifying the TLS.
-	 */
-	cpu = get_cpu();
-
-	if (LDT_empty(&info)) {
-		desc->a = 0;
-		desc->b = 0;
-	} else {
-		desc->a = LDT_entry_a(&info);
-		desc->b = LDT_entry_b(&info);
-	}
-	load_TLS(t, cpu);
-
-	put_cpu();
-
-	return 0;
-}
-
-/*
- * Get the current Thread-Local Storage area:
- */
-
-#define GET_BASE(desc) ( \
-	(((desc)->a >> 16) & 0x0000ffff) | \
-	(((desc)->b << 16) & 0x00ff0000) | \
-	( (desc)->b        & 0xff000000)   )
-
-#define GET_LIMIT(desc) ( \
-	((desc)->a & 0x0ffff) | \
-	 ((desc)->b & 0xf0000) )
-	
-#define GET_32BIT(desc)		(((desc)->b >> 22) & 1)
-#define GET_CONTENTS(desc)	(((desc)->b >> 10) & 3)
-#define GET_WRITABLE(desc)	(((desc)->b >>  9) & 1)
-#define GET_LIMIT_PAGES(desc)	(((desc)->b >> 23) & 1)
-#define GET_PRESENT(desc)	(((desc)->b >> 15) & 1)
-#define GET_USEABLE(desc)	(((desc)->b >> 20) & 1)
-
-asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
-{
-	struct user_desc info;
-	struct desc_struct *desc;
-	int idx;
-
-	if (get_user(idx, &u_info->entry_number))
-		return -EFAULT;
-	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
-		return -EINVAL;
-
-	memset(&info, 0, sizeof(info));
-
-	desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
-
-	info.entry_number = idx;
-	info.base_addr = GET_BASE(desc);
-	info.limit = GET_LIMIT(desc);
-	info.seg_32bit = GET_32BIT(desc);
-	info.contents = GET_CONTENTS(desc);
-	info.read_exec_only = !GET_WRITABLE(desc);
-	info.limit_in_pages = GET_LIMIT_PAGES(desc);
-	info.seg_not_present = !GET_PRESENT(desc);
-	info.useable = GET_USEABLE(desc);
-
-	if (copy_to_user(u_info, &info, sizeof(info)))
-		return -EFAULT;
-	return 0;
-}
-
 unsigned long arch_align_stack(unsigned long sp)
 {
 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9ea1d7546f80..ccc9d68d5a58 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -524,7 +524,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
 	if (clone_flags & CLONE_SETTLS) {
 #ifdef CONFIG_IA32_EMULATION
 		if (test_thread_flag(TIF_IA32))
-			err = ia32_child_tls(p, childregs); 
+			err = do_set_thread_area(p, -1,
+				(struct user_desc __user *)childregs->rsi, 0);
 		else 			
 #endif	 
 			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 
diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace_32.c
index ff5431cc03ee..09227cfb7c4c 100644
--- a/arch/x86/kernel/ptrace_32.c
+++ b/arch/x86/kernel/ptrace_32.c
@@ -276,85 +276,6 @@ void ptrace_disable(struct task_struct *child)
 	clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 }
 
-/*
- * Perform get_thread_area on behalf of the traced child.
- */
-static int
-ptrace_get_thread_area(struct task_struct *child,
-		       int idx, struct user_desc __user *user_desc)
-{
-	struct user_desc info;
-	struct desc_struct *desc;
-
-/*
- * Get the current Thread-Local Storage area:
- */
-
-#define GET_BASE(desc) ( \
-	(((desc)->a >> 16) & 0x0000ffff) | \
-	(((desc)->b << 16) & 0x00ff0000) | \
-	( (desc)->b        & 0xff000000)   )
-
-#define GET_LIMIT(desc) ( \
-	((desc)->a & 0x0ffff) | \
-	 ((desc)->b & 0xf0000) )
-
-#define GET_32BIT(desc)		(((desc)->b >> 22) & 1)
-#define GET_CONTENTS(desc)	(((desc)->b >> 10) & 3)
-#define GET_WRITABLE(desc)	(((desc)->b >>  9) & 1)
-#define GET_LIMIT_PAGES(desc)	(((desc)->b >> 23) & 1)
-#define GET_PRESENT(desc)	(((desc)->b >> 15) & 1)
-#define GET_USEABLE(desc)	(((desc)->b >> 20) & 1)
-
-	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
-		return -EINVAL;
-
-	desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
-
-	info.entry_number = idx;
-	info.base_addr = GET_BASE(desc);
-	info.limit = GET_LIMIT(desc);
-	info.seg_32bit = GET_32BIT(desc);
-	info.contents = GET_CONTENTS(desc);
-	info.read_exec_only = !GET_WRITABLE(desc);
-	info.limit_in_pages = GET_LIMIT_PAGES(desc);
-	info.seg_not_present = !GET_PRESENT(desc);
-	info.useable = GET_USEABLE(desc);
-
-	if (copy_to_user(user_desc, &info, sizeof(info)))
-		return -EFAULT;
-
-	return 0;
-}
-
-/*
- * Perform set_thread_area on behalf of the traced child.
- */
-static int
-ptrace_set_thread_area(struct task_struct *child,
-		       int idx, struct user_desc __user *user_desc)
-{
-	struct user_desc info;
-	struct desc_struct *desc;
-
-	if (copy_from_user(&info, user_desc, sizeof(info)))
-		return -EFAULT;
-
-	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
-		return -EINVAL;
-
-	desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
-	if (LDT_empty(&info)) {
-		desc->a = 0;
-		desc->b = 0;
-	} else {
-		desc->a = LDT_entry_a(&info);
-		desc->b = LDT_entry_b(&info);
-	}
-
-	return 0;
-}
-
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
 	struct user * dummy = NULL;
@@ -601,13 +522,17 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 	}
 
 	case PTRACE_GET_THREAD_AREA:
-		ret = ptrace_get_thread_area(child, addr,
-					(struct user_desc __user *) data);
+		if (addr < 0)
+			return -EIO;
+		ret = do_get_thread_area(child, addr,
+					 (struct user_desc __user *) data);
 		break;
 
 	case PTRACE_SET_THREAD_AREA:
-		ret = ptrace_set_thread_area(child, addr,
-					(struct user_desc __user *) data);
+		if (addr < 0)
+			return -EIO;
+		ret = do_set_thread_area(child, addr,
+					 (struct user_desc __user *) data, 0);
 		break;
 
 	default:
diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c
index 1edece36044c..375fadc23a25 100644
--- a/arch/x86/kernel/ptrace_64.c
+++ b/arch/x86/kernel/ptrace_64.c
@@ -474,23 +474,19 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		   64bit debugger to fully examine them too. Better
 		   don't use it against 64bit processes, use
 		   PTRACE_ARCH_PRCTL instead. */
-	case PTRACE_SET_THREAD_AREA: {
-		struct user_desc __user *p;
-		int old; 
-		p = (struct user_desc __user *)data;
-		get_user(old,  &p->entry_number); 
-		put_user(addr, &p->entry_number);
-		ret = do_set_thread_area(&child->thread, p);
-		put_user(old,  &p->entry_number); 
-		break;
 	case PTRACE_GET_THREAD_AREA:
-		p = (struct user_desc __user *)data;
-		get_user(old,  &p->entry_number); 
-		put_user(addr, &p->entry_number);
-		ret = do_get_thread_area(&child->thread, p);
-		put_user(old,  &p->entry_number); 
+		if (addr < 0)
+			return -EIO;
+		ret = do_get_thread_area(child, addr,
+					 (struct user_desc __user *) data);
+
+		break;
+	case PTRACE_SET_THREAD_AREA:
+		if (addr < 0)
+			return -EIO;
+		ret = do_set_thread_area(child, addr,
+					 (struct user_desc __user *) data, 0);
 		break;
-	} 
 #endif
 		/* normal 64bit interface to access TLS data. 
 		   Works just like arch_prctl, except that the arguments
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 5291596f19b0..67a377621b12 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -19,31 +19,34 @@ static int get_free_idx(void)
 	int idx;
 
 	for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
-		if (desc_empty((struct n_desc_struct *)(t->tls_array) + idx))
+		if (desc_empty(&t->tls_array[idx]))
 			return idx + GDT_ENTRY_TLS_MIN;
 	return -ESRCH;
 }
 
 /*
  * Set a given TLS descriptor:
- * When you want addresses > 32bit use arch_prctl()
  */
-int do_set_thread_area(struct thread_struct *t, struct user_desc __user *u_info)
+int do_set_thread_area(struct task_struct *p, int idx,
+		       struct user_desc __user *u_info,
+		       int can_allocate)
 {
+	struct thread_struct *t = &p->thread;
 	struct user_desc info;
-	struct n_desc_struct *desc;
-	int cpu, idx;
+	u32 *desc;
+	int cpu;
 
 	if (copy_from_user(&info, u_info, sizeof(info)))
 		return -EFAULT;
 
-	idx = info.entry_number;
+	if (idx == -1)
+		idx = info.entry_number;
 
 	/*
 	 * index -1 means the kernel should try to find and
 	 * allocate an empty descriptor:
 	 */
-	if (idx == -1) {
+	if (idx == -1 && can_allocate) {
 		idx = get_free_idx();
 		if (idx < 0)
 			return idx;
@@ -54,7 +57,7 @@ int do_set_thread_area(struct thread_struct *t, struct user_desc __user *u_info)
 	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
 		return -EINVAL;
 
-	desc = ((struct n_desc_struct *)t->tls_array) + idx - GDT_ENTRY_TLS_MIN;
+	desc = (u32 *) &t->tls_array[idx - GDT_ENTRY_TLS_MIN];
 
 	/*
 	 * We must not get preempted while modifying the TLS.
@@ -62,11 +65,11 @@ int do_set_thread_area(struct thread_struct *t, struct user_desc __user *u_info)
 	cpu = get_cpu();
 
 	if (LDT_empty(&info)) {
-		desc->a = 0;
-		desc->b = 0;
+		desc[0] = 0;
+		desc[1] = 0;
 	} else {
-		desc->a = LDT_entry_a(&info);
-		desc->b = LDT_entry_b(&info);
+		desc[0] = LDT_entry_a(&info);
+		desc[1] = LDT_entry_b(&info);
 	}
 	if (t == &current->thread)
 		load_TLS(t, cpu);
@@ -75,9 +78,9 @@ int do_set_thread_area(struct thread_struct *t, struct user_desc __user *u_info)
 	return 0;
 }
 
-asmlinkage long sys32_set_thread_area(struct user_desc __user *u_info)
+asmlinkage int sys_set_thread_area(struct user_desc __user *u_info)
 {
-	return do_set_thread_area(&current->thread, u_info);
+	return do_set_thread_area(current, -1, u_info, 1);
 }
 
 
@@ -85,34 +88,32 @@ asmlinkage long sys32_set_thread_area(struct user_desc __user *u_info)
  * Get the current Thread-Local Storage area:
  */
 
-#define GET_LIMIT(desc) ( \
-	((desc)->a & 0x0ffff) | \
-	 ((desc)->b & 0xf0000) )
+#define GET_LIMIT(desc)		(((desc)[0] & 0x0ffff) | ((desc)[1] & 0xf0000))
+#define GET_32BIT(desc)		(((desc)[1] >> 22) & 1)
+#define GET_CONTENTS(desc)	(((desc)[1] >> 10) & 3)
+#define GET_WRITABLE(desc)	(((desc)[1] >>  9) & 1)
+#define GET_LIMIT_PAGES(desc)	(((desc)[1] >> 23) & 1)
+#define GET_PRESENT(desc)	(((desc)[1] >> 15) & 1)
+#define GET_USEABLE(desc)	(((desc)[1] >> 20) & 1)
+#define GET_LONGMODE(desc)	(((desc)[1] >> 21) & 1)
 
-#define GET_32BIT(desc)		(((desc)->b >> 22) & 1)
-#define GET_CONTENTS(desc)	(((desc)->b >> 10) & 3)
-#define GET_WRITABLE(desc)	(((desc)->b >>  9) & 1)
-#define GET_LIMIT_PAGES(desc)	(((desc)->b >> 23) & 1)
-#define GET_PRESENT(desc)	(((desc)->b >> 15) & 1)
-#define GET_USEABLE(desc)	(((desc)->b >> 20) & 1)
-#define GET_LONGMODE(desc)	(((desc)->b >> 21) & 1)
-
-int do_get_thread_area(struct thread_struct *t, struct user_desc __user *u_info)
+int do_get_thread_area(struct task_struct *p, int idx,
+		       struct user_desc __user *u_info)
 {
+	struct thread_struct *t = &p->thread;
 	struct user_desc info;
-	struct n_desc_struct *desc;
-	int idx;
+	u32 *desc;
 
-	if (get_user(idx, &u_info->entry_number))
+	if (idx == -1 && get_user(idx, &u_info->entry_number))
 		return -EFAULT;
 	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
 		return -EINVAL;
 
-	desc = ((struct n_desc_struct *)t->tls_array) + idx - GDT_ENTRY_TLS_MIN;
+	desc = (u32 *) &t->tls_array[idx - GDT_ENTRY_TLS_MIN];
 
 	memset(&info, 0, sizeof(struct user_desc));
 	info.entry_number = idx;
-	info.base_addr = get_desc_base(desc);
+	info.base_addr = get_desc_base((void *)desc);
 	info.limit = GET_LIMIT(desc);
 	info.seg_32bit = GET_32BIT(desc);
 	info.contents = GET_CONTENTS(desc);
@@ -120,39 +121,16 @@ int do_get_thread_area(struct thread_struct *t, struct user_desc __user *u_info)
 	info.limit_in_pages = GET_LIMIT_PAGES(desc);
 	info.seg_not_present = !GET_PRESENT(desc);
 	info.useable = GET_USEABLE(desc);
+#ifdef CONFIG_X86_64
 	info.lm = GET_LONGMODE(desc);
+#endif
 
 	if (copy_to_user(u_info, &info, sizeof(info)))
 		return -EFAULT;
 	return 0;
 }
 
-asmlinkage long sys32_get_thread_area(struct user_desc __user *u_info)
+asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
 {
-	return do_get_thread_area(&current->thread, u_info);
-}
-
-
-int ia32_child_tls(struct task_struct *p, struct pt_regs *childregs)
-{
-	struct n_desc_struct *desc;
-	struct user_desc info;
-	struct user_desc __user *cp;
-	int idx;
-
-	cp = (void __user *)childregs->rsi;
-	if (copy_from_user(&info, cp, sizeof(info)))
-		return -EFAULT;
-	if (LDT_empty(&info))
-		return -EINVAL;
-
-	idx = info.entry_number;
-	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
-		return -EINVAL;
-
-	desc = (struct n_desc_struct *)(p->thread.tls_array) + idx - GDT_ENTRY_TLS_MIN;
-	desc->a = LDT_entry_a(&info);
-	desc->b = LDT_entry_b(&info);
-
-	return 0;
+	return do_get_thread_area(current, -1, u_info);
 }
diff --git a/include/asm-x86/ia32.h b/include/asm-x86/ia32.h
index 0190b7c4e319..aa9733206e29 100644
--- a/include/asm-x86/ia32.h
+++ b/include/asm-x86/ia32.h
@@ -159,12 +159,6 @@ struct ustat32 {
 #define IA32_STACK_TOP IA32_PAGE_OFFSET
 
 #ifdef __KERNEL__
-struct user_desc;
-struct siginfo_t;
-int do_get_thread_area(struct thread_struct *t, struct user_desc __user *info);
-int do_set_thread_area(struct thread_struct *t, struct user_desc __user *info);
-int ia32_child_tls(struct task_struct *p, struct pt_regs *childregs);
-
 struct linux_binprm;
 extern int ia32_setup_arg_pages(struct linux_binprm *bprm,
 				unsigned long stack_top, int exec_stack);
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index 51ddb2590870..105d1534eaf4 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -137,6 +137,17 @@ enum {
 };
 #endif /* __KERNEL__ */
 #endif /* !__i386__ */
+
+#ifdef __KERNEL__
+
+struct user_desc;
+extern int do_get_thread_area(struct task_struct *p, int idx,
+			      struct user_desc __user *info);
+extern int do_set_thread_area(struct task_struct *p, int idx,
+			      struct user_desc __user *info, int can_allocate);
+
+#endif /* __KERNEL__ */
+
 #endif /* !__ASSEMBLY__ */
 
 #endif

From b3ca74a2bfc66262d21443b160815eb26d6699e6 Mon Sep 17 00:00:00 2001
From: Vladimir Berezniker <vmpn@hitechman.com>
Date: Wed, 30 Jan 2008 13:30:46 +0100
Subject: [PATCH 171/890] x86: sanitize user specified e820 memmap values

Sanitize user specified e820 memory ranges, using the same logic that is
applied to the values returned by the BIOS.  This ensures consistent
handling regardless of the source of the memory mappings.

Allows overriding portions of the memory map without specifying one in
it's entirety (memmap=exactmap).

E.g. marking a range of bad RAM as reserved with memmap=48M$528M

BIOS supplied range

BIOS-e820: 0000000000100000 - 000000007fe80000 (usable)

becomes

user: 0000000000100000 - 0000000021000000 (usable)
user: 0000000021000000 - 0000000024000000 (reserved)
user: 0000000024000000 - 000000007fe80000 (usable)

Previously this did not work, as the original BIOS range was left
untouched while the user defined range was appended to the end of the
memory map.

[ tglx: arch/x86 adaptation ]

Signed-off-by: Vladimir Berezniker <vmpn@hitechman.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/e820_64.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 151236896243..8e7321101a0a 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -691,6 +691,8 @@ static int __init parse_memmap_opt(char *p)
 	mem_size = memparse(p, &p);
 	if (p == oldp)
 		return -EINVAL;
+
+	userdef = 1;
 	if (*p == '@') {
 		start_at = memparse(p+1, &p);
 		add_memory_region(start_at, mem_size, E820_RAM);
@@ -710,6 +712,12 @@ early_param("memmap", parse_memmap_opt);
 void __init finish_e820_parsing(void)
 {
 	if (userdef) {
+		char nr = e820.nr_map;
+
+		if (sanitize_e820_map(e820.map, &nr) < 0)
+			early_panic("Invalid user supplied memory map");
+		e820.nr_map = nr;
+
 		printk(KERN_INFO "user-defined physical RAM map:\n");
 		e820_print_map("user");
 	}

From 949ec325c79add68af13705b68a885a1efb84234 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Wed, 30 Jan 2008 13:30:46 +0100
Subject: [PATCH 172/890] x86: set cpu_index to nr_cpus instead of 0

Some BIOSes that support two/four dualcore/quadcore systems, will get:

ACPI: LAPIC (acpi_id[0x01] lapic_id[0x00] enabled)
Processor #0 15:1 APIC version 16
ACPI: LAPIC (acpi_id[0x02] lapic_id[0x01] enabled)
Processor #1 15:1 APIC version 16
ACPI: LAPIC (acpi_id[0x03] lapic_id[0x02] enabled)
Processor #2 15:1 APIC version 16
ACPI: LAPIC (acpi_id[0x04] lapic_id[0x03] enabled)
Processor #3 15:1 APIC version 16
ACPI: LAPIC (acpi_id[0x05] lapic_id[0x84] disabled)
ACPI: LAPIC (acpi_id[0x06] lapic_id[0x85] disabled)
ACPI: LAPIC (acpi_id[0x07] lapic_id[0x86] disabled)
ACPI: LAPIC (acpi_id[0x08] lapic_id[0x87] disabled)
ACPI: LAPIC (acpi_id[0x09] lapic_id[0x88] disabled)
ACPI: LAPIC (acpi_id[0x0a] lapic_id[0x89] disabled)
ACPI: LAPIC (acpi_id[0x0b] lapic_id[0x8a] disabled)
ACPI: LAPIC (acpi_id[0x0c] lapic_id[0x8b] disabled)
ACPI: LAPIC (acpi_id[0x0d] lapic_id[0x8c] disabled)
ACPI: LAPIC (acpi_id[0x0e] lapic_id[0x8d] disabled)
ACPI: LAPIC (acpi_id[0x0f] lapic_id[0x8e] disabled)
ACPI: LAPIC (acpi_id[0x10] lapic_id[0x8f] disabled)

SMP: Allowing 16 CPUs, 12 hotplug CPUs

the /proc/cpuinfo will show a bunch of NULL cpus with cpu_index=0

so assign impossible cpu_index value at first instead of 0.

Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/smpboot_64.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index ddefb38c53fb..8ac8eb620428 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -861,6 +861,18 @@ void __init smp_set_apicids(void)
 	x86_cpu_to_apicid_ptr = NULL;
 }
 
+static void __init smp_cpu_index_default(void)
+{
+	int i;
+	struct cpuinfo_x86 *c;
+
+	for_each_cpu_mask(i, cpu_possible_map) {
+		c = &cpu_data(i);
+		/* mark all to hotplug */
+		c->cpu_index = NR_CPUS;
+	}
+}
+
 /*
  * Prepare for SMP bootup.  The MP table or ACPI has been read
  * earlier.  Just do some sanity checking here and enable APIC mode.
@@ -868,6 +880,7 @@ void __init smp_set_apicids(void)
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
 	nmi_watchdog_default();
+	smp_cpu_index_default();
 	current_cpu_data = boot_cpu_data;
 	current_thread_info()->cpu = 0;  /* needed? */
 	smp_set_apicids();

From 9de819fe72fc6979ddd18aa04ef9e2af5aa8bc5f Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Wed, 30 Jan 2008 13:30:46 +0100
Subject: [PATCH 173/890] x86: do not set boot cpu in cpu_online_map at
 x86_64_start_kernel()

In init/main.c boot_cpu_init() does that later.

Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/head64.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 6643f3f994fb..4a1c1356c41a 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -69,8 +69,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
 
 	pda_init(0);
 	copy_bootdata(__va(real_mode_data));
-#ifdef CONFIG_SMP
-	cpu_set(0, cpu_online_map);
-#endif
+
 	start_kernel();
 }

From 4ebd1290ba12121d66285cc06987ca97bcdfc55b Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bbpetkov@yahoo.de>
Date: Wed, 30 Jan 2008 13:30:46 +0100
Subject: [PATCH 174/890] x86: vmlinux_32.lds.S: remove repeated comment from
 the x86-32 linker script

Remove repeated comment from the linker script for the x86-32 target.

Signed-off-by: Borislav Petkov <bbpetkov@yahoo.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/vmlinux_32.lds.S | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 84c913f38f98..ec072588ff01 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -8,12 +8,6 @@
  * put it inside the section definition.
  */
 
-/* Don't define absolute symbols until and unless you know that symbol
- * value is should remain constant even if kernel image is relocated
- * at run time. Absolute symbols are not relocated. If symbol value should
- * change if kernel is relocated, make the symbol section relative and
- * put it inside the section definition.
- */
 #define LOAD_OFFSET __PAGE_OFFSET
 
 #include <asm-generic/vmlinux.lds.h>

From b263295dbffd33b0fbff670720fa178c30e3392a Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 30 Jan 2008 13:30:47 +0100
Subject: [PATCH 175/890] x86: 64-bit, make sparsemem vmemmap the only memory
 model

Use sparsemem as the only memory model for UP, SMP and NUMA.  Measurements
indicate that DISCONTIGMEM has a higher overhead than sparsemem.  And
FLATMEMs benefits are minimal.  So I think its best to simply standardize
on sparsemem.

Results of page allocator tests (test can be had via git from slab git
tree branch tests)

Measurements in cycle counts. 1000 allocations were performed and then the
average cycle count was calculated.

Order	FlatMem	Discontig	SparseMem
0	  639	  665		  641
1	  567	  647		  593
2	  679	  774		  692
3	  763	  967		  781
4	  961	 1501		  962
5	 1356	 2344		 1392
6	 2224	 3982		 2336
7	 4869	 7225		 5074
8	12500	14048		12732
9	27926	28223		28165
10	58578	58714		58682

(Note that FlatMem is an SMP config and the rest NUMA configurations)

Memory use:

SMP Sparsemem
-------------

Kernel size:

   text    data     bss     dec     hex filename
3849268  397739 1264856 5511863  541ab7 vmlinux

             total       used       free     shared    buffers     cached
Mem:       8242252      41164    8201088          0        352      11512
-/+ buffers/cache:      29300    8212952
Swap:      9775512          0    9775512

SMP Flatmem
-----------

Kernel size:

   text    data     bss     dec     hex filename
3844612  397739 1264536 5506887  540747 vmlinux

So 4.5k growth in text size vs. FLATMEM.

             total       used       free     shared    buffers     cached
Mem:       8244052      40544    8203508          0        352      11484
-/+ buffers/cache:      28708    8215344

2k growth in overall memory use after boot.

NUMA discontig:

   text    data     bss     dec     hex filename
3888124  470659 1276504 5635287  55fcd7 vmlinux

             total       used       free     shared    buffers     cached
Mem:       8256256      56908    8199348          0        352      11496
-/+ buffers/cache:      45060    8211196
Swap:      9775512          0    9775512

NUMA sparse:

   text    data     bss     dec     hex filename
3896428  470659 1276824 5643911  561e87 vmlinux

8k text growth. Given that we fully inline virt_to_page and friends now
that is rather good.

             total       used       free     shared    buffers     cached
Mem:       8264720      57240    8207480          0        352      11516
-/+ buffers/cache:      45372    8219348
Swap:      9775512          0    9775512

The total available memory is increased by 8k.

This patch makes sparsemem the default and removes discontig and
flatmem support from x86.

[ akpm@linux-foundation.org: allnoconfig build fix ]

Acked-by: Andi Kleen <ak@suse.de>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig                   | 22 +++++-------
 arch/x86/configs/x86_64_defconfig  |  9 -----
 arch/x86/kernel/machine_kexec_64.c |  5 ---
 arch/x86/mm/init_64.c              | 28 ---------------
 arch/x86/mm/ioremap_64.c           | 17 ---------
 arch/x86/mm/numa_64.c              | 21 -----------
 arch/x86/mm/srat_64.c              | 57 ------------------------------
 include/asm-x86/mmzone_64.h        |  6 ----
 include/asm-x86/page_64.h          |  3 --
 9 files changed, 9 insertions(+), 159 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2f4d88babd36..da98368f66af 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -891,25 +891,29 @@ config HAVE_ARCH_ALLOC_REMAP
 
 config ARCH_FLATMEM_ENABLE
 	def_bool y
-	depends on (X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC) || (X86_64 && !NUMA)
+	depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC
 
 config ARCH_DISCONTIGMEM_ENABLE
 	def_bool y
-	depends on NUMA
+	depends on NUMA && X86_32
 
 config ARCH_DISCONTIGMEM_DEFAULT
 	def_bool y
-	depends on NUMA
+	depends on NUMA && X86_32
+
+config ARCH_SPARSEMEM_DEFAULT
+	def_bool y
+	depends on X86_64
 
 config ARCH_SPARSEMEM_ENABLE
 	def_bool y
-	depends on NUMA || (EXPERIMENTAL && (X86_PC || X86_64))
+	depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC)
 	select SPARSEMEM_STATIC if X86_32
 	select SPARSEMEM_VMEMMAP_ENABLE if X86_64
 
 config ARCH_SELECT_MEMORY_MODEL
 	def_bool y
-	depends on X86_32 && ARCH_SPARSEMEM_ENABLE
+	depends on ARCH_SPARSEMEM_ENABLE
 
 config ARCH_MEMORY_PROBE
 	def_bool X86_64
@@ -1207,18 +1211,10 @@ config ARCH_ENABLE_MEMORY_HOTPLUG
 	def_bool y
 	depends on X86_64 || (X86_32 && HIGHMEM)
 
-config MEMORY_HOTPLUG_RESERVE
-	def_bool X86_64
-	depends on (MEMORY_HOTPLUG && DISCONTIGMEM)
-
 config HAVE_ARCH_EARLY_PFN_TO_NID
 	def_bool X86_64
 	depends on NUMA
 
-config OUT_OF_LINE_PFN_TO_PAGE
-	def_bool X86_64
-	depends on DISCONTIGMEM
-
 menu "Power management options"
 	depends on !X86_VOYAGER
 
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 38a83f9c966f..9e2b0ef851de 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -145,15 +145,6 @@ CONFIG_K8_NUMA=y
 CONFIG_NODES_SHIFT=6
 CONFIG_X86_64_ACPI_NUMA=y
 CONFIG_NUMA_EMU=y
-CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
-CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
-CONFIG_SELECT_MEMORY_MODEL=y
-# CONFIG_FLATMEM_MANUAL is not set
-CONFIG_DISCONTIGMEM_MANUAL=y
-# CONFIG_SPARSEMEM_MANUAL is not set
-CONFIG_DISCONTIGMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
 CONFIG_NEED_MULTIPLE_NODES=y
 # CONFIG_SPARSEMEM_STATIC is not set
 CONFIG_SPLIT_PTLOCK_CPUS=4
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index aa3d2c8f7737..a1fef42f8cdb 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -234,10 +234,5 @@ NORET_TYPE void machine_kexec(struct kimage *image)
 void arch_crash_save_vmcoreinfo(void)
 {
 	VMCOREINFO_SYMBOL(init_level4_pgt);
-
-#ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE
-	VMCOREINFO_SYMBOL(node_data);
-	VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
-#endif
 }
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0fbb657a8b19..251eeb325ae3 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -486,34 +486,6 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-/*
- * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
- * just online the pages.
- */
-int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
-{
-	int err = -EIO;
-	unsigned long pfn;
-	unsigned long total = 0, mem = 0;
-	for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
-		if (pfn_valid(pfn)) {
-			online_page(pfn_to_page(pfn));
-			err = 0;
-			mem++;
-		}
-		total++;
-	}
-	if (!err) {
-		z->spanned_pages += total;
-		z->present_pages += mem;
-		z->zone_pgdat->node_spanned_pages += total;
-		z->zone_pgdat->node_present_pages += mem;
-	}
-	return err;
-}
-#endif
-
 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
 			 kcore_vsyscall;
 
diff --git a/arch/x86/mm/ioremap_64.c b/arch/x86/mm/ioremap_64.c
index 6cac90aa5032..b03db4ca9cad 100644
--- a/arch/x86/mm/ioremap_64.c
+++ b/arch/x86/mm/ioremap_64.c
@@ -86,23 +86,6 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
 	if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
 		return (__force void __iomem *)phys_to_virt(phys_addr);
 
-#ifdef CONFIG_FLATMEM
-	/*
-	 * Don't allow anybody to remap normal RAM that we're using..
-	 */
-	if (last_addr < virt_to_phys(high_memory)) {
-		char *t_addr, *t_end;
- 		struct page *page;
-
-		t_addr = __va(phys_addr);
-		t_end = t_addr + (size - 1);
-	   
-		for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
-			if(!PageReserved(page))
-				return NULL;
-	}
-#endif
-
 	pgprot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_GLOBAL
 			  | _PAGE_DIRTY | _PAGE_ACCESSED | flags);
 	/*
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 29b69300aee2..46b4b5e1a02a 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -153,12 +153,10 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes)
 	return shift;
 }
 
-#ifdef CONFIG_SPARSEMEM
 int early_pfn_to_nid(unsigned long pfn)
 {
 	return phys_to_nid(pfn << PAGE_SHIFT);
 }
-#endif
 
 static void * __init early_node_mem(int nodeid, unsigned long start,
 				    unsigned long end, unsigned long size)
@@ -635,23 +633,4 @@ void __init init_cpu_to_node(void)
 	}
 }
 
-#ifdef CONFIG_DISCONTIGMEM
-/*
- * Functions to convert PFNs from/to per node page addresses.
- * These are out of line because they are quite big.
- * They could be all tuned by pre caching more state.
- * Should do that.
- */
 
-int pfn_valid(unsigned long pfn)
-{
-	unsigned nid;
-	if (pfn >= num_physpages)
-		return 0;
-	nid = pfn_to_nid(pfn);
-	if (nid == 0xff)
-		return 0;
-	return pfn >= node_start_pfn(nid) && (pfn) < node_end_pfn(nid);
-}
-EXPORT_SYMBOL(pfn_valid);
-#endif
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index ea85172fc0cc..9be14171144b 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -151,62 +151,6 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 	       pxm, pa->apic_id, node);
 }
 
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-/*
- * Protect against too large hotadd areas that would fill up memory.
- */
-static int hotadd_enough_memory(struct bootnode *nd)
-{
-	static unsigned long allocated;
-	static unsigned long last_area_end;
-	unsigned long pages = (nd->end - nd->start) >> PAGE_SHIFT;
-	long mem = pages * sizeof(struct page);
-	unsigned long addr;
-	unsigned long allowed;
-	unsigned long oldpages = pages;
-
-	if (mem < 0)
-		return 0;
-	allowed = (end_pfn - absent_pages_in_range(0, end_pfn)) * PAGE_SIZE;
-	allowed = (allowed / 100) * hotadd_percent;
-	if (allocated + mem > allowed) {
-		unsigned long range;
-		/* Give them at least part of their hotadd memory upto hotadd_percent
-		   It would be better to spread the limit out
-		   over multiple hotplug areas, but that is too complicated
-		   right now */
-		if (allocated >= allowed)
-			return 0;
-		range = allowed - allocated;
-		pages = (range / PAGE_SIZE);
-		mem = pages * sizeof(struct page);
-		nd->end = nd->start + range;
-	}
-	/* Not completely fool proof, but a good sanity check */
-	addr = find_e820_area(last_area_end, end_pfn<<PAGE_SHIFT, mem);
-	if (addr == -1UL)
-		return 0;
-	if (pages != oldpages)
-		printk(KERN_NOTICE "SRAT: Hotadd area limited to %lu bytes\n",
-			pages << PAGE_SHIFT);
-	last_area_end = addr + mem;
-	allocated += mem;
-	return 1;
-}
-
-static int update_end_of_memory(unsigned long end)
-{
-	found_add_area = 1;
-	if ((end >> PAGE_SHIFT) > end_pfn)
-		end_pfn = end >> PAGE_SHIFT;
-	return 1;
-}
-
-static inline int save_add_info(void)
-{
-	return hotadd_percent > 0;
-}
-#else
 int update_end_of_memory(unsigned long end) {return -1;}
 static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
@@ -214,7 +158,6 @@ static inline int save_add_info(void) {return 1;}
 #else
 static inline int save_add_info(void) {return 0;}
 #endif
-#endif
 /*
  * Update nodes_add and decide if to include add are in the zone.
  * Both SPARSE and RESERVE need nodes_add infomation.
diff --git a/include/asm-x86/mmzone_64.h b/include/asm-x86/mmzone_64.h
index 1e0ed34a6adc..b0c25ae111d9 100644
--- a/include/asm-x86/mmzone_64.h
+++ b/include/asm-x86/mmzone_64.h
@@ -43,12 +43,6 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
 
 extern int early_pfn_to_nid(unsigned long pfn);
 
-#ifdef CONFIG_DISCONTIGMEM
-#define pfn_to_nid(pfn) phys_to_nid((unsigned long)(pfn) << PAGE_SHIFT)
-
-extern int pfn_valid(unsigned long pfn);
-#endif
-
 #ifdef CONFIG_NUMA_EMU
 #define FAKE_NODE_MIN_SIZE	(64*1024*1024)
 #define FAKE_NODE_MIN_HASH_MASK	(~(FAKE_NODE_MIN_SIZE - 1uL))
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index 6fdc904a5fa5..d400167c5509 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -122,9 +122,6 @@ extern unsigned long __phys_addr(unsigned long);
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
 #define __boot_va(x)		__va(x)
 #define __boot_pa(x)		__pa(x)
-#ifdef CONFIG_FLATMEM
-#define pfn_valid(pfn)		((pfn) < end_pfn)
-#endif
 
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
 #define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)

From 5f627f8e122a163ce53908d55e088247db31f1d7 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 30 Jan 2008 13:30:47 +0100
Subject: [PATCH 176/890] mips, x86: optimize the i8259 code a bit

The timer code always calls the clock_event_device set_net_event and
set_mode methods with interrupts disabled, so no need to use
spin_lock_irqsave / spin_unlock_irqrestore for those.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Acked-by:Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/mips/kernel/i8253.c | 12 ++++--------
 arch/x86/kernel/i8253.c  | 12 ++++--------
 2 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c
index c2d497ceffdd..fc4aa07b6d35 100644
--- a/arch/mips/kernel/i8253.c
+++ b/arch/mips/kernel/i8253.c
@@ -24,9 +24,7 @@ DEFINE_SPINLOCK(i8253_lock);
 static void init_pit_timer(enum clock_event_mode mode,
 			   struct clock_event_device *evt)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8253_lock, flags);
+	spin_lock(&i8253_lock);
 
 	switch(mode) {
 	case CLOCK_EVT_MODE_PERIODIC:
@@ -55,7 +53,7 @@ static void init_pit_timer(enum clock_event_mode mode,
 		/* Nothing to do here */
 		break;
 	}
-	spin_unlock_irqrestore(&i8253_lock, flags);
+	spin_unlock(&i8253_lock);
 }
 
 /*
@@ -65,12 +63,10 @@ static void init_pit_timer(enum clock_event_mode mode,
  */
 static int pit_next_event(unsigned long delta, struct clock_event_device *evt)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8253_lock, flags);
+	spin_lock(&i8253_lock);
 	outb_p(delta & 0xff , PIT_CH0);	/* LSB */
 	outb(delta >> 8 , PIT_CH0);	/* MSB */
-	spin_unlock_irqrestore(&i8253_lock, flags);
+	spin_unlock(&i8253_lock);
 
 	return 0;
 }
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 377c3f8411f8..c76fef1ce355 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -38,9 +38,7 @@ struct clock_event_device *global_clock_event;
 static void init_pit_timer(enum clock_event_mode mode,
 			   struct clock_event_device *evt)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8253_lock, flags);
+	spin_lock(&i8253_lock);
 
 	switch(mode) {
 	case CLOCK_EVT_MODE_PERIODIC:
@@ -71,7 +69,7 @@ static void init_pit_timer(enum clock_event_mode mode,
 		/* Nothing to do here */
 		break;
 	}
-	spin_unlock_irqrestore(&i8253_lock, flags);
+	spin_unlock(&i8253_lock);
 }
 
 /*
@@ -81,12 +79,10 @@ static void init_pit_timer(enum clock_event_mode mode,
  */
 static int pit_next_event(unsigned long delta, struct clock_event_device *evt)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8253_lock, flags);
+	spin_lock(&i8253_lock);
 	outb_p(delta & 0xff , PIT_CH0);	/* LSB */
 	outb(delta >> 8 , PIT_CH0);	/* MSB */
-	spin_unlock_irqrestore(&i8253_lock, flags);
+	spin_unlock(&i8253_lock);
 
 	return 0;
 }

From 2c0b8a7578f7653e1e5312a5232e8ead563cf477 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Wed, 30 Jan 2008 13:30:47 +0100
Subject: [PATCH 177/890] x86: fall back on interrupt disable in cmpxchg8b on
 80386 and 80486

Actually, on 386, cmpxchg and cmpxchg_local fall back on
cmpxchg_386_u8/16/32: it disables interruptions around non atomic
updates to mimic the cmpxchg behavior.

The comment:
/* Poor man's cmpxchg for 386. Unsuitable for SMP */

already present in cmpxchg_386_u32 tells much about how this cmpxchg
implementation should not be used in a SMP context. However, the cmpxchg_local
can perfectly use this fallback, since it only needs to be atomic wrt the local
cpu.

This patch adds a cmpxchg_486_u64 and uses it as a fallback for cmpxchg64
and cmpxchg64_local on 80386 and 80486.

Q:
but why is it called cmpxchg_486 when the other functions are called

A:
Because the standard cmpxchg is missing only on 386, but cmpxchg8b is
missing both on 386 and 486.

Citing Intel's Instruction set reference:

cmpxchg:
This instruction is not supported on Intel processors earlier than the
Intel486 processors.

cmpxchg8b:
This instruction encoding is not supported on Intel processors earlier
than the Pentium processors.

Q:
What's the reason to have cmpxchg64_local on 32 bit architectures?
Without that need all this would just be a few simple defines.

A:
cmpxchg64_local on 32 bits architectures takes unsigned long long
parameters, but cmpxchg_local only takes longs. Since we have cmpxchg8b
to execute a 8 byte cmpxchg atomically on pentium and +, it makes sense
to provide a flavor of cmpxchg and cmpxchg_local using this instruction.

Also, for 32 bits architectures lacking the 64 bits atomic cmpxchg, it
makes sense _not_ to define cmpxchg64 while cmpxchg could still be
available.

Moreover, the fallback for cmpxchg8b on i386 for 386 and 486 is a

However, cmpxchg64_local will be emulated by disabling interrupts on all
architectures where it is not supported atomically.

Therefore, we *could* turn cmpxchg64_local into a cmpxchg_local, but it
would make the 386/486 fallbacks ugly, make its design different from
cmpxchg/cmpxchg64 (which really depends on atomic operations and cannot
be emulated) and require the __cmpxchg_local to be expressed as a macro
rather than an inline function so the parameters would not be fixed to
unsigned long long in every case.

So I think cmpxchg64_local makes sense there, but I am open to
suggestions.

Q:
Are there any callers?

A:
I am actually using it in LTTng in my timestamping code. I use it to
work around CPUs with asynchronous TSCs. I need to update 64 bits
values atomically on this 32 bits architecture.

Changelog:
- Ran though checkpatch.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/intel.c  |  17 ++++
 include/asm-x86/cmpxchg_32.h | 168 +++++++++++++++++++++--------------
 2 files changed, 119 insertions(+), 66 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index cc8c501b9f39..867ff94579be 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -342,5 +342,22 @@ unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
 EXPORT_SYMBOL(cmpxchg_386_u32);
 #endif
 
+#ifndef CONFIG_X86_CMPXCHG64
+unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new)
+{
+	u64 prev;
+	unsigned long flags;
+
+	/* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */
+	local_irq_save(flags);
+	prev = *(u64 *)ptr;
+	if (prev == old)
+		*(u64 *)ptr = new;
+	local_irq_restore(flags);
+	return prev;
+}
+EXPORT_SYMBOL(cmpxchg_486_u64);
+#endif
+
 // arch_initcall(intel_cpu_init);
 
diff --git a/include/asm-x86/cmpxchg_32.h b/include/asm-x86/cmpxchg_32.h
index f86ede28f6dc..cea1dae288a7 100644
--- a/include/asm-x86/cmpxchg_32.h
+++ b/include/asm-x86/cmpxchg_32.h
@@ -105,15 +105,24 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 
 #ifdef CONFIG_X86_CMPXCHG
 #define __HAVE_ARCH_CMPXCHG 1
-#define cmpxchg(ptr,o,n)\
-	((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
-					(unsigned long)(n),sizeof(*(ptr))))
-#define sync_cmpxchg(ptr,o,n)\
-	((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
-					(unsigned long)(n),sizeof(*(ptr))))
-#define cmpxchg_local(ptr,o,n)\
-	((__typeof__(*(ptr)))__cmpxchg_local((ptr),(unsigned long)(o),\
-					(unsigned long)(n),sizeof(*(ptr))))
+#define cmpxchg(ptr, o, n)						     \
+	((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o),	     \
+					(unsigned long)(n), sizeof(*(ptr))))
+#define sync_cmpxchg(ptr, o, n)						     \
+	((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o),	     \
+					(unsigned long)(n), sizeof(*(ptr))))
+#define cmpxchg_local(ptr, o, n)					     \
+	((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o),	     \
+					(unsigned long)(n), sizeof(*(ptr))))
+#endif
+
+#ifdef CONFIG_X86_CMPXCHG64
+#define cmpxchg64(ptr, o, n)						      \
+	((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o),      \
+					(unsigned long long)(n)))
+#define cmpxchg64_local(ptr, o, n)					      \
+	((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o),\
+					(unsigned long long)(n)))
 #endif
 
 static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
@@ -203,57 +212,8 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
 	return old;
 }
 
-#ifndef CONFIG_X86_CMPXCHG
-/*
- * Building a kernel capable running on 80386. It may be necessary to
- * simulate the cmpxchg on the 80386 CPU. For that purpose we define
- * a function for each of the sizes we support.
- */
-
-extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8);
-extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16);
-extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32);
-
-static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
-				      unsigned long new, int size)
-{
-	switch (size) {
-	case 1:
-		return cmpxchg_386_u8(ptr, old, new);
-	case 2:
-		return cmpxchg_386_u16(ptr, old, new);
-	case 4:
-		return cmpxchg_386_u32(ptr, old, new);
-	}
-	return old;
-}
-
-#define cmpxchg(ptr,o,n)						\
-({									\
-	__typeof__(*(ptr)) __ret;					\
-	if (likely(boot_cpu_data.x86 > 3))				\
-		__ret = __cmpxchg((ptr), (unsigned long)(o),		\
-					(unsigned long)(n), sizeof(*(ptr))); \
-	else								\
-		__ret = cmpxchg_386((ptr), (unsigned long)(o),		\
-					(unsigned long)(n), sizeof(*(ptr))); \
-	__ret;								\
-})
-#define cmpxchg_local(ptr,o,n)						\
-({									\
-	__typeof__(*(ptr)) __ret;					\
-	if (likely(boot_cpu_data.x86 > 3))				\
-		__ret = __cmpxchg_local((ptr), (unsigned long)(o),	\
-					(unsigned long)(n), sizeof(*(ptr))); \
-	else								\
-		__ret = cmpxchg_386((ptr), (unsigned long)(o),		\
-					(unsigned long)(n), sizeof(*(ptr))); \
-	__ret;								\
-})
-#endif
-
-static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old,
-				      unsigned long long new)
+static inline unsigned long long __cmpxchg64(volatile void *ptr,
+			unsigned long long old, unsigned long long new)
 {
 	unsigned long long prev;
 	__asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3"
@@ -280,10 +240,86 @@ static inline unsigned long long __cmpxchg64_local(volatile void *ptr,
 	return prev;
 }
 
-#define cmpxchg64(ptr,o,n)\
-	((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\
-					(unsigned long long)(n)))
-#define cmpxchg64_local(ptr,o,n)\
-	((__typeof__(*(ptr)))__cmpxchg64_local((ptr),(unsigned long long)(o),\
-					(unsigned long long)(n)))
+#ifndef CONFIG_X86_CMPXCHG
+/*
+ * Building a kernel capable running on 80386. It may be necessary to
+ * simulate the cmpxchg on the 80386 CPU. For that purpose we define
+ * a function for each of the sizes we support.
+ */
+
+extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8);
+extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16);
+extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32);
+
+static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
+				      unsigned long new, int size)
+{
+	switch (size) {
+	case 1:
+		return cmpxchg_386_u8(ptr, old, new);
+	case 2:
+		return cmpxchg_386_u16(ptr, old, new);
+	case 4:
+		return cmpxchg_386_u32(ptr, old, new);
+	}
+	return old;
+}
+
+#define cmpxchg(ptr, o, n)						\
+({									\
+	__typeof__(*(ptr)) __ret;					\
+	if (likely(boot_cpu_data.x86 > 3))				\
+		__ret = __cmpxchg((ptr), (unsigned long)(o),		\
+					(unsigned long)(n), sizeof(*(ptr))); \
+	else								\
+		__ret = cmpxchg_386((ptr), (unsigned long)(o),		\
+					(unsigned long)(n), sizeof(*(ptr))); \
+	__ret;								\
+})
+#define cmpxchg_local(ptr, o, n)					\
+({									\
+	__typeof__(*(ptr)) __ret;					\
+	if (likely(boot_cpu_data.x86 > 3))				\
+		__ret = __cmpxchg_local((ptr), (unsigned long)(o),	\
+					(unsigned long)(n), sizeof(*(ptr))); \
+	else								\
+		__ret = cmpxchg_386((ptr), (unsigned long)(o),		\
+					(unsigned long)(n), sizeof(*(ptr))); \
+	__ret;								\
+})
+#endif
+
+#ifndef CONFIG_X86_CMPXCHG64
+/*
+ * Building a kernel capable running on 80386 and 80486. It may be necessary
+ * to simulate the cmpxchg8b on the 80386 and 80486 CPU.
+ */
+
+extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64);
+
+#define cmpxchg64(ptr, o, n)						\
+({									\
+	__typeof__(*(ptr)) __ret;					\
+	if (likely(boot_cpu_data.x86 > 4))				\
+		__ret = __cmpxchg64((ptr), (unsigned long long)(o),	\
+				(unsigned long long)(n));		\
+	else								\
+		__ret = cmpxchg_486_u64((ptr), (unsigned long long)(o),	\
+				(unsigned long long)(n));		\
+	__ret;								\
+})
+#define cmpxchg64_local(ptr, o, n)					\
+({									\
+	__typeof__(*(ptr)) __ret;					\
+	if (likely(boot_cpu_data.x86 > 4))				\
+		__ret = __cmpxchg64_local((ptr), (unsigned long long)(o), \
+				(unsigned long long)(n));		\
+	else								\
+		__ret = cmpxchg_486_u64((ptr), (unsigned long long)(o),	\
+				(unsigned long long)(n));		\
+	__ret;								\
+})
+
+#endif
+
 #endif

From 022eb43419f896a3647f769cdc3b5e13a8fb8ee7 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 30 Jan 2008 13:30:47 +0100
Subject: [PATCH 178/890] x86: kmap_atomic() debugging

[ mingo@elte.hu: cleanups and made dependent on CONFIG_DEBUG_HIGHMEM.

  this caught a handful of bugs already, so lets apply it. If it gets
  things wrong we'll disable it. ]

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/highmem_32.c | 47 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 1c3bf95f7356..3d936f232704 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -18,6 +18,49 @@ void kunmap(struct page *page)
 	kunmap_high(page);
 }
 
+static void debug_kmap_atomic_prot(enum km_type type)
+{
+#ifdef CONFIG_DEBUG_HIGHMEM
+	static unsigned warn_count = 10;
+
+	if (unlikely(warn_count == 0))
+		return;
+
+	if (unlikely(in_interrupt())) {
+		if (in_irq()) {
+			if (type != KM_IRQ0 && type != KM_IRQ1 &&
+			    type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ &&
+			    type != KM_BOUNCE_READ) {
+				WARN_ON(1);
+				warn_count--;
+			}
+		} else if (!irqs_disabled()) {	/* softirq */
+			if (type != KM_IRQ0 && type != KM_IRQ1 &&
+			    type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 &&
+			    type != KM_SKB_SUNRPC_DATA &&
+			    type != KM_SKB_DATA_SOFTIRQ &&
+			    type != KM_BOUNCE_READ) {
+				WARN_ON(1);
+				warn_count--;
+			}
+		}
+	}
+
+	if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ ||
+			type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ) {
+		if (!irqs_disabled()) {
+			WARN_ON(1);
+			warn_count--;
+		}
+	} else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) {
+		if (irq_count() == 0 && !irqs_disabled()) {
+			WARN_ON(1);
+			warn_count--;
+		}
+	}
+#endif
+}
+
 /*
  * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
  * no global lock is needed and because the kmap code must perform a global TLB
@@ -30,8 +73,10 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
 {
 	enum fixed_addresses idx;
 	unsigned long vaddr;
-
 	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+
+	debug_kmap_atomic_prot(type);
+
 	pagefault_disable();
 
 	if (!PageHighMem(page))

From fb7fa8f1741c91f6c6e958762155abe9339476ca Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:47 +0100
Subject: [PATCH 179/890] ptrace: arch_has_single_step

This defines the new macro arch_has_single_step() in linux/ptrace.h, a
default for when asm/ptrace.h does not define it.  It declares the new
user_enable_single_step and user_disable_single_step functions.
This is not used yet, but paves the way to harmonize on this interface
for the arch-specific calls on all machines.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/ptrace.h | 46 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 3ea5750a0f7e..8f06c6fb22a5 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -129,6 +129,52 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data);
 #define force_successful_syscall_return() do { } while (0)
 #endif
 
+/*
+ * <asm/ptrace.h> should define the following things inside #ifdef __KERNEL__.
+ *
+ * These do-nothing inlines are used when the arch does not
+ * implement single-step.  The kerneldoc comments are here
+ * to document the interface for all arch definitions.
+ */
+
+#ifndef arch_has_single_step
+/**
+ * arch_has_single_step - does this CPU support user-mode single-step?
+ *
+ * If this is defined, then there must be function declarations or
+ * inlines for user_enable_single_step() and user_disable_single_step().
+ * arch_has_single_step() should evaluate to nonzero iff the machine
+ * supports instruction single-step for user mode.
+ * It can be a constant or it can test a CPU feature bit.
+ */
+#define arch_has_single_step()		(0)
+
+/**
+ * user_enable_single_step - single-step in user-mode task
+ * @task: either current or a task stopped in %TASK_TRACED
+ *
+ * This can only be called when arch_has_single_step() has returned nonzero.
+ * Set @task so that when it returns to user mode, it will trap after the
+ * next single instruction executes.
+ */
+static inline void user_enable_single_step(struct task_struct *task)
+{
+	BUG();			/* This can never be called.  */
+}
+
+/**
+ * user_disable_single_step - cancel user-mode single-step
+ * @task: either current or a task stopped in %TASK_TRACED
+ *
+ * Clear @task of the effects of user_enable_single_step().  This can
+ * be called whether or not user_enable_single_step() was ever called
+ * on @task, and even if arch_has_single_step() returned zero.
+ */
+static inline void user_disable_single_step(struct task_struct *task)
+{
+}
+#endif	/* arch_has_single_step */
+
 #endif
 
 #endif

From 2608a6584a0a32d3251dfafad31d9f8b2c784466 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:48 +0100
Subject: [PATCH 180/890] x86: segment selector macros

This copies into asm-x86/segment_64.h some macros from asm-x86/segment_32.h
for dissecting segment selectors.  This lets other code use these macros
uniformly on 32/64-bit rather than duplicating the constants elsewhere.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/segment_64.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/include/asm-x86/segment_64.h b/include/asm-x86/segment_64.h
index 04b8ab21328f..dce742101348 100644
--- a/include/asm-x86/segment_64.h
+++ b/include/asm-x86/segment_64.h
@@ -50,4 +50,15 @@
 #define GDT_SIZE (GDT_ENTRIES * 8)
 #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) 
 
+/* Bottom two bits of selector give the ring privilege level */
+#define SEGMENT_RPL_MASK	0x3
+/* Bit 2 is table indicator (LDT/GDT) */
+#define SEGMENT_TI_MASK		0x4
+
+/* User mode is privilege level 3 */
+#define USER_RPL		0x3
+/* LDT segment has TI set, GDT has it cleared */
+#define SEGMENT_LDT		0x4
+#define SEGMENT_GDT		0x0
+
 #endif

From 77c03dcd448aa4c7be45f4edb97381ef463e3911 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:48 +0100
Subject: [PATCH 181/890] x86: remove TRAP_FLAG

This gets rid of the local constant macro TRAP_FLAG.
It's redundant with the public constant macro X86_EFLAGS_TF.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace_32.c | 9 +++------
 arch/x86/kernel/ptrace_64.c | 9 +++------
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace_32.c
index 09227cfb7c4c..4619bda470b6 100644
--- a/arch/x86/kernel/ptrace_32.c
+++ b/arch/x86/kernel/ptrace_32.c
@@ -37,9 +37,6 @@
  */
 #define FLAG_MASK 0x00050dd5
 
-/* set's the trap flag. */
-#define TRAP_FLAG 0x100
-
 /*
  * Offset of eflags on child stack..
  */
@@ -235,11 +232,11 @@ static void set_singlestep(struct task_struct *child)
 	/*
 	 * If TF was already set, don't do anything else
 	 */
-	if (regs->eflags & TRAP_FLAG)
+	if (regs->eflags & X86_EFLAGS_TF)
 		return;
 
 	/* Set TF on the kernel stack.. */
-	regs->eflags |= TRAP_FLAG;
+	regs->eflags |= X86_EFLAGS_TF;
 
 	/*
 	 * ..but if TF is changed by the instruction we will trace,
@@ -260,7 +257,7 @@ static void clear_singlestep(struct task_struct *child)
 	/* But touch TF only if it was set by us.. */
 	if (child->ptrace & PT_DTRACE) {
 		struct pt_regs *regs = get_child_regs(child);
-		regs->eflags &= ~TRAP_FLAG;
+		regs->eflags &= ~X86_EFLAGS_TF;
 		child->ptrace &= ~PT_DTRACE;
 	}
 }
diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c
index 375fadc23a25..8e433b3773d2 100644
--- a/arch/x86/kernel/ptrace_64.c
+++ b/arch/x86/kernel/ptrace_64.c
@@ -42,9 +42,6 @@
  */
 #define FLAG_MASK 0x54dd5UL
 
-/* set's the trap flag. */
-#define TRAP_FLAG 0x100UL
-
 /*
  * eflags and offset of eflags on child stack..
  */
@@ -187,11 +184,11 @@ static void set_singlestep(struct task_struct *child)
 	/*
 	 * If TF was already set, don't do anything else
 	 */
-	if (regs->eflags & TRAP_FLAG)
+	if (regs->eflags & X86_EFLAGS_TF)
 		return;
 
 	/* Set TF on the kernel stack.. */
-	regs->eflags |= TRAP_FLAG;
+	regs->eflags |= X86_EFLAGS_TF;
 
 	/*
 	 * ..but if TF is changed by the instruction we will trace,
@@ -212,7 +209,7 @@ static void clear_singlestep(struct task_struct *child)
 	/* But touch TF only if it was set by us.. */
 	if (child->ptrace & PT_DTRACE) {
 		struct pt_regs *regs = task_pt_regs(child);
-		regs->eflags &= ~TRAP_FLAG;
+		regs->eflags &= ~X86_EFLAGS_TF;
 		child->ptrace &= ~PT_DTRACE;
 	}
 }

From 7f232343e0ea37ffc0a552cdbd4825482c949281 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:48 +0100
Subject: [PATCH 182/890] x86: arch_has_single_step

This defines the new standard arch_has_single_step macro.  It makes the
existing set_singlestep and clear_singlestep entry points global, and
renames them to the new standard names user_enable_single_step and
user_disable_single_step, respectively.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace_32.c | 12 ++++++------
 arch/x86/kernel/ptrace_64.c | 12 ++++++------
 include/asm-x86/ptrace.h    |  7 +++++++
 3 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace_32.c
index 4619bda470b6..1402a54ef61f 100644
--- a/arch/x86/kernel/ptrace_32.c
+++ b/arch/x86/kernel/ptrace_32.c
@@ -218,7 +218,7 @@ static inline int is_setting_trap_flag(struct task_struct *child, struct pt_regs
 	return 0;
 }
 
-static void set_singlestep(struct task_struct *child)
+void user_enable_single_step(struct task_struct *child)
 {
 	struct pt_regs *regs = get_child_regs(child);
 
@@ -249,7 +249,7 @@ static void set_singlestep(struct task_struct *child)
 	child->ptrace |= PT_DTRACE;
 }
 
-static void clear_singlestep(struct task_struct *child)
+void user_disable_single_step(struct task_struct *child)
 {
 	/* Always clear TIF_SINGLESTEP... */
 	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
@@ -269,7 +269,7 @@ static void clear_singlestep(struct task_struct *child)
  */
 void ptrace_disable(struct task_struct *child)
 { 
-	clear_singlestep(child);
+	user_disable_single_step(child);
 	clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 }
 
@@ -403,7 +403,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		}
 		child->exit_code = data;
 		/* make sure the single step bit is not set. */
-		clear_singlestep(child);
+		user_disable_single_step(child);
 		wake_up_process(child);
 		ret = 0;
 		break;
@@ -419,7 +419,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			break;
 		child->exit_code = SIGKILL;
 		/* make sure the single step bit is not set. */
-		clear_singlestep(child);
+		user_disable_single_step(child);
 		wake_up_process(child);
 		break;
 
@@ -435,7 +435,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 
 		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		set_singlestep(child);
+		user_enable_single_step(child);
 		child->exit_code = data;
 		/* give it a chance to run. */
 		wake_up_process(child);
diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c
index 8e433b3773d2..7373a99facf3 100644
--- a/arch/x86/kernel/ptrace_64.c
+++ b/arch/x86/kernel/ptrace_64.c
@@ -170,7 +170,7 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
 	return 0;
 }
 
-static void set_singlestep(struct task_struct *child)
+void user_enable_single_step(struct task_struct *child)
 {
 	struct pt_regs *regs = task_pt_regs(child);
 
@@ -201,7 +201,7 @@ static void set_singlestep(struct task_struct *child)
 	child->ptrace |= PT_DTRACE;
 }
 
-static void clear_singlestep(struct task_struct *child)
+void user_disable_single_step(struct task_struct *child)
 {
 	/* Always clear TIF_SINGLESTEP... */
 	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
@@ -221,7 +221,7 @@ static void clear_singlestep(struct task_struct *child)
  */
 void ptrace_disable(struct task_struct *child)
 { 
-	clear_singlestep(child);
+	user_disable_single_step(child);
 }
 
 static int putreg(struct task_struct *child,
@@ -461,7 +461,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		clear_tsk_thread_flag(child, TIF_SINGLESTEP);
 		child->exit_code = data;
 		/* make sure the single step bit is not set. */
-		clear_singlestep(child);
+		user_disable_single_step(child);
 		wake_up_process(child);
 		ret = 0;
 		break;
@@ -504,7 +504,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		clear_tsk_thread_flag(child, TIF_SINGLESTEP);
 		child->exit_code = SIGKILL;
 		/* make sure the single step bit is not set. */
-		clear_singlestep(child);
+		user_disable_single_step(child);
 		wake_up_process(child);
 		break;
 
@@ -513,7 +513,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		if (!valid_signal(data))
 			break;
 		clear_tsk_thread_flag(child,TIF_SYSCALL_TRACE);
-		set_singlestep(child);
+		user_enable_single_step(child);
 		child->exit_code = data;
 		/* give it a chance to run. */
 		wake_up_process(child);
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index 105d1534eaf4..fe75422f034b 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -140,6 +140,13 @@ enum {
 
 #ifdef __KERNEL__
 
+/*
+ * These are defined as per linux/ptrace.h, which see.
+ */
+#define arch_has_single_step()	(1)
+extern void user_enable_single_step(struct task_struct *);
+extern void user_disable_single_step(struct task_struct *);
+
 struct user_desc;
 extern int do_get_thread_area(struct task_struct *p, int idx,
 			      struct user_desc __user *info);

From fa1e03eae2f38e7b38095301b043da9c274d2284 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:50 +0100
Subject: [PATCH 183/890] x86: single_step moved

This moves the single-step support code from ptrace_64.c into a new file
step.c, verbatim.  This paves the way for consolidating this code between
64-bit and 32-bit versions.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/Makefile_64 |   2 +
 arch/x86/kernel/ptrace_64.c | 134 ----------------------------------
 arch/x86/kernel/step.c      | 140 ++++++++++++++++++++++++++++++++++++
 3 files changed, 142 insertions(+), 134 deletions(-)
 create mode 100644 arch/x86/kernel/step.c

diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index e1ba82e582a8..d908f0175e76 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -13,6 +13,8 @@ obj-y	:= process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
 		pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \
 		i8253.o io_delay.o rtc.o
 
+obj-y				+= step.o
+
 obj-$(CONFIG_IA32_EMULATION)	+= tls.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-y				+= cpu/
diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c
index 7373a99facf3..4abfbced9b26 100644
--- a/arch/x86/kernel/ptrace_64.c
+++ b/arch/x86/kernel/ptrace_64.c
@@ -80,140 +80,6 @@ static inline long put_stack_long(struct task_struct *task, int offset,
 	return 0;
 }
 
-#define LDT_SEGMENT 4
-
-unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs)
-{
-	unsigned long addr, seg;
-
-	addr = regs->rip;
-	seg = regs->cs & 0xffff;
-
-	/*
-	 * We'll assume that the code segments in the GDT
-	 * are all zero-based. That is largely true: the
-	 * TLS segments are used for data, and the PNPBIOS
-	 * and APM bios ones we just ignore here.
-	 */
-	if (seg & LDT_SEGMENT) {
-		u32 *desc;
-		unsigned long base;
-
-		seg &= ~7UL;
-
-		mutex_lock(&child->mm->context.lock);
-		if (unlikely((seg >> 3) >= child->mm->context.size))
-			addr = -1L; /* bogus selector, access would fault */
-		else {
-			desc = child->mm->context.ldt + seg;
-			base = ((desc[0] >> 16) |
-				((desc[1] & 0xff) << 16) |
-				(desc[1] & 0xff000000));
-
-			/* 16-bit code segment? */
-			if (!((desc[1] >> 22) & 1))
-				addr &= 0xffff;
-			addr += base;
-		}
-		mutex_unlock(&child->mm->context.lock);
-	}
-
-	return addr;
-}
-
-static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
-{
-	int i, copied;
-	unsigned char opcode[15];
-	unsigned long addr = convert_rip_to_linear(child, regs);
-
-	copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
-	for (i = 0; i < copied; i++) {
-		switch (opcode[i]) {
-		/* popf and iret */
-		case 0x9d: case 0xcf:
-			return 1;
-
-			/* CHECKME: 64 65 */
-
-		/* opcode and address size prefixes */
-		case 0x66: case 0x67:
-			continue;
-		/* irrelevant prefixes (segment overrides and repeats) */
-		case 0x26: case 0x2e:
-		case 0x36: case 0x3e:
-		case 0x64: case 0x65:
-		case 0xf2: case 0xf3:
-			continue;
-
-		case 0x40 ... 0x4f:
-			if (regs->cs != __USER_CS)
-				/* 32-bit mode: register increment */
-				return 0;
-			/* 64-bit mode: REX prefix */
-			continue;
-
-			/* CHECKME: f2, f3 */
-
-		/*
-		 * pushf: NOTE! We should probably not let
-		 * the user see the TF bit being set. But
-		 * it's more pain than it's worth to avoid
-		 * it, and a debugger could emulate this
-		 * all in user space if it _really_ cares.
-		 */
-		case 0x9c:
-		default:
-			return 0;
-		}
-	}
-	return 0;
-}
-
-void user_enable_single_step(struct task_struct *child)
-{
-	struct pt_regs *regs = task_pt_regs(child);
-
-	/*
-	 * Always set TIF_SINGLESTEP - this guarantees that
-	 * we single-step system calls etc..  This will also
-	 * cause us to set TF when returning to user mode.
-	 */
-	set_tsk_thread_flag(child, TIF_SINGLESTEP);
-
-	/*
-	 * If TF was already set, don't do anything else
-	 */
-	if (regs->eflags & X86_EFLAGS_TF)
-		return;
-
-	/* Set TF on the kernel stack.. */
-	regs->eflags |= X86_EFLAGS_TF;
-
-	/*
-	 * ..but if TF is changed by the instruction we will trace,
-	 * don't mark it as being "us" that set it, so that we
-	 * won't clear it by hand later.
-	 */
-	if (is_setting_trap_flag(child, regs))
-		return;
-
-	child->ptrace |= PT_DTRACE;
-}
-
-void user_disable_single_step(struct task_struct *child)
-{
-	/* Always clear TIF_SINGLESTEP... */
-	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-
-	/* But touch TF only if it was set by us.. */
-	if (child->ptrace & PT_DTRACE) {
-		struct pt_regs *regs = task_pt_regs(child);
-		regs->eflags &= ~X86_EFLAGS_TF;
-		child->ptrace &= ~PT_DTRACE;
-	}
-}
-
 /*
  * Called by kernel/ptrace.c when detaching..
  *
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
new file mode 100644
index 000000000000..cb3c8bc2939a
--- /dev/null
+++ b/arch/x86/kernel/step.c
@@ -0,0 +1,140 @@
+/*
+ * x86 single-step support code, common to 32-bit and 64-bit.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/ptrace.h>
+
+#define LDT_SEGMENT 4
+
+unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs)
+{
+	unsigned long addr, seg;
+
+	addr = regs->rip;
+	seg = regs->cs & 0xffff;
+
+	/*
+	 * We'll assume that the code segments in the GDT
+	 * are all zero-based. That is largely true: the
+	 * TLS segments are used for data, and the PNPBIOS
+	 * and APM bios ones we just ignore here.
+	 */
+	if (seg & LDT_SEGMENT) {
+		u32 *desc;
+		unsigned long base;
+
+		seg &= ~7UL;
+
+		mutex_lock(&child->mm->context.lock);
+		if (unlikely((seg >> 3) >= child->mm->context.size))
+			addr = -1L; /* bogus selector, access would fault */
+		else {
+			desc = child->mm->context.ldt + seg;
+			base = ((desc[0] >> 16) |
+				((desc[1] & 0xff) << 16) |
+				(desc[1] & 0xff000000));
+
+			/* 16-bit code segment? */
+			if (!((desc[1] >> 22) & 1))
+				addr &= 0xffff;
+			addr += base;
+		}
+		mutex_unlock(&child->mm->context.lock);
+	}
+
+	return addr;
+}
+
+static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
+{
+	int i, copied;
+	unsigned char opcode[15];
+	unsigned long addr = convert_rip_to_linear(child, regs);
+
+	copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
+	for (i = 0; i < copied; i++) {
+		switch (opcode[i]) {
+		/* popf and iret */
+		case 0x9d: case 0xcf:
+			return 1;
+
+			/* CHECKME: 64 65 */
+
+		/* opcode and address size prefixes */
+		case 0x66: case 0x67:
+			continue;
+		/* irrelevant prefixes (segment overrides and repeats) */
+		case 0x26: case 0x2e:
+		case 0x36: case 0x3e:
+		case 0x64: case 0x65:
+		case 0xf2: case 0xf3:
+			continue;
+
+		case 0x40 ... 0x4f:
+			if (regs->cs != __USER_CS)
+				/* 32-bit mode: register increment */
+				return 0;
+			/* 64-bit mode: REX prefix */
+			continue;
+
+			/* CHECKME: f2, f3 */
+
+		/*
+		 * pushf: NOTE! We should probably not let
+		 * the user see the TF bit being set. But
+		 * it's more pain than it's worth to avoid
+		 * it, and a debugger could emulate this
+		 * all in user space if it _really_ cares.
+		 */
+		case 0x9c:
+		default:
+			return 0;
+		}
+	}
+	return 0;
+}
+
+void user_enable_single_step(struct task_struct *child)
+{
+	struct pt_regs *regs = task_pt_regs(child);
+
+	/*
+	 * Always set TIF_SINGLESTEP - this guarantees that
+	 * we single-step system calls etc..  This will also
+	 * cause us to set TF when returning to user mode.
+	 */
+	set_tsk_thread_flag(child, TIF_SINGLESTEP);
+
+	/*
+	 * If TF was already set, don't do anything else
+	 */
+	if (regs->eflags & X86_EFLAGS_TF)
+		return;
+
+	/* Set TF on the kernel stack.. */
+	regs->eflags |= X86_EFLAGS_TF;
+
+	/*
+	 * ..but if TF is changed by the instruction we will trace,
+	 * don't mark it as being "us" that set it, so that we
+	 * won't clear it by hand later.
+	 */
+	if (is_setting_trap_flag(child, regs))
+		return;
+
+	child->ptrace |= PT_DTRACE;
+}
+
+void user_disable_single_step(struct task_struct *child)
+{
+	/* Always clear TIF_SINGLESTEP... */
+	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
+
+	/* But touch TF only if it was set by us.. */
+	if (child->ptrace & PT_DTRACE) {
+		struct pt_regs *regs = task_pt_regs(child);
+		regs->eflags &= ~X86_EFLAGS_TF;
+		child->ptrace &= ~PT_DTRACE;
+	}
+}

From 3f80c1adc900769f2070432419da3b5ddbf066fc Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:50 +0100
Subject: [PATCH 184/890] x86: single_step segment macros

This cleans up the single-step code to use the asm/segment.h macros
for segment selector magic bits, rather than its own constant.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/step.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index cb3c8bc2939a..3b70f20f21f9 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -5,8 +5,6 @@
 #include <linux/mm.h>
 #include <linux/ptrace.h>
 
-#define LDT_SEGMENT 4
-
 unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs)
 {
 	unsigned long addr, seg;
@@ -20,7 +18,7 @@ unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *r
 	 * TLS segments are used for data, and the PNPBIOS
 	 * and APM bios ones we just ignore here.
 	 */
-	if (seg & LDT_SEGMENT) {
+	if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) {
 		u32 *desc;
 		unsigned long base;
 

From 5f76cb1f6c42e7575256595f85b8b97d84ec669e Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:50 +0100
Subject: [PATCH 185/890] x86: single_step 0xf0

This fixes the 64-bit single-step handling code's instruction
decoder to grok the 0xf0 (lock) prefix, which the 32-bit code
already does correctly.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/step.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 3b70f20f21f9..6a93b93f91f1 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -66,7 +66,7 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
 		case 0x26: case 0x2e:
 		case 0x36: case 0x3e:
 		case 0x64: case 0x65:
-		case 0xf2: case 0xf3:
+		case 0xf0: case 0xf2: case 0xf3:
 			continue;
 
 		case 0x40 ... 0x4f:

From 7122ec8158b0f88befd94f4da8feae2c8d08d1b4 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:50 +0100
Subject: [PATCH 186/890] x86: single_step: share code

This removes the single-step code from ptrace_32.c and uses the step.c code
shared with the 64-bit kernel.  The two versions of the code were nearly
identical already, so the shared code has only a couple of simple #ifdef's.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/Makefile_32 |   1 +
 arch/x86/kernel/ptrace_32.c | 125 ------------------------------------
 arch/x86/kernel/step.c      |  14 ++++
 3 files changed, 15 insertions(+), 125 deletions(-)

diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index 9a6577a746ba..20e23c4c18b6 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -11,6 +11,7 @@ obj-y	:= process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
 		quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o rtc.o
 
 obj-y				+= tls.o
+obj-y				+= step.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-y				+= cpu/
 obj-y				+= acpi/
diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace_32.c
index 1402a54ef61f..b73960885c3f 100644
--- a/arch/x86/kernel/ptrace_32.c
+++ b/arch/x86/kernel/ptrace_32.c
@@ -137,131 +137,6 @@ static unsigned long getreg(struct task_struct *child,
 	return retval;
 }
 
-#define LDT_SEGMENT 4
-
-static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_regs *regs)
-{
-	unsigned long addr, seg;
-
-	addr = regs->eip;
-	seg = regs->xcs & 0xffff;
-	if (regs->eflags & VM_MASK) {
-		addr = (addr & 0xffff) + (seg << 4);
-		return addr;
-	}
-
-	/*
-	 * We'll assume that the code segments in the GDT
-	 * are all zero-based. That is largely true: the
-	 * TLS segments are used for data, and the PNPBIOS
-	 * and APM bios ones we just ignore here.
-	 */
-	if (seg & LDT_SEGMENT) {
-		u32 *desc;
-		unsigned long base;
-
-		seg &= ~7UL;
-
-		mutex_lock(&child->mm->context.lock);
-		if (unlikely((seg >> 3) >= child->mm->context.size))
-			addr = -1L; /* bogus selector, access would fault */
-		else {
-			desc = child->mm->context.ldt + seg;
-			base = ((desc[0] >> 16) |
-				((desc[1] & 0xff) << 16) |
-				(desc[1] & 0xff000000));
-
-			/* 16-bit code segment? */
-			if (!((desc[1] >> 22) & 1))
-				addr &= 0xffff;
-			addr += base;
-		}
-		mutex_unlock(&child->mm->context.lock);
-	}
-	return addr;
-}
-
-static inline int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
-{
-	int i, copied;
-	unsigned char opcode[15];
-	unsigned long addr = convert_eip_to_linear(child, regs);
-
-	copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
-	for (i = 0; i < copied; i++) {
-		switch (opcode[i]) {
-		/* popf and iret */
-		case 0x9d: case 0xcf:
-			return 1;
-		/* opcode and address size prefixes */
-		case 0x66: case 0x67:
-			continue;
-		/* irrelevant prefixes (segment overrides and repeats) */
-		case 0x26: case 0x2e:
-		case 0x36: case 0x3e:
-		case 0x64: case 0x65:
-		case 0xf0: case 0xf2: case 0xf3:
-			continue;
-
-		/*
-		 * pushf: NOTE! We should probably not let
-		 * the user see the TF bit being set. But
-		 * it's more pain than it's worth to avoid
-		 * it, and a debugger could emulate this
-		 * all in user space if it _really_ cares.
-		 */
-		case 0x9c:
-		default:
-			return 0;
-		}
-	}
-	return 0;
-}
-
-void user_enable_single_step(struct task_struct *child)
-{
-	struct pt_regs *regs = get_child_regs(child);
-
-	/*
-	 * Always set TIF_SINGLESTEP - this guarantees that 
-	 * we single-step system calls etc..  This will also
-	 * cause us to set TF when returning to user mode.
-	 */
-	set_tsk_thread_flag(child, TIF_SINGLESTEP);
-
-	/*
-	 * If TF was already set, don't do anything else
-	 */
-	if (regs->eflags & X86_EFLAGS_TF)
-		return;
-
-	/* Set TF on the kernel stack.. */
-	regs->eflags |= X86_EFLAGS_TF;
-
-	/*
-	 * ..but if TF is changed by the instruction we will trace,
-	 * don't mark it as being "us" that set it, so that we
-	 * won't clear it by hand later.
-	 */
-	if (is_setting_trap_flag(child, regs))
-		return;
-	
-	child->ptrace |= PT_DTRACE;
-}
-
-void user_disable_single_step(struct task_struct *child)
-{
-	/* Always clear TIF_SINGLESTEP... */
-	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-
-	/* But touch TF only if it was set by us.. */
-	if (child->ptrace & PT_DTRACE) {
-		struct pt_regs *regs = get_child_regs(child);
-		regs->eflags &= ~X86_EFLAGS_TF;
-		child->ptrace &= ~PT_DTRACE;
-	}
-}
-
 /*
  * Called by kernel/ptrace.c when detaching..
  *
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 6a93b93f91f1..6732272e3479 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -5,12 +5,24 @@
 #include <linux/mm.h>
 #include <linux/ptrace.h>
 
+#ifdef CONFIG_X86_32
+static
+#endif
 unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs)
 {
 	unsigned long addr, seg;
 
+#ifdef CONFIG_X86_64
 	addr = regs->rip;
 	seg = regs->cs & 0xffff;
+#else
+	addr = regs->eip;
+	seg = regs->xcs & 0xffff;
+	if (regs->eflags & X86_EFLAGS_VM) {
+		addr = (addr & 0xffff) + (seg << 4);
+		return addr;
+	}
+#endif
 
 	/*
 	 * We'll assume that the code segments in the GDT
@@ -69,12 +81,14 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
 		case 0xf0: case 0xf2: case 0xf3:
 			continue;
 
+#ifdef CONFIG_X86_64
 		case 0x40 ... 0x4f:
 			if (regs->cs != __USER_CS)
 				/* 32-bit mode: register increment */
 				return 0;
 			/* 64-bit mode: REX prefix */
 			continue;
+#endif
 
 			/* CHECKME: f2, f3 */
 

From e1f287735c1e58c653b516931b5d3dd899edcb77 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:50 +0100
Subject: [PATCH 187/890] x86 single_step: TIF_FORCED_TF

This changes the single-step support to use a new thread_info flag
TIF_FORCED_TF instead of the PT_DTRACE flag in task_struct.ptrace.
This keeps arch implementation uses out of this non-arch field.

This changes the ptrace access to eflags to mask TF and maintain
the TIF_FORCED_TF flag directly if userland sets TF, instead of
relying on ptrace_signal_deliver.  The 64-bit and 32-bit kernels
are harmonized on this same behavior.  The ptrace_signal_deliver
approach works now, but this change makes the low-level register
access code reliable when called from different contexts than a
ptrace stop, which will be possible in the future.

The 64-bit do_debug exception handler is also changed not to clear TF
from user-mode registers.  This matches the 32-bit kernel's behavior.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/ia32/ptrace32.c         | 20 ++++++++++++++++++--
 arch/x86/kernel/process_32.c     |  3 ---
 arch/x86/kernel/process_64.c     |  5 -----
 arch/x86/kernel/ptrace_32.c      | 17 +++++++++++++++++
 arch/x86/kernel/ptrace_64.c      | 20 ++++++++++++++++++++
 arch/x86/kernel/signal_32.c      | 12 +++++-------
 arch/x86/kernel/signal_64.c      | 14 +++++---------
 arch/x86/kernel/step.c           |  9 +++------
 arch/x86/kernel/traps_64.c       | 23 +++++------------------
 include/asm-x86/signal.h         | 11 ++---------
 include/asm-x86/thread_info_32.h |  2 ++
 include/asm-x86/thread_info_64.h |  2 ++
 12 files changed, 79 insertions(+), 59 deletions(-)

diff --git a/arch/x86/ia32/ptrace32.c b/arch/x86/ia32/ptrace32.c
index 9d754b640205..5dee33417313 100644
--- a/arch/x86/ia32/ptrace32.c
+++ b/arch/x86/ia32/ptrace32.c
@@ -89,6 +89,15 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
 		__u64 *flags = &stack[offsetof(struct pt_regs, eflags)/8];
 
 		val &= FLAG_MASK;
+		/*
+		 * If the user value contains TF, mark that
+		 * it was not "us" (the debugger) that set it.
+		 * If not, make sure it stays set if we had.
+		 */
+		if (val & X86_EFLAGS_TF)
+			clear_tsk_thread_flag(child, TIF_FORCED_TF);
+		else if (test_tsk_thread_flag(child, TIF_FORCED_TF))
+			val |= X86_EFLAGS_TF;
 		*flags = val | (*flags & ~FLAG_MASK);
 		break;
 	}
@@ -179,9 +188,17 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
 	R32(eax, rax);
 	R32(orig_eax, orig_rax);
 	R32(eip, rip);
-	R32(eflags, eflags);
 	R32(esp, rsp);
 
+	case offsetof(struct user32, regs.eflags):
+		/*
+		 * If the debugger set TF, hide it from the readout.
+		 */
+		*val = stack[offsetof(struct pt_regs, eflags)/8];
+		if (test_tsk_thread_flag(child, TIF_FORCED_TF))
+			*val &= ~X86_EFLAGS_TF;
+		break;
+
 	case offsetof(struct user32, u_debugreg[0]):
 		*val = child->thread.debugreg0;
 		break;
@@ -425,4 +442,3 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 	put_task_struct(child);
 	return ret;
 }
-
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 4d66a56280d3..d9905c9d0fd5 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -817,9 +817,6 @@ asmlinkage int sys_execve(struct pt_regs regs)
 			(char __user * __user *) regs.edx,
 			&regs);
 	if (error == 0) {
-		task_lock(current);
-		current->ptrace &= ~PT_DTRACE;
-		task_unlock(current);
 		/* Make sure we don't return using sysenter.. */
 		set_thread_flag(TIF_IRET);
 	}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ccc9d68d5a58..f7356e5517f6 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -709,11 +709,6 @@ long sys_execve(char __user *name, char __user * __user *argv,
 	if (IS_ERR(filename)) 
 		return error;
 	error = do_execve(filename, argv, envp, &regs); 
-	if (error == 0) {
-		task_lock(current);
-		current->ptrace &= ~PT_DTRACE;
-		task_unlock(current);
-	}
 	putname(filename);
 	return error;
 }
diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace_32.c
index b73960885c3f..bc7fd802dcc7 100644
--- a/arch/x86/kernel/ptrace_32.c
+++ b/arch/x86/kernel/ptrace_32.c
@@ -104,6 +104,15 @@ static int putreg(struct task_struct *child,
 			break;
 		case EFL:
 			value &= FLAG_MASK;
+			/*
+			 * If the user value contains TF, mark that
+			 * it was not "us" (the debugger) that set it.
+			 * If not, make sure it stays set if we had.
+			 */
+			if (value & X86_EFLAGS_TF)
+				clear_tsk_thread_flag(child, TIF_FORCED_TF);
+			else if (test_tsk_thread_flag(child, TIF_FORCED_TF))
+				value |= X86_EFLAGS_TF;
 			value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK;
 			break;
 	}
@@ -119,6 +128,14 @@ static unsigned long getreg(struct task_struct *child,
 	unsigned long retval = ~0UL;
 
 	switch (regno >> 2) {
+		case EFL:
+			/*
+			 * If the debugger set TF, hide it from the readout.
+			 */
+			retval = get_stack_long(child, EFL_OFFSET);
+			if (test_tsk_thread_flag(child, TIF_FORCED_TF))
+				retval &= ~X86_EFLAGS_TF;
+			break;
 		case GS:
 			retval = child->thread.gs;
 			break;
diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c
index 4abfbced9b26..035d53e99c57 100644
--- a/arch/x86/kernel/ptrace_64.c
+++ b/arch/x86/kernel/ptrace_64.c
@@ -143,6 +143,15 @@ static int putreg(struct task_struct *child,
 			return 0;
 		case offsetof(struct user_regs_struct, eflags):
 			value &= FLAG_MASK;
+			/*
+			 * If the user value contains TF, mark that
+			 * it was not "us" (the debugger) that set it.
+			 * If not, make sure it stays set if we had.
+			 */
+			if (value & X86_EFLAGS_TF)
+				clear_tsk_thread_flag(child, TIF_FORCED_TF);
+			else if (test_tsk_thread_flag(child, TIF_FORCED_TF))
+				value |= X86_EFLAGS_TF;
 			tmp = get_stack_long(child, EFL_OFFSET); 
 			tmp &= ~FLAG_MASK; 
 			value |= tmp;
@@ -189,6 +198,17 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
 			if (child->thread.gsindex != GS_TLS_SEL)
 				return 0;
 			return get_desc_base(&child->thread.tls_array[GS_TLS]);
+		case offsetof(struct user_regs_struct, eflags):
+			/*
+			 * If the debugger set TF, hide it from the readout.
+			 */
+			regno = regno - sizeof(struct pt_regs);
+			val = get_stack_long(child, regno);
+			if (test_tsk_thread_flag(child, TIF_IA32))
+				val &= 0xffffffff;
+			if (test_tsk_thread_flag(child, TIF_FORCED_TF))
+				val &= ~X86_EFLAGS_TF;
+			return val;
 		default:
 			regno = regno - sizeof(struct pt_regs);
 			val = get_stack_long(child, regno);
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 1ac53e9a0859..0a7c812212c9 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -545,14 +545,12 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	}
 
 	/*
-	 * If TF is set due to a debugger (PT_DTRACE), clear the TF flag so
-	 * that register information in the sigcontext is correct.
+	 * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF
+	 * flag so that register information in the sigcontext is correct.
 	 */
-	if (unlikely(regs->eflags & TF_MASK)
-	    && likely(current->ptrace & PT_DTRACE)) {
-		current->ptrace &= ~PT_DTRACE;
-		regs->eflags &= ~TF_MASK;
-	}
+	if (unlikely(regs->eflags & X86_EFLAGS_TF) &&
+	    likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
+		regs->eflags &= ~X86_EFLAGS_TF;
 
 	/* Set up the stack frame */
 	if (ka->sa.sa_flags & SA_SIGINFO)
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 38d806467c0f..ab0178ebe00a 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -349,16 +349,12 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	}
 
 	/*
-	 * If TF is set due to a debugger (PT_DTRACE), clear the TF
-	 * flag so that register information in the sigcontext is
-	 * correct.
+	 * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF
+	 * flag so that register information in the sigcontext is correct.
 	 */
-	if (unlikely(regs->eflags & TF_MASK)) {
-		if (likely(current->ptrace & PT_DTRACE)) {
-			current->ptrace &= ~PT_DTRACE;
-			regs->eflags &= ~TF_MASK;
-		}
-	}
+	if (unlikely(regs->eflags & X86_EFLAGS_TF) &&
+	    likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
+		regs->eflags &= ~X86_EFLAGS_TF;
 
 #ifdef CONFIG_IA32_EMULATION
 	if (test_thread_flag(TIF_IA32)) {
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 6732272e3479..243bff650ca5 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -135,7 +135,7 @@ void user_enable_single_step(struct task_struct *child)
 	if (is_setting_trap_flag(child, regs))
 		return;
 
-	child->ptrace |= PT_DTRACE;
+	set_tsk_thread_flag(child, TIF_FORCED_TF);
 }
 
 void user_disable_single_step(struct task_struct *child)
@@ -144,9 +144,6 @@ void user_disable_single_step(struct task_struct *child)
 	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
 
 	/* But touch TF only if it was set by us.. */
-	if (child->ptrace & PT_DTRACE) {
-		struct pt_regs *regs = task_pt_regs(child);
-		regs->eflags &= ~X86_EFLAGS_TF;
-		child->ptrace &= ~PT_DTRACE;
-	}
+	if (test_and_clear_tsk_thread_flag(child, TIF_FORCED_TF))
+		task_pt_regs(child)->eflags &= ~X86_EFLAGS_TF;
 }
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index aa248d754533..874aca397b02 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -865,27 +865,14 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs,
 
 	tsk->thread.debugreg6 = condition;
 
-	/* Mask out spurious TF errors due to lazy TF clearing */
+
+	/*
+	 * Single-stepping through TF: make sure we ignore any events in
+	 * kernel space (but re-enable TF when returning to user mode).
+	 */
 	if (condition & DR_STEP) {
-		/*
-		 * The TF error should be masked out only if the current
-		 * process is not traced and if the TRAP flag has been set
-		 * previously by a tracing process (condition detected by
-		 * the PT_DTRACE flag); remember that the i386 TRAP flag
-		 * can be modified by the process itself in user mode,
-		 * allowing programs to debug themselves without the ptrace()
-		 * interface.
-		 */
                 if (!user_mode(regs))
                        goto clear_TF_reenable;
-		/*
-		 * Was the TF flag set by a debugger? If so, clear it now,
-		 * so that register information is correct.
-		 */
-		if (tsk->ptrace & PT_DTRACE) {
-			regs->eflags &= ~TF_MASK;
-			tsk->ptrace &= ~PT_DTRACE;
-		}
 	}
 
 	/* Ok, finally something we can handle */
diff --git a/include/asm-x86/signal.h b/include/asm-x86/signal.h
index 987a422a2c78..aee7eca585ab 100644
--- a/include/asm-x86/signal.h
+++ b/include/asm-x86/signal.h
@@ -245,21 +245,14 @@ static __inline__ int sigfindinword(unsigned long word)
 
 struct pt_regs;
 
-#define ptrace_signal_deliver(regs, cookie)		\
-	do {						\
-		if (current->ptrace & PT_DTRACE) {	\
-			current->ptrace &= ~PT_DTRACE;	\
-			(regs)->eflags &= ~TF_MASK;	\
-		}					\
-	} while (0)
-
 #else /* __i386__ */
 
 #undef __HAVE_ARCH_SIG_BITOPS
 
+#endif /* !__i386__ */
+
 #define ptrace_signal_deliver(regs, cookie) do { } while (0)
 
-#endif /* !__i386__ */
 #endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 
diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h
index a516e9192f11..009ecc6ad38b 100644
--- a/include/asm-x86/thread_info_32.h
+++ b/include/asm-x86/thread_info_32.h
@@ -138,6 +138,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_IO_BITMAP		18	/* uses I/O bitmap */
 #define TIF_FREEZE		19	/* is freezing for suspend */
 #define TIF_NOTSC		20	/* TSC is not accessible in userland */
+#define TIF_FORCED_TF		21	/* true if TF in eflags artificially */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
@@ -153,6 +154,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_IO_BITMAP		(1<<TIF_IO_BITMAP)
 #define _TIF_FREEZE		(1<<TIF_FREEZE)
 #define _TIF_NOTSC		(1<<TIF_NOTSC)
+#define _TIF_FORCED_TF		(1<<TIF_FORCED_TF)
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK \
diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h
index c8e7736fc792..e0f41b3deced 100644
--- a/include/asm-x86/thread_info_64.h
+++ b/include/asm-x86/thread_info_64.h
@@ -121,6 +121,7 @@ static inline struct thread_info *stack_thread_info(void)
 #define TIF_DEBUG		21	/* uses debug registers */
 #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
 #define TIF_FREEZE		23	/* is freezing for suspend */
+#define TIF_FORCED_TF		24	/* true if TF in eflags artificially */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
@@ -138,6 +139,7 @@ static inline struct thread_info *stack_thread_info(void)
 #define _TIF_DEBUG		(1<<TIF_DEBUG)
 #define _TIF_IO_BITMAP		(1<<TIF_IO_BITMAP)
 #define _TIF_FREEZE		(1<<TIF_FREEZE)
+#define _TIF_FORCED_TF		(1<<TIF_FORCED_TF)
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK \

From 36df29d7994180cf7f0ccc5d46495855d56d2721 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:51 +0100
Subject: [PATCH 188/890] ptrace: generic resume

This makes ptrace_request handle all the ptrace requests that wake
up the traced task.  These do low-level ptrace implementation magic
that is not arch-specific and should be kept out of arch code.  The
implementations on each arch usually do the same thing.  The new
generic code makes use of the arch_has_single_step macro and generic
entry points to handle PTRACE_SINGLESTEP.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/ptrace.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index c719bb9d79ab..fad5f1e8511f 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -366,6 +366,50 @@ static int ptrace_setsiginfo(struct task_struct *child, siginfo_t __user * data)
 	return error;
 }
 
+
+#ifdef PTRACE_SINGLESTEP
+#define is_singlestep(request)		((request) == PTRACE_SINGLESTEP)
+#else
+#define is_singlestep(request)		0
+#endif
+
+#ifdef PTRACE_SYSEMU
+#define is_sysemu_singlestep(request)	((request) == PTRACE_SYSEMU_SINGLESTEP)
+#else
+#define is_sysemu_singlestep(request)	0
+#endif
+
+static int ptrace_resume(struct task_struct *child, long request, long data)
+{
+	if (!valid_signal(data))
+		return -EIO;
+
+	if (request == PTRACE_SYSCALL)
+		set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+	else
+		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+
+#ifdef TIF_SYSCALL_EMU
+	if (request == PTRACE_SYSEMU || request == PTRACE_SYSEMU_SINGLESTEP)
+		set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+	else
+		clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+#endif
+
+	if (is_singlestep(request) || is_sysemu_singlestep(request)) {
+		if (unlikely(!arch_has_single_step()))
+			return -EIO;
+		user_enable_single_step(child);
+	}
+	else
+		user_disable_single_step(child);
+
+	child->exit_code = data;
+	wake_up_process(child);
+
+	return 0;
+}
+
 int ptrace_request(struct task_struct *child, long request,
 		   long addr, long data)
 {
@@ -390,6 +434,23 @@ int ptrace_request(struct task_struct *child, long request,
 	case PTRACE_DETACH:	 /* detach a process that was attached. */
 		ret = ptrace_detach(child, data);
 		break;
+
+#ifdef PTRACE_SINGLESTEP
+	case PTRACE_SINGLESTEP:
+#endif
+#ifdef PTRACE_SYSEMU
+	case PTRACE_SYSEMU:
+	case PTRACE_SYSEMU_SINGLESTEP:
+#endif
+	case PTRACE_SYSCALL:
+	case PTRACE_CONT:
+		return ptrace_resume(child, request, data);
+
+	case PTRACE_KILL:
+		if (child->exit_state)	/* already dead */
+			return 0;
+		return ptrace_resume(child, request, SIGKILL);
+
 	default:
 		break;
 	}

From 18982c158f9c255d437713e63a93270d07408674 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:51 +0100
Subject: [PATCH 189/890] x86-64: ptrace generic resume

This removes the handling for PTRACE_CONT et al from the 64-bit
ptrace code, so it uses the new generic code via ptrace_request.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace_64.c | 45 -------------------------------------
 1 file changed, 45 deletions(-)

diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c
index 035d53e99c57..b129b1fbb5d9 100644
--- a/arch/x86/kernel/ptrace_64.c
+++ b/arch/x86/kernel/ptrace_64.c
@@ -334,23 +334,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		}
 		break;
 	}
-	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
-	case PTRACE_CONT:    /* restart after signal. */
-
-		ret = -EIO;
-		if (!valid_signal(data))
-			break;
-		if (request == PTRACE_SYSCALL)
-			set_tsk_thread_flag(child,TIF_SYSCALL_TRACE);
-		else
-			clear_tsk_thread_flag(child,TIF_SYSCALL_TRACE);
-		clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-		child->exit_code = data;
-		/* make sure the single step bit is not set. */
-		user_disable_single_step(child);
-		wake_up_process(child);
-		ret = 0;
-		break;
 
 #ifdef CONFIG_IA32_EMULATION
 		/* This makes only sense with 32bit programs. Allow a
@@ -378,34 +361,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		ret = do_arch_prctl(child, data, addr);
 		break;
 
-/*
- * make the child exit.  Best I can do is send it a sigkill. 
- * perhaps it should be put in the status that it wants to 
- * exit.
- */
-	case PTRACE_KILL:
-		ret = 0;
-		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
-			break;
-		clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-		child->exit_code = SIGKILL;
-		/* make sure the single step bit is not set. */
-		user_disable_single_step(child);
-		wake_up_process(child);
-		break;
-
-	case PTRACE_SINGLESTEP:    /* set the trap flag. */
-		ret = -EIO;
-		if (!valid_signal(data))
-			break;
-		clear_tsk_thread_flag(child,TIF_SYSCALL_TRACE);
-		user_enable_single_step(child);
-		child->exit_code = data;
-		/* give it a chance to run. */
-		wake_up_process(child);
-		ret = 0;
-		break;
-
 	case PTRACE_GETREGS: { /* Get all gp regs from the child. */
 	  	if (!access_ok(VERIFY_WRITE, (unsigned __user *)data,
 			       sizeof(struct user_regs_struct))) {

From 227195d4a6185e81855f56ed9bc815cad9a39398 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:51 +0100
Subject: [PATCH 190/890] x86-32: ptrace generic resume

This removes the handling for PTRACE_CONT et al from the 32-bit
ptrace code, so it uses the new generic code via ptrace_request.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace_32.c | 57 -------------------------------------
 1 file changed, 57 deletions(-)

diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace_32.c
index bc7fd802dcc7..bd3668c2421a 100644
--- a/arch/x86/kernel/ptrace_32.c
+++ b/arch/x86/kernel/ptrace_32.c
@@ -277,63 +277,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		  }
 		  break;
 
-	case PTRACE_SYSEMU: /* continue and stop at next syscall, which will not be executed */
-	case PTRACE_SYSCALL:	/* continue and stop at next (return from) syscall */
-	case PTRACE_CONT:	/* restart after signal. */
-		ret = -EIO;
-		if (!valid_signal(data))
-			break;
-		if (request == PTRACE_SYSEMU) {
-			set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		} else if (request == PTRACE_SYSCALL) {
-			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-			clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
-		} else {
-			clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		}
-		child->exit_code = data;
-		/* make sure the single step bit is not set. */
-		user_disable_single_step(child);
-		wake_up_process(child);
-		ret = 0;
-		break;
-
-/*
- * make the child exit.  Best I can do is send it a sigkill. 
- * perhaps it should be put in the status that it wants to 
- * exit.
- */
-	case PTRACE_KILL:
-		ret = 0;
-		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
-			break;
-		child->exit_code = SIGKILL;
-		/* make sure the single step bit is not set. */
-		user_disable_single_step(child);
-		wake_up_process(child);
-		break;
-
-	case PTRACE_SYSEMU_SINGLESTEP: /* Same as SYSEMU, but singlestep if not syscall */
-	case PTRACE_SINGLESTEP:	/* set the trap flag. */
-		ret = -EIO;
-		if (!valid_signal(data))
-			break;
-
-		if (request == PTRACE_SYSEMU_SINGLESTEP)
-			set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
-		else
-			clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
-
-		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		user_enable_single_step(child);
-		child->exit_code = data;
-		/* give it a chance to run. */
-		wake_up_process(child);
-		ret = 0;
-		break;
-
 	case PTRACE_GETREGS: { /* Get all gp regs from the child. */
 	  	if (!access_ok(VERIFY_WRITE, datap, FRAME_SIZE*sizeof(long))) {
 			ret = -EIO;

From 2a84b0d71973692b291f03a870c4d0d13f722d79 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:51 +0100
Subject: [PATCH 191/890] powerpc: arch_has_single_step

This defines the new standard arch_has_single_step macro.  It makes the
existing set_single_step and clear_single_step entry points global, and
renames them to the new standard names user_enable_single_step and
user_disable_single_step, respectively.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/powerpc/kernel/ptrace.c | 12 ++++++------
 include/asm-powerpc/ptrace.h |  7 +++++++
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 3e17d154d0d4..b970d7971779 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -256,7 +256,7 @@ static int set_evrregs(struct task_struct *task, unsigned long *data)
 #endif /* CONFIG_SPE */
 
 
-static void set_single_step(struct task_struct *task)
+void user_enable_single_step(struct task_struct *task)
 {
 	struct pt_regs *regs = task->thread.regs;
 
@@ -271,7 +271,7 @@ static void set_single_step(struct task_struct *task)
 	set_tsk_thread_flag(task, TIF_SINGLESTEP);
 }
 
-static void clear_single_step(struct task_struct *task)
+void user_disable_single_step(struct task_struct *task)
 {
 	struct pt_regs *regs = task->thread.regs;
 
@@ -313,7 +313,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 void ptrace_disable(struct task_struct *child)
 {
 	/* make sure the single step bit is not set. */
-	clear_single_step(child);
+	user_disable_single_step(child);
 }
 
 /*
@@ -456,7 +456,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 		child->exit_code = data;
 		/* make sure the single step bit is not set. */
-		clear_single_step(child);
+		user_disable_single_step(child);
 		wake_up_process(child);
 		ret = 0;
 		break;
@@ -473,7 +473,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			break;
 		child->exit_code = SIGKILL;
 		/* make sure the single step bit is not set. */
-		clear_single_step(child);
+		user_disable_single_step(child);
 		wake_up_process(child);
 		break;
 	}
@@ -483,7 +483,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		if (!valid_signal(data))
 			break;
 		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		set_single_step(child);
+		user_enable_single_step(child);
 		child->exit_code = data;
 		/* give it a chance to run. */
 		wake_up_process(child);
diff --git a/include/asm-powerpc/ptrace.h b/include/asm-powerpc/ptrace.h
index 13fccc5a4119..3063363f6799 100644
--- a/include/asm-powerpc/ptrace.h
+++ b/include/asm-powerpc/ptrace.h
@@ -119,6 +119,13 @@ do {									      \
 } while (0)
 #endif /* __powerpc64__ */
 
+/*
+ * These are defined as per linux/ptrace.h, which see.
+ */
+#define arch_has_single_step()	(1)
+extern void user_enable_single_step(struct task_struct *);
+extern void user_disable_single_step(struct task_struct *);
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __KERNEL__ */

From d6f4fb7558ee8138fe86e2778bc16e3794d0baa2 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:51 +0100
Subject: [PATCH 192/890] powerpc: ptrace generic resume

This removes the handling for PTRACE_CONT et al from the powerpc
ptrace code, so it uses the new generic code via ptrace_request.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/powerpc/kernel/ptrace.c | 46 ------------------------------------
 1 file changed, 46 deletions(-)

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index b970d7971779..8b056d2295cc 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -445,52 +445,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		break;
 	}
 
-	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
-	case PTRACE_CONT: { /* restart after signal. */
-		ret = -EIO;
-		if (!valid_signal(data))
-			break;
-		if (request == PTRACE_SYSCALL)
-			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		else
-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		child->exit_code = data;
-		/* make sure the single step bit is not set. */
-		user_disable_single_step(child);
-		wake_up_process(child);
-		ret = 0;
-		break;
-	}
-
-/*
- * make the child exit.  Best I can do is send it a sigkill.
- * perhaps it should be put in the status that it wants to
- * exit.
- */
-	case PTRACE_KILL: {
-		ret = 0;
-		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
-			break;
-		child->exit_code = SIGKILL;
-		/* make sure the single step bit is not set. */
-		user_disable_single_step(child);
-		wake_up_process(child);
-		break;
-	}
-
-	case PTRACE_SINGLESTEP: {  /* set the trap flag. */
-		ret = -EIO;
-		if (!valid_signal(data))
-			break;
-		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		user_enable_single_step(child);
-		child->exit_code = data;
-		/* give it a chance to run. */
-		wake_up_process(child);
-		ret = 0;
-		break;
-	}
-
 	case PTRACE_GET_DEBUGREG: {
 		ret = -EINVAL;
 		/* We only support one DABR and no IABRS at the moment */

From 62a97d447b511bf4f0f0aa8cdccfb9ed1c934c8b Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:52 +0100
Subject: [PATCH 193/890] x86-32 ptrace: use task_pt_regs

This cleans up the 32-bit ptrace code to use task_pt_regs instead of its
own redundant code that does the same thing a different way.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace_32.c | 68 +++++++++----------------------------
 1 file changed, 16 insertions(+), 52 deletions(-)

diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace_32.c
index bd3668c2421a..a1425e9ad028 100644
--- a/arch/x86/kernel/ptrace_32.c
+++ b/arch/x86/kernel/ptrace_32.c
@@ -37,53 +37,20 @@
  */
 #define FLAG_MASK 0x00050dd5
 
-/*
- * Offset of eflags on child stack..
- */
-#define EFL_OFFSET offsetof(struct pt_regs, eflags)
-
-static inline struct pt_regs *get_child_regs(struct task_struct *task)
+static long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
 {
-	void *stack_top = (void *)task->thread.esp0;
-	return stack_top - sizeof(struct pt_regs);
-}
-
-/*
- * This routine will get a word off of the processes privileged stack.
- * the offset is bytes into the pt_regs structure on the stack.
- * This routine assumes that all the privileged stacks are in our
- * data space.
- */   
-static inline int get_stack_long(struct task_struct *task, int offset)
-{
-	unsigned char *stack;
-
-	stack = (unsigned char *)task->thread.esp0 - sizeof(struct pt_regs);
-	stack += offset;
-	return (*((int *)stack));
-}
-
-/*
- * This routine will put a word on the processes privileged stack.
- * the offset is bytes into the pt_regs structure on the stack.
- * This routine assumes that all the privileged stacks are in our
- * data space.
- */
-static inline int put_stack_long(struct task_struct *task, int offset,
-	unsigned long data)
-{
-	unsigned char * stack;
-
-	stack = (unsigned char *)task->thread.esp0 - sizeof(struct pt_regs);
-	stack += offset;
-	*(unsigned long *) stack = data;
-	return 0;
+	BUILD_BUG_ON(offsetof(struct pt_regs, ebx) != 0);
+	if (regno > FS)
+		--regno;
+	return &regs->ebx + regno;
 }
 
 static int putreg(struct task_struct *child,
 	unsigned long regno, unsigned long value)
 {
-	switch (regno >> 2) {
+	struct pt_regs *regs = task_pt_regs(child);
+	regno >>= 2;
+	switch (regno) {
 		case GS:
 			if (value && (value & 3) != 3)
 				return -EIO;
@@ -113,26 +80,25 @@ static int putreg(struct task_struct *child,
 				clear_tsk_thread_flag(child, TIF_FORCED_TF);
 			else if (test_tsk_thread_flag(child, TIF_FORCED_TF))
 				value |= X86_EFLAGS_TF;
-			value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK;
+			value |= regs->eflags & ~FLAG_MASK;
 			break;
 	}
-	if (regno > FS*4)
-		regno -= 1*4;
-	put_stack_long(child, regno, value);
+	*pt_regs_access(regs, regno) = value;
 	return 0;
 }
 
-static unsigned long getreg(struct task_struct *child,
-	unsigned long regno)
+static unsigned long getreg(struct task_struct *child, unsigned long regno)
 {
+	struct pt_regs *regs = task_pt_regs(child);
 	unsigned long retval = ~0UL;
 
-	switch (regno >> 2) {
+	regno >>= 2;
+	switch (regno) {
 		case EFL:
 			/*
 			 * If the debugger set TF, hide it from the readout.
 			 */
-			retval = get_stack_long(child, EFL_OFFSET);
+			retval = regs->eflags;
 			if (test_tsk_thread_flag(child, TIF_FORCED_TF))
 				retval &= ~X86_EFLAGS_TF;
 			break;
@@ -147,9 +113,7 @@ static unsigned long getreg(struct task_struct *child,
 			retval = 0xffff;
 			/* fall through */
 		default:
-			if (regno > FS*4)
-				regno -= 1*4;
-			retval &= get_stack_long(child, regno);
+			retval &= *pt_regs_access(regs, regno);
 	}
 	return retval;
 }

From e4aed6cc45f06acd35e3dfbbaf632c5d5aa897c0 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:52 +0100
Subject: [PATCH 194/890] x86-64 ptrace: use task_pt_regs

This cleans up the 64-bit ptrace code to use task_pt_regs instead of its
own redundant code that does the same thing a different way.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace_64.c | 60 ++++++++-----------------------------
 1 file changed, 12 insertions(+), 48 deletions(-)

diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c
index b129b1fbb5d9..d0a0aeaaa0c2 100644
--- a/arch/x86/kernel/ptrace_64.c
+++ b/arch/x86/kernel/ptrace_64.c
@@ -42,44 +42,6 @@
  */
 #define FLAG_MASK 0x54dd5UL
 
-/*
- * eflags and offset of eflags on child stack..
- */
-#define EFLAGS offsetof(struct pt_regs, eflags)
-#define EFL_OFFSET ((int)(EFLAGS-sizeof(struct pt_regs)))
-
-/*
- * this routine will get a word off of the processes privileged stack. 
- * the offset is how far from the base addr as stored in the TSS.  
- * this routine assumes that all the privileged stacks are in our
- * data space.
- */   
-static inline unsigned long get_stack_long(struct task_struct *task, int offset)
-{
-	unsigned char *stack;
-
-	stack = (unsigned char *)task->thread.rsp0;
-	stack += offset;
-	return (*((unsigned long *)stack));
-}
-
-/*
- * this routine will put a word on the processes privileged stack. 
- * the offset is how far from the base addr as stored in the TSS.  
- * this routine assumes that all the privileged stacks are in our
- * data space.
- */
-static inline long put_stack_long(struct task_struct *task, int offset,
-	unsigned long data)
-{
-	unsigned char * stack;
-
-	stack = (unsigned char *) task->thread.rsp0;
-	stack += offset;
-	*(unsigned long *) stack = data;
-	return 0;
-}
-
 /*
  * Called by kernel/ptrace.c when detaching..
  *
@@ -90,11 +52,16 @@ void ptrace_disable(struct task_struct *child)
 	user_disable_single_step(child);
 }
 
+static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long offset)
+{
+	BUILD_BUG_ON(offsetof(struct pt_regs, r15) != 0);
+	return &regs->r15 + (offset / sizeof(regs->r15));
+}
+
 static int putreg(struct task_struct *child,
 	unsigned long regno, unsigned long value)
 {
-	unsigned long tmp; 
-	
+	struct pt_regs *regs = task_pt_regs(child);
 	switch (regno) {
 		case offsetof(struct user_regs_struct,fs):
 			if (value && (value & 3) != 3)
@@ -152,9 +119,7 @@ static int putreg(struct task_struct *child,
 				clear_tsk_thread_flag(child, TIF_FORCED_TF);
 			else if (test_tsk_thread_flag(child, TIF_FORCED_TF))
 				value |= X86_EFLAGS_TF;
-			tmp = get_stack_long(child, EFL_OFFSET); 
-			tmp &= ~FLAG_MASK; 
-			value |= tmp;
+			value |= regs->eflags & ~FLAG_MASK;
 			break;
 		case offsetof(struct user_regs_struct,cs): 
 			if ((value & 3) != 3)
@@ -162,12 +127,13 @@ static int putreg(struct task_struct *child,
 			value &= 0xffff;
 			break;
 	}
-	put_stack_long(child, regno - sizeof(struct pt_regs), value);
+	*pt_regs_access(regs, regno) = value;
 	return 0;
 }
 
 static unsigned long getreg(struct task_struct *child, unsigned long regno)
 {
+	struct pt_regs *regs = task_pt_regs(child);
 	unsigned long val;
 	switch (regno) {
 		case offsetof(struct user_regs_struct, fs):
@@ -202,16 +168,14 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
 			/*
 			 * If the debugger set TF, hide it from the readout.
 			 */
-			regno = regno - sizeof(struct pt_regs);
-			val = get_stack_long(child, regno);
+			val = regs->eflags;
 			if (test_tsk_thread_flag(child, TIF_IA32))
 				val &= 0xffffffff;
 			if (test_tsk_thread_flag(child, TIF_FORCED_TF))
 				val &= ~X86_EFLAGS_TF;
 			return val;
 		default:
-			regno = regno - sizeof(struct pt_regs);
-			val = get_stack_long(child, regno);
+			val = *pt_regs_access(regs, regno);
 			if (test_tsk_thread_flag(child, TIF_IA32))
 				val &= 0xffffffff;
 			return val;

From 962ff3804d31a4d090bbcbd3d06a4b63e3a5b5fd Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:52 +0100
Subject: [PATCH 195/890] x86: x86-64 ptrace debugreg cleanup

This cleans up the 64-bit ptrace code to separate the guts of the
debug register access from the implementation of PTRACE_PEEKUSR and
PTRACE_POKEUSR.  The new functions ptrace_[gs]et_debugreg are made
global so that the ia32 code can later be changed to call them too.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace_64.c | 140 +++++++++++++++++-------------------
 include/asm-x86/ptrace.h    |   3 +
 2 files changed, 69 insertions(+), 74 deletions(-)

diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c
index d0a0aeaaa0c2..4ba66d8af717 100644
--- a/arch/x86/kernel/ptrace_64.c
+++ b/arch/x86/kernel/ptrace_64.c
@@ -183,9 +183,63 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
 
 }
 
+unsigned long ptrace_get_debugreg(struct task_struct *child, int n)
+{
+	switch (n) {
+	case 0:		return child->thread.debugreg0;
+	case 1:		return child->thread.debugreg1;
+	case 2:		return child->thread.debugreg2;
+	case 3:		return child->thread.debugreg3;
+	case 6:		return child->thread.debugreg6;
+	case 7:		return child->thread.debugreg7;
+	}
+	return 0;
+}
+
+int ptrace_set_debugreg(struct task_struct *child, int n, unsigned long data)
+{
+	int i;
+
+	if (n < 4) {
+		int dsize = test_tsk_thread_flag(child, TIF_IA32) ? 3 : 7;
+		if (unlikely(data >= TASK_SIZE_OF(child) - dsize))
+			return -EIO;
+	}
+
+	switch (n) {
+	case 0:		child->thread.debugreg0 = data; break;
+	case 1:		child->thread.debugreg1 = data; break;
+	case 2:		child->thread.debugreg2 = data; break;
+	case 3:		child->thread.debugreg3 = data; break;
+
+	case 6:
+		if (data >> 32)
+			return -EIO;
+		child->thread.debugreg6 = data;
+		break;
+
+	case 7:
+		/*
+		 * See ptrace_32.c for an explanation of this awkward check.
+		 */
+		data &= ~DR_CONTROL_RESERVED;
+		for (i = 0; i < 4; i++)
+			if ((0x5554 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
+				return -EIO;
+		child->thread.debugreg7 = data;
+		if (data)
+			set_tsk_thread_flag(child, TIF_DEBUG);
+		else
+			clear_tsk_thread_flag(child, TIF_DEBUG);
+		break;
+	}
+
+	return 0;
+}
+
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
-	long i, ret;
+	long ret;
 	unsigned ui;
 
 	switch (request) {
@@ -204,32 +258,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		    addr > sizeof(struct user) - 7)
 			break;
 
-		switch (addr) { 
-		case 0 ... sizeof(struct user_regs_struct) - sizeof(long):
+		tmp = 0;
+		if (addr < sizeof(struct user_regs_struct))
 			tmp = getreg(child, addr);
-			break;
-		case offsetof(struct user, u_debugreg[0]):
-			tmp = child->thread.debugreg0;
-			break;
-		case offsetof(struct user, u_debugreg[1]):
-			tmp = child->thread.debugreg1;
-			break;
-		case offsetof(struct user, u_debugreg[2]):
-			tmp = child->thread.debugreg2;
-			break;
-		case offsetof(struct user, u_debugreg[3]):
-			tmp = child->thread.debugreg3;
-			break;
-		case offsetof(struct user, u_debugreg[6]):
-			tmp = child->thread.debugreg6;
-			break;
-		case offsetof(struct user, u_debugreg[7]):
-			tmp = child->thread.debugreg7;
-			break;
-		default:
-			tmp = 0;
-			break;
+		else if (addr >= offsetof(struct user, u_debugreg[0])) {
+			addr -= offsetof(struct user, u_debugreg[0]);
+			tmp = ptrace_get_debugreg(child, addr / sizeof(long));
 		}
+
 		ret = put_user(tmp,(unsigned long __user *) data);
 		break;
 	}
@@ -241,63 +277,19 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		break;
 
 	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
-	{
-		int dsize = test_tsk_thread_flag(child, TIF_IA32) ? 3 : 7;
 		ret = -EIO;
 		if ((addr & 7) ||
 		    addr > sizeof(struct user) - 7)
 			break;
 
-		switch (addr) { 
-		case 0 ... sizeof(struct user_regs_struct) - sizeof(long):
+		if (addr < sizeof(struct user_regs_struct))
 			ret = putreg(child, addr, data);
-			break;
-		/* Disallows to set a breakpoint into the vsyscall */
-		case offsetof(struct user, u_debugreg[0]):
-			if (data >= TASK_SIZE_OF(child) - dsize) break;
-			child->thread.debugreg0 = data;
-			ret = 0;
-			break;
-		case offsetof(struct user, u_debugreg[1]):
-			if (data >= TASK_SIZE_OF(child) - dsize) break;
-			child->thread.debugreg1 = data;
-			ret = 0;
-			break;
-		case offsetof(struct user, u_debugreg[2]):
-			if (data >= TASK_SIZE_OF(child) - dsize) break;
-			child->thread.debugreg2 = data;
-			ret = 0;
-			break;
-		case offsetof(struct user, u_debugreg[3]):
-			if (data >= TASK_SIZE_OF(child) - dsize) break;
-			child->thread.debugreg3 = data;
-			ret = 0;
-			break;
-		case offsetof(struct user, u_debugreg[6]):
-				  if (data >> 32)
-				break; 
-			child->thread.debugreg6 = data;
-			ret = 0;
-			break;
-		case offsetof(struct user, u_debugreg[7]):
-			/* See arch/i386/kernel/ptrace.c for an explanation of
-			 * this awkward check.*/
-			data &= ~DR_CONTROL_RESERVED;
-			for(i=0; i<4; i++)
-				if ((0x5554 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
-					break;
-			if (i == 4) {
-			  child->thread.debugreg7 = data;
-			  if (data)
-			  	set_tsk_thread_flag(child, TIF_DEBUG);
-			  else
-			  	clear_tsk_thread_flag(child, TIF_DEBUG);
-			  ret = 0;
-		  	}
-		  break;
+		else if (addr >= offsetof(struct user, u_debugreg[0])) {
+			addr -= offsetof(struct user, u_debugreg[0]);
+			ret = ptrace_set_debugreg(child,
+						  addr / sizeof(long), data);
 		}
 		break;
-	}
 
 #ifdef CONFIG_IA32_EMULATION
 		/* This makes only sense with 32bit programs. Allow a
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index fe75422f034b..d223decd7b01 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -110,6 +110,9 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
 
 struct task_struct;
 
+extern unsigned long ptrace_get_debugreg(struct task_struct *child, int n);
+extern int ptrace_set_debugreg(struct task_struct *child, int n, unsigned long);
+
 extern unsigned long
 convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs);
 

From d0f081758260e9221729cabbc3aba63d89b8c8d4 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:52 +0100
Subject: [PATCH 196/890] x86: x86-64 ia32 ptrace debugreg cleanup

This cleans up the ia32 compat ptrace code to use shared code from
native ptrace for the implementation guts of debug register access.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/ia32/ptrace32.c | 63 +++++-----------------------------------
 1 file changed, 8 insertions(+), 55 deletions(-)

diff --git a/arch/x86/ia32/ptrace32.c b/arch/x86/ia32/ptrace32.c
index 5dee33417313..5ababea1307a 100644
--- a/arch/x86/ia32/ptrace32.c
+++ b/arch/x86/ia32/ptrace32.c
@@ -41,7 +41,6 @@
 
 static int putreg32(struct task_struct *child, unsigned regno, u32 val)
 {
-	int i;
 	__u64 *stack = (__u64 *)task_pt_regs(child);
 
 	switch (regno) {
@@ -102,43 +101,10 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
 		break;
 	}
 
-	case offsetof(struct user32, u_debugreg[4]):
-	case offsetof(struct user32, u_debugreg[5]):
-		return -EIO;
-
-	case offsetof(struct user32, u_debugreg[0]):
-		child->thread.debugreg0 = val;
-		break;
-
-	case offsetof(struct user32, u_debugreg[1]):
-		child->thread.debugreg1 = val;
-		break;
-
-	case offsetof(struct user32, u_debugreg[2]):
-		child->thread.debugreg2 = val;
-		break;
-
-	case offsetof(struct user32, u_debugreg[3]):
-		child->thread.debugreg3 = val;
-		break;
-
-	case offsetof(struct user32, u_debugreg[6]):
-		child->thread.debugreg6 = val;
-		break;
-
-	case offsetof(struct user32, u_debugreg[7]):
-		val &= ~DR_CONTROL_RESERVED;
-		/* See arch/i386/kernel/ptrace.c for an explanation of
-		 * this awkward check.*/
-		for (i = 0; i < 4; i++)
-			if ((0x5454 >> ((val >> (16 + 4*i)) & 0xf)) & 1)
-			       return -EIO;
-		child->thread.debugreg7 = val;
-		if (val)
-			set_tsk_thread_flag(child, TIF_DEBUG);
-		else
-			clear_tsk_thread_flag(child, TIF_DEBUG);
-		break;
+	case offsetof(struct user32, u_debugreg[0]) ...
+		offsetof(struct user32, u_debugreg[7]):
+		regno -= offsetof(struct user32, u_debugreg[0]);
+		return ptrace_set_debugreg(child, regno / 4, val);
 
 	default:
 		if (regno > sizeof(struct user32) || (regno & 3))
@@ -199,23 +165,10 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
 			*val &= ~X86_EFLAGS_TF;
 		break;
 
-	case offsetof(struct user32, u_debugreg[0]):
-		*val = child->thread.debugreg0;
-		break;
-	case offsetof(struct user32, u_debugreg[1]):
-		*val = child->thread.debugreg1;
-		break;
-	case offsetof(struct user32, u_debugreg[2]):
-		*val = child->thread.debugreg2;
-		break;
-	case offsetof(struct user32, u_debugreg[3]):
-		*val = child->thread.debugreg3;
-		break;
-	case offsetof(struct user32, u_debugreg[6]):
-		*val = child->thread.debugreg6;
-		break;
-	case offsetof(struct user32, u_debugreg[7]):
-		*val = child->thread.debugreg7;
+	case offsetof(struct user32, u_debugreg[0]) ...
+		offsetof(struct user32, u_debugreg[7]):
+		regno -= offsetof(struct user32, u_debugreg[0]);
+		*val = ptrace_get_debugreg(child, regno / 4);
 		break;
 
 	default:

From d9771e8c50020bb1b4ca9eca9c188874ff126aa4 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:52 +0100
Subject: [PATCH 197/890] x86: x86-32 ptrace debugreg cleanup

This cleans up the 32-bit ptrace code to separate the guts of the
debug register access from the implementation of PTRACE_PEEKUSR and
PTRACE_POKEUSR.  The new functions ptrace_[gs]et_debugreg match the
new 64-bit entry points for parity, but they don't need to be global.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace_32.c | 119 +++++++++++++++++++++---------------
 1 file changed, 69 insertions(+), 50 deletions(-)

diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace_32.c
index a1425e9ad028..512f8412b799 100644
--- a/arch/x86/kernel/ptrace_32.c
+++ b/arch/x86/kernel/ptrace_32.c
@@ -118,6 +118,72 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
 	return retval;
 }
 
+/*
+ * This function is trivial and will be inlined by the compiler.
+ * Having it separates the implementation details of debug
+ * registers from the interface details of ptrace.
+ */
+static unsigned long ptrace_get_debugreg(struct task_struct *child, int n)
+{
+	return child->thread.debugreg[n];
+}
+
+static int ptrace_set_debugreg(struct task_struct *child,
+			       int n, unsigned long data)
+{
+	if (unlikely(n == 4 || n == 5))
+		return -EIO;
+
+	if (n < 4 && unlikely(data >= TASK_SIZE - 3))
+		return -EIO;
+
+	if (n == 7) {
+		/*
+		 * Sanity-check data. Take one half-byte at once with
+		 * check = (val >> (16 + 4*i)) & 0xf. It contains the
+		 * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
+		 * 2 and 3 are LENi. Given a list of invalid values,
+		 * we do mask |= 1 << invalid_value, so that
+		 * (mask >> check) & 1 is a correct test for invalid
+		 * values.
+		 *
+		 * R/Wi contains the type of the breakpoint /
+		 * watchpoint, LENi contains the length of the watched
+		 * data in the watchpoint case.
+		 *
+		 * The invalid values are:
+		 * - LENi == 0x10 (undefined), so mask |= 0x0f00.
+		 * - R/Wi == 0x10 (break on I/O reads or writes), so
+		 *   mask |= 0x4444.
+		 * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
+		 *   0x1110.
+		 *
+		 * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
+		 *
+		 * See the Intel Manual "System Programming Guide",
+		 * 15.2.4
+		 *
+		 * Note that LENi == 0x10 is defined on x86_64 in long
+		 * mode (i.e. even for 32-bit userspace software, but
+		 * 64-bit kernel), so the x86_64 mask value is 0x5454.
+		 * See the AMD manual no. 24593 (AMD64 System Programming)
+		 */
+		int i;
+		data &= ~DR_CONTROL_RESERVED;
+		for (i = 0; i < 4; i++)
+			if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
+				return -EIO;
+		if (data)
+			set_tsk_thread_flag(child, TIF_DEBUG);
+		else
+			clear_tsk_thread_flag(child, TIF_DEBUG);
+	}
+
+	child->thread.debugreg[n] = data;
+
+	return 0;
+}
+
 /*
  * Called by kernel/ptrace.c when detaching..
  *
@@ -158,7 +224,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		   addr <= (long) &dummy->u_debugreg[7]){
 			addr -= (long) &dummy->u_debugreg[0];
 			addr = addr >> 2;
-			tmp = child->thread.debugreg[addr];
+			tmp = ptrace_get_debugreg(child, addr);
 		}
 		ret = put_user(tmp, datap);
 		break;
@@ -188,56 +254,9 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		  ret = -EIO;
 		  if(addr >= (long) &dummy->u_debugreg[0] &&
 		     addr <= (long) &dummy->u_debugreg[7]){
-
-			  if(addr == (long) &dummy->u_debugreg[4]) break;
-			  if(addr == (long) &dummy->u_debugreg[5]) break;
-			  if(addr < (long) &dummy->u_debugreg[4] &&
-			     ((unsigned long) data) >= TASK_SIZE-3) break;
-			  
-			  /* Sanity-check data. Take one half-byte at once with
-			   * check = (val >> (16 + 4*i)) & 0xf. It contains the
-			   * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
-			   * 2 and 3 are LENi. Given a list of invalid values,
-			   * we do mask |= 1 << invalid_value, so that
-			   * (mask >> check) & 1 is a correct test for invalid
-			   * values.
-			   *
-			   * R/Wi contains the type of the breakpoint /
-			   * watchpoint, LENi contains the length of the watched
-			   * data in the watchpoint case.
-			   *
-			   * The invalid values are:
-			   * - LENi == 0x10 (undefined), so mask |= 0x0f00.
-			   * - R/Wi == 0x10 (break on I/O reads or writes), so
-			   *   mask |= 0x4444.
-			   * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
-			   *   0x1110.
-			   *
-			   * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
-			   *
-			   * See the Intel Manual "System Programming Guide",
-			   * 15.2.4
-			   *
-			   * Note that LENi == 0x10 is defined on x86_64 in long
-			   * mode (i.e. even for 32-bit userspace software, but
-			   * 64-bit kernel), so the x86_64 mask value is 0x5454.
-			   * See the AMD manual no. 24593 (AMD64 System
-			   * Programming)*/
-
-			  if(addr == (long) &dummy->u_debugreg[7]) {
-				  data &= ~DR_CONTROL_RESERVED;
-				  for(i=0; i<4; i++)
-					  if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
-						  goto out_tsk;
-				  if (data)
-					  set_tsk_thread_flag(child, TIF_DEBUG);
-				  else
-					  clear_tsk_thread_flag(child, TIF_DEBUG);
-			  }
 			  addr -= (long) &dummy->u_debugreg;
 			  addr = addr >> 2;
-			  child->thread.debugreg[addr] = data;
-			  ret = 0;
+			  ret = ptrace_set_debugreg(child, addr, data);
 		  }
 		  break;
 
@@ -335,7 +354,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		ret = ptrace_request(child, request, addr, data);
 		break;
 	}
- out_tsk:
+
 	return ret;
 }
 

From dc802c2d2e66e2d1544e023bfd4be6cdee48d57b Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:53 +0100
Subject: [PATCH 198/890] ptrace: arch_has_block_step

This defines the new macro arch_has_block_step() in linux/ptrace.h, a
default for when asm/ptrace.h does not define it.  This is the analog
of arch_has_single_step() for step-until-branch on machines that have
it.  It declares the new user_enable_block_step function, which goes
with the existing user_enable_single_step and user_disable_single_step.
This is not used yet, but paves the way to harmonize on this interface
for the arch-specific calls on all machines.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/ptrace.h | 37 +++++++++++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 4 deletions(-)

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 8f06c6fb22a5..1febc541dda5 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -155,7 +155,8 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data);
  *
  * This can only be called when arch_has_single_step() has returned nonzero.
  * Set @task so that when it returns to user mode, it will trap after the
- * next single instruction executes.
+ * next single instruction executes.  If arch_has_block_step() is defined,
+ * this must clear the effects of user_enable_block_step() too.
  */
 static inline void user_enable_single_step(struct task_struct *task)
 {
@@ -166,15 +167,43 @@ static inline void user_enable_single_step(struct task_struct *task)
  * user_disable_single_step - cancel user-mode single-step
  * @task: either current or a task stopped in %TASK_TRACED
  *
- * Clear @task of the effects of user_enable_single_step().  This can
- * be called whether or not user_enable_single_step() was ever called
- * on @task, and even if arch_has_single_step() returned zero.
+ * Clear @task of the effects of user_enable_single_step() and
+ * user_enable_block_step().  This can be called whether or not either
+ * of those was ever called on @task, and even if arch_has_single_step()
+ * returned zero.
  */
 static inline void user_disable_single_step(struct task_struct *task)
 {
 }
 #endif	/* arch_has_single_step */
 
+#ifndef arch_has_block_step
+/**
+ * arch_has_block_step - does this CPU support user-mode block-step?
+ *
+ * If this is defined, then there must be a function declaration or inline
+ * for user_enable_block_step(), and arch_has_single_step() must be defined
+ * too.  arch_has_block_step() should evaluate to nonzero iff the machine
+ * supports step-until-branch for user mode.  It can be a constant or it
+ * can test a CPU feature bit.
+ */
+#define arch_has_single_step()		(0)
+
+/**
+ * user_enable_block_step - step until branch in user-mode task
+ * @task: either current or a task stopped in %TASK_TRACED
+ *
+ * This can only be called when arch_has_block_step() has returned nonzero,
+ * and will never be called when single-instruction stepping is being used.
+ * Set @task so that when it returns to user mode, it will trap after the
+ * next branch or trap taken.
+ */
+static inline void user_enable_block_step(struct task_struct *task)
+{
+	BUG();			/* This can never be called.  */
+}
+#endif	/* arch_has_block_step */
+
 #endif
 
 #endif

From 5b88abbf770a0e1975c668743100f42934f385e8 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:53 +0100
Subject: [PATCH 199/890] ptrace: generic PTRACE_SINGLEBLOCK

This makes ptrace_request handle PTRACE_SINGLEBLOCK along with
PTRACE_CONT et al.  The new generic code makes use of the
arch_has_block_step macro and generic entry points on machines
that define them.

[ mingo@elte.hu: bugfix ]

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/ptrace.h |  2 +-
 kernel/ptrace.c        | 15 ++++++++++++++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 1febc541dda5..515bff053de8 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -187,7 +187,7 @@ static inline void user_disable_single_step(struct task_struct *task)
  * supports step-until-branch for user mode.  It can be a constant or it
  * can test a CPU feature bit.
  */
-#define arch_has_single_step()		(0)
+#define arch_has_block_step()		(0)
 
 /**
  * user_enable_block_step - step until branch in user-mode task
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index fad5f1e8511f..973d727f5e84 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -373,6 +373,12 @@ static int ptrace_setsiginfo(struct task_struct *child, siginfo_t __user * data)
 #define is_singlestep(request)		0
 #endif
 
+#ifdef PTRACE_SINGLEBLOCK
+#define is_singleblock(request)		((request) == PTRACE_SINGLEBLOCK)
+#else
+#define is_singleblock(request)		0
+#endif
+
 #ifdef PTRACE_SYSEMU
 #define is_sysemu_singlestep(request)	((request) == PTRACE_SYSEMU_SINGLESTEP)
 #else
@@ -396,7 +402,11 @@ static int ptrace_resume(struct task_struct *child, long request, long data)
 		clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 #endif
 
-	if (is_singlestep(request) || is_sysemu_singlestep(request)) {
+	if (is_singleblock(request)) {
+		if (unlikely(!arch_has_block_step()))
+			return -EIO;
+		user_enable_block_step(child);
+	} else if (is_singlestep(request) || is_sysemu_singlestep(request)) {
 		if (unlikely(!arch_has_single_step()))
 			return -EIO;
 		user_enable_single_step(child);
@@ -438,6 +448,9 @@ int ptrace_request(struct task_struct *child, long request,
 #ifdef PTRACE_SINGLESTEP
 	case PTRACE_SINGLESTEP:
 #endif
+#ifdef PTRACE_SINGLEBLOCK
+	case PTRACE_SINGLEBLOCK:
+#endif
 #ifdef PTRACE_SYSEMU
 	case PTRACE_SYSEMU:
 	case PTRACE_SYSEMU_SINGLESTEP:

From d2499d8b6c94df02629631807dd96deae7aa7637 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:54 +0100
Subject: [PATCH 200/890] x86: debugctlmsr constants

This adds constant macros for a few of the bits in MSR_IA32_DEBUGCTLMSR.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/msr-index.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h
index a4944732be04..4045bbe47379 100644
--- a/include/asm-x86/msr-index.h
+++ b/include/asm-x86/msr-index.h
@@ -63,6 +63,13 @@
 #define MSR_IA32_LASTINTFROMIP		0x000001dd
 #define MSR_IA32_LASTINTTOIP		0x000001de
 
+/* DEBUGCTLMSR bits (others vary by model): */
+#define _DEBUGCTLMSR_LBR	0 /* last branch recording */
+#define _DEBUGCTLMSR_BTF	1 /* single-step on branches */
+
+#define DEBUGCTLMSR_LBR		(1UL << _DEBUGCTLMSR_LBR)
+#define DEBUGCTLMSR_BTF		(1UL << _DEBUGCTLMSR_BTF)
+
 #define MSR_IA32_MC0_CTL		0x00000400
 #define MSR_IA32_MC0_STATUS		0x00000401
 #define MSR_IA32_MC0_ADDR		0x00000402

From 0a049bb0ab807b4a95dce9cd0b603c01c199a287 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:54 +0100
Subject: [PATCH 201/890] x86: debugctlmsr kconfig

This adds the (internal) Kconfig macro CONFIG_X86_DEBUGCTLMSR,
to be defined when configuring to support only hardware that
definitely supports MSR_IA32_DEBUGCTLMSR with the BTF flag.

The Intel documentation says "P6 family" and later processors all have it.
I think the Kconfig dependencies are right to have it set for those and
unset for others (i.e., when 586 and earlier are supported).

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig.cpu | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index c30162202dc4..69e2ee44fdff 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -399,3 +399,7 @@ config X86_MINIMUM_CPU_FAMILY
 	default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK)
 	default "3"
 
+config X86_DEBUGCTLMSR
+	bool
+	depends on !(M586MMX || M586TSC || M586 || M486 || M386)
+	default y

From 7e9916040b3020d0f36d68bb7512e3b80b623097 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:54 +0100
Subject: [PATCH 202/890] x86: debugctlmsr context switch

This adds low-level support for a per-thread value of MSR_IA32_DEBUGCTLMSR.
The per-thread value is switched in when TIF_DEBUGCTLMSR is set.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process_32.c     | 6 +++++-
 arch/x86/kernel/process_64.c     | 3 +++
 include/asm-x86/processor_32.h   | 2 ++
 include/asm-x86/processor_64.h   | 2 ++
 include/asm-x86/thread_info_32.h | 6 ++++--
 include/asm-x86/thread_info_64.h | 4 +++-
 6 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index d9905c9d0fd5..d5462f228daf 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -602,10 +602,14 @@ static noinline void
 __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 		 struct tss_struct *tss)
 {
-	struct thread_struct *next;
+	struct thread_struct *prev, *next;
 
+	prev = &prev_p->thread;
 	next = &next_p->thread;
 
+	if (next->debugctlmsr != prev->debugctlmsr)
+		wrmsr(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr, 0);
+
 	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
 		set_debugreg(next->debugreg[0], 0);
 		set_debugreg(next->debugreg[1], 1);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index f7356e5517f6..ae5eca17aa3c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -555,6 +555,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
 	prev = &prev_p->thread,
 	next = &next_p->thread;
 
+	if (next->debugctlmsr != prev->debugctlmsr)
+		wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
+
 	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
 		loaddebug(next, 0);
 		loaddebug(next, 1);
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 2540bf8d5724..3c67eacb3168 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -368,6 +368,8 @@ struct thread_struct {
  	unsigned long	iopl;
 /* max allowed port in the bitmap, in bytes: */
 	unsigned long	io_bitmap_max;
+/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
+	unsigned long	debugctlmsr;
 };
 
 #define INIT_THREAD  {							\
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index 20d8935d141a..e7bea4fed642 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -238,6 +238,8 @@ struct thread_struct {
 	int		ioperm;
 	unsigned long	*io_bitmap_ptr;
 	unsigned io_bitmap_max;
+/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
+	unsigned long	debugctlmsr;
 /* cached TLS descriptors. */
 	u64 tls_array[GDT_ENTRY_TLS_ENTRIES];
 } __attribute__((aligned(16)));
diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h
index 009ecc6ad38b..306fc80800e1 100644
--- a/include/asm-x86/thread_info_32.h
+++ b/include/asm-x86/thread_info_32.h
@@ -139,6 +139,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_FREEZE		19	/* is freezing for suspend */
 #define TIF_NOTSC		20	/* TSC is not accessible in userland */
 #define TIF_FORCED_TF		21	/* true if TF in eflags artificially */
+#define TIF_DEBUGCTLMSR		22	/* uses thread_struct.debugctlmsr */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
@@ -155,6 +156,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_FREEZE		(1<<TIF_FREEZE)
 #define _TIF_NOTSC		(1<<TIF_NOTSC)
 #define _TIF_FORCED_TF		(1<<TIF_FORCED_TF)
+#define _TIF_DEBUGCTLMSR	(1<<TIF_DEBUGCTLMSR)
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK \
@@ -164,8 +166,8 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_ALLWORK_MASK	(0x0000FFFF & ~_TIF_SECCOMP)
 
 /* flags to check in __switch_to() */
-#define _TIF_WORK_CTXSW_NEXT (_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUG)
-#define _TIF_WORK_CTXSW_PREV (_TIF_IO_BITMAP | _TIF_NOTSC)
+#define _TIF_WORK_CTXSW_NEXT (_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUG | _TIF_DEBUGCTLMSR)
+#define _TIF_WORK_CTXSW_PREV (_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUGCTLMSR)
 
 /*
  * Thread-synchronous status.
diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h
index e0f41b3deced..ee35fd12b541 100644
--- a/include/asm-x86/thread_info_64.h
+++ b/include/asm-x86/thread_info_64.h
@@ -122,6 +122,7 @@ static inline struct thread_info *stack_thread_info(void)
 #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
 #define TIF_FREEZE		23	/* is freezing for suspend */
 #define TIF_FORCED_TF		24	/* true if TF in eflags artificially */
+#define TIF_DEBUGCTLMSR		25	/* uses thread_struct.debugctlmsr */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
@@ -140,6 +141,7 @@ static inline struct thread_info *stack_thread_info(void)
 #define _TIF_IO_BITMAP		(1<<TIF_IO_BITMAP)
 #define _TIF_FREEZE		(1<<TIF_FREEZE)
 #define _TIF_FORCED_TF		(1<<TIF_FORCED_TF)
+#define _TIF_DEBUGCTLMSR	(1<<TIF_DEBUGCTLMSR)
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK \
@@ -151,7 +153,7 @@ static inline struct thread_info *stack_thread_info(void)
 	(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED)
 
 /* flags to check in __switch_to() */
-#define _TIF_WORK_CTXSW (_TIF_DEBUG|_TIF_IO_BITMAP)
+#define _TIF_WORK_CTXSW (_TIF_DEBUG|_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR)
 
 #define PREEMPT_ACTIVE     0x10000000
 

From 10faa81e102e2b7695f386812055cd2ef9e44b4c Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:54 +0100
Subject: [PATCH 203/890] x86: debugctlmsr arch_has_block_step

This implements user-mode step-until-branch on x86 using the BTF bit
in MSR_IA32_DEBUGCTLMSR.  It's just like single-step, only less so.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/step.c     | 64 ++++++++++++++++++++++++++++++++++++--
 arch/x86/kernel/traps_32.c |  6 ++++
 arch/x86/kernel/traps_64.c |  6 ++++
 include/asm-x86/ptrace.h   |  7 +++++
 4 files changed, 80 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 243bff650ca5..cf4b9dac4a05 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -107,7 +107,10 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
 	return 0;
 }
 
-void user_enable_single_step(struct task_struct *child)
+/*
+ * Enable single-stepping.  Return nonzero if user mode is not using TF itself.
+ */
+static int enable_single_step(struct task_struct *child)
 {
 	struct pt_regs *regs = task_pt_regs(child);
 
@@ -122,7 +125,7 @@ void user_enable_single_step(struct task_struct *child)
 	 * If TF was already set, don't do anything else
 	 */
 	if (regs->eflags & X86_EFLAGS_TF)
-		return;
+		return 0;
 
 	/* Set TF on the kernel stack.. */
 	regs->eflags |= X86_EFLAGS_TF;
@@ -133,13 +136,68 @@ void user_enable_single_step(struct task_struct *child)
 	 * won't clear it by hand later.
 	 */
 	if (is_setting_trap_flag(child, regs))
-		return;
+		return 0;
 
 	set_tsk_thread_flag(child, TIF_FORCED_TF);
+
+	return 1;
+}
+
+/*
+ * Install this value in MSR_IA32_DEBUGCTLMSR whenever child is running.
+ */
+static void write_debugctlmsr(struct task_struct *child, unsigned long val)
+{
+	child->thread.debugctlmsr = val;
+
+	if (child != current)
+		return;
+
+#ifdef CONFIG_X86_64
+	wrmsrl(MSR_IA32_DEBUGCTLMSR, val);
+#else
+	wrmsr(MSR_IA32_DEBUGCTLMSR, val, 0);
+#endif
+}
+
+/*
+ * Enable single or block step.
+ */
+static void enable_step(struct task_struct *child, bool block)
+{
+	/*
+	 * Make sure block stepping (BTF) is not enabled unless it should be.
+	 * Note that we don't try to worry about any is_setting_trap_flag()
+	 * instructions after the first when using block stepping.
+	 * So noone should try to use debugger block stepping in a program
+	 * that uses user-mode single stepping itself.
+	 */
+	if (enable_single_step(child) && block) {
+		set_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+		write_debugctlmsr(child, DEBUGCTLMSR_BTF);
+	} else if (test_and_clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR)) {
+		write_debugctlmsr(child, 0);
+	}
+}
+
+void user_enable_single_step(struct task_struct *child)
+{
+	enable_step(child, 0);
+}
+
+void user_enable_block_step(struct task_struct *child)
+{
+	enable_step(child, 1);
 }
 
 void user_disable_single_step(struct task_struct *child)
 {
+	/*
+	 * Make sure block stepping (BTF) is disabled.
+	 */
+	if (test_and_clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR))
+		write_debugctlmsr(child, 0);
+
 	/* Always clear TIF_SINGLESTEP... */
 	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
 
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 02d1e1e58e81..9b0bbd508cd5 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -837,6 +837,12 @@ fastcall void __kprobes do_debug(struct pt_regs * regs, long error_code)
 
 	get_debugreg(condition, 6);
 
+	/*
+	 * The processor cleared BTF, so don't mark that we need it set.
+	 */
+	clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
+	tsk->thread.debugctlmsr = 0;
+
 	if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
 					SIGTRAP) == NOTIFY_STOP)
 		return;
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 874aca397b02..610a64d6bdf0 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -850,6 +850,12 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs,
 
 	get_debugreg(condition, 6);
 
+	/*
+	 * The processor cleared BTF, so don't mark that we need it set.
+	 */
+	clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
+	tsk->thread.debugctlmsr = 0;
+
 	if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
 						SIGTRAP) == NOTIFY_STOP)
 		return;
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index d223decd7b01..04204f359298 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -150,6 +150,13 @@ enum {
 extern void user_enable_single_step(struct task_struct *);
 extern void user_disable_single_step(struct task_struct *);
 
+extern void user_enable_block_step(struct task_struct *);
+#ifdef CONFIG_X86_DEBUGCTLMSR
+#define arch_has_block_step()	(1)
+#else
+#define arch_has_block_step()	(boot_cpu_data.x86 >= 6)
+#endif
+
 struct user_desc;
 extern int do_get_thread_area(struct task_struct *p, int idx,
 			      struct user_desc __user *info);

From 1ecc798c67645e0ee6eb028cb988b71b3edf4b40 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:54 +0100
Subject: [PATCH 204/890] x86: debugctlmsr kprobes

This adjusts the x86 kprobes implementation to cope with per-thread
MSR_IA32_DEBUGCTLMSR being set for user mode.  I haven't delved deep
enough into the kprobes code to be really sure this covers all the
cases where the user-mode BTF setting needs to be cleared or restored.
It looks about right to me.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/kprobes_32.c | 15 +++++++++++++++
 arch/x86/kernel/kprobes_64.c | 14 ++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/arch/x86/kernel/kprobes_32.c b/arch/x86/kernel/kprobes_32.c
index 3a020f79f82b..bc4a68367cd0 100644
--- a/arch/x86/kernel/kprobes_32.c
+++ b/arch/x86/kernel/kprobes_32.c
@@ -217,8 +217,21 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 		kcb->kprobe_saved_eflags &= ~IF_MASK;
 }
 
+static __always_inline void clear_btf(void)
+{
+	if (test_thread_flag(TIF_DEBUGCTLMSR))
+		wrmsr(MSR_IA32_DEBUGCTLMSR, 0, 0);
+}
+
+static __always_inline void restore_btf(void)
+{
+	if (test_thread_flag(TIF_DEBUGCTLMSR))
+		wrmsr(MSR_IA32_DEBUGCTLMSR, current->thread.debugctlmsr, 0);
+}
+
 static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 {
+	clear_btf();
 	regs->eflags |= TF_MASK;
 	regs->eflags &= ~IF_MASK;
 	/*single step inline if the instruction is an int3*/
@@ -542,6 +555,8 @@ static void __kprobes resume_execution(struct kprobe *p,
 	regs->eip = orig_eip + (regs->eip - copy_eip);
 
 no_change:
+	restore_btf();
+
 	return;
 }
 
diff --git a/arch/x86/kernel/kprobes_64.c b/arch/x86/kernel/kprobes_64.c
index 5df19a9f9239..10d66e323c7d 100644
--- a/arch/x86/kernel/kprobes_64.c
+++ b/arch/x86/kernel/kprobes_64.c
@@ -256,8 +256,21 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 		kcb->kprobe_saved_rflags &= ~IF_MASK;
 }
 
+static __always_inline void clear_btf(void)
+{
+	if (test_thread_flag(TIF_DEBUGCTLMSR))
+		wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
+}
+
+static __always_inline void restore_btf(void)
+{
+	if (test_thread_flag(TIF_DEBUGCTLMSR))
+		wrmsrl(MSR_IA32_DEBUGCTLMSR, current->thread.debugctlmsr);
+}
+
 static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 {
+	clear_btf();
 	regs->eflags |= TF_MASK;
 	regs->eflags &= ~IF_MASK;
 	/*single step inline if the instruction is an int3*/
@@ -527,6 +540,7 @@ static void __kprobes resume_execution(struct kprobe *p,
 
 	regs->rip = orig_rip + (regs->rip - copy_rip);
 no_change:
+	restore_btf();
 
 	return;
 }

From 0fa376e027f0bd71368e1485f863f0dff8f7c897 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:55 +0100
Subject: [PATCH 205/890] x86: PTRACE_SINGLEBLOCK

This adds the PTRACE_SINGLEBLOCK request on x86, matching the ia64 feature.
The implementation comes from the generic ptrace code and relies on the
low-level machine support provided by arch_has_block_step() et al.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/ia32/ptrace32.c     | 1 +
 include/asm-x86/ptrace-abi.h | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/arch/x86/ia32/ptrace32.c b/arch/x86/ia32/ptrace32.c
index 5ababea1307a..8c6fb9d8352b 100644
--- a/arch/x86/ia32/ptrace32.c
+++ b/arch/x86/ia32/ptrace32.c
@@ -227,6 +227,7 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 	case PTRACE_KILL:
 	case PTRACE_CONT:
 	case PTRACE_SINGLESTEP:
+	case PTRACE_SINGLEBLOCK:
 	case PTRACE_DETACH:
 	case PTRACE_SYSCALL:
 	case PTRACE_OLDSETOPTIONS:
diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h
index 7524e1233833..adce6b51df2e 100644
--- a/include/asm-x86/ptrace-abi.h
+++ b/include/asm-x86/ptrace-abi.h
@@ -78,4 +78,6 @@
 # define PTRACE_SYSEMU_SINGLESTEP 32
 #endif
 
+#define PTRACE_SINGLEBLOCK	33	/* resume execution until next branch */
+
 #endif

From 7bf0c23ed24b0d95a2a717f86dce1f210e16f8a5 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:30:55 +0100
Subject: [PATCH 206/890] x86: prevent dereferencing non-allocated per_cpu
 variables

'for_each_possible_cpu(i)' when there's a _remote possibility_ of
dereferencing a non-allocated per_cpu variable involved.

All files except mm/vmstat.c are x86 arch.

Thanks to pageexec@freemail.hu for pointing this out.

Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: <pageexec@freemail.hu>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/smp_32.c     | 4 ++--
 arch/x86/kernel/smpboot_32.c | 4 ++--
 arch/x86/xen/smp.c           | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c
index fcaa026eb807..d4c01a4aca60 100644
--- a/arch/x86/kernel/smp_32.c
+++ b/arch/x86/kernel/smp_32.c
@@ -223,7 +223,7 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
 	 */ 
 
 	local_irq_save(flags);
-	for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) {
+	for_each_possible_cpu(query_cpu) {
 		if (cpu_isset(query_cpu, mask)) {
 			__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
 					      vector);
@@ -675,7 +675,7 @@ static int convert_apicid_to_cpu(int apic_id)
 {
 	int i;
 
-	for (i = 0; i < NR_CPUS; i++) {
+	for_each_possible_cpu(i) {
 		if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
 			return i;
 	}
diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c
index 239ada1c499c..0bf7f20baba0 100644
--- a/arch/x86/kernel/smpboot_32.c
+++ b/arch/x86/kernel/smpboot_32.c
@@ -1090,7 +1090,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
 	 * Allow the user to impress friends.
 	 */
 	Dprintk("Before bogomips.\n");
-	for (cpu = 0; cpu < NR_CPUS; cpu++)
+	for_each_possible_cpu(cpu)
 		if (cpu_isset(cpu, cpu_callout_map))
 			bogosum += cpu_data(cpu).loops_per_jiffy;
 	printk(KERN_INFO
@@ -1121,7 +1121,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
 	 * construct cpu_sibling_map, so that we can tell sibling CPUs
 	 * efficiently.
 	 */
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for_each_possible_cpu(cpu) {
 		cpus_clear(per_cpu(cpu_sibling_map, cpu));
 		cpus_clear(per_cpu(cpu_core_map, cpu));
 	}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index c1b131bcdcbe..8e1234e14559 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -146,7 +146,7 @@ void __init xen_smp_prepare_boot_cpu(void)
 	   old memory can be recycled */
 	make_lowmem_page_readwrite(&per_cpu__gdt_page);
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for_each_possible_cpu(cpu) {
 		cpus_clear(per_cpu(cpu_sibling_map, cpu));
 		/*
 		 * cpu_core_map lives in a per cpu area that is cleared
@@ -163,7 +163,7 @@ void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 {
 	unsigned cpu;
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for_each_possible_cpu(cpu) {
 		cpus_clear(per_cpu(cpu_sibling_map, cpu));
 		/*
 		 * cpu_core_ map will be zeroed when the per

From 1c54d77078056cde0f195b1a982cb681850efc08 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:30:55 +0100
Subject: [PATCH 207/890] x86: partial unification of asm-x86/bitops.h

This unifies the set/clear/test bit functions of asm/bitops.h.

I have not attempted to merge the bit-finding functions, since they
rely on the machine word size and can't be easily restructured to work
generically without a lot of #ifdefs.  In particular, the 64-bit code
can assume the presence of conditional move instructions, whereas
32-bit needs to be more careful.

The inline assembly for the bit operations has been changed to remove
explicit sizing hints on the instructions, so the assembler will pick
the appropriate instruction forms depending on the architecture and
the context.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/bitops.h    | 315 ++++++++++++++++++++++++++++++++++++
 include/asm-x86/bitops_32.h | 308 -----------------------------------
 include/asm-x86/bitops_64.h | 297 ----------------------------------
 3 files changed, 315 insertions(+), 605 deletions(-)

diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h
index 07e3f6d4fe47..c6dd7e259b46 100644
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h
@@ -1,5 +1,320 @@
+#ifndef _ASM_X86_BITOPS_H
+#define _ASM_X86_BITOPS_H
+
+/*
+ * Copyright 1992, Linus Torvalds.
+ */
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#include <linux/compiler.h>
+#include <asm/alternative.h>
+
+/*
+ * These have to be done with inline assembly: that way the bit-setting
+ * is guaranteed to be atomic. All bit operations return 0 if the bit
+ * was cleared before the operation and != 0 if it was not.
+ *
+ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
+ */
+
+#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
+/* Technically wrong, but this avoids compilation errors on some gcc
+   versions. */
+#define ADDR "=m" (*(volatile long *) addr)
+#else
+#define ADDR "+m" (*(volatile long *) addr)
+#endif
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.  See __set_bit()
+ * if you do not require the atomic guarantees.
+ *
+ * Note: there are no guarantees that this function will not be reordered
+ * on non x86 architectures, so if you are writing portable code,
+ * make sure not to rely on its reordering guarantees.
+ *
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void set_bit(int nr, volatile unsigned long *addr)
+{
+	asm volatile(LOCK_PREFIX "bts %1,%0"
+		     : ADDR
+		     : "Ir" (nr) : "memory");
+}
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __set_bit(int nr, volatile unsigned long *addr)
+{
+	asm volatile("bts %1,%0"
+		     : ADDR
+		     : "Ir" (nr) : "memory");
+}
+
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered.  However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static inline void clear_bit(int nr, volatile unsigned long *addr)
+{
+	asm volatile(LOCK_PREFIX "btr %1,%0"
+		     : ADDR
+		     : "Ir" (nr));
+}
+
+/*
+ * clear_bit_unlock - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and implies release semantics before the memory
+ * operation. It can be used for an unlock.
+ */
+static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
+{
+	barrier();
+	clear_bit(nr, addr);
+}
+
+static inline void __clear_bit(int nr, volatile unsigned long *addr)
+{
+	asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
+}
+
+/*
+ * __clear_bit_unlock - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * __clear_bit() is non-atomic and implies release semantics before the memory
+ * operation. It can be used for an unlock if no other CPUs can concurrently
+ * modify other bits in the word.
+ *
+ * No memory barrier is required here, because x86 cannot reorder stores past
+ * older loads. Same principle as spin_unlock.
+ */
+static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
+{
+	barrier();
+	__clear_bit(nr, addr);
+}
+
+#define smp_mb__before_clear_bit()	barrier()
+#define smp_mb__after_clear_bit()	barrier()
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to change
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __change_bit(int nr, volatile unsigned long *addr)
+{
+	asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
+}
+
+/**
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to change
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void change_bit(int nr, volatile unsigned long *addr)
+{
+	asm volatile(LOCK_PREFIX "btc %1,%0"
+		     : ADDR : "Ir" (nr));
+}
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+	int oldbit;
+
+	asm volatile(LOCK_PREFIX "bts %2,%1\n\t"
+		     "sbb %0,%0"
+		     : "=r" (oldbit), ADDR
+		     : "Ir" (nr) : "memory");
+
+	return oldbit;
+}
+
+/**
+ * test_and_set_bit_lock - Set a bit and return its old value for lock
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This is the same as test_and_set_bit on x86.
+ */
+static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr)
+{
+	return test_and_set_bit(nr, addr);
+}
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+	int oldbit;
+
+	asm("bts %2,%1\n\t"
+	    "sbb %0,%0"
+	    : "=r" (oldbit), ADDR
+	    : "Ir" (nr));
+	return oldbit;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+	int oldbit;
+
+	asm volatile(LOCK_PREFIX "btr %2,%1\n\t"
+		     "sbb %0,%0"
+		     : "=r" (oldbit), ADDR
+		     : "Ir" (nr) : "memory");
+
+	return oldbit;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+	int oldbit;
+
+	asm volatile("btr %2,%1\n\t"
+		     "sbb %0,%0"
+		     : "=r" (oldbit), ADDR
+		     : "Ir" (nr));
+	return oldbit;
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
+{
+	int oldbit;
+
+	asm volatile("btc %2,%1\n\t"
+		     "sbb %0,%0"
+		     : "=r" (oldbit), ADDR
+		     : "Ir" (nr) : "memory");
+
+	return oldbit;
+}
+
+/**
+ * test_and_change_bit - Change a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
+{
+	int oldbit;
+
+	asm volatile(LOCK_PREFIX "btc %2,%1\n\t"
+		     "sbb %0,%0"
+		     : "=r" (oldbit), ADDR
+		     : "Ir" (nr) : "memory");
+
+	return oldbit;
+}
+
+static inline int constant_test_bit(int nr, const volatile unsigned long *addr)
+{
+	return ((1UL << (nr % BITS_PER_LONG)) & (addr[nr / BITS_PER_LONG])) != 0;
+}
+
+static inline int variable_test_bit(int nr, volatile const unsigned long *addr)
+{
+	int oldbit;
+
+	asm volatile("bt %2,%1\n\t"
+		     "sbb %0,%0"
+		     : "=r" (oldbit)
+		     : "m" (*addr), "Ir" (nr));
+
+	return oldbit;
+}
+
+#if 0 /* Fool kernel-doc since it doesn't do macros yet */
+/**
+ * test_bit - Determine whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static int test_bit(int nr, const volatile unsigned long *addr);
+#endif
+
+#define test_bit(nr,addr)			\
+	(__builtin_constant_p(nr) ?		\
+	 constant_test_bit((nr),(addr)) :	\
+	 variable_test_bit((nr),(addr)))
+
+#undef ADDR
+
 #ifdef CONFIG_X86_32
 # include "bitops_32.h"
 #else
 # include "bitops_64.h"
 #endif
+
+#endif	/* _ASM_X86_BITOPS_H */
diff --git a/include/asm-x86/bitops_32.h b/include/asm-x86/bitops_32.h
index 5a29cce6a91d..e4d75fcf9c03 100644
--- a/include/asm-x86/bitops_32.h
+++ b/include/asm-x86/bitops_32.h
@@ -5,314 +5,6 @@
  * Copyright 1992, Linus Torvalds.
  */
 
-#ifndef _LINUX_BITOPS_H
-#error only <linux/bitops.h> can be included directly
-#endif
-
-#include <linux/compiler.h>
-#include <asm/alternative.h>
-
-/*
- * These have to be done with inline assembly: that way the bit-setting
- * is guaranteed to be atomic. All bit operations return 0 if the bit
- * was cleared before the operation and != 0 if it was not.
- *
- * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
- */
-
-#define ADDR (*(volatile long *) addr)
-
-/**
- * set_bit - Atomically set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * This function is atomic and may not be reordered.  See __set_bit()
- * if you do not require the atomic guarantees.
- *
- * Note: there are no guarantees that this function will not be reordered
- * on non x86 architectures, so if you are writing portable code,
- * make sure not to rely on its reordering guarantees.
- *
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static inline void set_bit(int nr, volatile unsigned long *addr)
-{
-	__asm__ __volatile__( LOCK_PREFIX
-		"btsl %1,%0"
-		:"+m" (ADDR)
-		:"Ir" (nr));
-}
-
-/**
- * __set_bit - Set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike set_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __set_bit(int nr, volatile unsigned long *addr)
-{
-	__asm__(
-		"btsl %1,%0"
-		:"+m" (ADDR)
-		:"Ir" (nr));
-}
-
-/**
- * clear_bit - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and may not be reordered.  However, it does
- * not contain a memory barrier, so if it is used for locking purposes,
- * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
- * in order to ensure changes are visible on other processors.
- */
-static inline void clear_bit(int nr, volatile unsigned long *addr)
-{
-	__asm__ __volatile__( LOCK_PREFIX
-		"btrl %1,%0"
-		:"+m" (ADDR)
-		:"Ir" (nr));
-}
-
-/*
- * clear_bit_unlock - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and implies release semantics before the memory
- * operation. It can be used for an unlock.
- */
-static inline void clear_bit_unlock(unsigned long nr, volatile unsigned long *addr)
-{
-	barrier();
-	clear_bit(nr, addr);
-}
-
-static inline void __clear_bit(int nr, volatile unsigned long *addr)
-{
-	__asm__ __volatile__(
-		"btrl %1,%0"
-		:"+m" (ADDR)
-		:"Ir" (nr));
-}
-
-/*
- * __clear_bit_unlock - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * __clear_bit() is non-atomic and implies release semantics before the memory
- * operation. It can be used for an unlock if no other CPUs can concurrently
- * modify other bits in the word.
- *
- * No memory barrier is required here, because x86 cannot reorder stores past
- * older loads. Same principle as spin_unlock.
- */
-static inline void __clear_bit_unlock(unsigned long nr, volatile unsigned long *addr)
-{
-	barrier();
-	__clear_bit(nr, addr);
-}
-
-#define smp_mb__before_clear_bit()	barrier()
-#define smp_mb__after_clear_bit()	barrier()
-
-/**
- * __change_bit - Toggle a bit in memory
- * @nr: the bit to change
- * @addr: the address to start counting from
- *
- * Unlike change_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __change_bit(int nr, volatile unsigned long *addr)
-{
-	__asm__ __volatile__(
-		"btcl %1,%0"
-		:"+m" (ADDR)
-		:"Ir" (nr));
-}
-
-/**
- * change_bit - Toggle a bit in memory
- * @nr: Bit to change
- * @addr: Address to start counting from
- *
- * change_bit() is atomic and may not be reordered. It may be
- * reordered on other architectures than x86.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static inline void change_bit(int nr, volatile unsigned long *addr)
-{
-	__asm__ __volatile__( LOCK_PREFIX
-		"btcl %1,%0"
-		:"+m" (ADDR)
-		:"Ir" (nr));
-}
-
-/**
- * test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.  
- * It may be reordered on other architectures than x86.
- * It also implies a memory barrier.
- */
-static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__( LOCK_PREFIX
-		"btsl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),"+m" (ADDR)
-		:"Ir" (nr) : "memory");
-	return oldbit;
-}
-
-/**
- * test_and_set_bit_lock - Set a bit and return its old value for lock
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This is the same as test_and_set_bit on x86.
- */
-static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr)
-{
-	return test_and_set_bit(nr, addr);
-}
-
-/**
- * __test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.  
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-	int oldbit;
-
-	__asm__(
-		"btsl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),"+m" (ADDR)
-		:"Ir" (nr));
-	return oldbit;
-}
-
-/**
- * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It can be reorderdered on other architectures other than x86.
- * It also implies a memory barrier.
- */
-static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__( LOCK_PREFIX
-		"btrl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),"+m" (ADDR)
-		:"Ir" (nr) : "memory");
-	return oldbit;
-}
-
-/**
- * __test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.  
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	int oldbit;
-
-	__asm__(
-		"btrl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),"+m" (ADDR)
-		:"Ir" (nr));
-	return oldbit;
-}
-
-/* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__(
-		"btcl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),"+m" (ADDR)
-		:"Ir" (nr) : "memory");
-	return oldbit;
-}
-
-/**
- * test_and_change_bit - Change a bit and return its old value
- * @nr: Bit to change
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.  
- * It also implies a memory barrier.
- */
-static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__( LOCK_PREFIX
-		"btcl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),"+m" (ADDR)
-		:"Ir" (nr) : "memory");
-	return oldbit;
-}
-
-#if 0 /* Fool kernel-doc since it doesn't do macros yet */
-/**
- * test_bit - Determine whether a bit is set
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static int test_bit(int nr, const volatile void *addr);
-#endif
-
-static __always_inline int constant_test_bit(int nr, const volatile unsigned long *addr)
-{
-	return ((1UL << (nr & 31)) & (addr[nr >> 5])) != 0;
-}
-
-static inline int variable_test_bit(int nr, const volatile unsigned long *addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__(
-		"btl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit)
-		:"m" (ADDR),"Ir" (nr));
-	return oldbit;
-}
-
-#define test_bit(nr, addr) \
-	(__builtin_constant_p(nr) ? \
-		constant_test_bit((nr), (addr)) : \
-		variable_test_bit((nr), (addr)))
-
-#undef ADDR
-
 /**
  * find_first_zero_bit - find the first zero bit in a memory region
  * @addr: The address to start the search at
diff --git a/include/asm-x86/bitops_64.h b/include/asm-x86/bitops_64.h
index 766bcc0470a6..48adbf56ca60 100644
--- a/include/asm-x86/bitops_64.h
+++ b/include/asm-x86/bitops_64.h
@@ -5,303 +5,6 @@
  * Copyright 1992, Linus Torvalds.
  */
 
-#ifndef _LINUX_BITOPS_H
-#error only <linux/bitops.h> can be included directly
-#endif
-
-#include <asm/alternative.h>
-
-#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
-/* Technically wrong, but this avoids compilation errors on some gcc
-   versions. */
-#define ADDR "=m" (*(volatile long *) addr)
-#else
-#define ADDR "+m" (*(volatile long *) addr)
-#endif
-
-/**
- * set_bit - Atomically set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * This function is atomic and may not be reordered.  See __set_bit()
- * if you do not require the atomic guarantees.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static inline void set_bit(int nr, volatile void *addr)
-{
-	__asm__ __volatile__( LOCK_PREFIX
-		"btsl %1,%0"
-		:ADDR
-		:"dIr" (nr) : "memory");
-}
-
-/**
- * __set_bit - Set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike set_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __set_bit(int nr, volatile void *addr)
-{
-	__asm__ volatile(
-		"btsl %1,%0"
-		:ADDR
-		:"dIr" (nr) : "memory");
-}
-
-/**
- * clear_bit - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and may not be reordered.  However, it does
- * not contain a memory barrier, so if it is used for locking purposes,
- * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
- * in order to ensure changes are visible on other processors.
- */
-static inline void clear_bit(int nr, volatile void *addr)
-{
-	__asm__ __volatile__( LOCK_PREFIX
-		"btrl %1,%0"
-		:ADDR
-		:"dIr" (nr));
-}
-
-/*
- * clear_bit_unlock - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and implies release semantics before the memory
- * operation. It can be used for an unlock.
- */
-static inline void clear_bit_unlock(unsigned long nr, volatile unsigned long *addr)
-{
-	barrier();
-	clear_bit(nr, addr);
-}
-
-static inline void __clear_bit(int nr, volatile void *addr)
-{
-	__asm__ __volatile__(
-		"btrl %1,%0"
-		:ADDR
-		:"dIr" (nr));
-}
-
-/*
- * __clear_bit_unlock - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * __clear_bit() is non-atomic and implies release semantics before the memory
- * operation. It can be used for an unlock if no other CPUs can concurrently
- * modify other bits in the word.
- *
- * No memory barrier is required here, because x86 cannot reorder stores past
- * older loads. Same principle as spin_unlock.
- */
-static inline void __clear_bit_unlock(unsigned long nr, volatile unsigned long *addr)
-{
-	barrier();
-	__clear_bit(nr, addr);
-}
-
-#define smp_mb__before_clear_bit()	barrier()
-#define smp_mb__after_clear_bit()	barrier()
-
-/**
- * __change_bit - Toggle a bit in memory
- * @nr: the bit to change
- * @addr: the address to start counting from
- *
- * Unlike change_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __change_bit(int nr, volatile void *addr)
-{
-	__asm__ __volatile__(
-		"btcl %1,%0"
-		:ADDR
-		:"dIr" (nr));
-}
-
-/**
- * change_bit - Toggle a bit in memory
- * @nr: Bit to change
- * @addr: Address to start counting from
- *
- * change_bit() is atomic and may not be reordered.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static inline void change_bit(int nr, volatile void *addr)
-{
-	__asm__ __volatile__( LOCK_PREFIX
-		"btcl %1,%0"
-		:ADDR
-		:"dIr" (nr));
-}
-
-/**
- * test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.  
- * It also implies a memory barrier.
- */
-static inline int test_and_set_bit(int nr, volatile void *addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__( LOCK_PREFIX
-		"btsl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),ADDR
-		:"dIr" (nr) : "memory");
-	return oldbit;
-}
-
-/**
- * test_and_set_bit_lock - Set a bit and return its old value for lock
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This is the same as test_and_set_bit on x86.
- */
-static inline int test_and_set_bit_lock(int nr, volatile void *addr)
-{
-	return test_and_set_bit(nr, addr);
-}
-
-/**
- * __test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.  
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_set_bit(int nr, volatile void *addr)
-{
-	int oldbit;
-
-	__asm__(
-		"btsl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),ADDR
-		:"dIr" (nr));
-	return oldbit;
-}
-
-/**
- * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.  
- * It also implies a memory barrier.
- */
-static inline int test_and_clear_bit(int nr, volatile void *addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__( LOCK_PREFIX
-		"btrl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),ADDR
-		:"dIr" (nr) : "memory");
-	return oldbit;
-}
-
-/**
- * __test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.  
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_clear_bit(int nr, volatile void *addr)
-{
-	int oldbit;
-
-	__asm__(
-		"btrl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),ADDR
-		:"dIr" (nr));
-	return oldbit;
-}
-
-/* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(int nr, volatile void *addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__(
-		"btcl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),ADDR
-		:"dIr" (nr) : "memory");
-	return oldbit;
-}
-
-/**
- * test_and_change_bit - Change a bit and return its old value
- * @nr: Bit to change
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.  
- * It also implies a memory barrier.
- */
-static inline int test_and_change_bit(int nr, volatile void *addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__( LOCK_PREFIX
-		"btcl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),ADDR
-		:"dIr" (nr) : "memory");
-	return oldbit;
-}
-
-#if 0 /* Fool kernel-doc since it doesn't do macros yet */
-/**
- * test_bit - Determine whether a bit is set
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static int test_bit(int nr, const volatile void *addr);
-#endif
-
-static inline int constant_test_bit(int nr, const volatile void *addr)
-{
-	return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
-}
-
-static inline int variable_test_bit(int nr, volatile const void *addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__(
-		"btl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit)
-		:"m" (*(volatile long *)addr),"dIr" (nr));
-	return oldbit;
-}
-
-#define test_bit(nr,addr) \
-(__builtin_constant_p(nr) ? \
- constant_test_bit((nr),(addr)) : \
- variable_test_bit((nr),(addr)))
-
-#undef ADDR
-
 extern long find_first_zero_bit(const unsigned long *addr, unsigned long size);
 extern long find_next_zero_bit(const unsigned long *addr, long size, long offset);
 extern long find_first_bit(const unsigned long *addr, unsigned long size);

From 5548fecdff5617ba3a2f09f0e585e1ac6e1bd25c Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:30:55 +0100
Subject: [PATCH 208/890] x86: clean up bitops-related warnings

Add casts to appropriate places to silence spurious bitops warnings.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Andi Kleen <ak@suse.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup_64.c   | 31 ++++++++++++++++---------------
 arch/x86/kernel/smpboot_64.c |  4 ++--
 arch/x86/mm/numa_64.c        |  2 +-
 include/asm-x86/cpufeature.h |  2 +-
 include/asm-x86/numa_64.h    |  2 +-
 include/linux/thread_info.h  | 10 +++++-----
 6 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 84f66b7b4d2e..63dd39b843b5 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -661,19 +661,19 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 
 	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
 	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
-	clear_bit(0*32+31, &c->x86_capability);
+	clear_bit(0*32+31, (unsigned long *)&c->x86_capability);
 
 	/* On C+ stepping K8 rep microcode works well for copy/memset */
 	level = cpuid_eax(1);
 	if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) ||
 			     level >= 0x0f58))
-		set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
+		set_bit(X86_FEATURE_REP_GOOD, (unsigned long *)&c->x86_capability);
 	if (c->x86 == 0x10 || c->x86 == 0x11)
-		set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
+		set_bit(X86_FEATURE_REP_GOOD, (unsigned long *)&c->x86_capability);
 
 	/* Enable workaround for FXSAVE leak */
 	if (c->x86 >= 6)
-		set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
+		set_bit(X86_FEATURE_FXSAVE_LEAK, (unsigned long *)&c->x86_capability);
 
 	level = get_model_name(c);
 	if (!level) {
@@ -689,7 +689,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 
 	/* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
 	if (c->x86_power & (1<<8))
-		set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
+		set_bit(X86_FEATURE_CONSTANT_TSC, (unsigned long *)&c->x86_capability);
 
 	/* Multi core CPU? */
 	if (c->extended_cpuid_level >= 0x80000008)
@@ -702,14 +702,14 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 		num_cache_leaves = 3;
 
 	if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11)
-		set_bit(X86_FEATURE_K8, &c->x86_capability);
+		set_bit(X86_FEATURE_K8, (unsigned long *)&c->x86_capability);
 
 	/* RDTSC can be speculated around */
-	clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+	clear_bit(X86_FEATURE_SYNC_RDTSC, (unsigned long *)&c->x86_capability);
 
 	/* Family 10 doesn't support C states in MWAIT so don't use it */
 	if (c->x86 == 0x10 && !force_mwait)
-		clear_bit(X86_FEATURE_MWAIT, &c->x86_capability);
+		clear_bit(X86_FEATURE_MWAIT, (unsigned long *)&c->x86_capability);
 
 	if (amd_apic_timer_broken())
 		disable_apic_timer = 1;
@@ -811,16 +811,17 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 		unsigned eax = cpuid_eax(10);
 		/* Check for version and the number of counters */
 		if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
-			set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
+			set_bit(X86_FEATURE_ARCH_PERFMON,
+				(unsigned long *)&c->x86_capability);
 	}
 
 	if (cpu_has_ds) {
 		unsigned int l1, l2;
 		rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
 		if (!(l1 & (1<<11)))
-			set_bit(X86_FEATURE_BTS, c->x86_capability);
+			set_bit(X86_FEATURE_BTS, (unsigned long *)c->x86_capability);
 		if (!(l1 & (1<<12)))
-			set_bit(X86_FEATURE_PEBS, c->x86_capability);
+			set_bit(X86_FEATURE_PEBS, (unsigned long *)c->x86_capability);
 	}
 
 	n = c->extended_cpuid_level;
@@ -839,13 +840,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 		c->x86_cache_alignment = c->x86_clflush_size * 2;
 	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
 	    (c->x86 == 0x6 && c->x86_model >= 0x0e))
-		set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
+		set_bit(X86_FEATURE_CONSTANT_TSC, (unsigned long *)&c->x86_capability);
 	if (c->x86 == 6)
-		set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
+		set_bit(X86_FEATURE_REP_GOOD, (unsigned long *)&c->x86_capability);
 	if (c->x86 == 15)
-		set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+		set_bit(X86_FEATURE_SYNC_RDTSC, (unsigned long *)&c->x86_capability);
 	else
-		clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+		clear_bit(X86_FEATURE_SYNC_RDTSC, (unsigned long *)&c->x86_capability);
 	c->x86_max_cores = intel_num_cpu_cores(c);
 
 	srat_detect_node();
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index 8ac8eb620428..ac1089f2b917 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -691,7 +691,7 @@ do_rest:
 	}
 	if (boot_error) {
 		cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
-		clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
+		clear_bit(cpu, (unsigned long *)&cpu_initialized); /* was set by cpu_init() */
 		clear_node_cpumask(cpu); /* was set by numa_add_cpu */
 		cpu_clear(cpu, cpu_present_map);
 		cpu_clear(cpu, cpu_possible_map);
@@ -1036,7 +1036,7 @@ void remove_cpu_from_maps(void)
 
 	cpu_clear(cpu, cpu_callout_map);
 	cpu_clear(cpu, cpu_callin_map);
-	clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
+	clear_bit(cpu, (unsigned long *)&cpu_initialized); /* was set by cpu_init() */
 	clear_node_cpumask(cpu);
 }
 
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 46b4b5e1a02a..848231481619 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -547,7 +547,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 
 __cpuinit void numa_add_cpu(int cpu)
 {
-	set_bit(cpu, &node_to_cpumask_map[cpu_to_node(cpu)]);
+	set_bit(cpu, (unsigned long *)&node_to_cpumask_map[cpu_to_node(cpu)]);
 }
 
 void __cpuinit numa_set_node(int cpu, int node)
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index acbf6681740d..761922972f6f 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -124,7 +124,7 @@
 	   (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) ||	\
 	   (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) )	\
 	  ? 1 :								\
-	  test_bit(bit, (c)->x86_capability))
+	 test_bit(bit, (unsigned long *)(c)->x86_capability))
 #define boot_cpu_has(bit)	cpu_has(&boot_cpu_data, bit)
 
 #define cpu_has_fpu		boot_cpu_has(X86_FEATURE_FPU)
diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h
index c3c20db1fba3..4449889bb0c0 100644
--- a/include/asm-x86/numa_64.h
+++ b/include/asm-x86/numa_64.h
@@ -32,7 +32,7 @@ extern void __init init_cpu_to_node(void);
 
 static inline void clear_node_cpumask(int cpu)
 {
-	clear_bit(cpu, &node_to_cpumask_map[cpu_to_node(cpu)]);
+	clear_bit(cpu, (unsigned long *)&node_to_cpumask_map[cpu_to_node(cpu)]);
 }
 
 #else
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 9c4ad755d7e5..dfbdfb9836f4 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -42,27 +42,27 @@ extern long do_no_restart_syscall(struct restart_block *parm);
 
 static inline void set_ti_thread_flag(struct thread_info *ti, int flag)
 {
-	set_bit(flag,&ti->flags);
+	set_bit(flag, (unsigned long *)&ti->flags);
 }
 
 static inline void clear_ti_thread_flag(struct thread_info *ti, int flag)
 {
-	clear_bit(flag,&ti->flags);
+	clear_bit(flag, (unsigned long *)&ti->flags);
 }
 
 static inline int test_and_set_ti_thread_flag(struct thread_info *ti, int flag)
 {
-	return test_and_set_bit(flag,&ti->flags);
+	return test_and_set_bit(flag, (unsigned long *)&ti->flags);
 }
 
 static inline int test_and_clear_ti_thread_flag(struct thread_info *ti, int flag)
 {
-	return test_and_clear_bit(flag,&ti->flags);
+	return test_and_clear_bit(flag, (unsigned long *)&ti->flags);
 }
 
 static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
 {
-	return test_bit(flag,&ti->flags);
+	return test_bit(flag, (unsigned long *)&ti->flags);
 }
 
 #define set_thread_flag(flag) \

From 53756d3722172815f52272b28c6d5d5e9639adde Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:30:55 +0100
Subject: [PATCH 209/890] x86: add set/clear_cpu_cap operations

The patch to suppress bitops-related warnings added a pile of ugly
casts.  Many of these were related to the management of x86 CPU
capabilities.  Clean these up by adding specific set/clear_cpu_cap
macros, and use them consistently.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Andi Kleen <ak@suse.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/alternative.c              | 13 +++++-----
 arch/x86/kernel/apic_32.c                  |  8 +++---
 arch/x86/kernel/apic_64.c                  |  2 +-
 arch/x86/kernel/cpu/addon_cpuid_features.c |  2 +-
 arch/x86/kernel/cpu/mcheck/mce_64.c        |  2 +-
 arch/x86/kernel/setup_32.c                 |  2 +-
 arch/x86/kernel/setup_64.c                 | 29 +++++++++++-----------
 arch/x86/kernel/vmi_32.c                   | 10 ++++----
 include/asm-x86/cpufeature.h               |  5 +++-
 9 files changed, 38 insertions(+), 35 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index d6405e0842b5..cdc43242da92 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -356,15 +356,15 @@ void alternatives_smp_switch(int smp)
 	spin_lock_irqsave(&smp_alt, flags);
 	if (smp) {
 		printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
-		clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
-		clear_bit(X86_FEATURE_UP, cpu_data(0).x86_capability);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
+		clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
 		list_for_each_entry(mod, &smp_alt_modules, next)
 			alternatives_smp_lock(mod->locks, mod->locks_end,
 					      mod->text, mod->text_end);
 	} else {
 		printk(KERN_INFO "SMP alternatives: switching to UP code\n");
-		set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
-		set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability);
+		set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
+		set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
 		list_for_each_entry(mod, &smp_alt_modules, next)
 			alternatives_smp_unlock(mod->locks, mod->locks_end,
 						mod->text, mod->text_end);
@@ -431,8 +431,9 @@ void __init alternative_instructions(void)
 	if (smp_alt_once) {
 		if (1 == num_possible_cpus()) {
 			printk(KERN_INFO "SMP alternatives: switching to UP code\n");
-			set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
-			set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability);
+			set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
+			set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
+
 			alternatives_smp_unlock(__smp_locks, __smp_locks_end,
 						_text, _etext);
 		}
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 56352c11a896..1ee443a8e61b 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1078,7 +1078,7 @@ static int __init detect_init_APIC (void)
 		printk(KERN_WARNING "Could not enable APIC!\n");
 		return -1;
 	}
-	set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
 	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
 
 	/* The BIOS may have set up the APIC at some other address */
@@ -1168,7 +1168,7 @@ fake_ioapic_page:
 int __init APIC_init_uniprocessor (void)
 {
 	if (enable_local_apic < 0)
-		clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
 
 	if (!smp_found_config && !cpu_has_apic)
 		return -1;
@@ -1180,7 +1180,7 @@ int __init APIC_init_uniprocessor (void)
 	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
 		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
 		       boot_cpu_physical_apicid);
-		clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
 		return -1;
 	}
 
@@ -1536,7 +1536,7 @@ early_param("lapic", parse_lapic);
 static int __init parse_nolapic(char *arg)
 {
 	enable_local_apic = -1;
-	clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+	clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
 	return 0;
 }
 early_param("nolapic", parse_nolapic);
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 027004262105..ab4ae50399fd 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -1211,7 +1211,7 @@ early_param("apic", apic_set_verbosity);
 static __init int setup_disableapic(char *str)
 {
 	disable_apic = 1;
-	clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+	clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
 	return 0;
 }
 early_param("disableapic", setup_disableapic);
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 3e91d3ee26ec..238468ae1993 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -45,6 +45,6 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 			&regs[CR_ECX], &regs[CR_EDX]);
 
 		if (regs[cb->reg] & (1 << cb->bit))
-			set_bit(cb->feature, c->x86_capability);
+			set_cpu_cap(c, cb->feature);
 	}
 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 8ef6a6bfd112..3c7672c40cf4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -80,7 +80,7 @@ void mce_log(struct mce *mce)
 			/* When the buffer fills up discard new entries. Assume
 			   that the earlier errors are the more interesting. */
 			if (entry >= MCE_LOG_LEN) {
-				set_bit(MCE_OVERFLOW, &mcelog.flags);
+				set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
 				return;
 			}
 			/* Old left over entry. Skip. */
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 32edf70d6b0d..e9ede0fc585a 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -238,7 +238,7 @@ static int __init parse_mem(char *arg)
 		return -EINVAL;
 
 	if (strcmp(arg, "nopentium") == 0) {
-		clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PSE);
 		disable_pse = 1;
 	} else {
 		/* If the user specifies memory size, we
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 63dd39b843b5..ce4d6b52ce36 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -667,13 +667,13 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	level = cpuid_eax(1);
 	if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) ||
 			     level >= 0x0f58))
-		set_bit(X86_FEATURE_REP_GOOD, (unsigned long *)&c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
 	if (c->x86 == 0x10 || c->x86 == 0x11)
-		set_bit(X86_FEATURE_REP_GOOD, (unsigned long *)&c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
 
 	/* Enable workaround for FXSAVE leak */
 	if (c->x86 >= 6)
-		set_bit(X86_FEATURE_FXSAVE_LEAK, (unsigned long *)&c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
 
 	level = get_model_name(c);
 	if (!level) {
@@ -689,7 +689,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 
 	/* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
 	if (c->x86_power & (1<<8))
-		set_bit(X86_FEATURE_CONSTANT_TSC, (unsigned long *)&c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 
 	/* Multi core CPU? */
 	if (c->extended_cpuid_level >= 0x80000008)
@@ -702,14 +702,14 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 		num_cache_leaves = 3;
 
 	if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11)
-		set_bit(X86_FEATURE_K8, (unsigned long *)&c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_K8);
 
 	/* RDTSC can be speculated around */
-	clear_bit(X86_FEATURE_SYNC_RDTSC, (unsigned long *)&c->x86_capability);
+	clear_cpu_cap(c, X86_FEATURE_SYNC_RDTSC);
 
 	/* Family 10 doesn't support C states in MWAIT so don't use it */
 	if (c->x86 == 0x10 && !force_mwait)
-		clear_bit(X86_FEATURE_MWAIT, (unsigned long *)&c->x86_capability);
+		clear_cpu_cap(c, X86_FEATURE_MWAIT);
 
 	if (amd_apic_timer_broken())
 		disable_apic_timer = 1;
@@ -811,17 +811,16 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 		unsigned eax = cpuid_eax(10);
 		/* Check for version and the number of counters */
 		if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
-			set_bit(X86_FEATURE_ARCH_PERFMON,
-				(unsigned long *)&c->x86_capability);
+			set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
 	}
 
 	if (cpu_has_ds) {
 		unsigned int l1, l2;
 		rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
 		if (!(l1 & (1<<11)))
-			set_bit(X86_FEATURE_BTS, (unsigned long *)c->x86_capability);
+			set_cpu_cap(c, X86_FEATURE_BTS);
 		if (!(l1 & (1<<12)))
-			set_bit(X86_FEATURE_PEBS, (unsigned long *)c->x86_capability);
+			set_cpu_cap(c, X86_FEATURE_PEBS);
 	}
 
 	n = c->extended_cpuid_level;
@@ -840,13 +839,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 		c->x86_cache_alignment = c->x86_clflush_size * 2;
 	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
 	    (c->x86 == 0x6 && c->x86_model >= 0x0e))
-		set_bit(X86_FEATURE_CONSTANT_TSC, (unsigned long *)&c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 	if (c->x86 == 6)
-		set_bit(X86_FEATURE_REP_GOOD, (unsigned long *)&c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
 	if (c->x86 == 15)
-		set_bit(X86_FEATURE_SYNC_RDTSC, (unsigned long *)&c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_SYNC_RDTSC);
 	else
-		clear_bit(X86_FEATURE_SYNC_RDTSC, (unsigned long *)&c->x86_capability);
+		clear_cpu_cap(c, X86_FEATURE_SYNC_RDTSC);
 	c->x86_max_cores = intel_num_cpu_cores(c);
 
 	srat_detect_node();
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index aacce426cbd0..87e5633805a9 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -963,19 +963,19 @@ static int __init parse_vmi(char *arg)
 		return -EINVAL;
 
 	if (!strcmp(arg, "disable_pge")) {
-		clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE);
 		disable_pge = 1;
 	} else if (!strcmp(arg, "disable_pse")) {
-		clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PSE);
 		disable_pse = 1;
 	} else if (!strcmp(arg, "disable_sep")) {
-		clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
 		disable_sep = 1;
 	} else if (!strcmp(arg, "disable_tsc")) {
-		clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC);
 		disable_tsc = 1;
 	} else if (!strcmp(arg, "disable_mtrr")) {
-		clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_MTRR);
 		disable_mtrr = 1;
 	} else if (!strcmp(arg, "disable_timer")) {
 		disable_vmi_timer = 1;
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 761922972f6f..87dd900154d1 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -124,9 +124,12 @@
 	   (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) ||	\
 	   (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) )	\
 	  ? 1 :								\
-	 test_bit(bit, (unsigned long *)(c)->x86_capability))
+	 test_bit(bit, (unsigned long *)((c)->x86_capability)))
 #define boot_cpu_has(bit)	cpu_has(&boot_cpu_data, bit)
 
+#define set_cpu_cap(c, bit)	set_bit(bit, (unsigned long *)((c)->x86_capability))
+#define clear_cpu_cap(c, bit)	clear_bit(bit, (unsigned long *)((c)->x86_capability))
+
 #define cpu_has_fpu		boot_cpu_has(X86_FEATURE_FPU)
 #define cpu_has_vme		boot_cpu_has(X86_FEATURE_VME)
 #define cpu_has_de		boot_cpu_has(X86_FEATURE_DE)

From 65ea5b0349903585bfed9720fa06f5edb4f1cd25 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:30:56 +0100
Subject: [PATCH 210/890] x86: rename the struct pt_regs members for 32/64-bit
 consistency

We have a lot of code which differs only by the naming of specific
members of structures that contain registers.  In order to enable
additional unifications, this patch drops the e- or r- size prefix
from the register names in struct pt_regs, and drops the x- prefixes
for segment registers on the 32-bit side.

This patch also performs the equivalent renames in some additional
places that might be candidates for unification in the future.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/ia32/ia32_aout.c           |  30 ++---
 arch/x86/ia32/ia32_binfmt.c         |  42 +++---
 arch/x86/ia32/ia32_signal.c         | 100 +++++++--------
 arch/x86/ia32/ptrace32.c            |  44 +++----
 arch/x86/ia32/sys_ia32.c            |   6 +-
 arch/x86/kernel/acpi/wakeup_64.S    |  32 ++---
 arch/x86/kernel/asm-offsets_32.c    |  32 ++---
 arch/x86/kernel/asm-offsets_64.c    |  18 +--
 arch/x86/kernel/cpu/common.c        |   2 +-
 arch/x86/kernel/cpu/mcheck/mce_64.c |  14 +-
 arch/x86/kernel/i8259_32.c          |   2 +-
 arch/x86/kernel/io_apic_64.c        |   2 +-
 arch/x86/kernel/ioport_32.c         |   8 +-
 arch/x86/kernel/ioport_64.c         |   6 +-
 arch/x86/kernel/irq_32.c            |  20 +--
 arch/x86/kernel/irq_64.c            |  10 +-
 arch/x86/kernel/kprobes_32.c        |  92 ++++++-------
 arch/x86/kernel/kprobes_64.c        |  69 +++++-----
 arch/x86/kernel/process_32.c        | 140 ++++++++++----------
 arch/x86/kernel/process_64.c        |  44 +++----
 arch/x86/kernel/ptrace_32.c         |  26 ++--
 arch/x86/kernel/ptrace_64.c         |  24 ++--
 arch/x86/kernel/signal_32.c         | 192 ++++++++++++++--------------
 arch/x86/kernel/signal_64.c         | 115 ++++++++---------
 arch/x86/kernel/smp_64.c            |   2 +-
 arch/x86/kernel/smpboot_32.c        |  10 +-
 arch/x86/kernel/step.c              |  15 +--
 arch/x86/kernel/suspend_asm_64.S    |  32 ++---
 arch/x86/kernel/time_32.c           |   8 +-
 arch/x86/kernel/time_64.c           |   4 +-
 arch/x86/kernel/traps_32.c          | 114 ++++++++---------
 arch/x86/kernel/traps_64.c          |  84 ++++++------
 arch/x86/kernel/vm86_32.c           |  94 +++++++-------
 arch/x86/kernel/vmi_32.c            |  50 ++++----
 arch/x86/kernel/vsyscall_64.c       |   2 +-
 arch/x86/lguest/boot.c              |  22 ++--
 arch/x86/mach-voyager/voyager_smp.c |  12 +-
 arch/x86/mm/extable_32.c            |   6 +-
 arch/x86/mm/fault_32.c              |  38 +++---
 arch/x86/mm/fault_64.c              |  22 ++--
 arch/x86/oprofile/backtrace.c       |   6 +-
 arch/x86/xen/enlighten.c            |  18 +--
 arch/x86/xen/events.c               |   2 +-
 include/asm-x86/compat.h            |   2 +-
 include/asm-x86/elf.h               |  66 +++++-----
 include/asm-x86/kexec_32.h          |  36 +++---
 include/asm-x86/kexec_64.h          |  20 +--
 include/asm-x86/kprobes_32.h        |   2 +-
 include/asm-x86/kprobes_64.h        |   2 +-
 include/asm-x86/mce.h               |   4 +-
 include/asm-x86/processor_32.h      |  20 +--
 include/asm-x86/processor_64.h      |   8 +-
 include/asm-x86/ptrace.h            |  80 ++++++++++--
 kernel/signal.c                     |   4 +-
 54 files changed, 953 insertions(+), 902 deletions(-)

diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index a764e4e95314..f1a0f83676dc 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -53,7 +53,7 @@ static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
 /* changed the size calculations - should hopefully work better. lbt */
 	dump->magic = CMAGIC;
 	dump->start_code = 0;
-	dump->start_stack = regs->rsp & ~(PAGE_SIZE - 1);
+	dump->start_stack = regs->sp & ~(PAGE_SIZE - 1);
 	dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
 	dump->u_dsize = ((unsigned long)
 			 (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
@@ -75,22 +75,22 @@ static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
 		dump->u_ssize = tmp >> PAGE_SHIFT;
 	}
 
-	dump->regs.ebx = regs->rbx;
-	dump->regs.ecx = regs->rcx;
-	dump->regs.edx = regs->rdx;
-	dump->regs.esi = regs->rsi;
-	dump->regs.edi = regs->rdi;
-	dump->regs.ebp = regs->rbp;
-	dump->regs.eax = regs->rax;
+	dump->regs.bx = regs->bx;
+	dump->regs.cx = regs->cx;
+	dump->regs.dx = regs->dx;
+	dump->regs.si = regs->si;
+	dump->regs.di = regs->di;
+	dump->regs.bp = regs->bp;
+	dump->regs.ax = regs->ax;
 	dump->regs.ds = current->thread.ds;
 	dump->regs.es = current->thread.es;
 	asm("movl %%fs,%0" : "=r" (fs)); dump->regs.fs = fs;
 	asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs;
-	dump->regs.orig_eax = regs->orig_rax;
-	dump->regs.eip = regs->rip;
+	dump->regs.orig_ax = regs->orig_ax;
+	dump->regs.ip = regs->ip;
 	dump->regs.cs = regs->cs;
-	dump->regs.eflags = regs->eflags;
-	dump->regs.esp = regs->rsp;
+	dump->regs.flags = regs->flags;
+	dump->regs.sp = regs->sp;
 	dump->regs.ss = regs->ss;
 
 #if 1 /* FIXME */
@@ -432,9 +432,9 @@ beyond_if:
 	asm volatile("movl %0,%%fs" :: "r" (0)); \
 	asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS));
 	load_gs_index(0);
-	(regs)->rip = ex.a_entry;
-	(regs)->rsp = current->mm->start_stack;
-	(regs)->eflags = 0x200;
+	(regs)->ip = ex.a_entry;
+	(regs)->sp = current->mm->start_stack;
+	(regs)->flags = 0x200;
 	(regs)->cs = __USER32_CS;
 	(regs)->ss = __USER32_DS;
 	regs->r8 = regs->r9 = regs->r10 = regs->r11 =
diff --git a/arch/x86/ia32/ia32_binfmt.c b/arch/x86/ia32/ia32_binfmt.c
index e32974c3dd3b..806135c3f975 100644
--- a/arch/x86/ia32/ia32_binfmt.c
+++ b/arch/x86/ia32/ia32_binfmt.c
@@ -73,22 +73,22 @@ struct file;
 /* Assumes current==process to be dumped */
 #undef	ELF_CORE_COPY_REGS
 #define ELF_CORE_COPY_REGS(pr_reg, regs)       		\
-	pr_reg[0] = regs->rbx;				\
-	pr_reg[1] = regs->rcx;				\
-	pr_reg[2] = regs->rdx;				\
-	pr_reg[3] = regs->rsi;				\
-	pr_reg[4] = regs->rdi;				\
-	pr_reg[5] = regs->rbp;				\
-	pr_reg[6] = regs->rax;				\
+	pr_reg[0] = regs->bx;				\
+	pr_reg[1] = regs->cx;				\
+	pr_reg[2] = regs->dx;				\
+	pr_reg[3] = regs->si;				\
+	pr_reg[4] = regs->di;				\
+	pr_reg[5] = regs->bp;				\
+	pr_reg[6] = regs->ax;				\
 	pr_reg[7] = _GET_SEG(ds);   			\
 	pr_reg[8] = _GET_SEG(es);			\
 	pr_reg[9] = _GET_SEG(fs);			\
 	pr_reg[10] = _GET_SEG(gs);			\
-	pr_reg[11] = regs->orig_rax;			\
-	pr_reg[12] = regs->rip;				\
+	pr_reg[11] = regs->orig_ax;			\
+	pr_reg[12] = regs->ip;				\
 	pr_reg[13] = regs->cs;				\
-	pr_reg[14] = regs->eflags;			\
-	pr_reg[15] = regs->rsp;				\
+	pr_reg[14] = regs->flags;			\
+	pr_reg[15] = regs->sp;				\
 	pr_reg[16] = regs->ss;
 
 
@@ -205,9 +205,9 @@ do {							\
 	asm volatile("movl %0,%%fs" :: "r" (0)); \
 	asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS)); \
 	load_gs_index(0); \
-	(regs)->rip = (new_rip); \
-	(regs)->rsp = (new_rsp); \
-	(regs)->eflags = 0x200; \
+	(regs)->ip = (new_rip); \
+	(regs)->sp = (new_rsp); \
+	(regs)->flags = X86_EFLAGS_IF; \
 	(regs)->cs = __USER32_CS; \
 	(regs)->ss = __USER32_DS; \
 	set_fs(USER_DS); \
@@ -233,13 +233,13 @@ extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
 static void elf32_init(struct pt_regs *regs)
 {
 	struct task_struct *me = current; 
-	regs->rdi = 0;
-	regs->rsi = 0;
-	regs->rdx = 0;
-	regs->rcx = 0;
-	regs->rax = 0;
-	regs->rbx = 0; 
-	regs->rbp = 0; 
+	regs->di = 0;
+	regs->si = 0;
+	regs->dx = 0;
+	regs->cx = 0;
+	regs->ax = 0;
+	regs->bx = 0;
+	regs->bp = 0;
 	regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
 		regs->r13 = regs->r14 = regs->r15 = 0; 
     me->thread.fs = 0; 
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 39356a756b28..f2da443f8c7b 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -154,7 +154,7 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
 	}
 	seg = get_fs();
 	set_fs(KERNEL_DS);
-	ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->rsp);
+	ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->sp);
 	set_fs(seg);
 	if (ret >= 0 && uoss_ptr)  {
 		if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t)) ||
@@ -195,7 +195,7 @@ struct rt_sigframe
 #define COPY(x)		{ \
 	unsigned int reg;			\
 	err |= __get_user(reg, &sc->e ##x);	\
-	regs->r ## x = reg;			\
+	regs->x = reg;			\
 }
 
 #define RELOAD_SEG(seg,mask)						\
@@ -220,7 +220,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 #if DEBUG_SIG
 	printk(KERN_DEBUG "SIG restore_sigcontext: "
 	       "sc=%p err(%x) eip(%x) cs(%x) flg(%x)\n",
-	       sc, sc->err, sc->eip, sc->cs, sc->eflags);
+	       sc, sc->err, sc->ip, sc->cs, sc->flags);
 #endif
 
 	/*
@@ -249,9 +249,9 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 	regs->ss |= 3;
 
 	err |= __get_user(tmpflags, &sc->eflags);
-	regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
+	regs->flags = (regs->flags & ~0x40DD5) | (tmpflags & 0x40DD5);
 	/* disable syscall checks */
-	regs->orig_rax = -1;
+	regs->orig_ax = -1;
 
 	err |= __get_user(tmp, &sc->fpstate);
 	buf = compat_ptr(tmp);
@@ -279,9 +279,9 @@ badframe:
 
 asmlinkage long sys32_sigreturn(struct pt_regs *regs)
 {
-	struct sigframe __user *frame = (struct sigframe __user *)(regs->rsp-8);
+	struct sigframe __user *frame = (struct sigframe __user *)(regs->sp-8);
 	sigset_t set;
-	unsigned int eax;
+	unsigned int ax;
 
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
@@ -298,9 +298,9 @@ asmlinkage long sys32_sigreturn(struct pt_regs *regs)
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 
-	if (ia32_restore_sigcontext(regs, &frame->sc, &eax))
+	if (ia32_restore_sigcontext(regs, &frame->sc, &ax))
 		goto badframe;
-	return eax;
+	return ax;
 
 badframe:
 	signal_fault(regs, frame, "32bit sigreturn");
@@ -311,10 +311,10 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
 {
 	struct rt_sigframe __user *frame;
 	sigset_t set;
-	unsigned int eax;
+	unsigned int ax;
 	struct pt_regs tregs;
 
-	frame = (struct rt_sigframe __user *)(regs->rsp - 4);
+	frame = (struct rt_sigframe __user *)(regs->sp - 4);
 
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
@@ -327,14 +327,14 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 
-	if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
+	if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
 
 	tregs = *regs;
 	if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT)
 		goto badframe;
 
-	return eax;
+	return ax;
 
 badframe:
 	signal_fault(regs, frame, "32bit rt sigreturn");
@@ -361,21 +361,21 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
 	__asm__("movl %%es,%0" : "=r"(tmp): "0"(tmp));
 	err |= __put_user(tmp, (unsigned int __user *)&sc->es);
 
-	err |= __put_user((u32)regs->rdi, &sc->edi);
-	err |= __put_user((u32)regs->rsi, &sc->esi);
-	err |= __put_user((u32)regs->rbp, &sc->ebp);
-	err |= __put_user((u32)regs->rsp, &sc->esp);
-	err |= __put_user((u32)regs->rbx, &sc->ebx);
-	err |= __put_user((u32)regs->rdx, &sc->edx);
-	err |= __put_user((u32)regs->rcx, &sc->ecx);
-	err |= __put_user((u32)regs->rax, &sc->eax);
+	err |= __put_user((u32)regs->di, &sc->edi);
+	err |= __put_user((u32)regs->si, &sc->esi);
+	err |= __put_user((u32)regs->bp, &sc->ebp);
+	err |= __put_user((u32)regs->sp, &sc->esp);
+	err |= __put_user((u32)regs->bx, &sc->ebx);
+	err |= __put_user((u32)regs->dx, &sc->edx);
+	err |= __put_user((u32)regs->cx, &sc->ecx);
+	err |= __put_user((u32)regs->ax, &sc->eax);
 	err |= __put_user((u32)regs->cs, &sc->cs);
 	err |= __put_user((u32)regs->ss, &sc->ss);
 	err |= __put_user(current->thread.trap_no, &sc->trapno);
 	err |= __put_user(current->thread.error_code, &sc->err);
-	err |= __put_user((u32)regs->rip, &sc->eip);
-	err |= __put_user((u32)regs->eflags, &sc->eflags);
-	err |= __put_user((u32)regs->rsp, &sc->esp_at_signal);
+	err |= __put_user((u32)regs->ip, &sc->eip);
+	err |= __put_user((u32)regs->flags, &sc->eflags);
+	err |= __put_user((u32)regs->sp, &sc->esp_at_signal);
 
 	tmp = save_i387_ia32(current, fpstate, regs, 0);
 	if (tmp < 0)
@@ -400,28 +400,28 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
 static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
 				 size_t frame_size)
 {
-	unsigned long rsp;
+	unsigned long sp;
 
 	/* Default to using normal stack */
-	rsp = regs->rsp;
+	sp = regs->sp;
 
 	/* This is the X/Open sanctioned signal stack switching.  */
 	if (ka->sa.sa_flags & SA_ONSTACK) {
-		if (sas_ss_flags(rsp) == 0)
-			rsp = current->sas_ss_sp + current->sas_ss_size;
+		if (sas_ss_flags(sp) == 0)
+			sp = current->sas_ss_sp + current->sas_ss_size;
 	}
 
 	/* This is the legacy signal stack switching. */
 	else if ((regs->ss & 0xffff) != __USER_DS &&
 		!(ka->sa.sa_flags & SA_RESTORER) &&
 		 ka->sa.sa_restorer)
-		rsp = (unsigned long) ka->sa.sa_restorer;
+		sp = (unsigned long) ka->sa.sa_restorer;
 
-	rsp -= frame_size;
+	sp -= frame_size;
 	/* Align the stack pointer according to the i386 ABI,
 	 * i.e. so that on function entry ((sp + 4) & 15) == 0. */
-	rsp = ((rsp + 4) & -16ul) - 4;
-	return (void __user *) rsp;
+	sp = ((sp + 4) & -16ul) - 4;
+	return (void __user *) sp;
 }
 
 int ia32_setup_frame(int sig, struct k_sigaction *ka,
@@ -486,13 +486,13 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 		goto give_sigsegv;
 
 	/* Set up registers for signal handler */
-	regs->rsp = (unsigned long) frame;
-	regs->rip = (unsigned long) ka->sa.sa_handler;
+	regs->sp = (unsigned long) frame;
+	regs->ip = (unsigned long) ka->sa.sa_handler;
 
 	/* Make -mregparm=3 work */
-	regs->rax = sig;
-	regs->rdx = 0;
-	regs->rcx = 0;
+	regs->ax = sig;
+	regs->dx = 0;
+	regs->cx = 0;
 
 	asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
 	asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
@@ -501,13 +501,13 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 	regs->ss = __USER32_DS;
 
 	set_fs(USER_DS);
-	regs->eflags &= ~TF_MASK;
+	regs->flags &= ~TF_MASK;
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
 
 #if DEBUG_SIG
 	printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
-	       current->comm, current->pid, frame, regs->rip, frame->pretcode);
+	       current->comm, current->pid, frame, regs->ip, frame->pretcode);
 #endif
 
 	return 0;
@@ -556,7 +556,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
 	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-	err |= __put_user(sas_ss_flags(regs->rsp),
+	err |= __put_user(sas_ss_flags(regs->sp),
 			  &frame->uc.uc_stack.ss_flags);
 	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
 	err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
@@ -581,18 +581,18 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		goto give_sigsegv;
 
 	/* Set up registers for signal handler */
-	regs->rsp = (unsigned long) frame;
-	regs->rip = (unsigned long) ka->sa.sa_handler;
+	regs->sp = (unsigned long) frame;
+	regs->ip = (unsigned long) ka->sa.sa_handler;
 
 	/* Make -mregparm=3 work */
-	regs->rax = sig;
-	regs->rdx = (unsigned long) &frame->info;
-	regs->rcx = (unsigned long) &frame->uc;
+	regs->ax = sig;
+	regs->dx = (unsigned long) &frame->info;
+	regs->cx = (unsigned long) &frame->uc;
 
 	/* Make -mregparm=3 work */
-	regs->rax = sig;
-	regs->rdx = (unsigned long) &frame->info;
-	regs->rcx = (unsigned long) &frame->uc;
+	regs->ax = sig;
+	regs->dx = (unsigned long) &frame->info;
+	regs->cx = (unsigned long) &frame->uc;
 
 	asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
 	asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
@@ -601,13 +601,13 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	regs->ss = __USER32_DS;
 
 	set_fs(USER_DS);
-	regs->eflags &= ~TF_MASK;
+	regs->flags &= ~TF_MASK;
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
 
 #if DEBUG_SIG
 	printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
-	       current->comm, current->pid, frame, regs->rip, frame->pretcode);
+	       current->comm, current->pid, frame, regs->ip, frame->pretcode);
 #endif
 
 	return 0;
diff --git a/arch/x86/ia32/ptrace32.c b/arch/x86/ia32/ptrace32.c
index 8c6fb9d8352b..1e382e3bd882 100644
--- a/arch/x86/ia32/ptrace32.c
+++ b/arch/x86/ia32/ptrace32.c
@@ -73,19 +73,19 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
 		stack[offsetof(struct pt_regs, cs)/8] = val & 0xffff;
 		break;
 
-	R32(ebx, rbx);
-	R32(ecx, rcx);
-	R32(edx, rdx);
-	R32(edi, rdi);
-	R32(esi, rsi);
-	R32(ebp, rbp);
-	R32(eax, rax);
-	R32(orig_eax, orig_rax);
-	R32(eip, rip);
-	R32(esp, rsp);
+	R32(ebx, bx);
+	R32(ecx, cx);
+	R32(edx, dx);
+	R32(edi, di);
+	R32(esi, si);
+	R32(ebp, bp);
+	R32(eax, ax);
+	R32(orig_eax, orig_ax);
+	R32(eip, ip);
+	R32(esp, sp);
 
 	case offsetof(struct user32, regs.eflags): {
-		__u64 *flags = &stack[offsetof(struct pt_regs, eflags)/8];
+		__u64 *flags = &stack[offsetof(struct pt_regs, flags)/8];
 
 		val &= FLAG_MASK;
 		/*
@@ -145,22 +145,22 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
 
 	R32(cs, cs);
 	R32(ss, ss);
-	R32(ebx, rbx);
-	R32(ecx, rcx);
-	R32(edx, rdx);
-	R32(edi, rdi);
-	R32(esi, rsi);
-	R32(ebp, rbp);
-	R32(eax, rax);
-	R32(orig_eax, orig_rax);
-	R32(eip, rip);
-	R32(esp, rsp);
+	R32(ebx, bx);
+	R32(ecx, cx);
+	R32(edx, dx);
+	R32(edi, di);
+	R32(esi, si);
+	R32(ebp, bp);
+	R32(eax, ax);
+	R32(orig_eax, orig_ax);
+	R32(eip, ip);
+	R32(esp, sp);
 
 	case offsetof(struct user32, regs.eflags):
 		/*
 		 * If the debugger set TF, hide it from the readout.
 		 */
-		*val = stack[offsetof(struct pt_regs, eflags)/8];
+		*val = stack[offsetof(struct pt_regs, flags)/8];
 		if (test_tsk_thread_flag(child, TIF_FORCED_TF))
 			*val &= ~X86_EFLAGS_TF;
 		break;
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 58991abc5b59..abf71d26fc2a 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -816,11 +816,11 @@ asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv,
 asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp,
 			    struct pt_regs *regs)
 {
-	void __user *parent_tid = (void __user *)regs->rdx;
-	void __user *child_tid = (void __user *)regs->rdi;
+	void __user *parent_tid = (void __user *)regs->dx;
+	void __user *child_tid = (void __user *)regs->di;
 
 	if (!newsp)
-		newsp = regs->rsp;
+		newsp = regs->sp;
 	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
 }
 
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 5ed3bc5c61d7..2e1b9e0d0767 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -344,13 +344,13 @@ do_suspend_lowlevel:
 	call	save_processor_state
 
 	movq	$saved_context, %rax
-	movq	%rsp, pt_regs_rsp(%rax)
-	movq	%rbp, pt_regs_rbp(%rax)
-	movq	%rsi, pt_regs_rsi(%rax)
-	movq	%rdi, pt_regs_rdi(%rax)
-	movq	%rbx, pt_regs_rbx(%rax)
-	movq	%rcx, pt_regs_rcx(%rax)
-	movq	%rdx, pt_regs_rdx(%rax)
+	movq	%rsp, pt_regs_sp(%rax)
+	movq	%rbp, pt_regs_bp(%rax)
+	movq	%rsi, pt_regs_si(%rax)
+	movq	%rdi, pt_regs_di(%rax)
+	movq	%rbx, pt_regs_bx(%rax)
+	movq	%rcx, pt_regs_cx(%rax)
+	movq	%rdx, pt_regs_dx(%rax)
 	movq	%r8, pt_regs_r8(%rax)
 	movq	%r9, pt_regs_r9(%rax)
 	movq	%r10, pt_regs_r10(%rax)
@@ -360,7 +360,7 @@ do_suspend_lowlevel:
 	movq	%r14, pt_regs_r14(%rax)
 	movq	%r15, pt_regs_r15(%rax)
 	pushfq
-	popq	pt_regs_eflags(%rax)
+	popq	pt_regs_flags(%rax)
 
 	movq	$.L97, saved_rip(%rip)
 
@@ -391,15 +391,15 @@ do_suspend_lowlevel:
 	movq	%rbx, %cr2
 	movq	saved_context_cr0(%rax), %rbx
 	movq	%rbx, %cr0
-	pushq	pt_regs_eflags(%rax)
+	pushq	pt_regs_flags(%rax)
 	popfq
-	movq	pt_regs_rsp(%rax), %rsp
-	movq	pt_regs_rbp(%rax), %rbp
-	movq	pt_regs_rsi(%rax), %rsi
-	movq	pt_regs_rdi(%rax), %rdi
-	movq	pt_regs_rbx(%rax), %rbx
-	movq	pt_regs_rcx(%rax), %rcx
-	movq	pt_regs_rdx(%rax), %rdx
+	movq	pt_regs_sp(%rax), %rsp
+	movq	pt_regs_bp(%rax), %rbp
+	movq	pt_regs_si(%rax), %rsi
+	movq	pt_regs_di(%rax), %rdi
+	movq	pt_regs_bx(%rax), %rbx
+	movq	pt_regs_cx(%rax), %rcx
+	movq	pt_regs_dx(%rax), %rdx
 	movq	pt_regs_r8(%rax), %r8
 	movq	pt_regs_r9(%rax), %r9
 	movq	pt_regs_r10(%rax), %r10
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index fd7464d23339..a3a8be7618d1 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -75,22 +75,22 @@ void foo(void)
 	OFFSET(GDS_pad, Xgt_desc_struct, pad);
 	BLANK();
 
-	OFFSET(PT_EBX, pt_regs, ebx);
-	OFFSET(PT_ECX, pt_regs, ecx);
-	OFFSET(PT_EDX, pt_regs, edx);
-	OFFSET(PT_ESI, pt_regs, esi);
-	OFFSET(PT_EDI, pt_regs, edi);
-	OFFSET(PT_EBP, pt_regs, ebp);
-	OFFSET(PT_EAX, pt_regs, eax);
-	OFFSET(PT_DS,  pt_regs, xds);
-	OFFSET(PT_ES,  pt_regs, xes);
-	OFFSET(PT_FS,  pt_regs, xfs);
-	OFFSET(PT_ORIG_EAX, pt_regs, orig_eax);
-	OFFSET(PT_EIP, pt_regs, eip);
-	OFFSET(PT_CS,  pt_regs, xcs);
-	OFFSET(PT_EFLAGS, pt_regs, eflags);
-	OFFSET(PT_OLDESP, pt_regs, esp);
-	OFFSET(PT_OLDSS,  pt_regs, xss);
+	OFFSET(PT_EBX, pt_regs, bx);
+	OFFSET(PT_ECX, pt_regs, cx);
+	OFFSET(PT_EDX, pt_regs, dx);
+	OFFSET(PT_ESI, pt_regs, si);
+	OFFSET(PT_EDI, pt_regs, di);
+	OFFSET(PT_EBP, pt_regs, bp);
+	OFFSET(PT_EAX, pt_regs, ax);
+	OFFSET(PT_DS,  pt_regs, ds);
+	OFFSET(PT_ES,  pt_regs, es);
+	OFFSET(PT_FS,  pt_regs, fs);
+	OFFSET(PT_ORIG_EAX, pt_regs, orig_ax);
+	OFFSET(PT_EIP, pt_regs, ip);
+	OFFSET(PT_CS,  pt_regs, cs);
+	OFFSET(PT_EFLAGS, pt_regs, flags);
+	OFFSET(PT_OLDESP, pt_regs, sp);
+	OFFSET(PT_OLDSS,  pt_regs, ss);
 	BLANK();
 
 	OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index c27c646214f4..2e918ebf21d3 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -83,14 +83,14 @@ int main(void)
 	DEFINE(pbe_next, offsetof(struct pbe, next));
 	BLANK();
 #define ENTRY(entry) DEFINE(pt_regs_ ## entry, offsetof(struct pt_regs, entry))
-	ENTRY(rbx);
-	ENTRY(rbx);
-	ENTRY(rcx);
-	ENTRY(rdx);
-	ENTRY(rsp);
-	ENTRY(rbp);
-	ENTRY(rsi);
-	ENTRY(rdi);
+	ENTRY(bx);
+	ENTRY(bx);
+	ENTRY(cx);
+	ENTRY(dx);
+	ENTRY(sp);
+	ENTRY(bp);
+	ENTRY(si);
+	ENTRY(di);
 	ENTRY(r8);
 	ENTRY(r9);
 	ENTRY(r10);
@@ -99,7 +99,7 @@ int main(void)
 	ENTRY(r13);
 	ENTRY(r14);
 	ENTRY(r15);
-	ENTRY(eflags);
+	ENTRY(flags);
 	BLANK();
 #undef ENTRY
 #define ENTRY(entry) DEFINE(saved_context_ ## entry, offsetof(struct saved_context, entry))
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index e2fcf2051bdb..5db2a163bf4b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -634,7 +634,7 @@ void __init early_cpu_init(void)
 struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
 {
 	memset(regs, 0, sizeof(struct pt_regs));
-	regs->xfs = __KERNEL_PERCPU;
+	regs->fs = __KERNEL_PERCPU;
 	return regs;
 }
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 3c7672c40cf4..0adad772d0da 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -110,12 +110,12 @@ static void print_mce(struct mce *m)
 	       KERN_EMERG
 	       "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
 	       m->cpu, m->mcgstatus, m->bank, m->status);
-	if (m->rip) {
+	if (m->ip) {
 		printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",
 		       !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
-		       m->cs, m->rip);
+		       m->cs, m->ip);
 		if (m->cs == __KERNEL_CS)
-			print_symbol("{%s}", m->rip);
+			print_symbol("{%s}", m->ip);
 		printk("\n");
 	}
 	printk(KERN_EMERG "TSC %Lx ", m->tsc);
@@ -156,16 +156,16 @@ static int mce_available(struct cpuinfo_x86 *c)
 static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
 {
 	if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
-		m->rip = regs->rip;
+		m->ip = regs->ip;
 		m->cs = regs->cs;
 	} else {
-		m->rip = 0;
+		m->ip = 0;
 		m->cs = 0;
 	}
 	if (rip_msr) {
 		/* Assume the RIP in the MSR is exact. Is this true? */
 		m->mcgstatus |= MCG_STATUS_EIPV;
-		rdmsrl(rip_msr, m->rip);
+		rdmsrl(rip_msr, m->ip);
 		m->cs = 0;
 	}
 }
@@ -288,7 +288,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
 		 * instruction which caused the MCE.
 		 */
 		if (m.mcgstatus & MCG_STATUS_EIPV)
-			user_space = panicm.rip && (panicm.cs & 3);
+			user_space = panicm.ip && (panicm.cs & 3);
 
 		/*
 		 * If we know that the error was in user space, send a
diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c
index 3321ce669295..f201e7da1bbc 100644
--- a/arch/x86/kernel/i8259_32.c
+++ b/arch/x86/kernel/i8259_32.c
@@ -339,7 +339,7 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id)
 	outb(0,0xF0);
 	if (ignore_fpu_irq || !boot_cpu_data.hard_math)
 		return IRQ_NONE;
-	math_error((void __user *)get_irq_regs()->eip);
+	math_error((void __user *)get_irq_regs()->ip);
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 4ef85a3b3f9f..fa70005be5e8 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1412,7 +1412,7 @@ static void irq_complete_move(unsigned int irq)
 	if (likely(!cfg->move_in_progress))
 		return;
 
-	vector = ~get_irq_regs()->orig_rax;
+	vector = ~get_irq_regs()->orig_ax;
 	me = smp_processor_id();
 	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
 		cpumask_t cleanup_mask;
diff --git a/arch/x86/kernel/ioport_32.c b/arch/x86/kernel/ioport_32.c
index c281ffa18259..9295e01ff49c 100644
--- a/arch/x86/kernel/ioport_32.c
+++ b/arch/x86/kernel/ioport_32.c
@@ -100,7 +100,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
  * beyond the 0x3ff range: to get the full 65536 ports bitmapped
  * you'd need 8kB of bitmaps/process, which is a bit excessive.
  *
- * Here we just change the eflags value on the stack: we allow
+ * Here we just change the flags value on the stack: we allow
  * only the super-user to do it. This depends on the stack-layout
  * on system-call entry - see also fork() and the signal handling
  * code.
@@ -109,8 +109,8 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 asmlinkage long sys_iopl(unsigned long regsp)
 {
 	volatile struct pt_regs *regs = (struct pt_regs *)&regsp;
-	unsigned int level = regs->ebx;
-	unsigned int old = (regs->eflags >> 12) & 3;
+	unsigned int level = regs->bx;
+	unsigned int old = (regs->flags >> 12) & 3;
 	struct thread_struct *t = &current->thread;
 
 	if (level > 3)
@@ -122,7 +122,7 @@ asmlinkage long sys_iopl(unsigned long regsp)
 	}
 
 	t->iopl = level << 12;
-	regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl;
+	regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | t->iopl;
 	set_iopl_mask(t->iopl);
 
 	return 0;
diff --git a/arch/x86/kernel/ioport_64.c b/arch/x86/kernel/ioport_64.c
index 5f62fad64dab..ff7514b757e5 100644
--- a/arch/x86/kernel/ioport_64.c
+++ b/arch/x86/kernel/ioport_64.c
@@ -95,7 +95,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
  * beyond the 0x3ff range: to get the full 65536 ports bitmapped
  * you'd need 8kB of bitmaps/process, which is a bit excessive.
  *
- * Here we just change the eflags value on the stack: we allow
+ * Here we just change the flags value on the stack: we allow
  * only the super-user to do it. This depends on the stack-layout
  * on system-call entry - see also fork() and the signal handling
  * code.
@@ -103,7 +103,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 
 asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs)
 {
-	unsigned int old = (regs->eflags >> 12) & 3;
+	unsigned int old = (regs->flags >> 12) & 3;
 
 	if (level > 3)
 		return -EINVAL;
@@ -112,6 +112,6 @@ asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs)
 		if (!capable(CAP_SYS_RAWIO))
 			return -EPERM;
 	}
-	regs->eflags = (regs->eflags &~ X86_EFLAGS_IOPL) | (level << 12);
+	regs->flags = (regs->flags &~ X86_EFLAGS_IOPL) | (level << 12);
 	return 0;
 }
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index d3fde94f7345..b49616bcc16b 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -70,7 +70,7 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs)
 {	
 	struct pt_regs *old_regs;
 	/* high bit used in ret_from_ code */
-	int irq = ~regs->orig_eax;
+	int irq = ~regs->orig_ax;
 	struct irq_desc *desc = irq_desc + irq;
 #ifdef CONFIG_4KSTACKS
 	union irq_ctx *curctx, *irqctx;
@@ -88,13 +88,13 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs)
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 	/* Debugging check for stack overflow: is there less than 1KB free? */
 	{
-		long esp;
+		long sp;
 
 		__asm__ __volatile__("andl %%esp,%0" :
-					"=r" (esp) : "0" (THREAD_SIZE - 1));
-		if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
+					"=r" (sp) : "0" (THREAD_SIZE - 1));
+		if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) {
 			printk("do_IRQ: stack overflow: %ld\n",
-				esp - sizeof(struct thread_info));
+				sp - sizeof(struct thread_info));
 			dump_stack();
 		}
 	}
@@ -112,7 +112,7 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs)
 	 * current stack (which is the irq stack already after all)
 	 */
 	if (curctx != irqctx) {
-		int arg1, arg2, ebx;
+		int arg1, arg2, bx;
 
 		/* build the stack frame on the IRQ stack */
 		isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
@@ -128,10 +128,10 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs)
 			(curctx->tinfo.preempt_count & SOFTIRQ_MASK);
 
 		asm volatile(
-			"       xchgl  %%ebx,%%esp      \n"
-			"       call   *%%edi           \n"
-			"       movl   %%ebx,%%esp      \n"
-			: "=a" (arg1), "=d" (arg2), "=b" (ebx)
+			"       xchgl  %%ebx,%%esp    \n"
+			"       call   *%%edi         \n"
+			"       movl   %%ebx,%%esp    \n"
+			: "=a" (arg1), "=d" (arg2), "=b" (bx)
 			:  "0" (irq),   "1" (desc),  "2" (isp),
 			   "D" (desc->handle_irq)
 			: "memory", "cc"
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 6c3a3b6e5cf4..3aac15466a91 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -53,11 +53,11 @@ static inline void stack_overflow_check(struct pt_regs *regs)
 	u64 curbase = (u64)task_stack_page(current);
 	static unsigned long warned = -60*HZ;
 
-	if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE &&
-	    regs->rsp <  curbase + sizeof(struct thread_info) + 128 &&
+	if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE &&
+	    regs->sp <  curbase + sizeof(struct thread_info) + 128 &&
 	    time_after(jiffies, warned + 60*HZ)) {
-		printk("do_IRQ: %s near stack overflow (cur:%Lx,rsp:%lx)\n",
-		       current->comm, curbase, regs->rsp);
+		printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
+		       current->comm, curbase, regs->sp);
 		show_stack(NULL,NULL);
 		warned = jiffies;
 	}
@@ -162,7 +162,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
 	/* high bit used in ret_from_ code  */
-	unsigned vector = ~regs->orig_rax;
+	unsigned vector = ~regs->orig_ax;
 	unsigned irq;
 
 	exit_idle();
diff --git a/arch/x86/kernel/kprobes_32.c b/arch/x86/kernel/kprobes_32.c
index bc4a68367cd0..d708cd4f956f 100644
--- a/arch/x86/kernel/kprobes_32.c
+++ b/arch/x86/kernel/kprobes_32.c
@@ -212,7 +212,7 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 {
 	__get_cpu_var(current_kprobe) = p;
 	kcb->kprobe_saved_eflags = kcb->kprobe_old_eflags
-		= (regs->eflags & (TF_MASK | IF_MASK));
+		= (regs->flags & (TF_MASK | IF_MASK));
 	if (is_IF_modifier(p->opcode))
 		kcb->kprobe_saved_eflags &= ~IF_MASK;
 }
@@ -232,20 +232,20 @@ static __always_inline void restore_btf(void)
 static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 {
 	clear_btf();
-	regs->eflags |= TF_MASK;
-	regs->eflags &= ~IF_MASK;
+	regs->flags |= TF_MASK;
+	regs->flags &= ~IF_MASK;
 	/*single step inline if the instruction is an int3*/
 	if (p->opcode == BREAKPOINT_INSTRUCTION)
-		regs->eip = (unsigned long)p->addr;
+		regs->ip = (unsigned long)p->addr;
 	else
-		regs->eip = (unsigned long)p->ainsn.insn;
+		regs->ip = (unsigned long)p->ainsn.insn;
 }
 
 /* Called with kretprobe_lock held */
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
-	unsigned long *sara = (unsigned long *)&regs->esp;
+	unsigned long *sara = (unsigned long *)&regs->sp;
 
 	ri->ret_addr = (kprobe_opcode_t *) *sara;
 
@@ -264,7 +264,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 	kprobe_opcode_t *addr;
 	struct kprobe_ctlblk *kcb;
 
-	addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t));
+	addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
 
 	/*
 	 * We don't want to be preempted for the entire
@@ -279,8 +279,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 		if (p) {
 			if (kcb->kprobe_status == KPROBE_HIT_SS &&
 				*p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
-				regs->eflags &= ~TF_MASK;
-				regs->eflags |= kcb->kprobe_saved_eflags;
+				regs->flags &= ~TF_MASK;
+				regs->flags |= kcb->kprobe_saved_eflags;
 				goto no_kprobe;
 			}
 			/* We have reentered the kprobe_handler(), since
@@ -301,7 +301,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 			 * another cpu right after we hit, no further
 			 * handling of this interrupt is appropriate
 			 */
-				regs->eip -= sizeof(kprobe_opcode_t);
+				regs->ip -= sizeof(kprobe_opcode_t);
 				ret = 1;
 				goto no_kprobe;
 			}
@@ -325,7 +325,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 			 * Back up over the (now missing) int3 and run
 			 * the original instruction.
 			 */
-			regs->eip -= sizeof(kprobe_opcode_t);
+			regs->ip -= sizeof(kprobe_opcode_t);
 			ret = 1;
 		}
 		/* Not one of ours: let kernel handle it */
@@ -344,7 +344,7 @@ ss_probe:
 	if (p->ainsn.boostable == 1 && !p->post_handler){
 		/* Boost up -- we can execute copied instructions directly */
 		reset_current_kprobe();
-		regs->eip = (unsigned long)p->ainsn.insn;
+		regs->ip = (unsigned long)p->ainsn.insn;
 		preempt_enable_no_resched();
 		return 1;
 	}
@@ -368,7 +368,7 @@ no_kprobe:
 	asm volatile ( ".global kretprobe_trampoline\n"
 			"kretprobe_trampoline: \n"
 			"	pushf\n"
-			/* skip cs, eip, orig_eax */
+			/* skip cs, ip, orig_ax */
 			"	subl $12, %esp\n"
 			"	pushl %fs\n"
 			"	pushl %ds\n"
@@ -382,10 +382,10 @@ no_kprobe:
 			"	pushl %ebx\n"
 			"	movl %esp, %eax\n"
 			"	call trampoline_handler\n"
-			/* move eflags to cs */
+			/* move flags to cs */
 			"	movl 52(%esp), %edx\n"
 			"	movl %edx, 48(%esp)\n"
-			/* save true return address on eflags */
+			/* save true return address on flags */
 			"	movl %eax, 52(%esp)\n"
 			"	popl %ebx\n"
 			"	popl %ecx\n"
@@ -394,7 +394,7 @@ no_kprobe:
 			"	popl %edi\n"
 			"	popl %ebp\n"
 			"	popl %eax\n"
-			/* skip eip, orig_eax, es, ds, fs */
+			/* skip ip, orig_ax, es, ds, fs */
 			"	addl $20, %esp\n"
 			"	popf\n"
 			"	ret\n");
@@ -415,9 +415,9 @@ fastcall void *__kprobes trampoline_handler(struct pt_regs *regs)
 	spin_lock_irqsave(&kretprobe_lock, flags);
 	head = kretprobe_inst_table_head(current);
 	/* fixup registers */
-	regs->xcs = __KERNEL_CS | get_kernel_rpl();
-	regs->eip = trampoline_address;
-	regs->orig_eax = 0xffffffff;
+	regs->cs = __KERNEL_CS | get_kernel_rpl();
+	regs->ip = trampoline_address;
+	regs->orig_ax = 0xffffffff;
 
 	/*
 	 * It is possible to have multiple instances associated with a given
@@ -478,11 +478,11 @@ fastcall void *__kprobes trampoline_handler(struct pt_regs *regs)
  * interrupt.  We have to fix up the stack as follows:
  *
  * 0) Except in the case of absolute or indirect jump or call instructions,
- * the new eip is relative to the copied instruction.  We need to make
+ * the new ip is relative to the copied instruction.  We need to make
  * it relative to the original instruction.
  *
  * 1) If the single-stepped instruction was pushfl, then the TF and IF
- * flags are set in the just-pushed eflags, and may need to be cleared.
+ * flags are set in the just-pushed flags, and may need to be cleared.
  *
  * 2) If the single-stepped instruction was a call, the return address
  * that is atop the stack is the address following the copied instruction.
@@ -493,11 +493,11 @@ fastcall void *__kprobes trampoline_handler(struct pt_regs *regs)
 static void __kprobes resume_execution(struct kprobe *p,
 		struct pt_regs *regs, struct kprobe_ctlblk *kcb)
 {
-	unsigned long *tos = (unsigned long *)&regs->esp;
+	unsigned long *tos = (unsigned long *)&regs->sp;
 	unsigned long copy_eip = (unsigned long)p->ainsn.insn;
 	unsigned long orig_eip = (unsigned long)p->addr;
 
-	regs->eflags &= ~TF_MASK;
+	regs->flags &= ~TF_MASK;
 	switch (p->ainsn.insn[0]) {
 	case 0x9c:		/* pushfl */
 		*tos &= ~(TF_MASK | IF_MASK);
@@ -508,8 +508,8 @@ static void __kprobes resume_execution(struct kprobe *p,
 	case 0xca:
 	case 0xcb:
 	case 0xcf:
-	case 0xea:		/* jmp absolute -- eip is correct */
-		/* eip is already adjusted, no more changes required */
+	case 0xea:		/* jmp absolute -- ip is correct */
+		/* ip is already adjusted, no more changes required */
 		p->ainsn.boostable = 1;
 		goto no_change;
 	case 0xe8:		/* call relative - Fix return addr */
@@ -522,14 +522,14 @@ static void __kprobes resume_execution(struct kprobe *p,
 		if ((p->ainsn.insn[1] & 0x30) == 0x10) {
 			/*
 			 * call absolute, indirect
-			 * Fix return addr; eip is correct.
+			 * Fix return addr; ip is correct.
 			 * But this is not boostable
 			 */
 			*tos = orig_eip + (*tos - copy_eip);
 			goto no_change;
 		} else if (((p->ainsn.insn[1] & 0x31) == 0x20) ||	/* jmp near, absolute indirect */
 			   ((p->ainsn.insn[1] & 0x31) == 0x21)) {	/* jmp far, absolute indirect */
-			/* eip is correct. And this is boostable */
+			/* ip is correct. And this is boostable */
 			p->ainsn.boostable = 1;
 			goto no_change;
 		}
@@ -538,21 +538,21 @@ static void __kprobes resume_execution(struct kprobe *p,
 	}
 
 	if (p->ainsn.boostable == 0) {
-		if ((regs->eip > copy_eip) &&
-		    (regs->eip - copy_eip) + 5 < MAX_INSN_SIZE) {
+		if ((regs->ip > copy_eip) &&
+		    (regs->ip - copy_eip) + 5 < MAX_INSN_SIZE) {
 			/*
 			 * These instructions can be executed directly if it
 			 * jumps back to correct address.
 			 */
-			set_jmp_op((void *)regs->eip,
-				   (void *)orig_eip + (regs->eip - copy_eip));
+			set_jmp_op((void *)regs->ip,
+				   (void *)orig_eip + (regs->ip - copy_eip));
 			p->ainsn.boostable = 1;
 		} else {
 			p->ainsn.boostable = -1;
 		}
 	}
 
-	regs->eip = orig_eip + (regs->eip - copy_eip);
+	regs->ip = orig_eip + (regs->ip - copy_eip);
 
 no_change:
 	restore_btf();
@@ -578,8 +578,8 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs)
 	}
 
 	resume_execution(cur, regs, kcb);
-	regs->eflags |= kcb->kprobe_saved_eflags;
-	trace_hardirqs_fixup_flags(regs->eflags);
+	regs->flags |= kcb->kprobe_saved_eflags;
+	trace_hardirqs_fixup_flags(regs->flags);
 
 	/*Restore back the original saved kprobes variables and continue. */
 	if (kcb->kprobe_status == KPROBE_REENTER) {
@@ -591,11 +591,11 @@ out:
 	preempt_enable_no_resched();
 
 	/*
-	 * if somebody else is singlestepping across a probe point, eflags
+	 * if somebody else is singlestepping across a probe point, flags
 	 * will have TF set, in which case, continue the remaining processing
 	 * of do_debug, as if this is not a probe hit.
 	 */
-	if (regs->eflags & TF_MASK)
+	if (regs->flags & TF_MASK)
 		return 0;
 
 	return 1;
@@ -612,12 +612,12 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		/*
 		 * We are here because the instruction being single
 		 * stepped caused a page fault. We reset the current
-		 * kprobe and the eip points back to the probe address
+		 * kprobe and the ip points back to the probe address
 		 * and allow the page fault handler to continue as a
 		 * normal page fault.
 		 */
-		regs->eip = (unsigned long)cur->addr;
-		regs->eflags |= kcb->kprobe_old_eflags;
+		regs->ip = (unsigned long)cur->addr;
+		regs->flags |= kcb->kprobe_old_eflags;
 		if (kcb->kprobe_status == KPROBE_REENTER)
 			restore_previous_kprobe(kcb);
 		else
@@ -703,7 +703,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
 	kcb->jprobe_saved_regs = *regs;
-	kcb->jprobe_saved_esp = &regs->esp;
+	kcb->jprobe_saved_esp = &regs->sp;
 	addr = (unsigned long)(kcb->jprobe_saved_esp);
 
 	/*
@@ -715,9 +715,9 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	 */
 	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
 			MIN_STACK_SIZE(addr));
-	regs->eflags &= ~IF_MASK;
+	regs->flags &= ~IF_MASK;
 	trace_hardirqs_off();
-	regs->eip = (unsigned long)(jp->entry);
+	regs->ip = (unsigned long)(jp->entry);
 	return 1;
 }
 
@@ -736,15 +736,15 @@ void __kprobes jprobe_return(void)
 int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-	u8 *addr = (u8 *) (regs->eip - 1);
+	u8 *addr = (u8 *) (regs->ip - 1);
 	unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_esp);
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
 
 	if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
-		if (&regs->esp != kcb->jprobe_saved_esp) {
+		if (&regs->sp != kcb->jprobe_saved_esp) {
 			struct pt_regs *saved_regs = &kcb->jprobe_saved_regs;
-			printk("current esp %p does not match saved esp %p\n",
-			       &regs->esp, kcb->jprobe_saved_esp);
+			printk("current sp %p does not match saved sp %p\n",
+			       &regs->sp, kcb->jprobe_saved_esp);
 			printk("Saved registers for jprobe %p\n", jp);
 			show_registers(saved_regs);
 			printk("Current registers\n");
diff --git a/arch/x86/kernel/kprobes_64.c b/arch/x86/kernel/kprobes_64.c
index 10d66e323c7d..f6837cd3bed5 100644
--- a/arch/x86/kernel/kprobes_64.c
+++ b/arch/x86/kernel/kprobes_64.c
@@ -251,7 +251,7 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 {
 	__get_cpu_var(current_kprobe) = p;
 	kcb->kprobe_saved_rflags = kcb->kprobe_old_rflags
-		= (regs->eflags & (TF_MASK | IF_MASK));
+		= (regs->flags & (TF_MASK | IF_MASK));
 	if (is_IF_modifier(p->ainsn.insn))
 		kcb->kprobe_saved_rflags &= ~IF_MASK;
 }
@@ -271,20 +271,20 @@ static __always_inline void restore_btf(void)
 static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 {
 	clear_btf();
-	regs->eflags |= TF_MASK;
-	regs->eflags &= ~IF_MASK;
+	regs->flags |= TF_MASK;
+	regs->flags &= ~IF_MASK;
 	/*single step inline if the instruction is an int3*/
 	if (p->opcode == BREAKPOINT_INSTRUCTION)
-		regs->rip = (unsigned long)p->addr;
+		regs->ip = (unsigned long)p->addr;
 	else
-		regs->rip = (unsigned long)p->ainsn.insn;
+		regs->ip = (unsigned long)p->ainsn.insn;
 }
 
 /* Called with kretprobe_lock held */
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
-	unsigned long *sara = (unsigned long *)regs->rsp;
+	unsigned long *sara = (unsigned long *)regs->sp;
 
 	ri->ret_addr = (kprobe_opcode_t *) *sara;
 	/* Replace the return addr with trampoline addr */
@@ -295,7 +295,7 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
 {
 	struct kprobe *p;
 	int ret = 0;
-	kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->rip - sizeof(kprobe_opcode_t));
+	kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
 	struct kprobe_ctlblk *kcb;
 
 	/*
@@ -311,8 +311,8 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
 		if (p) {
 			if (kcb->kprobe_status == KPROBE_HIT_SS &&
 				*p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
-				regs->eflags &= ~TF_MASK;
-				regs->eflags |= kcb->kprobe_saved_rflags;
+				regs->flags &= ~TF_MASK;
+				regs->flags |= kcb->kprobe_saved_rflags;
 				goto no_kprobe;
 			} else if (kcb->kprobe_status == KPROBE_HIT_SSDONE) {
 				/* TODO: Provide re-entrancy from
@@ -321,7 +321,7 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
 				 * the instruction of the new probe.
 				 */
 				arch_disarm_kprobe(p);
-				regs->rip = (unsigned long)p->addr;
+				regs->ip = (unsigned long)p->addr;
 				reset_current_kprobe();
 				ret = 1;
 			} else {
@@ -345,7 +345,7 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
 			 * another cpu right after we hit, no further
 			 * handling of this interrupt is appropriate
 			 */
-				regs->rip = (unsigned long)addr;
+				regs->ip = (unsigned long)addr;
 				ret = 1;
 				goto no_kprobe;
 			}
@@ -369,7 +369,7 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
 			 * Back up over the (now missing) int3 and run
 			 * the original instruction.
 			 */
-			regs->rip = (unsigned long)addr;
+			regs->ip = (unsigned long)addr;
 			ret = 1;
 		}
 		/* Not one of ours: let kernel handle it */
@@ -454,7 +454,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	}
 
 	kretprobe_assert(ri, orig_ret_address, trampoline_address);
-	regs->rip = orig_ret_address;
+	regs->ip = orig_ret_address;
 
 	reset_current_kprobe();
 	spin_unlock_irqrestore(&kretprobe_lock, flags);
@@ -484,11 +484,11 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
  * interrupt.  We have to fix up the stack as follows:
  *
  * 0) Except in the case of absolute or indirect jump or call instructions,
- * the new rip is relative to the copied instruction.  We need to make
+ * the new ip is relative to the copied instruction.  We need to make
  * it relative to the original instruction.
  *
  * 1) If the single-stepped instruction was pushfl, then the TF and IF
- * flags are set in the just-pushed eflags, and may need to be cleared.
+ * flags are set in the just-pushed flags, and may need to be cleared.
  *
  * 2) If the single-stepped instruction was a call, the return address
  * that is atop the stack is the address following the copied instruction.
@@ -497,7 +497,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 static void __kprobes resume_execution(struct kprobe *p,
 		struct pt_regs *regs, struct kprobe_ctlblk *kcb)
 {
-	unsigned long *tos = (unsigned long *)regs->rsp;
+	unsigned long *tos = (unsigned long *)regs->sp;
 	unsigned long copy_rip = (unsigned long)p->ainsn.insn;
 	unsigned long orig_rip = (unsigned long)p->addr;
 	kprobe_opcode_t *insn = p->ainsn.insn;
@@ -506,7 +506,7 @@ static void __kprobes resume_execution(struct kprobe *p,
 	if (*insn >= 0x40 && *insn <= 0x4f)
 		insn++;
 
-	regs->eflags &= ~TF_MASK;
+	regs->flags &= ~TF_MASK;
 	switch (*insn) {
 	case 0x9c:	/* pushfl */
 		*tos &= ~(TF_MASK | IF_MASK);
@@ -538,7 +538,8 @@ static void __kprobes resume_execution(struct kprobe *p,
 		break;
 	}
 
-	regs->rip = orig_rip + (regs->rip - copy_rip);
+	regs->ip = orig_rip + (regs->ip - copy_rip);
+
 no_change:
 	restore_btf();
 
@@ -559,8 +560,8 @@ int __kprobes post_kprobe_handler(struct pt_regs *regs)
 	}
 
 	resume_execution(cur, regs, kcb);
-	regs->eflags |= kcb->kprobe_saved_rflags;
-	trace_hardirqs_fixup_flags(regs->eflags);
+	regs->flags |= kcb->kprobe_saved_rflags;
+	trace_hardirqs_fixup_flags(regs->flags);
 
 	/* Restore the original saved kprobes variables and continue. */
 	if (kcb->kprobe_status == KPROBE_REENTER) {
@@ -572,11 +573,11 @@ out:
 	preempt_enable_no_resched();
 
 	/*
-	 * if somebody else is singlestepping across a probe point, eflags
+	 * if somebody else is singlestepping across a probe point, flags
 	 * will have TF set, in which case, continue the remaining processing
 	 * of do_debug, as if this is not a probe hit.
 	 */
-	if (regs->eflags & TF_MASK)
+	if (regs->flags & TF_MASK)
 		return 0;
 
 	return 1;
@@ -594,12 +595,12 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		/*
 		 * We are here because the instruction being single
 		 * stepped caused a page fault. We reset the current
-		 * kprobe and the rip points back to the probe address
+		 * kprobe and the ip points back to the probe address
 		 * and allow the page fault handler to continue as a
 		 * normal page fault.
 		 */
-		regs->rip = (unsigned long)cur->addr;
-		regs->eflags |= kcb->kprobe_old_rflags;
+		regs->ip = (unsigned long)cur->addr;
+		regs->flags |= kcb->kprobe_old_rflags;
 		if (kcb->kprobe_status == KPROBE_REENTER)
 			restore_previous_kprobe(kcb);
 		else
@@ -629,9 +630,9 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
 		 */
-		fixup = search_exception_tables(regs->rip);
+		fixup = search_exception_tables(regs->ip);
 		if (fixup) {
-			regs->rip = fixup->fixup;
+			regs->ip = fixup->fixup;
 			return 1;
 		}
 
@@ -688,7 +689,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
 	kcb->jprobe_saved_regs = *regs;
-	kcb->jprobe_saved_rsp = (long *) regs->rsp;
+	kcb->jprobe_saved_rsp = (long *) regs->sp;
 	addr = (unsigned long)(kcb->jprobe_saved_rsp);
 	/*
 	 * As Linus pointed out, gcc assumes that the callee
@@ -699,9 +700,9 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	 */
 	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
 			MIN_STACK_SIZE(addr));
-	regs->eflags &= ~IF_MASK;
+	regs->flags &= ~IF_MASK;
 	trace_hardirqs_off();
-	regs->rip = (unsigned long)(jp->entry);
+	regs->ip = (unsigned long)(jp->entry);
 	return 1;
 }
 
@@ -720,15 +721,15 @@ void __kprobes jprobe_return(void)
 int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-	u8 *addr = (u8 *) (regs->rip - 1);
+	u8 *addr = (u8 *) (regs->ip - 1);
 	unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_rsp);
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
 
 	if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
-		if ((unsigned long *)regs->rsp != kcb->jprobe_saved_rsp) {
+		if ((unsigned long *)regs->sp != kcb->jprobe_saved_rsp) {
 			struct pt_regs *saved_regs = &kcb->jprobe_saved_regs;
-			printk("current rsp %p does not match saved rsp %p\n",
-			       (long *)regs->rsp, kcb->jprobe_saved_rsp);
+			printk("current sp %p does not match saved sp %p\n",
+			       (long *)regs->sp, kcb->jprobe_saved_rsp);
 			printk("Saved registers for jprobe %p\n", jp);
 			show_registers(saved_regs);
 			printk("Current registers\n");
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index d5462f228daf..c9f28e02e86d 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -265,13 +265,13 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
  * New with Core Duo processors, MWAIT can take some hints based on CPU
  * capability.
  */
-void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
+void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
 	if (!need_resched()) {
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		smp_mb();
 		if (!need_resched())
-			__mwait(eax, ecx);
+			__mwait(ax, cx);
 	}
 }
 
@@ -320,15 +320,15 @@ void __show_registers(struct pt_regs *regs, int all)
 {
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
 	unsigned long d0, d1, d2, d3, d6, d7;
-	unsigned long esp;
+	unsigned long sp;
 	unsigned short ss, gs;
 
 	if (user_mode_vm(regs)) {
-		esp = regs->esp;
-		ss = regs->xss & 0xffff;
+		sp = regs->sp;
+		ss = regs->ss & 0xffff;
 		savesegment(gs, gs);
 	} else {
-		esp = (unsigned long) (&regs->esp);
+		sp = (unsigned long) (&regs->sp);
 		savesegment(ss, ss);
 		savesegment(gs, gs);
 	}
@@ -341,17 +341,17 @@ void __show_registers(struct pt_regs *regs, int all)
 			init_utsname()->version);
 
 	printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
-			0xffff & regs->xcs, regs->eip, regs->eflags,
+			0xffff & regs->cs, regs->ip, regs->flags,
 			smp_processor_id());
-	print_symbol("EIP is at %s\n", regs->eip);
+	print_symbol("EIP is at %s\n", regs->ip);
 
 	printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
-		regs->eax, regs->ebx, regs->ecx, regs->edx);
+		regs->ax, regs->bx, regs->cx, regs->dx);
 	printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
-		regs->esi, regs->edi, regs->ebp, esp);
+		regs->si, regs->di, regs->bp, sp);
 	printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
-	       regs->xds & 0xffff, regs->xes & 0xffff,
-	       regs->xfs & 0xffff, gs, ss);
+	       regs->ds & 0xffff, regs->es & 0xffff,
+	       regs->fs & 0xffff, gs, ss);
 
 	if (!all)
 		return;
@@ -379,12 +379,12 @@ void __show_registers(struct pt_regs *regs, int all)
 void show_regs(struct pt_regs *regs)
 {
 	__show_registers(regs, 1);
-	show_trace(NULL, regs, &regs->esp);
+	show_trace(NULL, regs, &regs->sp);
 }
 
 /*
- * This gets run with %ebx containing the
- * function to call, and %edx containing
+ * This gets run with %bx containing the
+ * function to call, and %dx containing
  * the "args".
  */
 extern void kernel_thread_helper(void);
@@ -398,16 +398,16 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
 
 	memset(&regs, 0, sizeof(regs));
 
-	regs.ebx = (unsigned long) fn;
-	regs.edx = (unsigned long) arg;
+	regs.bx = (unsigned long) fn;
+	regs.dx = (unsigned long) arg;
 
-	regs.xds = __USER_DS;
-	regs.xes = __USER_DS;
-	regs.xfs = __KERNEL_PERCPU;
-	regs.orig_eax = -1;
-	regs.eip = (unsigned long) kernel_thread_helper;
-	regs.xcs = __KERNEL_CS | get_kernel_rpl();
-	regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
+	regs.ds = __USER_DS;
+	regs.es = __USER_DS;
+	regs.fs = __KERNEL_PERCPU;
+	regs.orig_ax = -1;
+	regs.ip = (unsigned long) kernel_thread_helper;
+	regs.cs = __KERNEL_CS | get_kernel_rpl();
+	regs.flags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
 
 	/* Ok, create the new process.. */
 	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
@@ -470,7 +470,7 @@ void prepare_to_copy(struct task_struct *tsk)
 	unlazy_fpu(tsk);
 }
 
-int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
+int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 	unsigned long unused,
 	struct task_struct * p, struct pt_regs * regs)
 {
@@ -480,8 +480,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
 
 	childregs = task_pt_regs(p);
 	*childregs = *regs;
-	childregs->eax = 0;
-	childregs->esp = esp;
+	childregs->ax = 0;
+	childregs->sp = sp;
 
 	p->thread.esp = (unsigned long) childregs;
 	p->thread.esp0 = (unsigned long) (childregs+1);
@@ -508,7 +508,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
 	 */
 	if (clone_flags & CLONE_SETTLS)
 		err = do_set_thread_area(p, -1,
-			(struct user_desc __user *)childregs->esi, 0);
+			(struct user_desc __user *)childregs->si, 0);
 
 	if (err && p->thread.io_bitmap_ptr) {
 		kfree(p->thread.io_bitmap_ptr);
@@ -527,7 +527,7 @@ void dump_thread(struct pt_regs * regs, struct user * dump)
 /* changed the size calculations - should hopefully work better. lbt */
 	dump->magic = CMAGIC;
 	dump->start_code = 0;
-	dump->start_stack = regs->esp & ~(PAGE_SIZE - 1);
+	dump->start_stack = regs->sp & ~(PAGE_SIZE - 1);
 	dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
 	dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
 	dump->u_dsize -= dump->u_tsize;
@@ -538,23 +538,23 @@ void dump_thread(struct pt_regs * regs, struct user * dump)
 	if (dump->start_stack < TASK_SIZE)
 		dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
 
-	dump->regs.ebx = regs->ebx;
-	dump->regs.ecx = regs->ecx;
-	dump->regs.edx = regs->edx;
-	dump->regs.esi = regs->esi;
-	dump->regs.edi = regs->edi;
-	dump->regs.ebp = regs->ebp;
-	dump->regs.eax = regs->eax;
-	dump->regs.ds = regs->xds;
-	dump->regs.es = regs->xes;
-	dump->regs.fs = regs->xfs;
+	dump->regs.ebx = regs->bx;
+	dump->regs.ecx = regs->cx;
+	dump->regs.edx = regs->dx;
+	dump->regs.esi = regs->si;
+	dump->regs.edi = regs->di;
+	dump->regs.ebp = regs->bp;
+	dump->regs.eax = regs->ax;
+	dump->regs.ds = regs->ds;
+	dump->regs.es = regs->es;
+	dump->regs.fs = regs->fs;
 	savesegment(gs,dump->regs.gs);
-	dump->regs.orig_eax = regs->orig_eax;
-	dump->regs.eip = regs->eip;
-	dump->regs.cs = regs->xcs;
-	dump->regs.eflags = regs->eflags;
-	dump->regs.esp = regs->esp;
-	dump->regs.ss = regs->xss;
+	dump->regs.orig_eax = regs->orig_ax;
+	dump->regs.eip = regs->ip;
+	dump->regs.cs = regs->cs;
+	dump->regs.eflags = regs->flags;
+	dump->regs.esp = regs->sp;
+	dump->regs.ss = regs->ss;
 
 	dump->u_fpvalid = dump_fpu (regs, &dump->i387);
 }
@@ -566,10 +566,10 @@ EXPORT_SYMBOL(dump_thread);
 int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
 {
 	struct pt_regs ptregs = *task_pt_regs(tsk);
-	ptregs.xcs &= 0xffff;
-	ptregs.xds &= 0xffff;
-	ptregs.xes &= 0xffff;
-	ptregs.xss &= 0xffff;
+	ptregs.cs &= 0xffff;
+	ptregs.ds &= 0xffff;
+	ptregs.es &= 0xffff;
+	ptregs.ss &= 0xffff;
 
 	elf_core_copy_regs(regs, &ptregs);
 
@@ -684,7 +684,7 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
  * More important, however, is the fact that this allows us much
  * more flexibility.
  *
- * The return value (in %eax) will be the "prev" task after
+ * The return value (in %ax) will be the "prev" task after
  * the task-switch, and shows up in ret_from_fork in entry.S,
  * for example.
  */
@@ -771,7 +771,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
 
 asmlinkage int sys_fork(struct pt_regs regs)
 {
-	return do_fork(SIGCHLD, regs.esp, &regs, 0, NULL, NULL);
+	return do_fork(SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
 }
 
 asmlinkage int sys_clone(struct pt_regs regs)
@@ -780,12 +780,12 @@ asmlinkage int sys_clone(struct pt_regs regs)
 	unsigned long newsp;
 	int __user *parent_tidptr, *child_tidptr;
 
-	clone_flags = regs.ebx;
-	newsp = regs.ecx;
-	parent_tidptr = (int __user *)regs.edx;
-	child_tidptr = (int __user *)regs.edi;
+	clone_flags = regs.bx;
+	newsp = regs.cx;
+	parent_tidptr = (int __user *)regs.dx;
+	child_tidptr = (int __user *)regs.di;
 	if (!newsp)
-		newsp = regs.esp;
+		newsp = regs.sp;
 	return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr);
 }
 
@@ -801,7 +801,7 @@ asmlinkage int sys_clone(struct pt_regs regs)
  */
 asmlinkage int sys_vfork(struct pt_regs regs)
 {
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, &regs, 0, NULL, NULL);
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
 }
 
 /*
@@ -812,13 +812,13 @@ asmlinkage int sys_execve(struct pt_regs regs)
 	int error;
 	char * filename;
 
-	filename = getname((char __user *) regs.ebx);
+	filename = getname((char __user *) regs.bx);
 	error = PTR_ERR(filename);
 	if (IS_ERR(filename))
 		goto out;
 	error = do_execve(filename,
-			(char __user * __user *) regs.ecx,
-			(char __user * __user *) regs.edx,
+			(char __user * __user *) regs.cx,
+			(char __user * __user *) regs.dx,
 			&regs);
 	if (error == 0) {
 		/* Make sure we don't return using sysenter.. */
@@ -834,24 +834,24 @@ out:
 
 unsigned long get_wchan(struct task_struct *p)
 {
-	unsigned long ebp, esp, eip;
+	unsigned long bp, sp, ip;
 	unsigned long stack_page;
 	int count = 0;
 	if (!p || p == current || p->state == TASK_RUNNING)
 		return 0;
 	stack_page = (unsigned long)task_stack_page(p);
-	esp = p->thread.esp;
-	if (!stack_page || esp < stack_page || esp > top_esp+stack_page)
+	sp = p->thread.esp;
+	if (!stack_page || sp < stack_page || sp > top_esp+stack_page)
 		return 0;
-	/* include/asm-i386/system.h:switch_to() pushes ebp last. */
-	ebp = *(unsigned long *) esp;
+	/* include/asm-i386/system.h:switch_to() pushes bp last. */
+	bp = *(unsigned long *) sp;
 	do {
-		if (ebp < stack_page || ebp > top_ebp+stack_page)
+		if (bp < stack_page || bp > top_ebp+stack_page)
 			return 0;
-		eip = *(unsigned long *) (ebp+4);
-		if (!in_sched_functions(eip))
-			return eip;
-		ebp = *(unsigned long *) ebp;
+		ip = *(unsigned long *) (bp+4);
+		if (!in_sched_functions(ip))
+			return ip;
+		bp = *(unsigned long *) bp;
 	} while (count++ < 16);
 	return 0;
 }
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ae5eca17aa3c..efbb1a2eab97 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -257,13 +257,13 @@ void cpu_idle(void)
  * New with Core Duo processors, MWAIT can take some hints based on CPU
  * capability.
  */
-void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
+void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
 	if (!need_resched()) {
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		smp_mb();
 		if (!need_resched())
-			__mwait(eax, ecx);
+			__mwait(ax, cx);
 	}
 }
 
@@ -330,16 +330,16 @@ void __show_regs(struct pt_regs * regs)
 		init_utsname()->release,
 		(int)strcspn(init_utsname()->version, " "),
 		init_utsname()->version);
-	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
-	printk_address(regs->rip); 
-	printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
-		regs->eflags);
+	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
+	printk_address(regs->ip);
+	printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->sp,
+		regs->flags);
 	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
-	       regs->rax, regs->rbx, regs->rcx);
+	       regs->ax, regs->bx, regs->cx);
 	printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
-	       regs->rdx, regs->rsi, regs->rdi); 
+	       regs->dx, regs->si, regs->di);
 	printk("RBP: %016lx R08: %016lx R09: %016lx\n",
-	       regs->rbp, regs->r8, regs->r9); 
+	       regs->bp, regs->r8, regs->r9);
 	printk("R10: %016lx R11: %016lx R12: %016lx\n",
 	       regs->r10, regs->r11, regs->r12); 
 	printk("R13: %016lx R14: %016lx R15: %016lx\n",
@@ -476,7 +476,7 @@ void prepare_to_copy(struct task_struct *tsk)
 	unlazy_fpu(tsk);
 }
 
-int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, 
+int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 		unsigned long unused,
 	struct task_struct * p, struct pt_regs * regs)
 {
@@ -488,10 +488,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
 			(THREAD_SIZE + task_stack_page(p))) - 1;
 	*childregs = *regs;
 
-	childregs->rax = 0;
-	childregs->rsp = rsp;
-	if (rsp == ~0UL)
-		childregs->rsp = (unsigned long)childregs;
+	childregs->ax = 0;
+	childregs->sp = sp;
+	if (sp == ~0UL)
+		childregs->sp = (unsigned long)childregs;
 
 	p->thread.rsp = (unsigned long) childregs;
 	p->thread.rsp0 = (unsigned long) (childregs+1);
@@ -525,7 +525,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
 #ifdef CONFIG_IA32_EMULATION
 		if (test_thread_flag(TIF_IA32))
 			err = do_set_thread_area(p, -1,
-				(struct user_desc __user *)childregs->rsi, 0);
+				(struct user_desc __user *)childregs->si, 0);
 		else 			
 #endif	 
 			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 
@@ -732,7 +732,7 @@ void set_personality_64bit(void)
 
 asmlinkage long sys_fork(struct pt_regs *regs)
 {
-	return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
+	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
 }
 
 asmlinkage long
@@ -740,7 +740,7 @@ sys_clone(unsigned long clone_flags, unsigned long newsp,
 	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
 {
 	if (!newsp)
-		newsp = regs->rsp;
+		newsp = regs->sp;
 	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
 }
 
@@ -756,14 +756,14 @@ sys_clone(unsigned long clone_flags, unsigned long newsp,
  */
 asmlinkage long sys_vfork(struct pt_regs *regs)
 {
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
 		    NULL, NULL);
 }
 
 unsigned long get_wchan(struct task_struct *p)
 {
 	unsigned long stack;
-	u64 fp,rip;
+	u64 fp,ip;
 	int count = 0;
 
 	if (!p || p == current || p->state==TASK_RUNNING)
@@ -776,9 +776,9 @@ unsigned long get_wchan(struct task_struct *p)
 		if (fp < (unsigned long)stack ||
 		    fp > (unsigned long)stack+THREAD_SIZE)
 			return 0; 
-		rip = *(u64 *)(fp+8); 
-		if (!in_sched_functions(rip))
-			return rip; 
+		ip = *(u64 *)(fp+8);
+		if (!in_sched_functions(ip))
+			return ip;
 		fp = *(u64 *)fp; 
 	} while (count++ < 16); 
 	return 0;
diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace_32.c
index 512f8412b799..f81e2f1827d4 100644
--- a/arch/x86/kernel/ptrace_32.c
+++ b/arch/x86/kernel/ptrace_32.c
@@ -39,10 +39,10 @@
 
 static long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
 {
-	BUILD_BUG_ON(offsetof(struct pt_regs, ebx) != 0);
+	BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
 	if (regno > FS)
 		--regno;
-	return &regs->ebx + regno;
+	return &regs->bx + regno;
 }
 
 static int putreg(struct task_struct *child,
@@ -80,7 +80,7 @@ static int putreg(struct task_struct *child,
 				clear_tsk_thread_flag(child, TIF_FORCED_TF);
 			else if (test_tsk_thread_flag(child, TIF_FORCED_TF))
 				value |= X86_EFLAGS_TF;
-			value |= regs->eflags & ~FLAG_MASK;
+			value |= regs->flags & ~FLAG_MASK;
 			break;
 	}
 	*pt_regs_access(regs, regno) = value;
@@ -98,7 +98,7 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
 			/*
 			 * If the debugger set TF, hide it from the readout.
 			 */
-			retval = regs->eflags;
+			retval = regs->flags;
 			if (test_tsk_thread_flag(child, TIF_FORCED_TF))
 				retval &= ~X86_EFLAGS_TF;
 			break;
@@ -369,8 +369,8 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
 	info.si_signo = SIGTRAP;
 	info.si_code = TRAP_BRKPT;
 
-	/* User-mode eip? */
-	info.si_addr = user_mode_vm(regs) ? (void __user *) regs->eip : NULL;
+	/* User-mode ip? */
+	info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL;
 
 	/* Send us the fake SIGTRAP */
 	force_sig_info(SIGTRAP, &info, tsk);
@@ -392,12 +392,12 @@ int do_syscall_trace(struct pt_regs *regs, int entryexit)
 
 	/* do the secure computing check first */
 	if (!entryexit)
-		secure_computing(regs->orig_eax);
+		secure_computing(regs->orig_ax);
 
 	if (unlikely(current->audit_context)) {
 		if (entryexit)
-			audit_syscall_exit(AUDITSC_RESULT(regs->eax),
-						regs->eax);
+			audit_syscall_exit(AUDITSC_RESULT(regs->ax),
+						regs->ax);
 		/* Debug traps, when using PTRACE_SINGLESTEP, must be sent only
 		 * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is
 		 * not used, entry.S will call us only on syscall exit, not
@@ -445,13 +445,13 @@ int do_syscall_trace(struct pt_regs *regs, int entryexit)
 	ret = is_sysemu;
 out:
 	if (unlikely(current->audit_context) && !entryexit)
-		audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_eax,
-				    regs->ebx, regs->ecx, regs->edx, regs->esi);
+		audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax,
+				    regs->bx, regs->cx, regs->dx, regs->si);
 	if (ret == 0)
 		return 0;
 
-	regs->orig_eax = -1; /* force skip of syscall restarting */
+	regs->orig_ax = -1; /* force skip of syscall restarting */
 	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->eax), regs->eax);
+		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
 	return 1;
 }
diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c
index 4ba66d8af717..bee20bb1a6c0 100644
--- a/arch/x86/kernel/ptrace_64.c
+++ b/arch/x86/kernel/ptrace_64.c
@@ -119,7 +119,7 @@ static int putreg(struct task_struct *child,
 				clear_tsk_thread_flag(child, TIF_FORCED_TF);
 			else if (test_tsk_thread_flag(child, TIF_FORCED_TF))
 				value |= X86_EFLAGS_TF;
-			value |= regs->eflags & ~FLAG_MASK;
+			value |= regs->flags & ~FLAG_MASK;
 			break;
 		case offsetof(struct user_regs_struct,cs): 
 			if ((value & 3) != 3)
@@ -168,7 +168,7 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
 			/*
 			 * If the debugger set TF, hide it from the readout.
 			 */
-			val = regs->eflags;
+			val = regs->flags;
 			if (test_tsk_thread_flag(child, TIF_IA32))
 				val &= 0xffffffff;
 			if (test_tsk_thread_flag(child, TIF_FORCED_TF))
@@ -383,9 +383,9 @@ static void syscall_trace(struct pt_regs *regs)
 {
 
 #if 0
-	printk("trace %s rip %lx rsp %lx rax %d origrax %d caller %lx tiflags %x ptrace %x\n",
+	printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
 	       current->comm,
-	       regs->rip, regs->rsp, regs->rax, regs->orig_rax, __builtin_return_address(0),
+	       regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0),
 	       current_thread_info()->flags, current->ptrace); 
 #endif
 
@@ -405,7 +405,7 @@ static void syscall_trace(struct pt_regs *regs)
 asmlinkage void syscall_trace_enter(struct pt_regs *regs)
 {
 	/* do the secure computing check first */
-	secure_computing(regs->orig_rax);
+	secure_computing(regs->orig_ax);
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE)
 	    && (current->ptrace & PT_PTRACED))
@@ -414,14 +414,14 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(current->audit_context)) {
 		if (test_thread_flag(TIF_IA32)) {
 			audit_syscall_entry(AUDIT_ARCH_I386,
-					    regs->orig_rax,
-					    regs->rbx, regs->rcx,
-					    regs->rdx, regs->rsi);
+					    regs->orig_ax,
+					    regs->bx, regs->cx,
+					    regs->dx, regs->si);
 		} else {
 			audit_syscall_entry(AUDIT_ARCH_X86_64,
-					    regs->orig_rax,
-					    regs->rdi, regs->rsi,
-					    regs->rdx, regs->r10);
+					    regs->orig_ax,
+					    regs->di, regs->si,
+					    regs->dx, regs->r10);
 		}
 	}
 }
@@ -429,7 +429,7 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs)
 asmlinkage void syscall_trace_leave(struct pt_regs *regs)
 {
 	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->rax), regs->rax);
+		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
 
 	if ((test_thread_flag(TIF_SYSCALL_TRACE)
 	     || test_thread_flag(TIF_SINGLESTEP))
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 0a7c812212c9..40fd3515ccf1 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -82,14 +82,14 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
 }
 
 asmlinkage int
-sys_sigaltstack(unsigned long ebx)
+sys_sigaltstack(unsigned long bx)
 {
 	/* This is needed to make gcc realize it doesn't own the "struct pt_regs" */
-	struct pt_regs *regs = (struct pt_regs *)&ebx;
-	const stack_t __user *uss = (const stack_t __user *)ebx;
-	stack_t __user *uoss = (stack_t __user *)regs->ecx;
+	struct pt_regs *regs = (struct pt_regs *)&bx;
+	const stack_t __user *uss = (const stack_t __user *)bx;
+	stack_t __user *uoss = (stack_t __user *)regs->cx;
 
-	return do_sigaltstack(uss, uoss, regs->esp);
+	return do_sigaltstack(uss, uoss, regs->sp);
 }
 
 
@@ -105,17 +105,17 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax
 	/* Always make any pending restarted system calls return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
-#define COPY(x)		err |= __get_user(regs->x, &sc->x)
+#define COPY(x)		err |= __get_user(regs->x, &sc->e ## x)
 
 #define COPY_SEG(seg)							\
 	{ unsigned short tmp;						\
 	  err |= __get_user(tmp, &sc->seg);				\
-	  regs->x##seg = tmp; }
+	  regs->seg = tmp; }
 
 #define COPY_SEG_STRICT(seg)						\
 	{ unsigned short tmp;						\
 	  err |= __get_user(tmp, &sc->seg);				\
-	  regs->x##seg = tmp|3; }
+	  regs->seg = tmp|3; }
 
 #define GET_SEG(seg)							\
 	{ unsigned short tmp;						\
@@ -131,22 +131,22 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax
 	COPY_SEG(fs);
 	COPY_SEG(es);
 	COPY_SEG(ds);
-	COPY(edi);
-	COPY(esi);
-	COPY(ebp);
-	COPY(esp);
-	COPY(ebx);
-	COPY(edx);
-	COPY(ecx);
-	COPY(eip);
+	COPY(di);
+	COPY(si);
+	COPY(bp);
+	COPY(sp);
+	COPY(bx);
+	COPY(dx);
+	COPY(cx);
+	COPY(ip);
 	COPY_SEG_STRICT(cs);
 	COPY_SEG_STRICT(ss);
 	
 	{
 		unsigned int tmpflags;
 		err |= __get_user(tmpflags, &sc->eflags);
-		regs->eflags = (regs->eflags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
-		regs->orig_eax = -1;		/* disable syscall checks */
+		regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
+		regs->orig_ax = -1;		/* disable syscall checks */
 	}
 
 	{
@@ -175,9 +175,9 @@ badframe:
 asmlinkage int sys_sigreturn(unsigned long __unused)
 {
 	struct pt_regs *regs = (struct pt_regs *) &__unused;
-	struct sigframe __user *frame = (struct sigframe __user *)(regs->esp - 8);
+	struct sigframe __user *frame = (struct sigframe __user *)(regs->sp - 8);
 	sigset_t set;
-	int eax;
+	int ax;
 
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
@@ -193,17 +193,17 @@ asmlinkage int sys_sigreturn(unsigned long __unused)
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 	
-	if (restore_sigcontext(regs, &frame->sc, &eax))
+	if (restore_sigcontext(regs, &frame->sc, &ax))
 		goto badframe;
-	return eax;
+	return ax;
 
 badframe:
 	if (show_unhandled_signals && printk_ratelimit())
-		printk("%s%s[%d] bad frame in sigreturn frame:%p eip:%lx"
-		       " esp:%lx oeax:%lx\n",
+		printk("%s%s[%d] bad frame in sigreturn frame:%p ip:%lx"
+		       " sp:%lx oeax:%lx\n",
 		    task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
-		    current->comm, task_pid_nr(current), frame, regs->eip,
-		    regs->esp, regs->orig_eax);
+		    current->comm, task_pid_nr(current), frame, regs->ip,
+		    regs->sp, regs->orig_ax);
 
 	force_sig(SIGSEGV, current);
 	return 0;
@@ -212,9 +212,9 @@ badframe:
 asmlinkage int sys_rt_sigreturn(unsigned long __unused)
 {
 	struct pt_regs *regs = (struct pt_regs *) &__unused;
-	struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(regs->esp - 4);
+	struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(regs->sp - 4);
 	sigset_t set;
-	int eax;
+	int ax;
 
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
@@ -227,13 +227,13 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused)
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 	
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
 
-	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->esp) == -EFAULT)
+	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
 		goto badframe;
 
-	return eax;
+	return ax;
 
 badframe:
 	force_sig(SIGSEGV, current);
@@ -250,27 +250,27 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
 {
 	int tmp, err = 0;
 
-	err |= __put_user(regs->xfs, (unsigned int __user *)&sc->fs);
+	err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
 	savesegment(gs, tmp);
 	err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
 
-	err |= __put_user(regs->xes, (unsigned int __user *)&sc->es);
-	err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds);
-	err |= __put_user(regs->edi, &sc->edi);
-	err |= __put_user(regs->esi, &sc->esi);
-	err |= __put_user(regs->ebp, &sc->ebp);
-	err |= __put_user(regs->esp, &sc->esp);
-	err |= __put_user(regs->ebx, &sc->ebx);
-	err |= __put_user(regs->edx, &sc->edx);
-	err |= __put_user(regs->ecx, &sc->ecx);
-	err |= __put_user(regs->eax, &sc->eax);
+	err |= __put_user(regs->es, (unsigned int __user *)&sc->es);
+	err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
+	err |= __put_user(regs->di, &sc->edi);
+	err |= __put_user(regs->si, &sc->esi);
+	err |= __put_user(regs->bp, &sc->ebp);
+	err |= __put_user(regs->sp, &sc->esp);
+	err |= __put_user(regs->bx, &sc->ebx);
+	err |= __put_user(regs->dx, &sc->edx);
+	err |= __put_user(regs->cx, &sc->ecx);
+	err |= __put_user(regs->ax, &sc->eax);
 	err |= __put_user(current->thread.trap_no, &sc->trapno);
 	err |= __put_user(current->thread.error_code, &sc->err);
-	err |= __put_user(regs->eip, &sc->eip);
-	err |= __put_user(regs->xcs, (unsigned int __user *)&sc->cs);
-	err |= __put_user(regs->eflags, &sc->eflags);
-	err |= __put_user(regs->esp, &sc->esp_at_signal);
-	err |= __put_user(regs->xss, (unsigned int __user *)&sc->ss);
+	err |= __put_user(regs->ip, &sc->eip);
+	err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
+	err |= __put_user(regs->flags, &sc->eflags);
+	err |= __put_user(regs->sp, &sc->esp_at_signal);
+	err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
 
 	tmp = save_i387(fpstate);
 	if (tmp < 0)
@@ -291,36 +291,36 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
 static inline void __user *
 get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
 {
-	unsigned long esp;
+	unsigned long sp;
 
 	/* Default to using normal stack */
-	esp = regs->esp;
+	sp = regs->sp;
 
 	/*
 	 * If we are on the alternate signal stack and would overflow it, don't.
 	 * Return an always-bogus address instead so we will die with SIGSEGV.
 	 */
-	if (on_sig_stack(esp) && !likely(on_sig_stack(esp - frame_size)))
+	if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size)))
 		return (void __user *) -1L;
 
 	/* This is the X/Open sanctioned signal stack switching.  */
 	if (ka->sa.sa_flags & SA_ONSTACK) {
-		if (sas_ss_flags(esp) == 0)
-			esp = current->sas_ss_sp + current->sas_ss_size;
+		if (sas_ss_flags(sp) == 0)
+			sp = current->sas_ss_sp + current->sas_ss_size;
 	}
 
 	/* This is the legacy signal stack switching. */
-	else if ((regs->xss & 0xffff) != __USER_DS &&
+	else if ((regs->ss & 0xffff) != __USER_DS &&
 		 !(ka->sa.sa_flags & SA_RESTORER) &&
 		 ka->sa.sa_restorer) {
-		esp = (unsigned long) ka->sa.sa_restorer;
+		sp = (unsigned long) ka->sa.sa_restorer;
 	}
 
-	esp -= frame_size;
+	sp -= frame_size;
 	/* Align the stack pointer according to the i386 ABI,
 	 * i.e. so that on function entry ((sp + 4) & 15) == 0. */
-	esp = ((esp + 4) & -16ul) - 4;
-	return (void __user *) esp;
+	sp = ((sp + 4) & -16ul) - 4;
+	return (void __user *) sp;
 }
 
 /* These symbols are defined with the addresses in the vsyscall page.
@@ -387,16 +387,16 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 		goto give_sigsegv;
 
 	/* Set up registers for signal handler */
-	regs->esp = (unsigned long) frame;
-	regs->eip = (unsigned long) ka->sa.sa_handler;
-	regs->eax = (unsigned long) sig;
-	regs->edx = (unsigned long) 0;
-	regs->ecx = (unsigned long) 0;
+	regs->sp = (unsigned long) frame;
+	regs->ip = (unsigned long) ka->sa.sa_handler;
+	regs->ax = (unsigned long) sig;
+	regs->dx = (unsigned long) 0;
+	regs->cx = (unsigned long) 0;
 
-	regs->xds = __USER_DS;
-	regs->xes = __USER_DS;
-	regs->xss = __USER_DS;
-	regs->xcs = __USER_CS;
+	regs->ds = __USER_DS;
+	regs->es = __USER_DS;
+	regs->ss = __USER_DS;
+	regs->cs = __USER_CS;
 
 	/*
 	 * Clear TF when entering the signal handler, but
@@ -404,13 +404,13 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 	 * The tracer may want to single-step inside the
 	 * handler too.
 	 */
-	regs->eflags &= ~TF_MASK;
+	regs->flags &= ~TF_MASK;
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
 
 #if DEBUG_SIG
 	printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
-		current->comm, current->pid, frame, regs->eip, frame->pretcode);
+		current->comm, current->pid, frame, regs->ip, frame->pretcode);
 #endif
 
 	return 0;
@@ -450,7 +450,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
 	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-	err |= __put_user(sas_ss_flags(regs->esp),
+	err |= __put_user(sas_ss_flags(regs->sp),
 			  &frame->uc.uc_stack.ss_flags);
 	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
 	err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
@@ -466,7 +466,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	err |= __put_user(restorer, &frame->pretcode);
 	 
 	/*
-	 * This is movl $,%eax ; int $0x80
+	 * This is movl $,%ax ; int $0x80
 	 *
 	 * WE DO NOT USE IT ANY MORE! It's only left here for historical
 	 * reasons and because gdb uses it as a signature to notice
@@ -480,16 +480,16 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		goto give_sigsegv;
 
 	/* Set up registers for signal handler */
-	regs->esp = (unsigned long) frame;
-	regs->eip = (unsigned long) ka->sa.sa_handler;
-	regs->eax = (unsigned long) usig;
-	regs->edx = (unsigned long) &frame->info;
-	regs->ecx = (unsigned long) &frame->uc;
+	regs->sp = (unsigned long) frame;
+	regs->ip = (unsigned long) ka->sa.sa_handler;
+	regs->ax = (unsigned long) usig;
+	regs->dx = (unsigned long) &frame->info;
+	regs->cx = (unsigned long) &frame->uc;
 
-	regs->xds = __USER_DS;
-	regs->xes = __USER_DS;
-	regs->xss = __USER_DS;
-	regs->xcs = __USER_CS;
+	regs->ds = __USER_DS;
+	regs->es = __USER_DS;
+	regs->ss = __USER_DS;
+	regs->cs = __USER_CS;
 
 	/*
 	 * Clear TF when entering the signal handler, but
@@ -497,13 +497,13 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	 * The tracer may want to single-step inside the
 	 * handler too.
 	 */
-	regs->eflags &= ~TF_MASK;
+	regs->flags &= ~TF_MASK;
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
 
 #if DEBUG_SIG
 	printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
-		current->comm, current->pid, frame, regs->eip, frame->pretcode);
+		current->comm, current->pid, frame, regs->ip, frame->pretcode);
 #endif
 
 	return 0;
@@ -524,23 +524,23 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	int ret;
 
 	/* Are we from a system call? */
-	if (regs->orig_eax >= 0) {
+	if (regs->orig_ax >= 0) {
 		/* If so, check system call restarting.. */
-		switch (regs->eax) {
+		switch (regs->ax) {
 		        case -ERESTART_RESTARTBLOCK:
 			case -ERESTARTNOHAND:
-				regs->eax = -EINTR;
+				regs->ax = -EINTR;
 				break;
 
 			case -ERESTARTSYS:
 				if (!(ka->sa.sa_flags & SA_RESTART)) {
-					regs->eax = -EINTR;
+					regs->ax = -EINTR;
 					break;
 				}
 			/* fallthrough */
 			case -ERESTARTNOINTR:
-				regs->eax = regs->orig_eax;
-				regs->eip -= 2;
+				regs->ax = regs->orig_ax;
+				regs->ip -= 2;
 		}
 	}
 
@@ -548,9 +548,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	 * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF
 	 * flag so that register information in the sigcontext is correct.
 	 */
-	if (unlikely(regs->eflags & X86_EFLAGS_TF) &&
+	if (unlikely(regs->flags & X86_EFLAGS_TF) &&
 	    likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
-		regs->eflags &= ~X86_EFLAGS_TF;
+		regs->flags &= ~X86_EFLAGS_TF;
 
 	/* Set up the stack frame */
 	if (ka->sa.sa_flags & SA_SIGINFO)
@@ -622,19 +622,19 @@ static void fastcall do_signal(struct pt_regs *regs)
 	}
 
 	/* Did we come from a system call? */
-	if (regs->orig_eax >= 0) {
+	if (regs->orig_ax >= 0) {
 		/* Restart the system call - no handlers present */
-		switch (regs->eax) {
+		switch (regs->ax) {
 		case -ERESTARTNOHAND:
 		case -ERESTARTSYS:
 		case -ERESTARTNOINTR:
-			regs->eax = regs->orig_eax;
-			regs->eip -= 2;
+			regs->ax = regs->orig_ax;
+			regs->ip -= 2;
 			break;
 
 		case -ERESTART_RESTARTBLOCK:
-			regs->eax = __NR_restart_syscall;
-			regs->eip -= 2;
+			regs->ax = __NR_restart_syscall;
+			regs->ip -= 2;
 			break;
 		}
 	}
@@ -657,7 +657,7 @@ void do_notify_resume(struct pt_regs *regs, void *_unused,
 {
 	/* Pending single-step? */
 	if (thread_info_flags & _TIF_SINGLESTEP) {
-		regs->eflags |= TF_MASK;
+		regs->flags |= TF_MASK;
 		clear_thread_flag(TIF_SINGLESTEP);
 	}
 
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index ab0178ebe00a..4b228fd83b31 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -39,7 +39,7 @@ asmlinkage long
 sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 		struct pt_regs *regs)
 {
-	return do_sigaltstack(uss, uoss, regs->rsp);
+	return do_sigaltstack(uss, uoss, regs->sp);
 }
 
 
@@ -62,10 +62,11 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned
 	/* Always make any pending restarted system calls return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
+#define COPYR(x)	err |= __get_user(regs->x, &sc->r ## x)
 #define COPY(x)		err |= __get_user(regs->x, &sc->x)
 
-	COPY(rdi); COPY(rsi); COPY(rbp); COPY(rsp); COPY(rbx);
-	COPY(rdx); COPY(rcx); COPY(rip);
+	COPYR(di); COPYR(si); COPYR(bp); COPYR(sp); COPYR(bx);
+	COPYR(dx); COPYR(cx); COPYR(ip);
 	COPY(r8);
 	COPY(r9);
 	COPY(r10);
@@ -87,8 +88,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned
 	{
 		unsigned int tmpflags;
 		err |= __get_user(tmpflags, &sc->eflags);
-		regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
-		regs->orig_rax = -1;		/* disable syscall checks */
+		regs->flags = (regs->flags & ~0x40DD5) | (tmpflags & 0x40DD5);
+		regs->orig_ax = -1;		/* disable syscall checks */
 	}
 
 	{
@@ -119,9 +120,9 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
 {
 	struct rt_sigframe __user *frame;
 	sigset_t set;
-	unsigned long eax;
+	unsigned long ax;
 
-	frame = (struct rt_sigframe __user *)(regs->rsp - 8);
+	frame = (struct rt_sigframe __user *)(regs->sp - 8);
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) {
 		goto badframe;
 	} 
@@ -135,17 +136,17 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 	
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
 
 #ifdef DEBUG_SIG
-	printk("%d sigreturn rip:%lx rsp:%lx frame:%p rax:%lx\n",current->pid,regs->rip,regs->rsp,frame,eax);
+	printk("%d sigreturn ip:%lx sp:%lx frame:%p ax:%lx\n",current->pid,regs->ip,regs->sp,frame,ax);
 #endif
 
-	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->rsp) == -EFAULT)
+	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
 		goto badframe;
 
-	return eax;
+	return ax;
 
 badframe:
 	signal_fault(regs,frame,"sigreturn");
@@ -165,14 +166,14 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned lo
 	err |= __put_user(0, &sc->gs);
 	err |= __put_user(0, &sc->fs);
 
-	err |= __put_user(regs->rdi, &sc->rdi);
-	err |= __put_user(regs->rsi, &sc->rsi);
-	err |= __put_user(regs->rbp, &sc->rbp);
-	err |= __put_user(regs->rsp, &sc->rsp);
-	err |= __put_user(regs->rbx, &sc->rbx);
-	err |= __put_user(regs->rdx, &sc->rdx);
-	err |= __put_user(regs->rcx, &sc->rcx);
-	err |= __put_user(regs->rax, &sc->rax);
+	err |= __put_user(regs->di, &sc->rdi);
+	err |= __put_user(regs->si, &sc->rsi);
+	err |= __put_user(regs->bp, &sc->rbp);
+	err |= __put_user(regs->sp, &sc->rsp);
+	err |= __put_user(regs->bx, &sc->rbx);
+	err |= __put_user(regs->dx, &sc->rdx);
+	err |= __put_user(regs->cx, &sc->rcx);
+	err |= __put_user(regs->ax, &sc->rax);
 	err |= __put_user(regs->r8, &sc->r8);
 	err |= __put_user(regs->r9, &sc->r9);
 	err |= __put_user(regs->r10, &sc->r10);
@@ -183,8 +184,8 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned lo
 	err |= __put_user(regs->r15, &sc->r15);
 	err |= __put_user(me->thread.trap_no, &sc->trapno);
 	err |= __put_user(me->thread.error_code, &sc->err);
-	err |= __put_user(regs->rip, &sc->rip);
-	err |= __put_user(regs->eflags, &sc->eflags);
+	err |= __put_user(regs->ip, &sc->rip);
+	err |= __put_user(regs->flags, &sc->eflags);
 	err |= __put_user(mask, &sc->oldmask);
 	err |= __put_user(me->thread.cr2, &sc->cr2);
 
@@ -198,18 +199,18 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned lo
 static void __user *
 get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
 {
-	unsigned long rsp;
+	unsigned long sp;
 
 	/* Default to using normal stack - redzone*/
-	rsp = regs->rsp - 128;
+	sp = regs->sp - 128;
 
 	/* This is the X/Open sanctioned signal stack switching.  */
 	if (ka->sa.sa_flags & SA_ONSTACK) {
-		if (sas_ss_flags(rsp) == 0)
-			rsp = current->sas_ss_sp + current->sas_ss_size;
+		if (sas_ss_flags(sp) == 0)
+			sp = current->sas_ss_sp + current->sas_ss_size;
 	}
 
-	return (void __user *)round_down(rsp - size, 16); 
+	return (void __user *)round_down(sp - size, 16);
 }
 
 static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
@@ -246,7 +247,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
 	err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-	err |= __put_user(sas_ss_flags(regs->rsp),
+	err |= __put_user(sas_ss_flags(regs->sp),
 			  &frame->uc.uc_stack.ss_flags);
 	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
 	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me);
@@ -271,21 +272,21 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		goto give_sigsegv;
 
 #ifdef DEBUG_SIG
-	printk("%d old rip %lx old rsp %lx old rax %lx\n", current->pid,regs->rip,regs->rsp,regs->rax);
+	printk("%d old ip %lx old sp %lx old ax %lx\n", current->pid,regs->ip,regs->sp,regs->ax);
 #endif
 
 	/* Set up registers for signal handler */
-	regs->rdi = sig;
+	regs->di = sig;
 	/* In case the signal handler was declared without prototypes */ 
-	regs->rax = 0;	
+	regs->ax = 0;
 
 	/* This also works for non SA_SIGINFO handlers because they expect the
 	   next argument after the signal number on the stack. */
-	regs->rsi = (unsigned long)&frame->info; 
-	regs->rdx = (unsigned long)&frame->uc; 
-	regs->rip = (unsigned long) ka->sa.sa_handler;
+	regs->si = (unsigned long)&frame->info;
+	regs->dx = (unsigned long)&frame->uc;
+	regs->ip = (unsigned long) ka->sa.sa_handler;
 
-	regs->rsp = (unsigned long)frame;
+	regs->sp = (unsigned long)frame;
 
 	/* Set up the CS register to run signal handlers in 64-bit mode,
 	   even if the handler happens to be interrupting 32-bit code. */
@@ -295,12 +296,12 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	   see include/asm-x86_64/uaccess.h for details. */
 	set_fs(USER_DS);
 
-	regs->eflags &= ~TF_MASK;
+	regs->flags &= ~TF_MASK;
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
 #ifdef DEBUG_SIG
 	printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%p\n",
-		current->comm, current->pid, frame, regs->rip, frame->pretcode);
+		current->comm, current->pid, frame, regs->ip, frame->pretcode);
 #endif
 
 	return 0;
@@ -321,29 +322,29 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	int ret;
 
 #ifdef DEBUG_SIG
-	printk("handle_signal pid:%d sig:%lu rip:%lx rsp:%lx regs=%p\n",
+	printk("handle_signal pid:%d sig:%lu ip:%lx sp:%lx regs=%p\n",
 		current->pid, sig,
-		regs->rip, regs->rsp, regs);
+		regs->ip, regs->sp, regs);
 #endif
 
 	/* Are we from a system call? */
-	if ((long)regs->orig_rax >= 0) {
+	if ((long)regs->orig_ax >= 0) {
 		/* If so, check system call restarting.. */
-		switch (regs->rax) {
+		switch (regs->ax) {
 		        case -ERESTART_RESTARTBLOCK:
 			case -ERESTARTNOHAND:
-				regs->rax = -EINTR;
+				regs->ax = -EINTR;
 				break;
 
 			case -ERESTARTSYS:
 				if (!(ka->sa.sa_flags & SA_RESTART)) {
-					regs->rax = -EINTR;
+					regs->ax = -EINTR;
 					break;
 				}
 				/* fallthrough */
 			case -ERESTARTNOINTR:
-				regs->rax = regs->orig_rax;
-				regs->rip -= 2;
+				regs->ax = regs->orig_ax;
+				regs->ip -= 2;
 				break;
 		}
 	}
@@ -352,9 +353,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	 * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF
 	 * flag so that register information in the sigcontext is correct.
 	 */
-	if (unlikely(regs->eflags & X86_EFLAGS_TF) &&
+	if (unlikely(regs->flags & X86_EFLAGS_TF) &&
 	    likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
-		regs->eflags &= ~X86_EFLAGS_TF;
+		regs->flags &= ~X86_EFLAGS_TF;
 
 #ifdef CONFIG_IA32_EMULATION
 	if (test_thread_flag(TIF_IA32)) {
@@ -426,21 +427,21 @@ static void do_signal(struct pt_regs *regs)
 	}
 
 	/* Did we come from a system call? */
-	if ((long)regs->orig_rax >= 0) {
+	if ((long)regs->orig_ax >= 0) {
 		/* Restart the system call - no handlers present */
-		long res = regs->rax;
+		long res = regs->ax;
 		switch (res) {
 		case -ERESTARTNOHAND:
 		case -ERESTARTSYS:
 		case -ERESTARTNOINTR:
-			regs->rax = regs->orig_rax;
-			regs->rip -= 2;
+			regs->ax = regs->orig_ax;
+			regs->ip -= 2;
 			break;
 		case -ERESTART_RESTARTBLOCK:
-			regs->rax = test_thread_flag(TIF_IA32) ?
+			regs->ax = test_thread_flag(TIF_IA32) ?
 					__NR_ia32_restart_syscall :
 					__NR_restart_syscall;
-			regs->rip -= 2;
+			regs->ip -= 2;
 			break;
 		}
 	}
@@ -457,13 +458,13 @@ void
 do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 {
 #ifdef DEBUG_SIG
-	printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%p pending:%x\n",
-	       thread_info_flags, regs->rip, regs->rsp, __builtin_return_address(0),signal_pending(current)); 
+	printk("do_notify_resume flags:%x ip:%lx sp:%lx caller:%p pending:%x\n",
+	       thread_info_flags, regs->ip, regs->sp, __builtin_return_address(0),signal_pending(current));
 #endif
 	       
 	/* Pending single-step? */
 	if (thread_info_flags & _TIF_SINGLESTEP) {
-		regs->eflags |= TF_MASK;
+		regs->flags |= TF_MASK;
 		clear_thread_flag(TIF_SINGLESTEP);
 	}
 
@@ -485,8 +486,8 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
 { 
 	struct task_struct *me = current; 
 	if (show_unhandled_signals && printk_ratelimit())
-		printk("%s[%d] bad frame in %s frame:%p rip:%lx rsp:%lx orax:%lx\n",
-	       me->comm,me->pid,where,frame,regs->rip,regs->rsp,regs->orig_rax); 
+		printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx\n",
+	       me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax);
 
 	force_sig(SIGSEGV, me); 
 } 
diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c
index 7142447b5666..02a6533e8909 100644
--- a/arch/x86/kernel/smp_64.c
+++ b/arch/x86/kernel/smp_64.c
@@ -136,7 +136,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
 	 * orig_rax contains the negated interrupt vector.
 	 * Use that to determine where the sender put the data.
 	 */
-	sender = ~regs->orig_rax - INVALIDATE_TLB_VECTOR_START;
+	sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
 	f = &per_cpu(flush_state, sender);
 
 	if (!cpu_isset(cpu, f->flush_cpumask))
diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c
index 0bf7f20baba0..3566191832b3 100644
--- a/arch/x86/kernel/smpboot_32.c
+++ b/arch/x86/kernel/smpboot_32.c
@@ -447,7 +447,7 @@ void __devinit initialize_secondary(void)
 {
 	/*
 	 * We don't actually need to load the full TSS,
-	 * basically just the stack pointer and the eip.
+	 * basically just the stack pointer and the ip.
 	 */
 
 	asm volatile(
@@ -459,7 +459,7 @@ void __devinit initialize_secondary(void)
 
 /* Static state in head.S used to set up a CPU */
 extern struct {
-	void * esp;
+	void * sp;
 	unsigned short ss;
 } stack_start;
 
@@ -667,7 +667,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
 	 * target processor state.
 	 */
 	startup_ipi_hook(phys_apicid, (unsigned long) start_secondary,
-		         (unsigned long) stack_start.esp);
+		         (unsigned long) stack_start.sp);
 
 	/*
 	 * Run STARTUP IPI loop.
@@ -806,9 +806,9 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 	alternatives_smp_switch(1);
 
 	/* So we see what's up   */
-	printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+	printk("Booting processor %d/%d ip %lx\n", cpu, apicid, start_eip);
 	/* Stack for startup_32 can be just as for start_secondary onwards */
-	stack_start.esp = (void *) idle->thread.esp;
+	stack_start.sp = (void *) idle->thread.esp;
 
 	irq_ctx_init(cpu);
 
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index cf4b9dac4a05..f55c003f5b63 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -12,17 +12,12 @@ unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *r
 {
 	unsigned long addr, seg;
 
-#ifdef CONFIG_X86_64
-	addr = regs->rip;
+	addr = regs->ip;
 	seg = regs->cs & 0xffff;
-#else
-	addr = regs->eip;
-	seg = regs->xcs & 0xffff;
-	if (regs->eflags & X86_EFLAGS_VM) {
+	if (v8086_mode(regs)) {
 		addr = (addr & 0xffff) + (seg << 4);
 		return addr;
 	}
-#endif
 
 	/*
 	 * We'll assume that the code segments in the GDT
@@ -124,11 +119,11 @@ static int enable_single_step(struct task_struct *child)
 	/*
 	 * If TF was already set, don't do anything else
 	 */
-	if (regs->eflags & X86_EFLAGS_TF)
+	if (regs->flags & X86_EFLAGS_TF)
 		return 0;
 
 	/* Set TF on the kernel stack.. */
-	regs->eflags |= X86_EFLAGS_TF;
+	regs->flags |= X86_EFLAGS_TF;
 
 	/*
 	 * ..but if TF is changed by the instruction we will trace,
@@ -203,5 +198,5 @@ void user_disable_single_step(struct task_struct *child)
 
 	/* But touch TF only if it was set by us.. */
 	if (test_and_clear_tsk_thread_flag(child, TIF_FORCED_TF))
-		task_pt_regs(child)->eflags &= ~X86_EFLAGS_TF;
+		task_pt_regs(child)->flags &= ~X86_EFLAGS_TF;
 }
diff --git a/arch/x86/kernel/suspend_asm_64.S b/arch/x86/kernel/suspend_asm_64.S
index 72f952103e50..aeb9a4d7681e 100644
--- a/arch/x86/kernel/suspend_asm_64.S
+++ b/arch/x86/kernel/suspend_asm_64.S
@@ -18,13 +18,13 @@
 
 ENTRY(swsusp_arch_suspend)
 	movq	$saved_context, %rax
-	movq	%rsp, pt_regs_rsp(%rax)
-	movq	%rbp, pt_regs_rbp(%rax)
-	movq	%rsi, pt_regs_rsi(%rax)
-	movq	%rdi, pt_regs_rdi(%rax)
-	movq	%rbx, pt_regs_rbx(%rax)
-	movq	%rcx, pt_regs_rcx(%rax)
-	movq	%rdx, pt_regs_rdx(%rax)
+	movq	%rsp, pt_regs_sp(%rax)
+	movq	%rbp, pt_regs_bp(%rax)
+	movq	%rsi, pt_regs_si(%rax)
+	movq	%rdi, pt_regs_di(%rax)
+	movq	%rbx, pt_regs_bx(%rax)
+	movq	%rcx, pt_regs_cx(%rax)
+	movq	%rdx, pt_regs_dx(%rax)
 	movq	%r8, pt_regs_r8(%rax)
 	movq	%r9, pt_regs_r9(%rax)
 	movq	%r10, pt_regs_r10(%rax)
@@ -34,7 +34,7 @@ ENTRY(swsusp_arch_suspend)
 	movq	%r14, pt_regs_r14(%rax)
 	movq	%r15, pt_regs_r15(%rax)
 	pushfq
-	popq	pt_regs_eflags(%rax)
+	popq	pt_regs_flags(%rax)
 
 	/* save the address of restore_registers */
 	movq	$restore_registers, %rax
@@ -115,13 +115,13 @@ ENTRY(restore_registers)
 
 	/* We don't restore %rax, it must be 0 anyway */
 	movq	$saved_context, %rax
-	movq	pt_regs_rsp(%rax), %rsp
-	movq	pt_regs_rbp(%rax), %rbp
-	movq	pt_regs_rsi(%rax), %rsi
-	movq	pt_regs_rdi(%rax), %rdi
-	movq	pt_regs_rbx(%rax), %rbx
-	movq	pt_regs_rcx(%rax), %rcx
-	movq	pt_regs_rdx(%rax), %rdx
+	movq	pt_regs_sp(%rax), %rsp
+	movq	pt_regs_bp(%rax), %rbp
+	movq	pt_regs_si(%rax), %rsi
+	movq	pt_regs_di(%rax), %rdi
+	movq	pt_regs_bx(%rax), %rbx
+	movq	pt_regs_cx(%rax), %rcx
+	movq	pt_regs_dx(%rax), %rdx
 	movq	pt_regs_r8(%rax), %r8
 	movq	pt_regs_r9(%rax), %r9
 	movq	pt_regs_r10(%rax), %r10
@@ -130,7 +130,7 @@ ENTRY(restore_registers)
 	movq	pt_regs_r13(%rax), %r13
 	movq	pt_regs_r14(%rax), %r14
 	movq	pt_regs_r15(%rax), %r15
-	pushq	pt_regs_eflags(%rax)
+	pushq	pt_regs_flags(%rax)
 	popfq
 
 	xorq	%rax, %rax
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 2dcbb81b4cd3..1a89e93f3f1c 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -49,15 +49,15 @@ unsigned long profile_pc(struct pt_regs *regs)
 	unsigned long pc = instruction_pointer(regs);
 
 #ifdef CONFIG_SMP
-	if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->xcs) &&
+	if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->cs) &&
 	    in_lock_functions(pc)) {
 #ifdef CONFIG_FRAME_POINTER
-		return *(unsigned long *)(regs->ebp + 4);
+		return *(unsigned long *)(regs->bp + 4);
 #else
-		unsigned long *sp = (unsigned long *)&regs->esp;
+		unsigned long *sp = (unsigned long *)&regs->sp;
 
 		/* Return address is either directly at stack pointer
-		   or above a saved eflags. Eflags has bits 22-31 zero,
+		   or above a saved flags. Eflags has bits 22-31 zero,
 		   kernel addresses don't. */
 		if (sp[0] >> 22)
 			return sp[0];
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index f88bf6b802e3..bf0bcc9bb001 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -29,10 +29,10 @@ unsigned long profile_pc(struct pt_regs *regs)
 	unsigned long pc = instruction_pointer(regs);
 
 	/* Assume the lock function has either no stack frame or a copy
-	   of eflags from PUSHF
+	   of flags from PUSHF
 	   Eflags always has bits 22 and up cleared unlike kernel addresses. */
 	if (!user_mode(regs) && in_lock_functions(pc)) {
-		unsigned long *sp = (unsigned long *)regs->rsp;
+		unsigned long *sp = (unsigned long *)regs->sp;
 		if (sp[0] >> 22)
 			return sp[0];
 		if (sp[1] >> 22)
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 9b0bbd508cd5..931ef10960ee 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -114,11 +114,11 @@ struct stack_frame {
 };
 
 static inline unsigned long print_context_stack(struct thread_info *tinfo,
-				unsigned long *stack, unsigned long ebp,
+				unsigned long *stack, unsigned long bp,
 				const struct stacktrace_ops *ops, void *data)
 {
 #ifdef	CONFIG_FRAME_POINTER
-	struct stack_frame *frame = (struct stack_frame *)ebp;
+	struct stack_frame *frame = (struct stack_frame *)bp;
 	while (valid_stack_ptr(tinfo, frame, sizeof(*frame))) {
 		struct stack_frame *next;
 		unsigned long addr;
@@ -145,7 +145,7 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
 			ops->address(data, addr);
 	}
 #endif
-	return ebp;
+	return bp;
 }
 
 #define MSG(msg) ops->warning(data, msg)
@@ -154,7 +154,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 	        unsigned long *stack,
 		const struct stacktrace_ops *ops, void *data)
 {
-	unsigned long ebp = 0;
+	unsigned long bp = 0;
 
 	if (!task)
 		task = current;
@@ -167,13 +167,13 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 	}
 
 #ifdef CONFIG_FRAME_POINTER
-	if (!ebp) {
+	if (!bp) {
 		if (task == current) {
-			/* Grab ebp right from our regs */
-			asm ("movl %%ebp, %0" : "=r" (ebp) : );
+			/* Grab bp right from our regs */
+			asm ("movl %%ebp, %0" : "=r" (bp) : );
 		} else {
-			/* ebp is the last reg pushed by switch_to */
-			ebp = *(unsigned long *) task->thread.esp;
+			/* bp is the last reg pushed by switch_to */
+			bp = *(unsigned long *) task->thread.esp;
 		}
 	}
 #endif
@@ -182,7 +182,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 		struct thread_info *context;
 		context = (struct thread_info *)
 			((unsigned long)stack & (~(THREAD_SIZE - 1)));
-		ebp = print_context_stack(context, stack, ebp, ops, data);
+		bp = print_context_stack(context, stack, bp, ops, data);
 		/* Should be after the line below, but somewhere
 		   in early boot context comes out corrupted and we
 		   can't reference it -AK */
@@ -246,19 +246,19 @@ void show_trace(struct task_struct *task, struct pt_regs *regs,
 }
 
 static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-			       unsigned long *esp, char *log_lvl)
+			       unsigned long *sp, char *log_lvl)
 {
 	unsigned long *stack;
 	int i;
 
-	if (esp == NULL) {
+	if (sp == NULL) {
 		if (task)
-			esp = (unsigned long*)task->thread.esp;
+			sp = (unsigned long*)task->thread.esp;
 		else
-			esp = (unsigned long *)&esp;
+			sp = (unsigned long *)&sp;
 	}
 
-	stack = esp;
+	stack = sp;
 	for(i = 0; i < kstack_depth_to_print; i++) {
 		if (kstack_end(stack))
 			break;
@@ -267,13 +267,13 @@ static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		printk("%08lx ", *stack++);
 	}
 	printk("\n%sCall Trace:\n", log_lvl);
-	show_trace_log_lvl(task, regs, esp, log_lvl);
+	show_trace_log_lvl(task, regs, sp, log_lvl);
 }
 
-void show_stack(struct task_struct *task, unsigned long *esp)
+void show_stack(struct task_struct *task, unsigned long *sp)
 {
 	printk("       ");
-	show_stack_log_lvl(task, NULL, esp, "");
+	show_stack_log_lvl(task, NULL, sp, "");
 }
 
 /*
@@ -307,30 +307,30 @@ void show_registers(struct pt_regs *regs)
 	 * time of the fault..
 	 */
 	if (!user_mode_vm(regs)) {
-		u8 *eip;
+		u8 *ip;
 		unsigned int code_prologue = code_bytes * 43 / 64;
 		unsigned int code_len = code_bytes;
 		unsigned char c;
 
 		printk("\n" KERN_EMERG "Stack: ");
-		show_stack_log_lvl(NULL, regs, &regs->esp, KERN_EMERG);
+		show_stack_log_lvl(NULL, regs, &regs->sp, KERN_EMERG);
 
 		printk(KERN_EMERG "Code: ");
 
-		eip = (u8 *)regs->eip - code_prologue;
-		if (eip < (u8 *)PAGE_OFFSET ||
-			probe_kernel_address(eip, c)) {
+		ip = (u8 *)regs->ip - code_prologue;
+		if (ip < (u8 *)PAGE_OFFSET ||
+			probe_kernel_address(ip, c)) {
 			/* try starting at EIP */
-			eip = (u8 *)regs->eip;
+			ip = (u8 *)regs->ip;
 			code_len = code_len - code_prologue + 1;
 		}
-		for (i = 0; i < code_len; i++, eip++) {
-			if (eip < (u8 *)PAGE_OFFSET ||
-				probe_kernel_address(eip, c)) {
+		for (i = 0; i < code_len; i++, ip++) {
+			if (ip < (u8 *)PAGE_OFFSET ||
+				probe_kernel_address(ip, c)) {
 				printk(" Bad EIP value.");
 				break;
 			}
-			if (eip == (u8 *)regs->eip)
+			if (ip == (u8 *)regs->ip)
 				printk("<%02x> ", c);
 			else
 				printk("%02x ", c);
@@ -339,13 +339,13 @@ void show_registers(struct pt_regs *regs)
 	printk("\n");
 }	
 
-int is_valid_bugaddr(unsigned long eip)
+int is_valid_bugaddr(unsigned long ip)
 {
 	unsigned short ud2;
 
-	if (eip < PAGE_OFFSET)
+	if (ip < PAGE_OFFSET)
 		return 0;
-	if (probe_kernel_address((unsigned short *)eip, ud2))
+	if (probe_kernel_address((unsigned short *)ip, ud2))
 		return 0;
 
 	return ud2 == 0x0b0f;
@@ -382,10 +382,10 @@ void die(const char * str, struct pt_regs * regs, long err)
 		raw_local_irq_save(flags);
 
 	if (++die.lock_owner_depth < 3) {
-		unsigned long esp;
+		unsigned long sp;
 		unsigned short ss;
 
-		report_bug(regs->eip, regs);
+		report_bug(regs->ip, regs);
 
 		printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff,
 		       ++die_counter);
@@ -405,15 +405,15 @@ void die(const char * str, struct pt_regs * regs, long err)
 				NOTIFY_STOP) {
 			show_registers(regs);
 			/* Executive summary in case the oops scrolled away */
-			esp = (unsigned long) (&regs->esp);
+			sp = (unsigned long) (&regs->sp);
 			savesegment(ss, ss);
 			if (user_mode(regs)) {
-				esp = regs->esp;
-				ss = regs->xss & 0xffff;
+				sp = regs->sp;
+				ss = regs->ss & 0xffff;
 			}
-			printk(KERN_EMERG "EIP: [<%08lx>] ", regs->eip);
-			print_symbol("%s", regs->eip);
-			printk(" SS:ESP %04x:%08lx\n", ss, esp);
+			printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
+			print_symbol("%s", regs->ip);
+			printk(" SS:ESP %04x:%08lx\n", ss, sp);
 		}
 		else
 			regs = NULL;
@@ -454,7 +454,7 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
 {
 	struct task_struct *tsk = current;
 
-	if (regs->eflags & VM_MASK) {
+	if (regs->flags & VM_MASK) {
 		if (vm86)
 			goto vm86_trap;
 		goto trap_signal;
@@ -548,13 +548,13 @@ fastcall void do_##name(struct pt_regs * regs, long error_code) \
 	do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
 }
 
-DO_VM86_ERROR_INFO( 0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->eip)
+DO_VM86_ERROR_INFO( 0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->ip)
 #ifndef CONFIG_KPROBES
 DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
 #endif
 DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
 DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
-DO_ERROR_INFO( 6, SIGILL,  "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip, 0)
+DO_ERROR_INFO( 6, SIGILL,  "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
 DO_ERROR( 9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun)
 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
 DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
@@ -596,7 +596,7 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs,
 	}
 	put_cpu();
 
-	if (regs->eflags & VM_MASK)
+	if (regs->flags & VM_MASK)
 		goto gp_in_vm86;
 
 	if (!user_mode(regs))
@@ -607,9 +607,9 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs,
 	if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) &&
 	    printk_ratelimit())
 		printk(KERN_INFO
-		    "%s[%d] general protection eip:%lx esp:%lx error:%lx\n",
+		    "%s[%d] general protection ip:%lx sp:%lx error:%lx\n",
 		    current->comm, task_pid_nr(current),
-		    regs->eip, regs->esp, error_code);
+		    regs->ip, regs->sp, error_code);
 
 	force_sig(SIGSEGV, current);
 	return;
@@ -705,8 +705,8 @@ void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
 	*/
 	bust_spinlocks(1);
 	printk(KERN_EMERG "%s", msg);
-	printk(" on CPU%d, eip %08lx, registers:\n",
-		smp_processor_id(), regs->eip);
+	printk(" on CPU%d, ip %08lx, registers:\n",
+		smp_processor_id(), regs->ip);
 	show_registers(regs);
 	console_silent();
 	spin_unlock(&nmi_print_lock);
@@ -847,7 +847,7 @@ fastcall void __kprobes do_debug(struct pt_regs * regs, long error_code)
 					SIGTRAP) == NOTIFY_STOP)
 		return;
 	/* It's safe to allow irq's after DR6 has been saved */
-	if (regs->eflags & X86_EFLAGS_IF)
+	if (regs->flags & X86_EFLAGS_IF)
 		local_irq_enable();
 
 	/* Mask out spurious debug traps due to lazy DR7 setting */
@@ -856,7 +856,7 @@ fastcall void __kprobes do_debug(struct pt_regs * regs, long error_code)
 			goto clear_dr7;
 	}
 
-	if (regs->eflags & VM_MASK)
+	if (regs->flags & VM_MASK)
 		goto debug_vm86;
 
 	/* Save debug status register where ptrace can see it */
@@ -892,7 +892,7 @@ debug_vm86:
 
 clear_TF_reenable:
 	set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
-	regs->eflags &= ~TF_MASK;
+	regs->flags &= ~TF_MASK;
 	return;
 }
 
@@ -901,7 +901,7 @@ clear_TF_reenable:
  * the correct behaviour even in the presence of the asynchronous
  * IRQ13 behaviour
  */
-void math_error(void __user *eip)
+void math_error(void __user *ip)
 {
 	struct task_struct * task;
 	siginfo_t info;
@@ -917,7 +917,7 @@ void math_error(void __user *eip)
 	info.si_signo = SIGFPE;
 	info.si_errno = 0;
 	info.si_code = __SI_FAULT;
-	info.si_addr = eip;
+	info.si_addr = ip;
 	/*
 	 * (~cwd & swd) will mask out exceptions that are not set to unmasked
 	 * status.  0x3f is the exception bits in these regs, 0x200 is the
@@ -963,10 +963,10 @@ void math_error(void __user *eip)
 fastcall void do_coprocessor_error(struct pt_regs * regs, long error_code)
 {
 	ignore_fpu_irq = 1;
-	math_error((void __user *)regs->eip);
+	math_error((void __user *)regs->ip);
 }
 
-static void simd_math_error(void __user *eip)
+static void simd_math_error(void __user *ip)
 {
 	struct task_struct * task;
 	siginfo_t info;
@@ -982,7 +982,7 @@ static void simd_math_error(void __user *eip)
 	info.si_signo = SIGFPE;
 	info.si_errno = 0;
 	info.si_code = __SI_FAULT;
-	info.si_addr = eip;
+	info.si_addr = ip;
 	/*
 	 * The SIMD FPU exceptions are handled a little differently, as there
 	 * is only a single status/control register.  Thus, to determine which
@@ -1020,13 +1020,13 @@ fastcall void do_simd_coprocessor_error(struct pt_regs * regs,
 	if (cpu_has_xmm) {
 		/* Handle SIMD FPU exceptions on PIII+ processors. */
 		ignore_fpu_irq = 1;
-		simd_math_error((void __user *)regs->eip);
+		simd_math_error((void __user *)regs->ip);
 	} else {
 		/*
 		 * Handle strange cache flush from user space exception
 		 * in all other cases.  This is undocumented behaviour.
 		 */
-		if (regs->eflags & VM_MASK) {
+		if (regs->flags & VM_MASK) {
 			handle_vm86_fault((struct kernel_vm86_regs *)regs,
 					  error_code);
 			return;
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 610a64d6bdf0..f7fecf9d47c3 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -76,20 +76,20 @@ asmlinkage void spurious_interrupt_bug(void);
 
 static inline void conditional_sti(struct pt_regs *regs)
 {
-	if (regs->eflags & X86_EFLAGS_IF)
+	if (regs->flags & X86_EFLAGS_IF)
 		local_irq_enable();
 }
 
 static inline void preempt_conditional_sti(struct pt_regs *regs)
 {
 	preempt_disable();
-	if (regs->eflags & X86_EFLAGS_IF)
+	if (regs->flags & X86_EFLAGS_IF)
 		local_irq_enable();
 }
 
 static inline void preempt_conditional_cli(struct pt_regs *regs)
 {
-	if (regs->eflags & X86_EFLAGS_IF)
+	if (regs->flags & X86_EFLAGS_IF)
 		local_irq_disable();
 	/* Make sure to not schedule here because we could be running
 	   on an exception stack. */
@@ -353,7 +353,7 @@ show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack)
 }
 
 static void
-_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp)
+_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp)
 {
 	unsigned long *stack;
 	int i;
@@ -364,14 +364,14 @@ _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp)
 	// debugging aid: "show_stack(NULL, NULL);" prints the
 	// back trace for this cpu.
 
-	if (rsp == NULL) {
+	if (sp == NULL) {
 		if (tsk)
-			rsp = (unsigned long *)tsk->thread.rsp;
+			sp = (unsigned long *)tsk->thread.rsp;
 		else
-			rsp = (unsigned long *)&rsp;
+			sp = (unsigned long *)&sp;
 	}
 
-	stack = rsp;
+	stack = sp;
 	for(i=0; i < kstack_depth_to_print; i++) {
 		if (stack >= irqstack && stack <= irqstack_end) {
 			if (stack == irqstack_end) {
@@ -387,12 +387,12 @@ _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp)
 		printk(" %016lx", *stack++);
 		touch_nmi_watchdog();
 	}
-	show_trace(tsk, regs, rsp);
+	show_trace(tsk, regs, sp);
 }
 
-void show_stack(struct task_struct *tsk, unsigned long * rsp)
+void show_stack(struct task_struct *tsk, unsigned long * sp)
 {
-	_show_stack(tsk, NULL, rsp);
+	_show_stack(tsk, NULL, sp);
 }
 
 /*
@@ -416,11 +416,11 @@ void show_registers(struct pt_regs *regs)
 {
 	int i;
 	int in_kernel = !user_mode(regs);
-	unsigned long rsp;
+	unsigned long sp;
 	const int cpu = smp_processor_id();
 	struct task_struct *cur = cpu_pda(cpu)->pcurrent;
 
-	rsp = regs->rsp;
+	sp = regs->sp;
 	printk("CPU %d ", cpu);
 	__show_regs(regs);
 	printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
@@ -432,15 +432,15 @@ void show_registers(struct pt_regs *regs)
 	 */
 	if (in_kernel) {
 		printk("Stack: ");
-		_show_stack(NULL, regs, (unsigned long*)rsp);
+		_show_stack(NULL, regs, (unsigned long*)sp);
 
 		printk("\nCode: ");
-		if (regs->rip < PAGE_OFFSET)
+		if (regs->ip < PAGE_OFFSET)
 			goto bad;
 
 		for (i=0; i<20; i++) {
 			unsigned char c;
-			if (__get_user(c, &((unsigned char*)regs->rip)[i])) {
+			if (__get_user(c, &((unsigned char*)regs->ip)[i])) {
 bad:
 				printk(" Bad RIP value.");
 				break;
@@ -451,11 +451,11 @@ bad:
 	printk("\n");
 }	
 
-int is_valid_bugaddr(unsigned long rip)
+int is_valid_bugaddr(unsigned long ip)
 {
 	unsigned short ud2;
 
-	if (__copy_from_user(&ud2, (const void __user *) rip, sizeof(ud2)))
+	if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2)))
 		return 0;
 
 	return ud2 == 0x0b0f;
@@ -521,8 +521,8 @@ void __kprobes __die(const char * str, struct pt_regs * regs, long err)
 	add_taint(TAINT_DIE);
 	/* Executive summary in case the oops scrolled away */
 	printk(KERN_ALERT "RIP ");
-	printk_address(regs->rip); 
-	printk(" RSP <%016lx>\n", regs->rsp); 
+	printk_address(regs->ip);
+	printk(" RSP <%016lx>\n", regs->sp);
 	if (kexec_should_crash(current))
 		crash_kexec(regs);
 }
@@ -532,7 +532,7 @@ void die(const char * str, struct pt_regs * regs, long err)
 	unsigned long flags = oops_begin();
 
 	if (!user_mode(regs))
-		report_bug(regs->rip, regs);
+		report_bug(regs->ip, regs);
 
 	__die(str, regs, err);
 	oops_end(flags);
@@ -582,9 +582,9 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
 		if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
 		    printk_ratelimit())
 			printk(KERN_INFO
-			       "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
+			       "%s[%d] trap %s ip:%lx sp:%lx error:%lx\n",
 			       tsk->comm, tsk->pid, str,
-			       regs->rip, regs->rsp, error_code); 
+			       regs->ip, regs->sp, error_code);
 
 		if (info)
 			force_sig_info(signr, info, tsk);
@@ -597,9 +597,9 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
 	/* kernel trap */ 
 	{	     
 		const struct exception_table_entry *fixup;
-		fixup = search_exception_tables(regs->rip);
+		fixup = search_exception_tables(regs->ip);
 		if (fixup)
-			regs->rip = fixup->fixup;
+			regs->ip = fixup->fixup;
 		else {
 			tsk->thread.error_code = error_code;
 			tsk->thread.trap_no = trapnr;
@@ -635,10 +635,10 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
 	do_trap(trapnr, signr, str, regs, error_code, &info); \
 }
 
-DO_ERROR_INFO( 0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->rip)
+DO_ERROR_INFO( 0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->ip)
 DO_ERROR( 4, SIGSEGV, "overflow", overflow)
 DO_ERROR( 5, SIGSEGV, "bounds", bounds)
-DO_ERROR_INFO( 6, SIGILL,  "invalid opcode", invalid_op, ILL_ILLOPN, regs->rip)
+DO_ERROR_INFO( 6, SIGILL,  "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
 DO_ERROR( 7, SIGSEGV, "device not available", device_not_available)
 DO_ERROR( 9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun)
 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
@@ -688,9 +688,9 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
 		if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
 		    printk_ratelimit())
 			printk(KERN_INFO
-		       "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
+		       "%s[%d] general protection ip:%lx sp:%lx error:%lx\n",
 			       tsk->comm, tsk->pid,
-			       regs->rip, regs->rsp, error_code); 
+			       regs->ip, regs->sp, error_code);
 
 		force_sig(SIGSEGV, tsk);
 		return;
@@ -699,9 +699,9 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
 	/* kernel gp */
 	{
 		const struct exception_table_entry *fixup;
-		fixup = search_exception_tables(regs->rip);
+		fixup = search_exception_tables(regs->ip);
 		if (fixup) {
-			regs->rip = fixup->fixup;
+			regs->ip = fixup->fixup;
 			return;
 		}
 
@@ -824,15 +824,15 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
 {
 	struct pt_regs *regs = eregs;
 	/* Did already sync */
-	if (eregs == (struct pt_regs *)eregs->rsp)
+	if (eregs == (struct pt_regs *)eregs->sp)
 		;
 	/* Exception from user space */
 	else if (user_mode(eregs))
 		regs = task_pt_regs(current);
 	/* Exception from kernel and interrupts are enabled. Move to
  	   kernel process stack. */
-	else if (eregs->eflags & X86_EFLAGS_IF)
-		regs = (struct pt_regs *)(eregs->rsp -= sizeof(struct pt_regs));
+	else if (eregs->flags & X86_EFLAGS_IF)
+		regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
 	if (eregs != regs)
 		*regs = *eregs;
 	return regs;
@@ -887,7 +887,7 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs,
 	info.si_signo = SIGTRAP;
 	info.si_errno = 0;
 	info.si_code = TRAP_BRKPT;
-	info.si_addr = user_mode(regs) ? (void __user *)regs->rip : NULL;
+	info.si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
 	force_sig_info(SIGTRAP, &info, tsk);
 
 clear_dr7:
@@ -897,16 +897,16 @@ clear_dr7:
 
 clear_TF_reenable:
 	set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
-	regs->eflags &= ~TF_MASK;
+	regs->flags &= ~TF_MASK;
 	preempt_conditional_cli(regs);
 }
 
 static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
 {
 	const struct exception_table_entry *fixup;
-	fixup = search_exception_tables(regs->rip);
+	fixup = search_exception_tables(regs->ip);
 	if (fixup) {
-		regs->rip = fixup->fixup;
+		regs->ip = fixup->fixup;
 		return 1;
 	}
 	notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE);
@@ -923,7 +923,7 @@ static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
  */
 asmlinkage void do_coprocessor_error(struct pt_regs *regs)
 {
-	void __user *rip = (void __user *)(regs->rip);
+	void __user *ip = (void __user *)(regs->ip);
 	struct task_struct * task;
 	siginfo_t info;
 	unsigned short cwd, swd;
@@ -943,7 +943,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs)
 	info.si_signo = SIGFPE;
 	info.si_errno = 0;
 	info.si_code = __SI_FAULT;
-	info.si_addr = rip;
+	info.si_addr = ip;
 	/*
 	 * (~cwd & swd) will mask out exceptions that are not set to unmasked
 	 * status.  0x3f is the exception bits in these regs, 0x200 is the
@@ -992,7 +992,7 @@ asmlinkage void bad_intr(void)
 
 asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
 {
-	void __user *rip = (void __user *)(regs->rip);
+	void __user *ip = (void __user *)(regs->ip);
 	struct task_struct * task;
 	siginfo_t info;
 	unsigned short mxcsr;
@@ -1012,7 +1012,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
 	info.si_signo = SIGFPE;
 	info.si_errno = 0;
 	info.si_code = __SI_FAULT;
-	info.si_addr = rip;
+	info.si_addr = ip;
 	/*
 	 * The SIMD FPU exceptions are handled a little differently, as there
 	 * is only a single status/control register.  Thus, to determine which
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 157e4bedd3c5..980e85b90091 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -70,10 +70,10 @@
 /*
  * 8- and 16-bit register defines..
  */
-#define AL(regs)	(((unsigned char *)&((regs)->pt.eax))[0])
-#define AH(regs)	(((unsigned char *)&((regs)->pt.eax))[1])
-#define IP(regs)	(*(unsigned short *)&((regs)->pt.eip))
-#define SP(regs)	(*(unsigned short *)&((regs)->pt.esp))
+#define AL(regs)	(((unsigned char *)&((regs)->pt.ax))[0])
+#define AH(regs)	(((unsigned char *)&((regs)->pt.ax))[1])
+#define IP(regs)	(*(unsigned short *)&((regs)->pt.ip))
+#define SP(regs)	(*(unsigned short *)&((regs)->pt.sp))
 
 /*
  * virtual flags (16 and 32-bit versions)
@@ -93,12 +93,12 @@ static int copy_vm86_regs_to_user(struct vm86_regs __user *user,
 {
 	int ret = 0;
 
-	/* kernel_vm86_regs is missing xgs, so copy everything up to
+	/* kernel_vm86_regs is missing gs, so copy everything up to
 	   (but not including) orig_eax, and then rest including orig_eax. */
-	ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_eax));
-	ret += copy_to_user(&user->orig_eax, &regs->pt.orig_eax,
+	ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_ax));
+	ret += copy_to_user(&user->orig_eax, &regs->pt.orig_ax,
 			    sizeof(struct kernel_vm86_regs) -
-			    offsetof(struct kernel_vm86_regs, pt.orig_eax));
+			    offsetof(struct kernel_vm86_regs, pt.orig_ax));
 
 	return ret;
 }
@@ -110,12 +110,12 @@ static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs,
 {
 	int ret = 0;
 
-	/* copy eax-xfs inclusive */
-	ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_eax));
-	/* copy orig_eax-__gsh+extra */
-	ret += copy_from_user(&regs->pt.orig_eax, &user->orig_eax,
+	/* copy ax-fs inclusive */
+	ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_ax));
+	/* copy orig_ax-__gsh+extra */
+	ret += copy_from_user(&regs->pt.orig_ax, &user->orig_eax,
 			      sizeof(struct kernel_vm86_regs) -
-			      offsetof(struct kernel_vm86_regs, pt.orig_eax) +
+			      offsetof(struct kernel_vm86_regs, pt.orig_ax) +
 			      extra);
 	return ret;
 }
@@ -138,7 +138,7 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
 		printk("no vm86_info: BAD\n");
 		do_exit(SIGSEGV);
 	}
-	set_flags(regs->pt.eflags, VEFLAGS, VIF_MASK | current->thread.v86mask);
+	set_flags(regs->pt.flags, VEFLAGS, VIF_MASK | current->thread.v86mask);
 	tmp = copy_vm86_regs_to_user(&current->thread.vm86_info->regs,regs);
 	tmp += put_user(current->thread.screen_bitmap,&current->thread.vm86_info->screen_bitmap);
 	if (tmp) {
@@ -155,7 +155,7 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
 
 	ret = KVM86->regs32;
 
-	ret->xfs = current->thread.saved_fs;
+	ret->fs = current->thread.saved_fs;
 	loadsegment(gs, current->thread.saved_gs);
 
 	return ret;
@@ -197,7 +197,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
 
 asmlinkage int sys_vm86old(struct pt_regs regs)
 {
-	struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.ebx;
+	struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.bx;
 	struct kernel_vm86_struct info; /* declare this _on top_,
 					 * this avoids wasting of stack space.
 					 * This remains on the stack until we
@@ -237,12 +237,12 @@ asmlinkage int sys_vm86(struct pt_regs regs)
 	struct vm86plus_struct __user *v86;
 
 	tsk = current;
-	switch (regs.ebx) {
+	switch (regs.bx) {
 		case VM86_REQUEST_IRQ:
 		case VM86_FREE_IRQ:
 		case VM86_GET_IRQ_BITS:
 		case VM86_GET_AND_RESET_IRQ:
-			ret = do_vm86_irq_handling(regs.ebx, (int)regs.ecx);
+			ret = do_vm86_irq_handling(regs.bx, (int)regs.cx);
 			goto out;
 		case VM86_PLUS_INSTALL_CHECK:
 			/* NOTE: on old vm86 stuff this will return the error
@@ -258,7 +258,7 @@ asmlinkage int sys_vm86(struct pt_regs regs)
 	ret = -EPERM;
 	if (tsk->thread.saved_esp0)
 		goto out;
-	v86 = (struct vm86plus_struct __user *)regs.ecx;
+	v86 = (struct vm86plus_struct __user *)regs.cx;
 	tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
 				       offsetof(struct kernel_vm86_struct, regs32) -
 				       sizeof(info.regs));
@@ -281,23 +281,23 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
 /*
  * make sure the vm86() system call doesn't try to do anything silly
  */
-	info->regs.pt.xds = 0;
-	info->regs.pt.xes = 0;
-	info->regs.pt.xfs = 0;
+	info->regs.pt.ds = 0;
+	info->regs.pt.es = 0;
+	info->regs.pt.fs = 0;
 
 /* we are clearing gs later just before "jmp resume_userspace",
  * because it is not saved/restored.
  */
 
 /*
- * The eflags register is also special: we cannot trust that the user
+ * The flags register is also special: we cannot trust that the user
  * has set it up safely, so this makes sure interrupt etc flags are
  * inherited from protected mode.
  */
- 	VEFLAGS = info->regs.pt.eflags;
-	info->regs.pt.eflags &= SAFE_MASK;
-	info->regs.pt.eflags |= info->regs32->eflags & ~SAFE_MASK;
-	info->regs.pt.eflags |= VM_MASK;
+	VEFLAGS = info->regs.pt.flags;
+	info->regs.pt.flags &= SAFE_MASK;
+	info->regs.pt.flags |= info->regs32->flags & ~SAFE_MASK;
+	info->regs.pt.flags |= VM_MASK;
 
 	switch (info->cpu_type) {
 		case CPU_286:
@@ -315,11 +315,11 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
 	}
 
 /*
- * Save old state, set default return value (%eax) to 0
+ * Save old state, set default return value (%ax) to 0
  */
-	info->regs32->eax = 0;
+	info->regs32->ax = 0;
 	tsk->thread.saved_esp0 = tsk->thread.esp0;
-	tsk->thread.saved_fs = info->regs32->xfs;
+	tsk->thread.saved_fs = info->regs32->fs;
 	savesegment(gs, tsk->thread.saved_gs);
 
 	tss = &per_cpu(init_tss, get_cpu());
@@ -352,7 +352,7 @@ static inline void return_to_32bit(struct kernel_vm86_regs * regs16, int retval)
 	struct pt_regs * regs32;
 
 	regs32 = save_v86_state(regs16);
-	regs32->eax = retval;
+	regs32->ax = retval;
 	__asm__ __volatile__("movl %0,%%esp\n\t"
 		"movl %1,%%ebp\n\t"
 		"jmp resume_userspace"
@@ -373,12 +373,12 @@ static inline void clear_IF(struct kernel_vm86_regs * regs)
 
 static inline void clear_TF(struct kernel_vm86_regs * regs)
 {
-	regs->pt.eflags &= ~TF_MASK;
+	regs->pt.flags &= ~TF_MASK;
 }
 
 static inline void clear_AC(struct kernel_vm86_regs * regs)
 {
-	regs->pt.eflags &= ~AC_MASK;
+	regs->pt.flags &= ~AC_MASK;
 }
 
 /* It is correct to call set_IF(regs) from the set_vflags_*
@@ -392,11 +392,11 @@ static inline void clear_AC(struct kernel_vm86_regs * regs)
  * [KD]
  */
 
-static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs * regs)
+static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs * regs)
 {
-	set_flags(VEFLAGS, eflags, current->thread.v86mask);
-	set_flags(regs->pt.eflags, eflags, SAFE_MASK);
-	if (eflags & IF_MASK)
+	set_flags(VEFLAGS, flags, current->thread.v86mask);
+	set_flags(regs->pt.flags, flags, SAFE_MASK);
+	if (flags & IF_MASK)
 		set_IF(regs);
 	else
 		clear_IF(regs);
@@ -405,7 +405,7 @@ static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs
 static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs * regs)
 {
 	set_flags(VFLAGS, flags, current->thread.v86mask);
-	set_flags(regs->pt.eflags, flags, SAFE_MASK);
+	set_flags(regs->pt.flags, flags, SAFE_MASK);
 	if (flags & IF_MASK)
 		set_IF(regs);
 	else
@@ -414,7 +414,7 @@ static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_reg
 
 static inline unsigned long get_vflags(struct kernel_vm86_regs * regs)
 {
-	unsigned long flags = regs->pt.eflags & RETURN_MASK;
+	unsigned long flags = regs->pt.flags & RETURN_MASK;
 
 	if (VEFLAGS & VIF_MASK)
 		flags |= IF_MASK;
@@ -518,7 +518,7 @@ static void do_int(struct kernel_vm86_regs *regs, int i,
 	unsigned long __user *intr_ptr;
 	unsigned long segoffs;
 
-	if (regs->pt.xcs == BIOSSEG)
+	if (regs->pt.cs == BIOSSEG)
 		goto cannot_handle;
 	if (is_revectored(i, &KVM86->int_revectored))
 		goto cannot_handle;
@@ -530,9 +530,9 @@ static void do_int(struct kernel_vm86_regs *regs, int i,
 	if ((segoffs >> 16) == BIOSSEG)
 		goto cannot_handle;
 	pushw(ssp, sp, get_vflags(regs), cannot_handle);
-	pushw(ssp, sp, regs->pt.xcs, cannot_handle);
+	pushw(ssp, sp, regs->pt.cs, cannot_handle);
 	pushw(ssp, sp, IP(regs), cannot_handle);
-	regs->pt.xcs = segoffs >> 16;
+	regs->pt.cs = segoffs >> 16;
 	SP(regs) -= 6;
 	IP(regs) = segoffs & 0xffff;
 	clear_TF(regs);
@@ -549,7 +549,7 @@ int handle_vm86_trap(struct kernel_vm86_regs * regs, long error_code, int trapno
 	if (VMPI.is_vm86pus) {
 		if ( (trapno==3) || (trapno==1) )
 			return_to_32bit(regs, VM86_TRAP + (trapno << 8));
-		do_int(regs, trapno, (unsigned char __user *) (regs->pt.xss << 4), SP(regs));
+		do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
 		return 0;
 	}
 	if (trapno !=1)
@@ -585,10 +585,10 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
 		handle_vm86_trap(regs, 0, 1); \
 	return; } while (0)
 
-	orig_flags = *(unsigned short *)&regs->pt.eflags;
+	orig_flags = *(unsigned short *)&regs->pt.flags;
 
-	csp = (unsigned char __user *) (regs->pt.xcs << 4);
-	ssp = (unsigned char __user *) (regs->pt.xss << 4);
+	csp = (unsigned char __user *) (regs->pt.cs << 4);
+	ssp = (unsigned char __user *) (regs->pt.ss << 4);
 	sp = SP(regs);
 	ip = IP(regs);
 
@@ -675,7 +675,7 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
 			SP(regs) += 6;
 		}
 		IP(regs) = newip;
-		regs->pt.xcs = newcs;
+		regs->pt.cs = newcs;
 		CHECK_IF_IN_TRAP;
 		if (data32) {
 			set_vflags_long(newflags, regs);
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 87e5633805a9..599b6f2ed562 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -88,13 +88,13 @@ struct vmi_timer_ops vmi_timer_ops;
 #define IRQ_PATCH_DISABLE  5
 
 static inline void patch_offset(void *insnbuf,
-				unsigned long eip, unsigned long dest)
+				unsigned long ip, unsigned long dest)
 {
-        *(unsigned long *)(insnbuf+1) = dest-eip-5;
+        *(unsigned long *)(insnbuf+1) = dest-ip-5;
 }
 
 static unsigned patch_internal(int call, unsigned len, void *insnbuf,
-			       unsigned long eip)
+			       unsigned long ip)
 {
 	u64 reloc;
 	struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc;
@@ -103,13 +103,13 @@ static unsigned patch_internal(int call, unsigned len, void *insnbuf,
 		case VMI_RELOCATION_CALL_REL:
 			BUG_ON(len < 5);
 			*(char *)insnbuf = MNEM_CALL;
-			patch_offset(insnbuf, eip, (unsigned long)rel->eip);
+			patch_offset(insnbuf, ip, (unsigned long)rel->eip);
 			return 5;
 
 		case VMI_RELOCATION_JUMP_REL:
 			BUG_ON(len < 5);
 			*(char *)insnbuf = MNEM_JMP;
-			patch_offset(insnbuf, eip, (unsigned long)rel->eip);
+			patch_offset(insnbuf, ip, (unsigned long)rel->eip);
 			return 5;
 
 		case VMI_RELOCATION_NOP:
@@ -131,25 +131,25 @@ static unsigned patch_internal(int call, unsigned len, void *insnbuf,
  * sequence.  The callee does nop padding for us.
  */
 static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
-			  unsigned long eip, unsigned len)
+			  unsigned long ip, unsigned len)
 {
 	switch (type) {
 		case PARAVIRT_PATCH(pv_irq_ops.irq_disable):
 			return patch_internal(VMI_CALL_DisableInterrupts, len,
-					      insns, eip);
+					      insns, ip);
 		case PARAVIRT_PATCH(pv_irq_ops.irq_enable):
 			return patch_internal(VMI_CALL_EnableInterrupts, len,
-					      insns, eip);
+					      insns, ip);
 		case PARAVIRT_PATCH(pv_irq_ops.restore_fl):
 			return patch_internal(VMI_CALL_SetInterruptMask, len,
-					      insns, eip);
+					      insns, ip);
 		case PARAVIRT_PATCH(pv_irq_ops.save_fl):
 			return patch_internal(VMI_CALL_GetInterruptMask, len,
-					      insns, eip);
+					      insns, ip);
 		case PARAVIRT_PATCH(pv_cpu_ops.iret):
-			return patch_internal(VMI_CALL_IRET, len, insns, eip);
+			return patch_internal(VMI_CALL_IRET, len, insns, ip);
 		case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret):
-			return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip);
+			return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip);
 		default:
 			break;
 	}
@@ -157,29 +157,29 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
 }
 
 /* CPUID has non-C semantics, and paravirt-ops API doesn't match hardware ISA */
-static void vmi_cpuid(unsigned int *eax, unsigned int *ebx,
-                               unsigned int *ecx, unsigned int *edx)
+static void vmi_cpuid(unsigned int *ax, unsigned int *bx,
+                               unsigned int *cx, unsigned int *dx)
 {
 	int override = 0;
-	if (*eax == 1)
+	if (*ax == 1)
 		override = 1;
         asm volatile ("call *%6"
-                      : "=a" (*eax),
-                        "=b" (*ebx),
-                        "=c" (*ecx),
-                        "=d" (*edx)
-                      : "0" (*eax), "2" (*ecx), "r" (vmi_ops.cpuid));
+                      : "=a" (*ax),
+                        "=b" (*bx),
+                        "=c" (*cx),
+                        "=d" (*dx)
+                      : "0" (*ax), "2" (*cx), "r" (vmi_ops.cpuid));
 	if (override) {
 		if (disable_pse)
-			*edx &= ~X86_FEATURE_PSE;
+			*dx &= ~X86_FEATURE_PSE;
 		if (disable_pge)
-			*edx &= ~X86_FEATURE_PGE;
+			*dx &= ~X86_FEATURE_PGE;
 		if (disable_sep)
-			*edx &= ~X86_FEATURE_SEP;
+			*dx &= ~X86_FEATURE_SEP;
 		if (disable_tsc)
-			*edx &= ~X86_FEATURE_TSC;
+			*dx &= ~X86_FEATURE_TSC;
 		if (disable_mtrr)
-			*edx &= ~X86_FEATURE_MTRR;
+			*dx &= ~X86_FEATURE_MTRR;
 	}
 }
 
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index ad4005c6d4a1..018f7cf33790 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -43,7 +43,7 @@
 #include <asm/vgtod.h>
 
 #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
-#define __syscall_clobber "r11","rcx","memory"
+#define __syscall_clobber "r11","cx","memory"
 #define __pa_vsymbol(x)			\
 	({unsigned long v;  		\
 	extern char __vsyscall_0; 	\
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index df04bf884dd4..ea46d05853bb 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -175,8 +175,8 @@ static void lguest_leave_lazy_mode(void)
  * check there when it wants to deliver an interrupt.
  */
 
-/* save_flags() is expected to return the processor state (ie. "eflags").  The
- * eflags word contains all kind of stuff, but in practice Linux only cares
+/* save_flags() is expected to return the processor state (ie. "flags").  The
+ * flags word contains all kind of stuff, but in practice Linux only cares
  * about the interrupt flag.  Our "save_flags()" just returns that. */
 static unsigned long save_fl(void)
 {
@@ -323,30 +323,30 @@ static void lguest_load_tr_desc(void)
  * anyone (including userspace) can just use the raw "cpuid" instruction and
  * the Host won't even notice since it isn't privileged.  So we try not to get
  * too worked up about it. */
-static void lguest_cpuid(unsigned int *eax, unsigned int *ebx,
-			 unsigned int *ecx, unsigned int *edx)
+static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
+			 unsigned int *cx, unsigned int *dx)
 {
-	int function = *eax;
+	int function = *ax;
 
-	native_cpuid(eax, ebx, ecx, edx);
+	native_cpuid(ax, bx, cx, dx);
 	switch (function) {
 	case 1:	/* Basic feature request. */
 		/* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */
-		*ecx &= 0x00002201;
+		*cx &= 0x00002201;
 		/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */
-		*edx &= 0x07808101;
+		*dx &= 0x07808101;
 		/* The Host can do a nice optimization if it knows that the
 		 * kernel mappings (addresses above 0xC0000000 or whatever
 		 * PAGE_OFFSET is set to) haven't changed.  But Linux calls
 		 * flush_tlb_user() for both user and kernel mappings unless
 		 * the Page Global Enable (PGE) feature bit is set. */
-		*edx |= 0x00002000;
+		*dx |= 0x00002000;
 		break;
 	case 0x80000000:
 		/* Futureproof this a little: if they ask how much extended
 		 * processor information there is, limit it to known fields. */
-		if (*eax > 0x80000008)
-			*eax = 0x80000008;
+		if (*ax > 0x80000008)
+			*ax = 0x80000008;
 		break;
 	}
 }
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index b472a2df0b7f..f2c13482acc0 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -526,7 +526,7 @@ static void __init do_boot_cpu(__u8 cpu)
 	 * initial kernel stack.  We need to alter this to give the
 	 * booting CPU a new stack (taken from its idle process) */
 	extern struct {
-		__u8 *esp;
+		__u8 *sp;
 		unsigned short ss;
 	} stack_start;
 	/* This is the format of the CPI IDT gate (in real mode) which
@@ -555,9 +555,9 @@ static void __init do_boot_cpu(__u8 cpu)
 	idle = fork_idle(cpu);
 	if (IS_ERR(idle))
 		panic("failed fork for CPU%d", cpu);
-	idle->thread.eip = (unsigned long)start_secondary;
+	idle->thread.ip = (unsigned long)start_secondary;
 	/* init_tasks (in sched.c) is indexed logically */
-	stack_start.esp = (void *)idle->thread.esp;
+	stack_start.sp = (void *)idle->thread.sp;
 
 	init_gdt(cpu);
 	per_cpu(current_task, cpu) = idle;
@@ -567,7 +567,7 @@ static void __init do_boot_cpu(__u8 cpu)
 	/* Note: Don't modify initial ss override */
 	VDEBUG(("VOYAGER SMP: Booting CPU%d at 0x%lx[%x:%x], stack %p\n", cpu,
 		(unsigned long)hijack_source.val, hijack_source.idt.Segment,
-		hijack_source.idt.Offset, stack_start.esp));
+		hijack_source.idt.Offset, stack_start.sp));
 
 	/* init lowmem identity mapping */
 	clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
@@ -745,8 +745,8 @@ void __init initialize_secondary(void)
 	 */
 
 	asm volatile ("movl %0,%%esp\n\t"
-		      "jmp *%1"::"r" (current->thread.esp),
-		      "r"(current->thread.eip));
+		      "jmp *%1"::"r" (current->thread.sp),
+		      "r"(current->thread.ip));
 }
 
 /* handle a Voyager SYS_INT -- If we don't, the base board will
diff --git a/arch/x86/mm/extable_32.c b/arch/x86/mm/extable_32.c
index 0ce4f22a2635..41685461f8b2 100644
--- a/arch/x86/mm/extable_32.c
+++ b/arch/x86/mm/extable_32.c
@@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs)
 	const struct exception_table_entry *fixup;
 
 #ifdef CONFIG_PNPBIOS
-	if (unlikely(SEGMENT_IS_PNP_CODE(regs->xcs)))
+	if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs)))
 	{
 		extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
 		extern u32 pnp_bios_is_utter_crap;
@@ -25,9 +25,9 @@ int fixup_exception(struct pt_regs *regs)
 	}
 #endif
 
-	fixup = search_exception_tables(regs->eip);
+	fixup = search_exception_tables(regs->ip);
 	if (fixup) {
-		regs->eip = fixup->fixup;
+		regs->ip = fixup->fixup;
 		return 1;
 	}
 
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index a2273d44aa27..6056c6d71835 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -72,15 +72,15 @@ static inline int notify_page_fault(struct pt_regs *regs)
 static inline unsigned long get_segment_eip(struct pt_regs *regs,
 					    unsigned long *eip_limit)
 {
-	unsigned long eip = regs->eip;
-	unsigned seg = regs->xcs & 0xffff;
+	unsigned long ip = regs->ip;
+	unsigned seg = regs->cs & 0xffff;
 	u32 seg_ar, seg_limit, base, *desc;
 
 	/* Unlikely, but must come before segment checks. */
-	if (unlikely(regs->eflags & VM_MASK)) {
+	if (unlikely(regs->flags & VM_MASK)) {
 		base = seg << 4;
 		*eip_limit = base + 0xffff;
-		return base + (eip & 0xffff);
+		return base + (ip & 0xffff);
 	}
 
 	/* The standard kernel/user address space limit. */
@@ -88,16 +88,16 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
 	
 	/* By far the most common cases. */
 	if (likely(SEGMENT_IS_FLAT_CODE(seg)))
-		return eip;
+		return ip;
 
 	/* Check the segment exists, is within the current LDT/GDT size,
 	   that kernel/user (ring 0..3) has the appropriate privilege,
 	   that it's a code segment, and get the limit. */
 	__asm__ ("larl %3,%0; lsll %3,%1"
 		 : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg));
-	if ((~seg_ar & 0x9800) || eip > seg_limit) {
+	if ((~seg_ar & 0x9800) || ip > seg_limit) {
 		*eip_limit = 0;
-		return 1;	 /* So that returned eip > *eip_limit. */
+		return 1;	 /* So that returned ip > *eip_limit. */
 	}
 
 	/* Get the GDT/LDT descriptor base. 
@@ -127,7 +127,7 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
 	seg_limit += base;
 	if (seg_limit < *eip_limit && seg_limit >= base)
 		*eip_limit = seg_limit;
-	return eip + base;
+	return ip + base;
 }
 
 /* 
@@ -345,7 +345,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
 
 	/* It's safe to allow irq's after cr2 has been saved and the vmalloc
 	   fault has been handled. */
-	if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
+	if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
 		local_irq_enable();
 
 	mm = tsk->mm;
@@ -374,7 +374,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
 	 */
 	if (!down_read_trylock(&mm->mmap_sem)) {
 		if ((error_code & 4) == 0 &&
-		    !search_exception_tables(regs->eip))
+		    !search_exception_tables(regs->ip))
 			goto bad_area_nosemaphore;
 		down_read(&mm->mmap_sem);
 	}
@@ -388,12 +388,12 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
 		goto bad_area;
 	if (error_code & 4) {
 		/*
-		 * Accessing the stack below %esp is always a bug.
+		 * Accessing the stack below %sp is always a bug.
 		 * The large cushion allows instructions like enter
 		 * and pusha to work.  ("enter $65535,$31" pushes
-		 * 32 pointers and then decrements %esp by 65535.)
+		 * 32 pointers and then decrements %sp by 65535.)
 		 */
-		if (address + 65536 + 32 * sizeof(unsigned long) < regs->esp)
+		if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
 			goto bad_area;
 	}
 	if (expand_stack(vma, address))
@@ -442,7 +442,7 @@ good_area:
 	/*
 	 * Did it hit the DOS screen memory VA from vm86 mode?
 	 */
-	if (regs->eflags & VM_MASK) {
+	if (regs->flags & VM_MASK) {
 		unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
 		if (bit < 32)
 			tsk->thread.screen_bitmap |= 1 << bit;
@@ -474,11 +474,11 @@ bad_area_nosemaphore:
 
 		if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
 		    printk_ratelimit()) {
-			printk("%s%s[%d]: segfault at %08lx eip %08lx "
-			    "esp %08lx error %lx\n",
+			printk("%s%s[%d]: segfault at %08lx ip %08lx "
+			    "sp %08lx error %lx\n",
 			    task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
-			    tsk->comm, task_pid_nr(tsk), address, regs->eip,
-			    regs->esp, error_code);
+			    tsk->comm, task_pid_nr(tsk), address, regs->ip,
+			    regs->sp, error_code);
 		}
 		tsk->thread.cr2 = address;
 		/* Kernel addresses are always protection faults */
@@ -544,7 +544,7 @@ no_context:
 			printk(KERN_ALERT "BUG: unable to handle kernel paging"
 					" request");
 		printk(" at virtual address %08lx\n",address);
-		printk(KERN_ALERT "printing eip: %08lx ", regs->eip);
+		printk(KERN_ALERT "printing ip: %08lx ", regs->ip);
 
 		page = read_cr3();
 		page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 0e26230669ca..88a7abda29ce 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -198,7 +198,7 @@ KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
 static int is_errata93(struct pt_regs *regs, unsigned long address) 
 {
 	static int warned;
-	if (address != regs->rip)
+	if (address != regs->ip)
 		return 0;
 	if ((address >> 32) != 0) 
 		return 0;
@@ -209,7 +209,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address)
 			printk(errata93_warning); 		
 			warned = 1;
 		}
-		regs->rip = address;
+		regs->ip = address;
 		return 1;
 	}
 	return 0;
@@ -355,7 +355,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	if (notify_page_fault(regs))
 		return;
 
-	if (likely(regs->eflags & X86_EFLAGS_IF))
+	if (likely(regs->flags & X86_EFLAGS_IF))
 		local_irq_enable();
 
 	if (unlikely(error_code & PF_RSVD))
@@ -393,7 +393,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	 */
 	if (!down_read_trylock(&mm->mmap_sem)) {
 		if ((error_code & PF_USER) == 0 &&
-		    !search_exception_tables(regs->rip))
+		    !search_exception_tables(regs->ip))
 			goto bad_area_nosemaphore;
 		down_read(&mm->mmap_sem);
 	}
@@ -409,7 +409,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 		/* Allow userspace just enough access below the stack pointer
 		 * to let the 'enter' instruction work.
 		 */
-		if (address + 65536 + 32 * sizeof(unsigned long) < regs->rsp)
+		if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
 			goto bad_area;
 	}
 	if (expand_stack(vma, address))
@@ -488,10 +488,10 @@ bad_area_nosemaphore:
 		if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
 		    printk_ratelimit()) {
 			printk(
-		       "%s%s[%d]: segfault at %lx rip %lx rsp %lx error %lx\n",
+		       "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx\n",
 					tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
-					tsk->comm, tsk->pid, address, regs->rip,
-					regs->rsp, error_code);
+					tsk->comm, tsk->pid, address, regs->ip,
+					regs->sp, error_code);
 		}
        
 		tsk->thread.cr2 = address;
@@ -509,9 +509,9 @@ bad_area_nosemaphore:
 no_context:
 	
 	/* Are we prepared to handle this kernel fault?  */
-	fixup = search_exception_tables(regs->rip);
+	fixup = search_exception_tables(regs->ip);
 	if (fixup) {
-		regs->rip = fixup->fixup;
+		regs->ip = fixup->fixup;
 		return;
 	}
 
@@ -537,7 +537,7 @@ no_context:
 	else
 		printk(KERN_ALERT "Unable to handle kernel paging request");
 	printk(" at %016lx RIP: \n" KERN_ALERT,address);
-	printk_address(regs->rip);
+	printk_address(regs->ip);
 	dump_pagetable(address);
 	tsk->thread.cr2 = address;
 	tsk->thread.trap_no = 14;
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 0ed046a187f7..cc353a0b183e 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -48,7 +48,7 @@ static struct stacktrace_ops backtrace_ops = {
 };
 
 struct frame_head {
-	struct frame_head *ebp;
+	struct frame_head *bp;
 	unsigned long ret;
 } __attribute__((packed));
 
@@ -67,10 +67,10 @@ dump_user_backtrace(struct frame_head * head)
 
 	/* frame pointers should strictly progress back up the stack
 	 * (towards higher addresses) */
-	if (head >= bufhead[0].ebp)
+	if (head >= bufhead[0].bp)
 		return NULL;
 
-	return bufhead[0].ebp;
+	return bufhead[0].bp;
 }
 
 void
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d3574485cb15..29517faaa735 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -141,8 +141,8 @@ static void __init xen_banner(void)
 	printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic);
 }
 
-static void xen_cpuid(unsigned int *eax, unsigned int *ebx,
-		      unsigned int *ecx, unsigned int *edx)
+static void xen_cpuid(unsigned int *ax, unsigned int *bx,
+		      unsigned int *cx, unsigned int *dx)
 {
 	unsigned maskedx = ~0;
 
@@ -150,18 +150,18 @@ static void xen_cpuid(unsigned int *eax, unsigned int *ebx,
 	 * Mask out inconvenient features, to try and disable as many
 	 * unsupported kernel subsystems as possible.
 	 */
-	if (*eax == 1)
+	if (*ax == 1)
 		maskedx = ~((1 << X86_FEATURE_APIC) |  /* disable APIC */
 			    (1 << X86_FEATURE_ACPI) |  /* disable ACPI */
 			    (1 << X86_FEATURE_ACC));   /* thermal monitoring */
 
 	asm(XEN_EMULATE_PREFIX "cpuid"
-		: "=a" (*eax),
-		  "=b" (*ebx),
-		  "=c" (*ecx),
-		  "=d" (*edx)
-		: "0" (*eax), "2" (*ecx));
-	*edx &= maskedx;
+		: "=a" (*ax),
+		  "=b" (*bx),
+		  "=c" (*cx),
+		  "=d" (*dx)
+		: "0" (*ax), "2" (*cx));
+	*dx &= maskedx;
 }
 
 static void xen_set_debugreg(int reg, unsigned long val)
diff --git a/arch/x86/xen/events.c b/arch/x86/xen/events.c
index 6d1da5809e6f..aebab9704dd7 100644
--- a/arch/x86/xen/events.c
+++ b/arch/x86/xen/events.c
@@ -487,7 +487,7 @@ fastcall void xen_evtchn_do_upcall(struct pt_regs *regs)
 			int irq = evtchn_to_irq[port];
 
 			if (irq != -1) {
-				regs->orig_eax = ~irq;
+				regs->orig_ax = ~irq;
 				do_IRQ(regs);
 			}
 		}
diff --git a/include/asm-x86/compat.h b/include/asm-x86/compat.h
index 66ba7987184a..b270ee04959e 100644
--- a/include/asm-x86/compat.h
+++ b/include/asm-x86/compat.h
@@ -207,7 +207,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
 static __inline__ void __user *compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = task_pt_regs(current);
-	return (void __user *)regs->rsp - len; 
+	return (void __user *)regs->sp - len;
 }
 
 static inline int is_compat_task(void)
diff --git a/include/asm-x86/elf.h b/include/asm-x86/elf.h
index 60f5101d9483..5e5705bf082a 100644
--- a/include/asm-x86/elf.h
+++ b/include/asm-x86/elf.h
@@ -99,32 +99,32 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
    just to make things more deterministic.
  */
 #define ELF_PLAT_INIT(_r, load_addr)	do { \
-	_r->ebx = 0; _r->ecx = 0; _r->edx = 0; \
-	_r->esi = 0; _r->edi = 0; _r->ebp = 0; \
-	_r->eax = 0; \
+	_r->bx = 0; _r->cx = 0; _r->dx = 0; \
+	_r->si = 0; _r->di = 0; _r->bp = 0; \
+	_r->ax = 0; \
 } while (0)
 
 /* regs is struct pt_regs, pr_reg is elf_gregset_t (which is
    now struct_user_regs, they are different) */
 
 #define ELF_CORE_COPY_REGS(pr_reg, regs)		\
-	pr_reg[0] = regs->ebx;				\
-	pr_reg[1] = regs->ecx;				\
-	pr_reg[2] = regs->edx;				\
-	pr_reg[3] = regs->esi;				\
-	pr_reg[4] = regs->edi;				\
-	pr_reg[5] = regs->ebp;				\
-	pr_reg[6] = regs->eax;				\
-	pr_reg[7] = regs->xds & 0xffff;			\
-	pr_reg[8] = regs->xes & 0xffff;			\
-	pr_reg[9] = regs->xfs & 0xffff;			\
+	pr_reg[0] = regs->bx;				\
+	pr_reg[1] = regs->cx;				\
+	pr_reg[2] = regs->dx;				\
+	pr_reg[3] = regs->si;				\
+	pr_reg[4] = regs->di;				\
+	pr_reg[5] = regs->bp;				\
+	pr_reg[6] = regs->ax;				\
+	pr_reg[7] = regs->ds & 0xffff;			\
+	pr_reg[8] = regs->es & 0xffff;			\
+	pr_reg[9] = regs->fs & 0xffff;			\
 	savesegment(gs,pr_reg[10]);			\
-	pr_reg[11] = regs->orig_eax;			\
-	pr_reg[12] = regs->eip;				\
-	pr_reg[13] = regs->xcs & 0xffff;		\
-	pr_reg[14] = regs->eflags;			\
-	pr_reg[15] = regs->esp;				\
-	pr_reg[16] = regs->xss & 0xffff;
+	pr_reg[11] = regs->orig_ax;			\
+	pr_reg[12] = regs->ip;				\
+	pr_reg[13] = regs->cs & 0xffff;			\
+	pr_reg[14] = regs->flags;			\
+	pr_reg[15] = regs->sp;				\
+	pr_reg[16] = regs->ss & 0xffff;
 
 #define ELF_PLATFORM	(utsname()->machine)
 #define set_personality_64bit()	do { } while (0)
@@ -142,9 +142,9 @@ extern unsigned int vdso_enabled;
 
 #define ELF_PLAT_INIT(_r, load_addr)	do {		  \
 	struct task_struct *cur = current;		  \
-	(_r)->rbx = 0; (_r)->rcx = 0; (_r)->rdx = 0;	  \
-	(_r)->rsi = 0; (_r)->rdi = 0; (_r)->rbp = 0;	  \
-	(_r)->rax = 0;					  \
+	(_r)->bx = 0; (_r)->cx = 0; (_r)->dx = 0;	  \
+	(_r)->si = 0; (_r)->di = 0; (_r)->bp = 0;	  \
+	(_r)->ax = 0;					  \
 	(_r)->r8 = 0;					  \
 	(_r)->r9 = 0;					  \
 	(_r)->r10 = 0;					  \
@@ -169,22 +169,22 @@ extern unsigned int vdso_enabled;
 	(pr_reg)[1] = (regs)->r14;				\
 	(pr_reg)[2] = (regs)->r13;				\
 	(pr_reg)[3] = (regs)->r12;				\
-	(pr_reg)[4] = (regs)->rbp;				\
-	(pr_reg)[5] = (regs)->rbx;				\
+	(pr_reg)[4] = (regs)->bp;				\
+	(pr_reg)[5] = (regs)->bx;				\
 	(pr_reg)[6] = (regs)->r11;				\
 	(pr_reg)[7] = (regs)->r10;				\
 	(pr_reg)[8] = (regs)->r9;				\
 	(pr_reg)[9] = (regs)->r8;				\
-	(pr_reg)[10] = (regs)->rax;				\
-	(pr_reg)[11] = (regs)->rcx;				\
-	(pr_reg)[12] = (regs)->rdx;				\
-	(pr_reg)[13] = (regs)->rsi;				\
-	(pr_reg)[14] = (regs)->rdi;				\
-	(pr_reg)[15] = (regs)->orig_rax;			\
-	(pr_reg)[16] = (regs)->rip;				\
+	(pr_reg)[10] = (regs)->ax;				\
+	(pr_reg)[11] = (regs)->cx;				\
+	(pr_reg)[12] = (regs)->dx;				\
+	(pr_reg)[13] = (regs)->si;				\
+	(pr_reg)[14] = (regs)->di;				\
+	(pr_reg)[15] = (regs)->orig_ax;			\
+	(pr_reg)[16] = (regs)->ip;				\
 	(pr_reg)[17] = (regs)->cs;				\
-	(pr_reg)[18] = (regs)->eflags;				\
-	(pr_reg)[19] = (regs)->rsp;				\
+	(pr_reg)[18] = (regs)->flags;				\
+	(pr_reg)[19] = (regs)->sp;				\
 	(pr_reg)[20] = (regs)->ss;				\
 	(pr_reg)[21] = current->thread.fs;			\
 	(pr_reg)[22] = current->thread.gs;			\
diff --git a/include/asm-x86/kexec_32.h b/include/asm-x86/kexec_32.h
index 4b9dc9e6b701..ff39d2f88022 100644
--- a/include/asm-x86/kexec_32.h
+++ b/include/asm-x86/kexec_32.h
@@ -45,7 +45,7 @@
 /* We can also handle crash dumps from 64 bit kernel. */
 #define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64)
 
-/* CPU does not save ss and esp on stack if execution is already
+/* CPU does not save ss and sp on stack if execution is already
  * running in kernel mode at the time of NMI occurrence. This code
  * fixes it.
  */
@@ -53,16 +53,16 @@ static inline void crash_fixup_ss_esp(struct pt_regs *newregs,
 					struct pt_regs *oldregs)
 {
 	memcpy(newregs, oldregs, sizeof(*newregs));
-	newregs->esp = (unsigned long)&(oldregs->esp);
+	newregs->sp = (unsigned long)&(oldregs->sp);
 	__asm__ __volatile__(
 			"xorl %%eax, %%eax\n\t"
 			"movw %%ss, %%ax\n\t"
-			:"=a"(newregs->xss));
+			:"=a"(newregs->ss));
 }
 
 /*
  * This function is responsible for capturing register states if coming
- * via panic otherwise just fix up the ss and esp if coming via kernel
+ * via panic otherwise just fix up the ss and sp if coming via kernel
  * mode exception.
  */
 static inline void crash_setup_regs(struct pt_regs *newregs,
@@ -71,21 +71,21 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
        if (oldregs)
                crash_fixup_ss_esp(newregs, oldregs);
        else {
-               __asm__ __volatile__("movl %%ebx,%0" : "=m"(newregs->ebx));
-               __asm__ __volatile__("movl %%ecx,%0" : "=m"(newregs->ecx));
-               __asm__ __volatile__("movl %%edx,%0" : "=m"(newregs->edx));
-               __asm__ __volatile__("movl %%esi,%0" : "=m"(newregs->esi));
-               __asm__ __volatile__("movl %%edi,%0" : "=m"(newregs->edi));
-               __asm__ __volatile__("movl %%ebp,%0" : "=m"(newregs->ebp));
-               __asm__ __volatile__("movl %%eax,%0" : "=m"(newregs->eax));
-               __asm__ __volatile__("movl %%esp,%0" : "=m"(newregs->esp));
-               __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(newregs->xss));
-               __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(newregs->xcs));
-               __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(newregs->xds));
-               __asm__ __volatile__("movw %%es, %%ax;" :"=a"(newregs->xes));
-               __asm__ __volatile__("pushfl; popl %0" :"=m"(newregs->eflags));
+               __asm__ __volatile__("movl %%ebx,%0" : "=m"(newregs->bx));
+               __asm__ __volatile__("movl %%ecx,%0" : "=m"(newregs->cx));
+               __asm__ __volatile__("movl %%edx,%0" : "=m"(newregs->dx));
+               __asm__ __volatile__("movl %%esi,%0" : "=m"(newregs->si));
+               __asm__ __volatile__("movl %%edi,%0" : "=m"(newregs->di));
+               __asm__ __volatile__("movl %%ebp,%0" : "=m"(newregs->bp));
+               __asm__ __volatile__("movl %%eax,%0" : "=m"(newregs->ax));
+               __asm__ __volatile__("movl %%esp,%0" : "=m"(newregs->sp));
+               __asm__ __volatile__("movl %%ss, %%eax;" :"=a"(newregs->ss));
+               __asm__ __volatile__("movl %%cs, %%eax;" :"=a"(newregs->cs));
+               __asm__ __volatile__("movl %%ds, %%eax;" :"=a"(newregs->ds));
+               __asm__ __volatile__("movl %%es, %%eax;" :"=a"(newregs->es));
+               __asm__ __volatile__("pushfl; popl %0" :"=m"(newregs->flags));
 
-               newregs->eip = (unsigned long)current_text_addr();
+               newregs->ip = (unsigned long)current_text_addr();
        }
 }
 asmlinkage NORET_TYPE void
diff --git a/include/asm-x86/kexec_64.h b/include/asm-x86/kexec_64.h
index 738e581b67f8..b5f989b15c0b 100644
--- a/include/asm-x86/kexec_64.h
+++ b/include/asm-x86/kexec_64.h
@@ -60,14 +60,14 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
 	if (oldregs)
 		memcpy(newregs, oldregs, sizeof(*newregs));
 	else {
-		__asm__ __volatile__("movq %%rbx,%0" : "=m"(newregs->rbx));
-		__asm__ __volatile__("movq %%rcx,%0" : "=m"(newregs->rcx));
-		__asm__ __volatile__("movq %%rdx,%0" : "=m"(newregs->rdx));
-		__asm__ __volatile__("movq %%rsi,%0" : "=m"(newregs->rsi));
-		__asm__ __volatile__("movq %%rdi,%0" : "=m"(newregs->rdi));
-		__asm__ __volatile__("movq %%rbp,%0" : "=m"(newregs->rbp));
-		__asm__ __volatile__("movq %%rax,%0" : "=m"(newregs->rax));
-		__asm__ __volatile__("movq %%rsp,%0" : "=m"(newregs->rsp));
+		__asm__ __volatile__("movq %%rbx,%0" : "=m"(newregs->bx));
+		__asm__ __volatile__("movq %%rcx,%0" : "=m"(newregs->cx));
+		__asm__ __volatile__("movq %%rdx,%0" : "=m"(newregs->dx));
+		__asm__ __volatile__("movq %%rsi,%0" : "=m"(newregs->si));
+		__asm__ __volatile__("movq %%rdi,%0" : "=m"(newregs->di));
+		__asm__ __volatile__("movq %%rbp,%0" : "=m"(newregs->bp));
+		__asm__ __volatile__("movq %%rax,%0" : "=m"(newregs->ax));
+		__asm__ __volatile__("movq %%rsp,%0" : "=m"(newregs->sp));
 		__asm__ __volatile__("movq %%r8,%0" : "=m"(newregs->r8));
 		__asm__ __volatile__("movq %%r9,%0" : "=m"(newregs->r9));
 		__asm__ __volatile__("movq %%r10,%0" : "=m"(newregs->r10));
@@ -78,9 +78,9 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
 		__asm__ __volatile__("movq %%r15,%0" : "=m"(newregs->r15));
 		__asm__ __volatile__("movl %%ss, %%eax;" :"=a"(newregs->ss));
 		__asm__ __volatile__("movl %%cs, %%eax;" :"=a"(newregs->cs));
-		__asm__ __volatile__("pushfq; popq %0" :"=m"(newregs->eflags));
+		__asm__ __volatile__("pushfq; popq %0" :"=m"(newregs->flags));
 
-		newregs->rip = (unsigned long)current_text_addr();
+		newregs->ip = (unsigned long)current_text_addr();
 	}
 }
 
diff --git a/include/asm-x86/kprobes_32.h b/include/asm-x86/kprobes_32.h
index 9fe8f3bddfd5..2f38315bc39f 100644
--- a/include/asm-x86/kprobes_32.h
+++ b/include/asm-x86/kprobes_32.h
@@ -84,7 +84,7 @@ struct kprobe_ctlblk {
  */
 static inline void restore_interrupts(struct pt_regs *regs)
 {
-	if (regs->eflags & IF_MASK)
+	if (regs->flags & IF_MASK)
 		local_irq_enable();
 }
 
diff --git a/include/asm-x86/kprobes_64.h b/include/asm-x86/kprobes_64.h
index 743d76218fc9..8c919d35cdd3 100644
--- a/include/asm-x86/kprobes_64.h
+++ b/include/asm-x86/kprobes_64.h
@@ -77,7 +77,7 @@ struct kprobe_ctlblk {
  */
 static inline void restore_interrupts(struct pt_regs *regs)
 {
-	if (regs->eflags & IF_MASK)
+	if (regs->flags & IF_MASK)
 		local_irq_enable();
 }
 
diff --git a/include/asm-x86/mce.h b/include/asm-x86/mce.h
index e6ff507a73b0..94f1fd79e22a 100644
--- a/include/asm-x86/mce.h
+++ b/include/asm-x86/mce.h
@@ -13,7 +13,7 @@
 #define MCG_CTL_P	 (1UL<<8)   /* MCG_CAP register available */
 
 #define MCG_STATUS_RIPV  (1UL<<0)   /* restart ip valid */
-#define MCG_STATUS_EIPV  (1UL<<1)   /* eip points to correct instruction */
+#define MCG_STATUS_EIPV  (1UL<<1)   /* ip points to correct instruction */
 #define MCG_STATUS_MCIP  (1UL<<2)   /* machine check in progress */
 
 #define MCI_STATUS_VAL   (1UL<<63)  /* valid error */
@@ -30,7 +30,7 @@ struct mce {
 	__u64 misc;
 	__u64 addr;
 	__u64 mcgstatus;
-	__u64 rip;
+	__u64 ip;
 	__u64 tsc;	/* cpu time stamp counter */
 	__u64 res1;	/* for future extension */
 	__u64 res2;	/* dito. */
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 3c67eacb3168..c85400fe58c4 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -398,14 +398,14 @@ struct thread_struct {
 
 #define start_thread(regs, new_eip, new_esp) do {		\
 	__asm__("movl %0,%%gs": :"r" (0));			\
-	regs->xfs = 0;						\
+	regs->fs = 0;						\
 	set_fs(USER_DS);					\
-	regs->xds = __USER_DS;					\
-	regs->xes = __USER_DS;					\
-	regs->xss = __USER_DS;					\
-	regs->xcs = __USER_CS;					\
-	regs->eip = new_eip;					\
-	regs->esp = new_esp;					\
+	regs->ds = __USER_DS;					\
+	regs->es = __USER_DS;					\
+	regs->ss = __USER_DS;					\
+	regs->cs = __USER_CS;					\
+	regs->ip = new_eip;					\
+	regs->sp = new_esp;					\
 } while (0)
 
 /* Forward declaration, a strange C thing */
@@ -440,7 +440,7 @@ unsigned long get_wchan(struct task_struct *p);
  * is accessable even if the CPU haven't stored the SS/ESP registers
  * on the stack (interrupt gate does not save these registers
  * when switching to the same priv ring).
- * Therefore beware: accessing the xss/esp fields of the
+ * Therefore beware: accessing the ss/esp fields of the
  * "struct pt_regs" is possible, but they may contain the
  * completely wrong values.
  */
@@ -451,8 +451,8 @@ unsigned long get_wchan(struct task_struct *p);
        __regs__ - 1;                                                   \
 })
 
-#define KSTK_EIP(task) (task_pt_regs(task)->eip)
-#define KSTK_ESP(task) (task_pt_regs(task)->esp)
+#define KSTK_EIP(task) (task_pt_regs(task)->ip)
+#define KSTK_ESP(task) (task_pt_regs(task)->sp)
 
 
 struct microcode_header {
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index e7bea4fed642..797770113e6d 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -258,12 +258,12 @@ struct thread_struct {
 #define start_thread(regs,new_rip,new_rsp) do { \
 	asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0));	 \
 	load_gs_index(0);							\
-	(regs)->rip = (new_rip);						 \
-	(regs)->rsp = (new_rsp);						 \
+	(regs)->ip = (new_rip);						 \
+	(regs)->sp = (new_rsp);						 \
 	write_pda(oldrsp, (new_rsp));						 \
 	(regs)->cs = __USER_CS;							 \
 	(regs)->ss = __USER_DS;							 \
-	(regs)->eflags = 0x200;							 \
+	(regs)->flags = 0x200;							 \
 	set_fs(USER_DS);							 \
 } while(0) 
 
@@ -297,7 +297,7 @@ extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
 
 extern unsigned long get_wchan(struct task_struct *p);
 #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.rsp0 - 1)
-#define KSTK_EIP(tsk) (task_pt_regs(tsk)->rip)
+#define KSTK_EIP(tsk) (task_pt_regs(tsk)->ip)
 #define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */
 
 
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index 04204f359298..9187b2fab754 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -10,6 +10,8 @@
 /* this struct defines the way the registers are stored on the
    stack during a system call. */
 
+#ifndef __KERNEL__
+
 struct pt_regs {
 	long ebx;
 	long ecx;
@@ -21,7 +23,7 @@ struct pt_regs {
 	int  xds;
 	int  xes;
 	int  xfs;
-	/* int  xgs; */
+	/* int  gs; */
 	long orig_eax;
 	long eip;
 	int  xcs;
@@ -30,7 +32,27 @@ struct pt_regs {
 	int  xss;
 };
 
-#ifdef __KERNEL__
+#else /* __KERNEL__ */
+
+struct pt_regs {
+	long bx;
+	long cx;
+	long dx;
+	long si;
+	long di;
+	long bp;
+	long ax;
+	int  ds;
+	int  es;
+	int  fs;
+	/* int  gs; */
+	long orig_ax;
+	long ip;
+	int  cs;
+	long flags;
+	long sp;
+	int  ss;
+};
 
 #include <asm/vm86.h>
 #include <asm/segment.h>
@@ -47,27 +69,30 @@ extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int erro
  */
 static inline int user_mode(struct pt_regs *regs)
 {
-	return (regs->xcs & SEGMENT_RPL_MASK) == USER_RPL;
+	return (regs->cs & SEGMENT_RPL_MASK) == USER_RPL;
 }
 static inline int user_mode_vm(struct pt_regs *regs)
 {
-	return ((regs->xcs & SEGMENT_RPL_MASK) | (regs->eflags & VM_MASK)) >= USER_RPL;
+	return ((regs->cs & SEGMENT_RPL_MASK) |
+		(regs->flags & VM_MASK)) >= USER_RPL;
 }
 static inline int v8086_mode(struct pt_regs *regs)
 {
-	return (regs->eflags & VM_MASK);
+	return (regs->flags & VM_MASK);
 }
 
-#define instruction_pointer(regs) ((regs)->eip)
-#define frame_pointer(regs) ((regs)->ebp)
+#define instruction_pointer(regs) ((regs)->ip)
+#define frame_pointer(regs) ((regs)->bp)
 #define stack_pointer(regs) ((unsigned long)(regs))
-#define regs_return_value(regs) ((regs)->eax)
+#define regs_return_value(regs) ((regs)->ax)
 
 extern unsigned long profile_pc(struct pt_regs *regs);
 #endif /* __KERNEL__ */
 
 #else /* __i386__ */
 
+#ifndef __KERNEL__
+
 struct pt_regs {
 	unsigned long r15;
 	unsigned long r14;
@@ -96,14 +121,43 @@ struct pt_regs {
 /* top of stack page */
 };
 
-#ifdef __KERNEL__
+#else /* __KERNEL__ */
+
+struct pt_regs {
+	unsigned long r15;
+	unsigned long r14;
+	unsigned long r13;
+	unsigned long r12;
+	unsigned long bp;
+	unsigned long bx;
+/* arguments: non interrupts/non tracing syscalls only save upto here*/
+	unsigned long r11;
+	unsigned long r10;
+	unsigned long r9;
+	unsigned long r8;
+	unsigned long ax;
+	unsigned long cx;
+	unsigned long dx;
+	unsigned long si;
+	unsigned long di;
+	unsigned long orig_ax;
+/* end of arguments */
+/* cpu exception frame or undefined */
+	unsigned long ip;
+	unsigned long cs;
+	unsigned long flags;
+	unsigned long sp;
+	unsigned long ss;
+/* top of stack page */
+};
 
 #define user_mode(regs) (!!((regs)->cs & 3))
 #define user_mode_vm(regs) user_mode(regs)
-#define instruction_pointer(regs) ((regs)->rip)
-#define frame_pointer(regs) ((regs)->rbp)
-#define stack_pointer(regs) ((regs)->rsp)
-#define regs_return_value(regs) ((regs)->rax)
+#define v8086_mode(regs) 0	/* No V86 mode support in long mode */
+#define instruction_pointer(regs) ((regs)->ip)
+#define frame_pointer(regs) ((regs)->bp)
+#define stack_pointer(regs) ((regs)->sp)
+#define regs_return_value(regs) ((regs)->ax)
 
 extern unsigned long profile_pc(struct pt_regs *regs);
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
diff --git a/kernel/signal.c b/kernel/signal.c
index afa4f781f924..bf49ce6f016b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -733,13 +733,13 @@ static void print_fatal_signal(struct pt_regs *regs, int signr)
 		current->comm, task_pid_nr(current), signr);
 
 #if defined(__i386__) && !defined(__arch_um__)
-	printk("code at %08lx: ", regs->eip);
+	printk("code at %08lx: ", regs->ip);
 	{
 		int i;
 		for (i = 0; i < 16; i++) {
 			unsigned char insn;
 
-			__get_user(insn, (unsigned char *)(regs->eip + i));
+			__get_user(insn, (unsigned char *)(regs->ip + i));
 			printk("%02x ", insn);
 		}
 	}

From 153d5f2e5787c74e9cbb6b6687c9b04be1b59893 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:30:56 +0100
Subject: [PATCH 211/890] x86: use generic register names in struct
 user_regs_struct

Switch struct user_regs_struct (defined in <asm/user.h>, which is no
longer exported to userspace) to using register names without e- or
r-prefixes for both 32 and 64 bit x86.  This is intended as a
preliminary step in unifying this code between architectures.

Also, be a bit more strict in truncating 32-bit "extended" segment
register values to 16 bits.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process_32.c | 35 +++++++++++++++---------------
 arch/x86/kernel/ptrace_64.c  |  4 ++--
 include/asm-x86/user_32.h    | 24 +++++++++++++++------
 include/asm-x86/user_64.h    | 41 +++++++++++++++++++++++++++---------
 4 files changed, 68 insertions(+), 36 deletions(-)

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index c9f28e02e86d..53406461074f 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -523,6 +523,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 void dump_thread(struct pt_regs * regs, struct user * dump)
 {
 	int i;
+	u16 gs;
 
 /* changed the size calculations - should hopefully work better. lbt */
 	dump->magic = CMAGIC;
@@ -538,23 +539,23 @@ void dump_thread(struct pt_regs * regs, struct user * dump)
 	if (dump->start_stack < TASK_SIZE)
 		dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
 
-	dump->regs.ebx = regs->bx;
-	dump->regs.ecx = regs->cx;
-	dump->regs.edx = regs->dx;
-	dump->regs.esi = regs->si;
-	dump->regs.edi = regs->di;
-	dump->regs.ebp = regs->bp;
-	dump->regs.eax = regs->ax;
-	dump->regs.ds = regs->ds;
-	dump->regs.es = regs->es;
-	dump->regs.fs = regs->fs;
-	savesegment(gs,dump->regs.gs);
-	dump->regs.orig_eax = regs->orig_ax;
-	dump->regs.eip = regs->ip;
-	dump->regs.cs = regs->cs;
-	dump->regs.eflags = regs->flags;
-	dump->regs.esp = regs->sp;
-	dump->regs.ss = regs->ss;
+	dump->regs.bx = regs->bx;
+	dump->regs.cx = regs->cx;
+	dump->regs.dx = regs->dx;
+	dump->regs.si = regs->si;
+	dump->regs.di = regs->di;
+	dump->regs.bp = regs->bp;
+	dump->regs.ax = regs->ax;
+	dump->regs.ds = (u16)regs->ds;
+	dump->regs.es = (u16)regs->es;
+	dump->regs.fs = (u16)regs->fs;
+	savesegment(gs,gs);
+	dump->regs.orig_ax = regs->orig_ax;
+	dump->regs.ip = regs->ip;
+	dump->regs.cs = (u16)regs->cs;
+	dump->regs.flags = regs->flags;
+	dump->regs.sp = regs->sp;
+	dump->regs.ss = (u16)regs->ss;
 
 	dump->u_fpvalid = dump_fpu (regs, &dump->i387);
 }
diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c
index bee20bb1a6c0..56b31cd3b865 100644
--- a/arch/x86/kernel/ptrace_64.c
+++ b/arch/x86/kernel/ptrace_64.c
@@ -108,7 +108,7 @@ static int putreg(struct task_struct *child,
 			if (child->thread.gs != value)
 				return do_arch_prctl(child, ARCH_SET_GS, value);
 			return 0;
-		case offsetof(struct user_regs_struct, eflags):
+		case offsetof(struct user_regs_struct,flags):
 			value &= FLAG_MASK;
 			/*
 			 * If the user value contains TF, mark that
@@ -164,7 +164,7 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
 			if (child->thread.gsindex != GS_TLS_SEL)
 				return 0;
 			return get_desc_base(&child->thread.tls_array[GS_TLS]);
-		case offsetof(struct user_regs_struct, eflags):
+		case offsetof(struct user_regs_struct, flags):
 			/*
 			 * If the debugger set TF, hide it from the readout.
 			 */
diff --git a/include/asm-x86/user_32.h b/include/asm-x86/user_32.h
index 0e85d2a5e33a..ed8b8fc6906c 100644
--- a/include/asm-x86/user_32.h
+++ b/include/asm-x86/user_32.h
@@ -75,13 +75,23 @@ struct user_fxsr_struct {
  * doesn't use the extra segment registers)
  */
 struct user_regs_struct {
-	long ebx, ecx, edx, esi, edi, ebp, eax;
-	unsigned short ds, __ds, es, __es;
-	unsigned short fs, __fs, gs, __gs;
-	long orig_eax, eip;
-	unsigned short cs, __cs;
-	long eflags, esp;
-	unsigned short ss, __ss;
+	unsigned long	bx;
+	unsigned long	cx;
+	unsigned long	dx;
+	unsigned long	si;
+	unsigned long	di;
+	unsigned long	bp;
+	unsigned long	ax;
+	unsigned long	ds;
+	unsigned long	es;
+	unsigned long	fs;
+	unsigned long	gs;
+	unsigned long	orig_ax;
+	unsigned long	ip;
+	unsigned long	cs;
+	unsigned long	flags;
+	unsigned long	sp;
+	unsigned long	ss;
 };
 
 /* When the kernel dumps core, it starts by dumping the user struct -
diff --git a/include/asm-x86/user_64.h b/include/asm-x86/user_64.h
index 12785c649ac5..a5449d456cc0 100644
--- a/include/asm-x86/user_64.h
+++ b/include/asm-x86/user_64.h
@@ -40,13 +40,13 @@
  * and both the standard and SIMD floating point data can be accessed via
  * the new ptrace requests.  In either case, changes to the FPU environment
  * will be reflected in the task's state as expected.
- * 
+ *
  * x86-64 support by Andi Kleen.
  */
 
 /* This matches the 64bit FXSAVE format as defined by AMD. It is the same
    as the 32bit format defined by Intel, except that the selector:offset pairs for
-   data and eip are replaced with flat 64bit pointers. */ 
+   data and eip are replaced with flat 64bit pointers. */
 struct user_i387_struct {
 	unsigned short	cwd;
 	unsigned short	swd;
@@ -65,13 +65,34 @@ struct user_i387_struct {
  * Segment register layout in coredumps.
  */
 struct user_regs_struct {
-	unsigned long r15,r14,r13,r12,rbp,rbx,r11,r10;
-	unsigned long r9,r8,rax,rcx,rdx,rsi,rdi,orig_rax;
-	unsigned long rip,cs,eflags;
-	unsigned long rsp,ss;
-  	unsigned long fs_base, gs_base;
-	unsigned long ds,es,fs,gs; 
-}; 
+	unsigned long	r15;
+	unsigned long	r14;
+	unsigned long	r13;
+	unsigned long	r12;
+	unsigned long	bp;
+	unsigned long	bx;
+	unsigned long	r11;
+	unsigned long	r10;
+	unsigned long	r9;
+	unsigned long	r8;
+	unsigned long	ax;
+	unsigned long	cx;
+	unsigned long	dx;
+	unsigned long	si;
+	unsigned long	di;
+	unsigned long	orig_ax;
+	unsigned long	ip;
+	unsigned long	cs;
+	unsigned long	flags;
+	unsigned long	sp;
+	unsigned long	ss;
+	unsigned long	fs_base;
+	unsigned long	gs_base;
+	unsigned long	ds;
+	unsigned long	es;
+	unsigned long	fs;
+	unsigned long	gs;
+};
 
 /* When the kernel dumps core, it starts by dumping the user struct -
    this will be used by gdb to figure out where the data and stack segments
@@ -94,7 +115,7 @@ struct user{
 				   This is actually the bottom of the stack,
 				   the top of the stack is always found in the
 				   esp register.  */
-  long int signal;     		/* Signal that caused the core dump. */
+  long int signal;		/* Signal that caused the core dump. */
   int reserved;			/* No longer used */
   int pad1;
   struct user_pt_regs * u_ar0;	/* Used by gdb to help find the values for */

From 742fa54a62be6a263df14a553bf832724471dfbe Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:30:56 +0100
Subject: [PATCH 212/890] x86: use generic register names in struct sigcontext

Switch struct sigcontext (defined in <asm/sigcontext*.h>) to using
register names withut e- or r-prefixes for both 32- and 64-bit x86.
This is intended as a preliminary step in unifying this code between
architectures.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/ia32/ia32_signal.c      | 32 +++++++++----------
 arch/x86/kernel/asm-offsets_32.c | 18 +++++------
 arch/x86/kernel/asm-offsets_64.c | 18 +++++------
 arch/x86/kernel/signal_32.c      | 28 ++++++++---------
 arch/x86/kernel/signal_64.c      | 29 +++++++++--------
 arch/x86/vdso/vdso32/sigreturn.S | 54 ++++++++++++++++----------------
 include/asm-x86/sigcontext.h     | 42 ++++++++++++-------------
 include/asm-x86/sigcontext32.h   | 22 ++++++-------
 8 files changed, 121 insertions(+), 122 deletions(-)

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index f2da443f8c7b..d03d43f32f4c 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -192,9 +192,9 @@ struct rt_sigframe
 	char retcode[8];
 };
 
-#define COPY(x)		{ \
-	unsigned int reg;			\
-	err |= __get_user(reg, &sc->e ##x);	\
+#define COPY(x)		{ 		\
+	unsigned int reg;		\
+	err |= __get_user(reg, &sc->x);	\
 	regs->x = reg;			\
 }
 
@@ -248,7 +248,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 	err |= __get_user(regs->ss, &sc->ss);
 	regs->ss |= 3;
 
-	err |= __get_user(tmpflags, &sc->eflags);
+	err |= __get_user(tmpflags, &sc->flags);
 	regs->flags = (regs->flags & ~0x40DD5) | (tmpflags & 0x40DD5);
 	/* disable syscall checks */
 	regs->orig_ax = -1;
@@ -268,7 +268,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 		}
 	}
 
-	err |= __get_user(tmp, &sc->eax);
+	err |= __get_user(tmp, &sc->ax);
 	*peax = tmp;
 
 	return err;
@@ -361,21 +361,21 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
 	__asm__("movl %%es,%0" : "=r"(tmp): "0"(tmp));
 	err |= __put_user(tmp, (unsigned int __user *)&sc->es);
 
-	err |= __put_user((u32)regs->di, &sc->edi);
-	err |= __put_user((u32)regs->si, &sc->esi);
-	err |= __put_user((u32)regs->bp, &sc->ebp);
-	err |= __put_user((u32)regs->sp, &sc->esp);
-	err |= __put_user((u32)regs->bx, &sc->ebx);
-	err |= __put_user((u32)regs->dx, &sc->edx);
-	err |= __put_user((u32)regs->cx, &sc->ecx);
-	err |= __put_user((u32)regs->ax, &sc->eax);
+	err |= __put_user((u32)regs->di, &sc->di);
+	err |= __put_user((u32)regs->si, &sc->si);
+	err |= __put_user((u32)regs->bp, &sc->bp);
+	err |= __put_user((u32)regs->sp, &sc->sp);
+	err |= __put_user((u32)regs->bx, &sc->bx);
+	err |= __put_user((u32)regs->dx, &sc->dx);
+	err |= __put_user((u32)regs->cx, &sc->cx);
+	err |= __put_user((u32)regs->ax, &sc->ax);
 	err |= __put_user((u32)regs->cs, &sc->cs);
 	err |= __put_user((u32)regs->ss, &sc->ss);
 	err |= __put_user(current->thread.trap_no, &sc->trapno);
 	err |= __put_user(current->thread.error_code, &sc->err);
-	err |= __put_user((u32)regs->ip, &sc->eip);
-	err |= __put_user((u32)regs->flags, &sc->eflags);
-	err |= __put_user((u32)regs->sp, &sc->esp_at_signal);
+	err |= __put_user((u32)regs->ip, &sc->ip);
+	err |= __put_user((u32)regs->flags, &sc->flags);
+	err |= __put_user((u32)regs->sp, &sc->sp_at_signal);
 
 	tmp = save_i387_ia32(current, fpstate, regs, 0);
 	if (tmp < 0)
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index a3a8be7618d1..4fc24a61f431 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -38,15 +38,15 @@ void foo(void);
 
 void foo(void)
 {
-	OFFSET(IA32_SIGCONTEXT_eax, sigcontext, eax);
-	OFFSET(IA32_SIGCONTEXT_ebx, sigcontext, ebx);
-	OFFSET(IA32_SIGCONTEXT_ecx, sigcontext, ecx);
-	OFFSET(IA32_SIGCONTEXT_edx, sigcontext, edx);
-	OFFSET(IA32_SIGCONTEXT_esi, sigcontext, esi);
-	OFFSET(IA32_SIGCONTEXT_edi, sigcontext, edi);
-	OFFSET(IA32_SIGCONTEXT_ebp, sigcontext, ebp);
-	OFFSET(IA32_SIGCONTEXT_esp, sigcontext, esp);
-	OFFSET(IA32_SIGCONTEXT_eip, sigcontext, eip);
+	OFFSET(IA32_SIGCONTEXT_ax, sigcontext, ax);
+	OFFSET(IA32_SIGCONTEXT_bx, sigcontext, bx);
+	OFFSET(IA32_SIGCONTEXT_cx, sigcontext, cx);
+	OFFSET(IA32_SIGCONTEXT_dx, sigcontext, dx);
+	OFFSET(IA32_SIGCONTEXT_si, sigcontext, si);
+	OFFSET(IA32_SIGCONTEXT_di, sigcontext, di);
+	OFFSET(IA32_SIGCONTEXT_bp, sigcontext, bp);
+	OFFSET(IA32_SIGCONTEXT_sp, sigcontext, sp);
+	OFFSET(IA32_SIGCONTEXT_ip, sigcontext, ip);
 	BLANK();
 
 	OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 2e918ebf21d3..a05428764314 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -63,15 +63,15 @@ int main(void)
 #undef ENTRY
 #ifdef CONFIG_IA32_EMULATION
 #define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry))
-	ENTRY(eax);
-	ENTRY(ebx);
-	ENTRY(ecx);
-	ENTRY(edx);
-	ENTRY(esi);
-	ENTRY(edi);
-	ENTRY(ebp);
-	ENTRY(esp);
-	ENTRY(eip);
+	ENTRY(ax);
+	ENTRY(bx);
+	ENTRY(cx);
+	ENTRY(dx);
+	ENTRY(si);
+	ENTRY(di);
+	ENTRY(bp);
+	ENTRY(sp);
+	ENTRY(ip);
 	BLANK();
 #undef ENTRY
 	DEFINE(IA32_RT_SIGFRAME_sigcontext,
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 40fd3515ccf1..64cb3c05de69 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -105,7 +105,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax
 	/* Always make any pending restarted system calls return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
-#define COPY(x)		err |= __get_user(regs->x, &sc->e ## x)
+#define COPY(x)		err |= __get_user(regs->x, &sc->x)
 
 #define COPY_SEG(seg)							\
 	{ unsigned short tmp;						\
@@ -144,7 +144,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax
 	
 	{
 		unsigned int tmpflags;
-		err |= __get_user(tmpflags, &sc->eflags);
+		err |= __get_user(tmpflags, &sc->flags);
 		regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
 		regs->orig_ax = -1;		/* disable syscall checks */
 	}
@@ -165,7 +165,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax
 		}
 	}
 
-	err |= __get_user(*peax, &sc->eax);
+	err |= __get_user(*peax, &sc->ax);
 	return err;
 
 badframe:
@@ -256,20 +256,20 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
 
 	err |= __put_user(regs->es, (unsigned int __user *)&sc->es);
 	err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
-	err |= __put_user(regs->di, &sc->edi);
-	err |= __put_user(regs->si, &sc->esi);
-	err |= __put_user(regs->bp, &sc->ebp);
-	err |= __put_user(regs->sp, &sc->esp);
-	err |= __put_user(regs->bx, &sc->ebx);
-	err |= __put_user(regs->dx, &sc->edx);
-	err |= __put_user(regs->cx, &sc->ecx);
-	err |= __put_user(regs->ax, &sc->eax);
+	err |= __put_user(regs->di, &sc->di);
+	err |= __put_user(regs->si, &sc->si);
+	err |= __put_user(regs->bp, &sc->bp);
+	err |= __put_user(regs->sp, &sc->sp);
+	err |= __put_user(regs->bx, &sc->bx);
+	err |= __put_user(regs->dx, &sc->dx);
+	err |= __put_user(regs->cx, &sc->cx);
+	err |= __put_user(regs->ax, &sc->ax);
 	err |= __put_user(current->thread.trap_no, &sc->trapno);
 	err |= __put_user(current->thread.error_code, &sc->err);
-	err |= __put_user(regs->ip, &sc->eip);
+	err |= __put_user(regs->ip, &sc->ip);
 	err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
-	err |= __put_user(regs->flags, &sc->eflags);
-	err |= __put_user(regs->sp, &sc->esp_at_signal);
+	err |= __put_user(regs->flags, &sc->flags);
+	err |= __put_user(regs->sp, &sc->sp_at_signal);
 	err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
 
 	tmp = save_i387(fpstate);
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 4b228fd83b31..1c9bca56fb55 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -62,11 +62,10 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned
 	/* Always make any pending restarted system calls return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
-#define COPYR(x)	err |= __get_user(regs->x, &sc->r ## x)
 #define COPY(x)		err |= __get_user(regs->x, &sc->x)
 
-	COPYR(di); COPYR(si); COPYR(bp); COPYR(sp); COPYR(bx);
-	COPYR(dx); COPYR(cx); COPYR(ip);
+	COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
+	COPY(dx); COPY(cx); COPY(ip);
 	COPY(r8);
 	COPY(r9);
 	COPY(r10);
@@ -87,7 +86,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned
 
 	{
 		unsigned int tmpflags;
-		err |= __get_user(tmpflags, &sc->eflags);
+		err |= __get_user(tmpflags, &sc->flags);
 		regs->flags = (regs->flags & ~0x40DD5) | (tmpflags & 0x40DD5);
 		regs->orig_ax = -1;		/* disable syscall checks */
 	}
@@ -109,7 +108,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned
 		}
 	}
 
-	err |= __get_user(*prax, &sc->rax);
+	err |= __get_user(*prax, &sc->ax);
 	return err;
 
 badframe:
@@ -166,14 +165,14 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned lo
 	err |= __put_user(0, &sc->gs);
 	err |= __put_user(0, &sc->fs);
 
-	err |= __put_user(regs->di, &sc->rdi);
-	err |= __put_user(regs->si, &sc->rsi);
-	err |= __put_user(regs->bp, &sc->rbp);
-	err |= __put_user(regs->sp, &sc->rsp);
-	err |= __put_user(regs->bx, &sc->rbx);
-	err |= __put_user(regs->dx, &sc->rdx);
-	err |= __put_user(regs->cx, &sc->rcx);
-	err |= __put_user(regs->ax, &sc->rax);
+	err |= __put_user(regs->di, &sc->di);
+	err |= __put_user(regs->si, &sc->si);
+	err |= __put_user(regs->bp, &sc->bp);
+	err |= __put_user(regs->sp, &sc->sp);
+	err |= __put_user(regs->bx, &sc->bx);
+	err |= __put_user(regs->dx, &sc->dx);
+	err |= __put_user(regs->cx, &sc->cx);
+	err |= __put_user(regs->ax, &sc->ax);
 	err |= __put_user(regs->r8, &sc->r8);
 	err |= __put_user(regs->r9, &sc->r9);
 	err |= __put_user(regs->r10, &sc->r10);
@@ -184,8 +183,8 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned lo
 	err |= __put_user(regs->r15, &sc->r15);
 	err |= __put_user(me->thread.trap_no, &sc->trapno);
 	err |= __put_user(me->thread.error_code, &sc->err);
-	err |= __put_user(regs->ip, &sc->rip);
-	err |= __put_user(regs->flags, &sc->eflags);
+	err |= __put_user(regs->ip, &sc->ip);
+	err |= __put_user(regs->flags, &sc->flags);
 	err |= __put_user(mask, &sc->oldmask);
 	err |= __put_user(me->thread.cr2, &sc->cr2);
 
diff --git a/arch/x86/vdso/vdso32/sigreturn.S b/arch/x86/vdso/vdso32/sigreturn.S
index cade2752928b..31776d0efc8c 100644
--- a/arch/x86/vdso/vdso32/sigreturn.S
+++ b/arch/x86/vdso/vdso32/sigreturn.S
@@ -92,27 +92,27 @@ __kernel_rt_sigreturn:
 	.sleb128 offset;		/*       offset */		\
 1:
 
-	do_cfa_expr(IA32_SIGCONTEXT_esp+4)
-	do_expr(0, IA32_SIGCONTEXT_eax+4)
-	do_expr(1, IA32_SIGCONTEXT_ecx+4)
-	do_expr(2, IA32_SIGCONTEXT_edx+4)
-	do_expr(3, IA32_SIGCONTEXT_ebx+4)
-	do_expr(5, IA32_SIGCONTEXT_ebp+4)
-	do_expr(6, IA32_SIGCONTEXT_esi+4)
-	do_expr(7, IA32_SIGCONTEXT_edi+4)
-	do_expr(8, IA32_SIGCONTEXT_eip+4)
+	do_cfa_expr(IA32_SIGCONTEXT_sp+4)
+	do_expr(0, IA32_SIGCONTEXT_ax+4)
+	do_expr(1, IA32_SIGCONTEXT_cx+4)
+	do_expr(2, IA32_SIGCONTEXT_dx+4)
+	do_expr(3, IA32_SIGCONTEXT_bx+4)
+	do_expr(5, IA32_SIGCONTEXT_bp+4)
+	do_expr(6, IA32_SIGCONTEXT_si+4)
+	do_expr(7, IA32_SIGCONTEXT_di+4)
+	do_expr(8, IA32_SIGCONTEXT_ip+4)
 
 	.byte 0x42	/* DW_CFA_advance_loc 2 -- nop; popl eax. */
 
-	do_cfa_expr(IA32_SIGCONTEXT_esp)
-	do_expr(0, IA32_SIGCONTEXT_eax)
-	do_expr(1, IA32_SIGCONTEXT_ecx)
-	do_expr(2, IA32_SIGCONTEXT_edx)
-	do_expr(3, IA32_SIGCONTEXT_ebx)
-	do_expr(5, IA32_SIGCONTEXT_ebp)
-	do_expr(6, IA32_SIGCONTEXT_esi)
-	do_expr(7, IA32_SIGCONTEXT_edi)
-	do_expr(8, IA32_SIGCONTEXT_eip)
+	do_cfa_expr(IA32_SIGCONTEXT_sp)
+	do_expr(0, IA32_SIGCONTEXT_ax)
+	do_expr(1, IA32_SIGCONTEXT_cx)
+	do_expr(2, IA32_SIGCONTEXT_dx)
+	do_expr(3, IA32_SIGCONTEXT_bx)
+	do_expr(5, IA32_SIGCONTEXT_bp)
+	do_expr(6, IA32_SIGCONTEXT_si)
+	do_expr(7, IA32_SIGCONTEXT_di)
+	do_expr(8, IA32_SIGCONTEXT_ip)
 
 	.align 4
 .LENDFDEDLSI1:
@@ -129,15 +129,15 @@ __kernel_rt_sigreturn:
 	   slightly less complicated than the above, since we don't
 	   modify the stack pointer in the process.  */
 
-	do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esp)
-	do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eax)
-	do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ecx)
-	do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edx)
-	do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebx)
-	do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebp)
-	do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esi)
-	do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edi)
-	do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eip)
+	do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_sp)
+	do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ax)
+	do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_cx)
+	do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_dx)
+	do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bx)
+	do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bp)
+	do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_si)
+	do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_di)
+	do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ip)
 
 	.align 4
 .LENDFDEDLSI2:
diff --git a/include/asm-x86/sigcontext.h b/include/asm-x86/sigcontext.h
index c047f9dc3423..681deade5f00 100644
--- a/include/asm-x86/sigcontext.h
+++ b/include/asm-x86/sigcontext.h
@@ -63,20 +63,20 @@ struct sigcontext {
 	unsigned short fs, __fsh;
 	unsigned short es, __esh;
 	unsigned short ds, __dsh;
-	unsigned long edi;
-	unsigned long esi;
-	unsigned long ebp;
-	unsigned long esp;
-	unsigned long ebx;
-	unsigned long edx;
-	unsigned long ecx;
-	unsigned long eax;
+	unsigned long di;
+	unsigned long si;
+	unsigned long bp;
+	unsigned long sp;
+	unsigned long bx;
+	unsigned long dx;
+	unsigned long cx;
+	unsigned long ax;
 	unsigned long trapno;
 	unsigned long err;
-	unsigned long eip;
+	unsigned long ip;
 	unsigned short cs, __csh;
-	unsigned long eflags;
-	unsigned long esp_at_signal;
+	unsigned long flags;
+	unsigned long sp_at_signal;
 	unsigned short ss, __ssh;
 	struct _fpstate __user * fpstate;
 	unsigned long oldmask;
@@ -111,16 +111,16 @@ struct sigcontext {
 	unsigned long r13;
 	unsigned long r14;
 	unsigned long r15;
-	unsigned long rdi;
-	unsigned long rsi;
-	unsigned long rbp;
-	unsigned long rbx;
-	unsigned long rdx;
-	unsigned long rax;
-	unsigned long rcx;
-	unsigned long rsp;
-	unsigned long rip;
-	unsigned long eflags;		/* RFLAGS */
+	unsigned long di;
+	unsigned long si;
+	unsigned long bp;
+	unsigned long bx;
+	unsigned long dx;
+	unsigned long ax;
+	unsigned long cx;
+	unsigned long sp;
+	unsigned long ip;
+	unsigned long flags;
 	unsigned short cs;
 	unsigned short gs;
 	unsigned short fs;
diff --git a/include/asm-x86/sigcontext32.h b/include/asm-x86/sigcontext32.h
index 3d657038ab7c..6ffab4fd593a 100644
--- a/include/asm-x86/sigcontext32.h
+++ b/include/asm-x86/sigcontext32.h
@@ -48,20 +48,20 @@ struct sigcontext_ia32 {
        unsigned short fs, __fsh;
        unsigned short es, __esh;
        unsigned short ds, __dsh;
-       unsigned int edi;
-       unsigned int esi;
-       unsigned int ebp;
-       unsigned int esp;
-       unsigned int ebx;
-       unsigned int edx;
-       unsigned int ecx;
-       unsigned int eax;
+       unsigned int di;
+       unsigned int si;
+       unsigned int bp;
+       unsigned int sp;
+       unsigned int bx;
+       unsigned int dx;
+       unsigned int cx;
+       unsigned int ax;
        unsigned int trapno;
        unsigned int err;
-       unsigned int eip;
+       unsigned int ip;
        unsigned short cs, __csh;
-       unsigned int eflags;
-       unsigned int esp_at_signal;
+       unsigned int flags;
+       unsigned int sp_at_signal;
        unsigned short ss, __ssh;
        unsigned int fpstate;		/* really (struct _fpstate_ia32 *) */
        unsigned int oldmask;

From a46ff73d53f011de1dfc1983b05db2c04d193713 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:57 +0100
Subject: [PATCH 213/890] x86: setup64 eflags constants

This cleans up arch/x86/kernel/setup64.c to use the X86_EFLAGS_* constants
from <asm/processor-flags.h> instead of the EF_* enum in <asm/ptrace.h>.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup64.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index 3558ac78c926..51297cca6b42 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -169,7 +169,8 @@ void syscall_init(void)
 #endif
 
 	/* Flags to clear on syscall */
-	wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); 
+	wrmsrl(MSR_SYSCALL_MASK,
+	       X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
 }
 
 void __cpuinit check_efer(void)

From 35d0991ffa544945d2e1992169c097286b7c0bfb Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:57 +0100
Subject: [PATCH 214/890] x86: eflags enum

This removes the EF_* enum from <asm/ptrace.h>.  It is no longer used,
and duplicates the X86_EFLAGS_* constants from <asm/processor-flags.h>.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/ptrace.h | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index 9187b2fab754..1b7a8b8d6f60 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -170,28 +170,6 @@ extern int ptrace_set_debugreg(struct task_struct *child, int n, unsigned long);
 extern unsigned long
 convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs);
 
-enum {
-	EF_CF   = 0x00000001,
-	EF_PF   = 0x00000004,
-	EF_AF   = 0x00000010,
-	EF_ZF   = 0x00000040,
-	EF_SF   = 0x00000080,
-	EF_TF   = 0x00000100,
-	EF_IE   = 0x00000200,
-	EF_DF   = 0x00000400,
-	EF_OF   = 0x00000800,
-	EF_IOPL = 0x00003000,
-	EF_IOPL_RING0 = 0x00000000,
-	EF_IOPL_RING1 = 0x00001000,
-	EF_IOPL_RING2 = 0x00002000,
-	EF_NT   = 0x00004000,   /* nested task */
-	EF_RF   = 0x00010000,   /* resume */
-	EF_VM   = 0x00020000,   /* virtual mode */
-	EF_AC   = 0x00040000,   /* alignment */
-	EF_VIF  = 0x00080000,   /* virtual interrupt */
-	EF_VIP  = 0x00100000,   /* virtual interrupt pending */
-	EF_ID   = 0x00200000,   /* id */
-};
 #endif /* __KERNEL__ */
 #endif /* !__i386__ */
 

From ff14c6164bd532a6dc9025c07d3b562f839f00a9 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:57 +0100
Subject: [PATCH 215/890] x86: x86-64 ia32 ptrace pt_regs cleanup

This cleans up the getreg32/putreg32 functions to use struct pt_regs in a
straightforward fashion, instead of equivalent ugly pointer arithmetic.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/ia32/ptrace32.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/arch/x86/ia32/ptrace32.c b/arch/x86/ia32/ptrace32.c
index 1e382e3bd882..c52d0664c67f 100644
--- a/arch/x86/ia32/ptrace32.c
+++ b/arch/x86/ia32/ptrace32.c
@@ -37,11 +37,11 @@
 
 #define R32(l,q)							\
 	case offsetof(struct user32, regs.l):				\
-		stack[offsetof(struct pt_regs, q) / 8] = val; break
+		regs->q = val; break;
 
 static int putreg32(struct task_struct *child, unsigned regno, u32 val)
 {
-	__u64 *stack = (__u64 *)task_pt_regs(child);
+	struct pt_regs *regs = task_pt_regs(child);
 
 	switch (regno) {
 	case offsetof(struct user32, regs.fs):
@@ -65,12 +65,12 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
 	case offsetof(struct user32, regs.ss):
 		if ((val & 3) != 3)
 			return -EIO;
-		stack[offsetof(struct pt_regs, ss)/8] = val & 0xffff;
+		regs->ss = val & 0xffff;
 		break;
 	case offsetof(struct user32, regs.cs):
 		if ((val & 3) != 3)
 			return -EIO;
-		stack[offsetof(struct pt_regs, cs)/8] = val & 0xffff;
+		regs->cs = val & 0xffff;
 		break;
 
 	R32(ebx, bx);
@@ -84,9 +84,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
 	R32(eip, ip);
 	R32(esp, sp);
 
-	case offsetof(struct user32, regs.eflags): {
-		__u64 *flags = &stack[offsetof(struct pt_regs, flags)/8];
-
+	case offsetof(struct user32, regs.eflags):
 		val &= FLAG_MASK;
 		/*
 		 * If the user value contains TF, mark that
@@ -97,9 +95,8 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
 			clear_tsk_thread_flag(child, TIF_FORCED_TF);
 		else if (test_tsk_thread_flag(child, TIF_FORCED_TF))
 			val |= X86_EFLAGS_TF;
-		*flags = val | (*flags & ~FLAG_MASK);
+		regs->flags = val | (regs->flags & ~FLAG_MASK);
 		break;
-	}
 
 	case offsetof(struct user32, u_debugreg[0]) ...
 		offsetof(struct user32, u_debugreg[7]):
@@ -123,11 +120,11 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
 
 #define R32(l,q)							\
 	case offsetof(struct user32, regs.l):				\
-		*val = stack[offsetof(struct pt_regs, q)/8]; break
+		*val = regs->q; break
 
 static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
 {
-	__u64 *stack = (__u64 *)task_pt_regs(child);
+	struct pt_regs *regs = task_pt_regs(child);
 
 	switch (regno) {
 	case offsetof(struct user32, regs.fs):
@@ -160,7 +157,7 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
 		/*
 		 * If the debugger set TF, hide it from the readout.
 		 */
-		*val = stack[offsetof(struct pt_regs, flags)/8];
+		*val = regs->flags;
 		if (test_tsk_thread_flag(child, TIF_FORCED_TF))
 			*val &= ~X86_EFLAGS_TF;
 		break;

From 80976c0867b7537d77511492d3180b8093b0ba3d Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:58 +0100
Subject: [PATCH 216/890] x86: x86-64 ptrace whitespace

This canonicalizes the indentation in the getreg and putreg functions.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace_64.c | 224 ++++++++++++++++++------------------
 1 file changed, 112 insertions(+), 112 deletions(-)

diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c
index 56b31cd3b865..2427548f2a01 100644
--- a/arch/x86/kernel/ptrace_64.c
+++ b/arch/x86/kernel/ptrace_64.c
@@ -2,7 +2,7 @@
 /*
  * Pentium III FXSR, SSE support
  *	Gareth Hughes <gareth@valinux.com>, May 2000
- * 
+ *
  * x86-64 port 2000-2002 Andi Kleen
  */
 
@@ -48,7 +48,7 @@
  * Make sure the single step bit is not set.
  */
 void ptrace_disable(struct task_struct *child)
-{ 
+{
 	user_disable_single_step(child);
 }
 
@@ -63,69 +63,69 @@ static int putreg(struct task_struct *child,
 {
 	struct pt_regs *regs = task_pt_regs(child);
 	switch (regno) {
-		case offsetof(struct user_regs_struct,fs):
-			if (value && (value & 3) != 3)
-				return -EIO;
-			child->thread.fsindex = value & 0xffff; 
-			return 0;
-		case offsetof(struct user_regs_struct,gs):
-			if (value && (value & 3) != 3)
-				return -EIO;
-			child->thread.gsindex = value & 0xffff;
-			return 0;
-		case offsetof(struct user_regs_struct,ds):
-			if (value && (value & 3) != 3)
-				return -EIO;
-			child->thread.ds = value & 0xffff;
-			return 0;
-		case offsetof(struct user_regs_struct,es): 
-			if (value && (value & 3) != 3)
-				return -EIO;
-			child->thread.es = value & 0xffff;
-			return 0;
-		case offsetof(struct user_regs_struct,ss):
-			if ((value & 3) != 3)
-				return -EIO;
-			value &= 0xffff;
-			return 0;
-		case offsetof(struct user_regs_struct,fs_base):
-			if (value >= TASK_SIZE_OF(child))
-				return -EIO;
-			/*
-			 * When changing the segment base, use do_arch_prctl
-			 * to set either thread.fs or thread.fsindex and the
-			 * corresponding GDT slot.
-			 */
-			if (child->thread.fs != value)
-				return do_arch_prctl(child, ARCH_SET_FS, value);
-			return 0;
-		case offsetof(struct user_regs_struct,gs_base):
-			/*
-			 * Exactly the same here as the %fs handling above.
-			 */
-			if (value >= TASK_SIZE_OF(child))
-				return -EIO;
-			if (child->thread.gs != value)
-				return do_arch_prctl(child, ARCH_SET_GS, value);
-			return 0;
-		case offsetof(struct user_regs_struct,flags):
-			value &= FLAG_MASK;
-			/*
-			 * If the user value contains TF, mark that
-			 * it was not "us" (the debugger) that set it.
-			 * If not, make sure it stays set if we had.
-			 */
-			if (value & X86_EFLAGS_TF)
-				clear_tsk_thread_flag(child, TIF_FORCED_TF);
-			else if (test_tsk_thread_flag(child, TIF_FORCED_TF))
-				value |= X86_EFLAGS_TF;
-			value |= regs->flags & ~FLAG_MASK;
-			break;
-		case offsetof(struct user_regs_struct,cs): 
-			if ((value & 3) != 3)
-				return -EIO;
-			value &= 0xffff;
-			break;
+	case offsetof(struct user_regs_struct,fs):
+		if (value && (value & 3) != 3)
+			return -EIO;
+		child->thread.fsindex = value & 0xffff;
+		return 0;
+	case offsetof(struct user_regs_struct,gs):
+		if (value && (value & 3) != 3)
+			return -EIO;
+		child->thread.gsindex = value & 0xffff;
+		return 0;
+	case offsetof(struct user_regs_struct,ds):
+		if (value && (value & 3) != 3)
+			return -EIO;
+		child->thread.ds = value & 0xffff;
+		return 0;
+	case offsetof(struct user_regs_struct,es):
+		if (value && (value & 3) != 3)
+			return -EIO;
+		child->thread.es = value & 0xffff;
+		return 0;
+	case offsetof(struct user_regs_struct,ss):
+		if ((value & 3) != 3)
+			return -EIO;
+		value &= 0xffff;
+		return 0;
+	case offsetof(struct user_regs_struct,fs_base):
+		if (value >= TASK_SIZE_OF(child))
+			return -EIO;
+		/*
+		 * When changing the segment base, use do_arch_prctl
+		 * to set either thread.fs or thread.fsindex and the
+		 * corresponding GDT slot.
+		 */
+		if (child->thread.fs != value)
+			return do_arch_prctl(child, ARCH_SET_FS, value);
+		return 0;
+	case offsetof(struct user_regs_struct,gs_base):
+		/*
+		 * Exactly the same here as the %fs handling above.
+		 */
+		if (value >= TASK_SIZE_OF(child))
+			return -EIO;
+		if (child->thread.gs != value)
+			return do_arch_prctl(child, ARCH_SET_GS, value);
+		return 0;
+	case offsetof(struct user_regs_struct,flags):
+		value &= FLAG_MASK;
+		/*
+		 * If the user value contains TF, mark that
+		 * it was not "us" (the debugger) that set it.
+		 * If not, make sure it stays set if we had.
+		 */
+		if (value & X86_EFLAGS_TF)
+			clear_tsk_thread_flag(child, TIF_FORCED_TF);
+		else if (test_tsk_thread_flag(child, TIF_FORCED_TF))
+			value |= X86_EFLAGS_TF;
+		value |= regs->flags & ~FLAG_MASK;
+		break;
+	case offsetof(struct user_regs_struct,cs):
+		if ((value & 3) != 3)
+			return -EIO;
+		value &= 0xffff;
+		break;
 	}
 	*pt_regs_access(regs, regno) = value;
 	return 0;
@@ -136,49 +136,49 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
 	struct pt_regs *regs = task_pt_regs(child);
 	unsigned long val;
 	switch (regno) {
-		case offsetof(struct user_regs_struct, fs):
-			return child->thread.fsindex;
-		case offsetof(struct user_regs_struct, gs):
-			return child->thread.gsindex;
-		case offsetof(struct user_regs_struct, ds):
-			return child->thread.ds;
-		case offsetof(struct user_regs_struct, es):
-			return child->thread.es; 
-		case offsetof(struct user_regs_struct, fs_base):
-			/*
-			 * do_arch_prctl may have used a GDT slot instead of
-			 * the MSR.  To userland, it appears the same either
-			 * way, except the %fs segment selector might not be 0.
-			 */
-			if (child->thread.fs != 0)
-				return child->thread.fs;
-			if (child->thread.fsindex != FS_TLS_SEL)
-				return 0;
-			return get_desc_base(&child->thread.tls_array[FS_TLS]);
-		case offsetof(struct user_regs_struct, gs_base):
-			/*
-			 * Exactly the same here as the %fs handling above.
-			 */
-			if (child->thread.gs != 0)
-				return child->thread.gs;
-			if (child->thread.gsindex != GS_TLS_SEL)
-				return 0;
-			return get_desc_base(&child->thread.tls_array[GS_TLS]);
-		case offsetof(struct user_regs_struct, flags):
-			/*
-			 * If the debugger set TF, hide it from the readout.
-			 */
-			val = regs->flags;
-			if (test_tsk_thread_flag(child, TIF_IA32))
-				val &= 0xffffffff;
-			if (test_tsk_thread_flag(child, TIF_FORCED_TF))
-				val &= ~X86_EFLAGS_TF;
-			return val;
-		default:
-			val = *pt_regs_access(regs, regno);
-			if (test_tsk_thread_flag(child, TIF_IA32))
-				val &= 0xffffffff;
-			return val;
+	case offsetof(struct user_regs_struct, fs):
+		return child->thread.fsindex;
+	case offsetof(struct user_regs_struct, gs):
+		return child->thread.gsindex;
+	case offsetof(struct user_regs_struct, ds):
+		return child->thread.ds;
+	case offsetof(struct user_regs_struct, es):
+		return child->thread.es;
+	case offsetof(struct user_regs_struct, fs_base):
+		/*
+		 * do_arch_prctl may have used a GDT slot instead of
+		 * the MSR.  To userland, it appears the same either
+		 * way, except the %fs segment selector might not be 0.
+		 */
+		if (child->thread.fs != 0)
+			return child->thread.fs;
+		if (child->thread.fsindex != FS_TLS_SEL)
+			return 0;
+		return get_desc_base(&child->thread.tls_array[FS_TLS]);
+	case offsetof(struct user_regs_struct, gs_base):
+		/*
+		 * Exactly the same here as the %fs handling above.
+		 */
+		if (child->thread.gs != 0)
+			return child->thread.gs;
+		if (child->thread.gsindex != GS_TLS_SEL)
+			return 0;
+		return get_desc_base(&child->thread.tls_array[GS_TLS]);
+	case offsetof(struct user_regs_struct, flags):
+		/*
+		 * If the debugger set TF, hide it from the readout.
+		 */
+		val = regs->flags;
+		if (test_tsk_thread_flag(child, TIF_IA32))
+			val &= 0xffffffff;
+		if (test_tsk_thread_flag(child, TIF_FORCED_TF))
+			val &= ~X86_EFLAGS_TF;
+		return val;
+	default:
+		val = *pt_regs_access(regs, regno);
+		if (test_tsk_thread_flag(child, TIF_IA32))
+			val &= 0xffffffff;
+		return val;
 	}
 
 }
@@ -244,7 +244,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 
 	switch (request) {
 	/* when I and D space are separate, these will need to be fixed. */
-	case PTRACE_PEEKTEXT: /* read word at location addr. */ 
+	case PTRACE_PEEKTEXT: /* read word at location addr. */
 	case PTRACE_PEEKDATA:
 		ret = generic_ptrace_peekdata(child, addr, data);
 		break;
@@ -310,10 +310,10 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 					 (struct user_desc __user *) data, 0);
 		break;
 #endif
-		/* normal 64bit interface to access TLS data. 
+		/* normal 64bit interface to access TLS data.
 		   Works just like arch_prctl, except that the arguments
 		   are reversed. */
-	case PTRACE_ARCH_PRCTL: 
+	case PTRACE_ARCH_PRCTL:
 		ret = do_arch_prctl(child, data, addr);
 		break;
 
@@ -386,7 +386,7 @@ static void syscall_trace(struct pt_regs *regs)
 	printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
 	       current->comm,
 	       regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0),
-	       current_thread_info()->flags, current->ptrace); 
+	       current_thread_info()->flags, current->ptrace);
 #endif
 
 	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)

From 9e714bed644cb463489b9250774a4b0fb352cabc Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:58 +0100
Subject: [PATCH 217/890] x86: x86-32 ptrace whitespace

This canonicalizes the indentation in the getreg and putreg functions.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace_32.c | 110 ++++++++++++++++++------------------
 1 file changed, 55 insertions(+), 55 deletions(-)

diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace_32.c
index f81e2f1827d4..5aca84ef26d5 100644
--- a/arch/x86/kernel/ptrace_32.c
+++ b/arch/x86/kernel/ptrace_32.c
@@ -51,37 +51,37 @@ static int putreg(struct task_struct *child,
 	struct pt_regs *regs = task_pt_regs(child);
 	regno >>= 2;
 	switch (regno) {
-		case GS:
-			if (value && (value & 3) != 3)
-				return -EIO;
-			child->thread.gs = value;
-			return 0;
-		case DS:
-		case ES:
-		case FS:
-			if (value && (value & 3) != 3)
-				return -EIO;
-			value &= 0xffff;
-			break;
-		case SS:
-		case CS:
-			if ((value & 3) != 3)
-				return -EIO;
-			value &= 0xffff;
-			break;
-		case EFL:
-			value &= FLAG_MASK;
-			/*
-			 * If the user value contains TF, mark that
-			 * it was not "us" (the debugger) that set it.
-			 * If not, make sure it stays set if we had.
-			 */
-			if (value & X86_EFLAGS_TF)
-				clear_tsk_thread_flag(child, TIF_FORCED_TF);
-			else if (test_tsk_thread_flag(child, TIF_FORCED_TF))
-				value |= X86_EFLAGS_TF;
-			value |= regs->flags & ~FLAG_MASK;
-			break;
+	case GS:
+		if (value && (value & 3) != 3)
+			return -EIO;
+		child->thread.gs = value;
+		return 0;
+	case DS:
+	case ES:
+	case FS:
+		if (value && (value & 3) != 3)
+			return -EIO;
+		value &= 0xffff;
+		break;
+	case SS:
+	case CS:
+		if ((value & 3) != 3)
+			return -EIO;
+		value &= 0xffff;
+		break;
+	case EFL:
+		value &= FLAG_MASK;
+		/*
+		 * If the user value contains TF, mark that
+		 * it was not "us" (the debugger) that set it.
+		 * If not, make sure it stays set if we had.
+		 */
+		if (value & X86_EFLAGS_TF)
+			clear_tsk_thread_flag(child, TIF_FORCED_TF);
+		else if (test_tsk_thread_flag(child, TIF_FORCED_TF))
+			value |= X86_EFLAGS_TF;
+		value |= regs->flags & ~FLAG_MASK;
+		break;
 	}
 	*pt_regs_access(regs, regno) = value;
 	return 0;
@@ -94,26 +94,26 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
 
 	regno >>= 2;
 	switch (regno) {
-		case EFL:
-			/*
-			 * If the debugger set TF, hide it from the readout.
-			 */
-			retval = regs->flags;
-			if (test_tsk_thread_flag(child, TIF_FORCED_TF))
-				retval &= ~X86_EFLAGS_TF;
-			break;
-		case GS:
-			retval = child->thread.gs;
-			break;
-		case DS:
-		case ES:
-		case FS:
-		case SS:
-		case CS:
-			retval = 0xffff;
-			/* fall through */
-		default:
-			retval &= *pt_regs_access(regs, regno);
+	case EFL:
+		/*
+		 * If the debugger set TF, hide it from the readout.
+		 */
+		retval = regs->flags;
+		if (test_tsk_thread_flag(child, TIF_FORCED_TF))
+			retval &= ~X86_EFLAGS_TF;
+		break;
+	case GS:
+		retval = child->thread.gs;
+		break;
+	case DS:
+	case ES:
+	case FS:
+	case SS:
+	case CS:
+		retval = 0xffff;
+		/* fall through */
+	default:
+		retval &= *pt_regs_access(regs, regno);
 	}
 	return retval;
 }
@@ -190,7 +190,7 @@ static int ptrace_set_debugreg(struct task_struct *child,
  * Make sure the single step bit is not set.
  */
 void ptrace_disable(struct task_struct *child)
-{ 
+{
 	user_disable_single_step(child);
 	clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 }
@@ -203,7 +203,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 
 	switch (request) {
 	/* when I and D space are separate, these will need to be fixed. */
-	case PTRACE_PEEKTEXT: /* read word at location addr. */ 
+	case PTRACE_PEEKTEXT: /* read word at location addr. */
 	case PTRACE_PEEKDATA:
 		ret = generic_ptrace_peekdata(child, addr, data);
 		break;
@@ -213,7 +213,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		unsigned long tmp;
 
 		ret = -EIO;
-		if ((addr & 3) || addr < 0 || 
+		if ((addr & 3) || addr < 0 ||
 		    addr > sizeof(struct user) - 3)
 			break;
 
@@ -238,7 +238,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 
 	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
 		ret = -EIO;
-		if ((addr & 3) || addr < 0 || 
+		if ((addr & 3) || addr < 0 ||
 		    addr > sizeof(struct user) - 3)
 			break;
 

From ce90f340855d7a9b3bec24f0fe49a76904242387 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:58 +0100
Subject: [PATCH 218/890] x86: x86-64 ptrace get/putreg current task

This generalizes the getreg and putreg functions so they can be used on the
current task, as well as on a task stopped in TASK_TRACED and switched off.
This lays the groundwork to share this code for all kinds of user-mode
machine state access, not just ptrace.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace_64.c | 36 ++++++++++++++++++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c
index 2427548f2a01..5979dbe8e0a2 100644
--- a/arch/x86/kernel/ptrace_64.c
+++ b/arch/x86/kernel/ptrace_64.c
@@ -67,21 +67,29 @@ static int putreg(struct task_struct *child,
 		if (value && (value & 3) != 3)
 			return -EIO;
 		child->thread.fsindex = value & 0xffff;
+		if (child == current)
+			loadsegment(fs, child->thread.fsindex);
 		return 0;
 	case offsetof(struct user_regs_struct,gs):
 		if (value && (value & 3) != 3)
 			return -EIO;
 		child->thread.gsindex = value & 0xffff;
+		if (child == current)
+			load_gs_index(child->thread.gsindex);
 		return 0;
 	case offsetof(struct user_regs_struct,ds):
 		if (value && (value & 3) != 3)
 			return -EIO;
 		child->thread.ds = value & 0xffff;
+		if (child == current)
+			loadsegment(ds, child->thread.ds);
 		return 0;
 	case offsetof(struct user_regs_struct,es):
 		if (value && (value & 3) != 3)
 			return -EIO;
 		child->thread.es = value & 0xffff;
+		if (child == current)
+			loadsegment(es, child->thread.es);
 		return 0;
 	case offsetof(struct user_regs_struct,ss):
 		if ((value & 3) != 3)
@@ -135,14 +143,32 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
 {
 	struct pt_regs *regs = task_pt_regs(child);
 	unsigned long val;
+	unsigned int seg;
 	switch (regno) {
 	case offsetof(struct user_regs_struct, fs):
+		if (child == current) {
+			/* Older gas can't assemble movq %?s,%r?? */
+			asm("movl %%fs,%0" : "=r" (seg));
+			return seg;
+		}
 		return child->thread.fsindex;
 	case offsetof(struct user_regs_struct, gs):
+		if (child == current) {
+			asm("movl %%gs,%0" : "=r" (seg));
+			return seg;
+		}
 		return child->thread.gsindex;
 	case offsetof(struct user_regs_struct, ds):
+		if (child == current) {
+			asm("movl %%ds,%0" : "=r" (seg));
+			return seg;
+		}
 		return child->thread.ds;
 	case offsetof(struct user_regs_struct, es):
+		if (child == current) {
+			asm("movl %%es,%0" : "=r" (seg));
+			return seg;
+		}
 		return child->thread.es;
 	case offsetof(struct user_regs_struct, fs_base):
 		/*
@@ -152,7 +178,10 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
 		 */
 		if (child->thread.fs != 0)
 			return child->thread.fs;
-		if (child->thread.fsindex != FS_TLS_SEL)
+		seg = child->thread.fsindex;
+		if (child == current)
+			asm("movl %%fs,%0" : "=r" (seg));
+		if (seg != FS_TLS_SEL)
 			return 0;
 		return get_desc_base(&child->thread.tls_array[FS_TLS]);
 	case offsetof(struct user_regs_struct, gs_base):
@@ -161,7 +190,10 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
 		 */
 		if (child->thread.gs != 0)
 			return child->thread.gs;
-		if (child->thread.gsindex != GS_TLS_SEL)
+		seg = child->thread.gsindex;
+		if (child == current)
+			asm("movl %%gs,%0" : "=r" (seg));
+		if (seg != GS_TLS_SEL)
 			return 0;
 		return get_desc_base(&child->thread.tls_array[GS_TLS]);
 	case offsetof(struct user_regs_struct, flags):

From 5fd4d16bd59a9fc84ca94c4fce4abc23fe219108 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:58 +0100
Subject: [PATCH 219/890] x86: x86-32 ptrace get/putreg current task

This generalizes the getreg and putreg functions so they can be used on the
current task, as well as on a task stopped in TASK_TRACED and switched off.
This lays the groundwork to share this code for all kinds of user-mode
machine state access, not just ptrace.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace_32.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace_32.c
index 5aca84ef26d5..26071305de2c 100644
--- a/arch/x86/kernel/ptrace_32.c
+++ b/arch/x86/kernel/ptrace_32.c
@@ -55,6 +55,12 @@ static int putreg(struct task_struct *child,
 		if (value && (value & 3) != 3)
 			return -EIO;
 		child->thread.gs = value;
+		if (child == current)
+			/*
+			 * The user-mode %gs is not affected by
+			 * kernel entry, so we must update the CPU.
+			 */
+			loadsegment(gs, value);
 		return 0;
 	case DS:
 	case ES:
@@ -104,6 +110,8 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
 		break;
 	case GS:
 		retval = child->thread.gs;
+		if (child == current)
+			savesegment(gs, retval);
 		break;
 	case DS:
 	case ES:

From d277fb89dfb042deba04a8e765718cc8b3825e85 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:58 +0100
Subject: [PATCH 220/890] x86: x86-64 ia32 ptrace get/putreg32 current task

This generalizes the getreg32 and putreg32 functions so they can be used on
the current task, as well as on a task stopped in TASK_TRACED and switched
off.  This lays the groundwork to share this code for all kinds of
user-mode machine state access, not just ptrace.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/ia32/ptrace32.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/arch/x86/ia32/ptrace32.c b/arch/x86/ia32/ptrace32.c
index c52d0664c67f..d5663e295330 100644
--- a/arch/x86/ia32/ptrace32.c
+++ b/arch/x86/ia32/ptrace32.c
@@ -48,19 +48,27 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
 		if (val && (val & 3) != 3)
 			return -EIO;
 		child->thread.fsindex = val & 0xffff;
+		if (child == current)
+			loadsegment(fs, child->thread.fsindex);
 		break;
 	case offsetof(struct user32, regs.gs):
 		if (val && (val & 3) != 3)
 			return -EIO;
 		child->thread.gsindex = val & 0xffff;
+		if (child == current)
+			load_gs_index(child->thread.gsindex);
 		break;
 	case offsetof(struct user32, regs.ds):
 		if (val && (val & 3) != 3)
 			return -EIO;
 		child->thread.ds = val & 0xffff;
+		if (child == current)
+			loadsegment(ds, child->thread.ds);
 		break;
 	case offsetof(struct user32, regs.es):
 		child->thread.es = val & 0xffff;
+		if (child == current)
+			loadsegment(es, child->thread.ds);
 		break;
 	case offsetof(struct user32, regs.ss):
 		if ((val & 3) != 3)
@@ -129,15 +137,23 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
 	switch (regno) {
 	case offsetof(struct user32, regs.fs):
 		*val = child->thread.fsindex;
+		if (child == current)
+			asm("movl %%fs,%0" : "=r" (*val));
 		break;
 	case offsetof(struct user32, regs.gs):
 		*val = child->thread.gsindex;
+		if (child == current)
+			asm("movl %%gs,%0" : "=r" (*val));
 		break;
 	case offsetof(struct user32, regs.ds):
 		*val = child->thread.ds;
+		if (child == current)
+			asm("movl %%ds,%0" : "=r" (*val));
 		break;
 	case offsetof(struct user32, regs.es):
 		*val = child->thread.es;
+		if (child == current)
+			asm("movl %%es,%0" : "=r" (*val));
 		break;
 
 	R32(cs, cs);

From 0f5340933f9bacb403f49baaf8073320e3984841 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:59 +0100
Subject: [PATCH 221/890] x86: x86-32 thread_struct.debugreg

This replaces the debugreg[7] member of thread_struct with individual
members debugreg0, etc.  This saves two words for the dummies 4 and 5,
and harmonizes the code between 32 and 64.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process_32.c   | 31 ++++++++++++++++++++-----------
 arch/x86/kernel/ptrace_32.c    | 29 ++++++++++++++++++++++++-----
 arch/x86/kernel/signal_32.c    |  4 ++--
 arch/x86/kernel/traps_32.c     |  4 ++--
 arch/x86/power/cpu.c           | 14 +++++++-------
 include/asm-x86/processor_32.h |  7 ++++++-
 6 files changed, 61 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 53406461074f..3744cf63682c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -445,7 +445,12 @@ void flush_thread(void)
 {
 	struct task_struct *tsk = current;
 
-	memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
+	tsk->thread.debugreg0 = 0;
+	tsk->thread.debugreg1 = 0;
+	tsk->thread.debugreg2 = 0;
+	tsk->thread.debugreg3 = 0;
+	tsk->thread.debugreg6 = 0;
+	tsk->thread.debugreg7 = 0;
 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));	
 	clear_tsk_thread_flag(tsk, TIF_DEBUG);
 	/*
@@ -522,7 +527,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
  */
 void dump_thread(struct pt_regs * regs, struct user * dump)
 {
-	int i;
 	u16 gs;
 
 /* changed the size calculations - should hopefully work better. lbt */
@@ -533,8 +537,14 @@ void dump_thread(struct pt_regs * regs, struct user * dump)
 	dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
 	dump->u_dsize -= dump->u_tsize;
 	dump->u_ssize = 0;
-	for (i = 0; i < 8; i++)
-		dump->u_debugreg[i] = current->thread.debugreg[i];  
+	dump->u_debugreg[0] = current->thread.debugreg0;
+	dump->u_debugreg[1] = current->thread.debugreg1;
+	dump->u_debugreg[2] = current->thread.debugreg2;
+	dump->u_debugreg[3] = current->thread.debugreg3;
+	dump->u_debugreg[4] = 0;
+	dump->u_debugreg[5] = 0;
+	dump->u_debugreg[6] = current->thread.debugreg6;
+	dump->u_debugreg[7] = current->thread.debugreg7;
 
 	if (dump->start_stack < TASK_SIZE)
 		dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
@@ -612,13 +622,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 		wrmsr(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr, 0);
 
 	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
-		set_debugreg(next->debugreg[0], 0);
-		set_debugreg(next->debugreg[1], 1);
-		set_debugreg(next->debugreg[2], 2);
-		set_debugreg(next->debugreg[3], 3);
+		set_debugreg(next->debugreg0, 0);
+		set_debugreg(next->debugreg1, 1);
+		set_debugreg(next->debugreg2, 2);
+		set_debugreg(next->debugreg3, 3);
 		/* no 4 and 5 */
-		set_debugreg(next->debugreg[6], 6);
-		set_debugreg(next->debugreg[7], 7);
+		set_debugreg(next->debugreg6, 6);
+		set_debugreg(next->debugreg7, 7);
 	}
 
 #ifdef CONFIG_SECCOMP
@@ -869,4 +879,3 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
 	unsigned long range_end = mm->brk + 0x02000000;
 	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
 }
-
diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace_32.c
index 26071305de2c..fed83d066135 100644
--- a/arch/x86/kernel/ptrace_32.c
+++ b/arch/x86/kernel/ptrace_32.c
@@ -133,19 +133,39 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
  */
 static unsigned long ptrace_get_debugreg(struct task_struct *child, int n)
 {
-	return child->thread.debugreg[n];
+	switch (n) {
+	case 0:		return child->thread.debugreg0;
+	case 1:		return child->thread.debugreg1;
+	case 2:		return child->thread.debugreg2;
+	case 3:		return child->thread.debugreg3;
+	case 6:		return child->thread.debugreg6;
+	case 7:		return child->thread.debugreg7;
+	}
+	return 0;
 }
 
 static int ptrace_set_debugreg(struct task_struct *child,
 			       int n, unsigned long data)
 {
+	int i;
+
 	if (unlikely(n == 4 || n == 5))
 		return -EIO;
 
 	if (n < 4 && unlikely(data >= TASK_SIZE - 3))
 		return -EIO;
 
-	if (n == 7) {
+	switch (n) {
+	case 0:		child->thread.debugreg0 = data; break;
+	case 1:		child->thread.debugreg1 = data; break;
+	case 2:		child->thread.debugreg2 = data; break;
+	case 3:		child->thread.debugreg3 = data; break;
+
+	case 6:
+		child->thread.debugreg6 = data;
+		break;
+
+	case 7:
 		/*
 		 * Sanity-check data. Take one half-byte at once with
 		 * check = (val >> (16 + 4*i)) & 0xf. It contains the
@@ -176,19 +196,18 @@ static int ptrace_set_debugreg(struct task_struct *child,
 		 * 64-bit kernel), so the x86_64 mask value is 0x5454.
 		 * See the AMD manual no. 24593 (AMD64 System Programming)
 		 */
-		int i;
 		data &= ~DR_CONTROL_RESERVED;
 		for (i = 0; i < 4; i++)
 			if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
 				return -EIO;
+		child->thread.debugreg7 = data;
 		if (data)
 			set_tsk_thread_flag(child, TIF_DEBUG);
 		else
 			clear_tsk_thread_flag(child, TIF_DEBUG);
+		break;
 	}
 
-	child->thread.debugreg[n] = data;
-
 	return 0;
 }
 
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 64cb3c05de69..2bf5c9aed106 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -605,8 +605,8 @@ static void fastcall do_signal(struct pt_regs *regs)
 		 * have been cleared if the watchpoint triggered
 		 * inside the kernel.
 		 */
-		if (unlikely(current->thread.debugreg[7]))
-			set_debugreg(current->thread.debugreg[7], 7);
+		if (unlikely(current->thread.debugreg7))
+			set_debugreg(current->thread.debugreg7, 7);
 
 		/* Whee!  Actually deliver the signal.  */
 		if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 931ef10960ee..27713553cc59 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -852,7 +852,7 @@ fastcall void __kprobes do_debug(struct pt_regs * regs, long error_code)
 
 	/* Mask out spurious debug traps due to lazy DR7 setting */
 	if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
-		if (!tsk->thread.debugreg[7])
+		if (!tsk->thread.debugreg7)
 			goto clear_dr7;
 	}
 
@@ -860,7 +860,7 @@ fastcall void __kprobes do_debug(struct pt_regs * regs, long error_code)
 		goto debug_vm86;
 
 	/* Save debug status register where ptrace can see it */
-	tsk->thread.debugreg[6] = condition;
+	tsk->thread.debugreg6 = condition;
 
 	/*
 	 * Single-stepping through TF: make sure we ignore any events in
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 998fd3ec0d68..5a98dc35addf 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -74,14 +74,14 @@ static void fix_processor_context(void)
 	/*
 	 * Now maybe reload the debug registers
 	 */
-	if (current->thread.debugreg[7]){
-		set_debugreg(current->thread.debugreg[0], 0);
-		set_debugreg(current->thread.debugreg[1], 1);
-		set_debugreg(current->thread.debugreg[2], 2);
-		set_debugreg(current->thread.debugreg[3], 3);
+	if (current->thread.debugreg7) {
+		set_debugreg(current->thread.debugreg0, 0);
+		set_debugreg(current->thread.debugreg1, 1);
+		set_debugreg(current->thread.debugreg2, 2);
+		set_debugreg(current->thread.debugreg3, 3);
 		/* no 4 and 5 */
-		set_debugreg(current->thread.debugreg[6], 6);
-		set_debugreg(current->thread.debugreg[7], 7);
+		set_debugreg(current->thread.debugreg6, 6);
+		set_debugreg(current->thread.debugreg7, 7);
 	}
 
 }
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index c85400fe58c4..d50a4b48d441 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -353,7 +353,12 @@ struct thread_struct {
 	unsigned long	fs;
 	unsigned long	gs;
 /* Hardware debugging registers */
-	unsigned long	debugreg[8];  /* %%db0-7 debug registers */
+	unsigned long	debugreg0;
+	unsigned long	debugreg1;
+	unsigned long	debugreg2;
+	unsigned long	debugreg3;
+	unsigned long	debugreg6;
+	unsigned long	debugreg7;
 /* fault info */
 	unsigned long	cr2, trap_no, error_code;
 /* floating point info */

From d52e9d690fff1fd9d0ccffe375fd01b700f82a64 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:01 +0100
Subject: [PATCH 222/890] x86: ptrace_32 renamed

This renames ptrace_32.c back to ptrace.c, in preparation
for merging the 32/64 versions of these files.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/Makefile_32               | 3 ++-
 arch/x86/kernel/{ptrace_32.c => ptrace.c} | 0
 2 files changed, 2 insertions(+), 1 deletion(-)
 rename arch/x86/kernel/{ptrace_32.c => ptrace.c} (100%)

diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index 20e23c4c18b6..b2d7aea4c82d 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -6,10 +6,11 @@ extra-y := head_32.o init_task.o vmlinux.lds
 CPPFLAGS_vmlinux.lds += -Ui386
 
 obj-y	:= process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
-		ptrace_32.o time_32.o ioport_32.o ldt.o setup_32.o i8259_32.o sys_i386_32.o \
+		time_32.o ioport_32.o ldt.o setup_32.o i8259_32.o sys_i386_32.o \
 		pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\
 		quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o rtc.o
 
+obj-y				+= ptrace.o
 obj-y				+= tls.o
 obj-y				+= step.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace.c
similarity index 100%
rename from arch/x86/kernel/ptrace_32.c
rename to arch/x86/kernel/ptrace.c

From e39c2891415b3b5c7381ece06bb45b3c7bdd4342 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:01 +0100
Subject: [PATCH 223/890] x86: ptrace FLAG_MASK cleanup

This cleans up the FLAG_MASK macro to use symbolic constants instead of a
magic number.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index fed83d066135..b71226d653ed 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -32,10 +32,15 @@
 
 /*
  * Determines which flags the user has access to [1 = access, 0 = no access].
- * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), NT(14), IOPL(12-13), IF(9).
- * Also masks reserved bits (31-22, 15, 5, 3, 1).
  */
-#define FLAG_MASK 0x00050dd5
+#define FLAG_MASK_32		((unsigned long)			\
+				 (X86_EFLAGS_CF | X86_EFLAGS_PF |	\
+				  X86_EFLAGS_AF | X86_EFLAGS_ZF |	\
+				  X86_EFLAGS_SF | X86_EFLAGS_TF |	\
+				  X86_EFLAGS_DF | X86_EFLAGS_OF |	\
+				  X86_EFLAGS_RF | X86_EFLAGS_AC))
+
+#define FLAG_MASK		FLAG_MASK_32
 
 static long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
 {

From 06ee1b687ac91698ccd47fa652d5b3cf1bfcd806 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:01 +0100
Subject: [PATCH 224/890] x86: x86 ptrace getreg/putreg cleanup

This cleans up the getreg/putreg functions to move the special cases
(segment registers and eflags) out into their own subroutines.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace.c | 162 +++++++++++++++++++++++----------------
 1 file changed, 96 insertions(+), 66 deletions(-)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index b71226d653ed..eaec75a4094b 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -45,92 +45,122 @@
 static long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
 {
 	BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
+	regno >>= 2;
 	if (regno > FS)
 		--regno;
 	return &regs->bx + regno;
 }
 
-static int putreg(struct task_struct *child,
-	unsigned long regno, unsigned long value)
+static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
 {
-	struct pt_regs *regs = task_pt_regs(child);
-	regno >>= 2;
-	switch (regno) {
-	case GS:
-		if (value && (value & 3) != 3)
-			return -EIO;
-		child->thread.gs = value;
-		if (child == current)
+	/*
+	 * Returning the value truncates it to 16 bits.
+	 */
+	unsigned int retval;
+	if (offset != offsetof(struct user_regs_struct, gs))
+		retval = *pt_regs_access(task_pt_regs(task), offset);
+	else {
+		retval = task->thread.gs;
+		if (task == current)
+			savesegment(gs, retval);
+	}
+	return retval;
+}
+
+static int set_segment_reg(struct task_struct *task,
+			   unsigned long offset, u16 value)
+{
+	/*
+	 * The value argument was already truncated to 16 bits.
+	 */
+	if (value && (value & 3) != 3)
+		return -EIO;
+
+	if (offset != offsetof(struct user_regs_struct, gs))
+		*pt_regs_access(task_pt_regs(task), offset) = value;
+	else {
+		task->thread.gs = value;
+		if (task == current)
 			/*
 			 * The user-mode %gs is not affected by
 			 * kernel entry, so we must update the CPU.
 			 */
 			loadsegment(gs, value);
-		return 0;
-	case DS:
-	case ES:
-	case FS:
-		if (value && (value & 3) != 3)
-			return -EIO;
-		value &= 0xffff;
-		break;
-	case SS:
-	case CS:
-		if ((value & 3) != 3)
-			return -EIO;
-		value &= 0xffff;
-		break;
-	case EFL:
-		value &= FLAG_MASK;
-		/*
-		 * If the user value contains TF, mark that
-		 * it was not "us" (the debugger) that set it.
-		 * If not, make sure it stays set if we had.
-		 */
-		if (value & X86_EFLAGS_TF)
-			clear_tsk_thread_flag(child, TIF_FORCED_TF);
-		else if (test_tsk_thread_flag(child, TIF_FORCED_TF))
-			value |= X86_EFLAGS_TF;
-		value |= regs->flags & ~FLAG_MASK;
-		break;
 	}
-	*pt_regs_access(regs, regno) = value;
+
 	return 0;
 }
 
-static unsigned long getreg(struct task_struct *child, unsigned long regno)
+static unsigned long get_flags(struct task_struct *task)
 {
-	struct pt_regs *regs = task_pt_regs(child);
-	unsigned long retval = ~0UL;
+	unsigned long retval = task_pt_regs(task)->flags;
+
+	/*
+	 * If the debugger set TF, hide it from the readout.
+	 */
+	if (test_tsk_thread_flag(task, TIF_FORCED_TF))
+		retval &= ~X86_EFLAGS_TF;
 
-	regno >>= 2;
-	switch (regno) {
-	case EFL:
-		/*
-		 * If the debugger set TF, hide it from the readout.
-		 */
-		retval = regs->flags;
-		if (test_tsk_thread_flag(child, TIF_FORCED_TF))
-			retval &= ~X86_EFLAGS_TF;
-		break;
-	case GS:
-		retval = child->thread.gs;
-		if (child == current)
-			savesegment(gs, retval);
-		break;
-	case DS:
-	case ES:
-	case FS:
-	case SS:
-	case CS:
-		retval = 0xffff;
-		/* fall through */
-	default:
-		retval &= *pt_regs_access(regs, regno);
-	}
 	return retval;
 }
 
+static int set_flags(struct task_struct *task, unsigned long value)
+{
+	struct pt_regs *regs = task_pt_regs(task);
+
+	/*
+	 * If the user value contains TF, mark that
+	 * it was not "us" (the debugger) that set it.
+	 * If not, make sure it stays set if we had.
+	 */
+	if (value & X86_EFLAGS_TF)
+		clear_tsk_thread_flag(task, TIF_FORCED_TF);
+	else if (test_tsk_thread_flag(task, TIF_FORCED_TF))
+		value |= X86_EFLAGS_TF;
+
+	regs->flags = (regs->flags & ~FLAG_MASK) | (value & FLAG_MASK);
+
+	return 0;
+}
+
+static int putreg(struct task_struct *child,
+		  unsigned long offset, unsigned long value)
+{
+	switch (offset) {
+	case offsetof(struct user_regs_struct, cs):
+	case offsetof(struct user_regs_struct, ds):
+	case offsetof(struct user_regs_struct, es):
+	case offsetof(struct user_regs_struct, fs):
+	case offsetof(struct user_regs_struct, gs):
+	case offsetof(struct user_regs_struct, ss):
+		return set_segment_reg(child, offset, value);
+
+	case offsetof(struct user_regs_struct, flags):
+		return set_flags(child, value);
+	}
+
+	*pt_regs_access(task_pt_regs(child), offset) = value;
+	return 0;
+}
+
+static unsigned long getreg(struct task_struct *task, unsigned long offset)
+{
+	switch (offset) {
+	case offsetof(struct user_regs_struct, cs):
+	case offsetof(struct user_regs_struct, ds):
+	case offsetof(struct user_regs_struct, es):
+	case offsetof(struct user_regs_struct, fs):
+	case offsetof(struct user_regs_struct, gs):
+	case offsetof(struct user_regs_struct, ss):
+		return get_segment_reg(task, offset);
+
+	case offsetof(struct user_regs_struct, flags):
+		return get_flags(task);
+	}
+
+	return *pt_regs_access(task_pt_regs(task), offset);
+}
+
 /*
  * This function is trivial and will be inlined by the compiler.
  * Having it separates the implementation details of debug

From 2047b08be67b70875d8765fc81d34ce28041bec3 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:01 +0100
Subject: [PATCH 225/890] x86: x86 ptrace getreg/putreg merge

This merges 64-bit support into the low-level register access
functions in arch/x86/kernel/ptrace.c, paving the way to share
this file between 32-bit and 64-bit builds.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace.c | 217 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 213 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index eaec75a4094b..c709868d28a5 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -24,6 +24,8 @@
 #include <asm/debugreg.h>
 #include <asm/ldt.h>
 #include <asm/desc.h>
+#include <asm/prctl.h>
+#include <asm/proto.h>
 
 /*
  * does not yet catch signals sent when the child dies.
@@ -40,6 +42,16 @@
 				  X86_EFLAGS_DF | X86_EFLAGS_OF |	\
 				  X86_EFLAGS_RF | X86_EFLAGS_AC))
 
+/*
+ * Determines whether a value may be installed in a segment register.
+ */
+static inline bool invalid_selector(u16 value)
+{
+	return unlikely(value != 0 && (value & SEGMENT_RPL_MASK) != USER_RPL);
+}
+
+#ifdef CONFIG_X86_32
+
 #define FLAG_MASK		FLAG_MASK_32
 
 static long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
@@ -73,7 +85,7 @@ static int set_segment_reg(struct task_struct *task,
 	/*
 	 * The value argument was already truncated to 16 bits.
 	 */
-	if (value && (value & 3) != 3)
+	if (invalid_selector(value))
 		return -EIO;
 
 	if (offset != offsetof(struct user_regs_struct, gs))
@@ -91,6 +103,142 @@ static int set_segment_reg(struct task_struct *task,
 	return 0;
 }
 
+static unsigned long debugreg_addr_limit(struct task_struct *task)
+{
+	return TASK_SIZE - 3;
+}
+
+#else  /* CONFIG_X86_64 */
+
+#define FLAG_MASK		(FLAG_MASK_32 | X86_EFLAGS_NT)
+
+static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long offset)
+{
+	BUILD_BUG_ON(offsetof(struct pt_regs, r15) != 0);
+	return &regs->r15 + (offset / sizeof(regs->r15));
+}
+
+static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
+{
+	/*
+	 * Returning the value truncates it to 16 bits.
+	 */
+	unsigned int seg;
+
+	switch (offset) {
+	case offsetof(struct user_regs_struct, fs):
+		if (task == current) {
+			/* Older gas can't assemble movq %?s,%r?? */
+			asm("movl %%fs,%0" : "=r" (seg));
+			return seg;
+		}
+		return task->thread.fsindex;
+	case offsetof(struct user_regs_struct, gs):
+		if (task == current) {
+			asm("movl %%gs,%0" : "=r" (seg));
+			return seg;
+		}
+		return task->thread.gsindex;
+	case offsetof(struct user_regs_struct, ds):
+		if (task == current) {
+			asm("movl %%ds,%0" : "=r" (seg));
+			return seg;
+		}
+		return task->thread.ds;
+	case offsetof(struct user_regs_struct, es):
+		if (task == current) {
+			asm("movl %%es,%0" : "=r" (seg));
+			return seg;
+		}
+		return task->thread.es;
+
+	case offsetof(struct user_regs_struct, cs):
+	case offsetof(struct user_regs_struct, ss):
+		break;
+	}
+	return *pt_regs_access(task_pt_regs(task), offset);
+}
+
+static int set_segment_reg(struct task_struct *task,
+			   unsigned long offset, u16 value)
+{
+	/*
+	 * The value argument was already truncated to 16 bits.
+	 */
+	if (invalid_selector(value))
+		return -EIO;
+
+	switch (offset) {
+	case offsetof(struct user_regs_struct,fs):
+		/*
+		 * If this is setting fs as for normal 64-bit use but
+		 * setting fs_base has implicitly changed it, leave it.
+		 */
+		if ((value == FS_TLS_SEL && task->thread.fsindex == 0 &&
+		     task->thread.fs != 0) ||
+		    (value == 0 && task->thread.fsindex == FS_TLS_SEL &&
+		     task->thread.fs == 0))
+			break;
+		task->thread.fsindex = value;
+		if (task == current)
+			loadsegment(fs, task->thread.fsindex);
+		break;
+	case offsetof(struct user_regs_struct,gs):
+		/*
+		 * If this is setting gs as for normal 64-bit use but
+		 * setting gs_base has implicitly changed it, leave it.
+		 */
+		if ((value == GS_TLS_SEL && task->thread.gsindex == 0 &&
+		     task->thread.gs != 0) ||
+		    (value == 0 && task->thread.gsindex == GS_TLS_SEL &&
+		     task->thread.gs == 0))
+			break;
+		task->thread.gsindex = value;
+		if (task == current)
+			load_gs_index(task->thread.gsindex);
+		break;
+	case offsetof(struct user_regs_struct,ds):
+		task->thread.ds = value;
+		if (task == current)
+			loadsegment(ds, task->thread.ds);
+		break;
+	case offsetof(struct user_regs_struct,es):
+		task->thread.es = value;
+		if (task == current)
+			loadsegment(es, task->thread.es);
+		break;
+
+		/*
+		 * Can't actually change these in 64-bit mode.
+		 */
+	case offsetof(struct user_regs_struct,cs):
+#ifdef CONFIG_IA32_EMULATION
+		if (test_tsk_thread_flag(task, TIF_IA32))
+			task_pt_regs(task)->cs = value;
+		break;
+#endif
+	case offsetof(struct user_regs_struct,ss):
+#ifdef CONFIG_IA32_EMULATION
+		if (test_tsk_thread_flag(task, TIF_IA32))
+			task_pt_regs(task)->ss = value;
+		break;
+#endif
+	}
+
+	return 0;
+}
+
+static unsigned long debugreg_addr_limit(struct task_struct *task)
+{
+#ifdef CONFIG_IA32_EMULATION
+	if (test_tsk_thread_flag(task, TIF_IA32))
+		return IA32_PAGE_OFFSET - 3;
+#endif
+	return TASK_SIZE64 - 7;
+}
+
+#endif	/* CONFIG_X86_32 */
+
 static unsigned long get_flags(struct task_struct *task)
 {
 	unsigned long retval = task_pt_regs(task)->flags;
@@ -137,6 +285,29 @@ static int putreg(struct task_struct *child,
 
 	case offsetof(struct user_regs_struct, flags):
 		return set_flags(child, value);
+
+#ifdef CONFIG_X86_64
+	case offsetof(struct user_regs_struct,fs_base):
+		if (value >= TASK_SIZE_OF(child))
+			return -EIO;
+		/*
+		 * When changing the segment base, use do_arch_prctl
+		 * to set either thread.fs or thread.fsindex and the
+		 * corresponding GDT slot.
+		 */
+		if (child->thread.fs != value)
+			return do_arch_prctl(child, ARCH_SET_FS, value);
+		return 0;
+	case offsetof(struct user_regs_struct,gs_base):
+		/*
+		 * Exactly the same here as the %fs handling above.
+		 */
+		if (value >= TASK_SIZE_OF(child))
+			return -EIO;
+		if (child->thread.gs != value)
+			return do_arch_prctl(child, ARCH_SET_GS, value);
+		return 0;
+#endif
 	}
 
 	*pt_regs_access(task_pt_regs(child), offset) = value;
@@ -156,6 +327,37 @@ static unsigned long getreg(struct task_struct *task, unsigned long offset)
 
 	case offsetof(struct user_regs_struct, flags):
 		return get_flags(task);
+
+#ifdef CONFIG_X86_64
+	case offsetof(struct user_regs_struct, fs_base): {
+		/*
+		 * do_arch_prctl may have used a GDT slot instead of
+		 * the MSR.  To userland, it appears the same either
+		 * way, except the %fs segment selector might not be 0.
+		 */
+		unsigned int seg = task->thread.fsindex;
+		if (task->thread.fs != 0)
+			return task->thread.fs;
+		if (task == current)
+			asm("movl %%fs,%0" : "=r" (seg));
+		if (seg != FS_TLS_SEL)
+			return 0;
+		return get_desc_base(&task->thread.tls_array[FS_TLS]);
+	}
+	case offsetof(struct user_regs_struct, gs_base): {
+		/*
+		 * Exactly the same here as the %fs handling above.
+		 */
+		unsigned int seg = task->thread.gsindex;
+		if (task->thread.gs != 0)
+			return task->thread.gs;
+		if (task == current)
+			asm("movl %%gs,%0" : "=r" (seg));
+		if (seg != GS_TLS_SEL)
+			return 0;
+		return get_desc_base(&task->thread.tls_array[GS_TLS]);
+	}
+#endif
 	}
 
 	return *pt_regs_access(task_pt_regs(task), offset);
@@ -187,7 +389,7 @@ static int ptrace_set_debugreg(struct task_struct *child,
 	if (unlikely(n == 4 || n == 5))
 		return -EIO;
 
-	if (n < 4 && unlikely(data >= TASK_SIZE - 3))
+	if (n < 4 && unlikely(data >= debugreg_addr_limit(child)))
 		return -EIO;
 
 	switch (n) {
@@ -197,6 +399,8 @@ static int ptrace_set_debugreg(struct task_struct *child,
 	case 3:		child->thread.debugreg3 = data; break;
 
 	case 6:
+		if ((data & ~0xffffffffUL) != 0)
+			return -EIO;
 		child->thread.debugreg6 = data;
 		break;
 
@@ -215,7 +419,7 @@ static int ptrace_set_debugreg(struct task_struct *child,
 		 * data in the watchpoint case.
 		 *
 		 * The invalid values are:
-		 * - LENi == 0x10 (undefined), so mask |= 0x0f00.
+		 * - LENi == 0x10 (undefined), so mask |= 0x0f00.	[32-bit]
 		 * - R/Wi == 0x10 (break on I/O reads or writes), so
 		 *   mask |= 0x4444.
 		 * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
@@ -231,9 +435,14 @@ static int ptrace_set_debugreg(struct task_struct *child,
 		 * 64-bit kernel), so the x86_64 mask value is 0x5454.
 		 * See the AMD manual no. 24593 (AMD64 System Programming)
 		 */
+#ifdef CONFIG_X86_32
+#define	DR7_MASK	0x5f54
+#else
+#define	DR7_MASK	0x5554
+#endif
 		data &= ~DR_CONTROL_RESERVED;
 		for (i = 0; i < 4; i++)
-			if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
+			if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1)
 				return -EIO;
 		child->thread.debugreg7 = data;
 		if (data)

From e9c86c789f067f75211cedb3f13aa58369b0d14a Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:01 +0100
Subject: [PATCH 226/890] x86: x86 ptrace arch merge

This adds 64-bit support to arch_ptrace in arch/x86/kernel/ptrace.c,
so this function can be used for native ptrace on both 32 and 64.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace.c | 65 +++++++++++++++++++++-------------------
 1 file changed, 35 insertions(+), 30 deletions(-)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index c709868d28a5..7161d60e152d 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -463,12 +463,13 @@ static int ptrace_set_debugreg(struct task_struct *child,
 void ptrace_disable(struct task_struct *child)
 {
 	user_disable_single_step(child);
+#ifdef TIF_SYSCALL_EMU
 	clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+#endif
 }
 
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
-	struct user * dummy = NULL;
 	int i, ret;
 	unsigned long __user *datap = (unsigned long __user *)data;
 
@@ -484,18 +485,17 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		unsigned long tmp;
 
 		ret = -EIO;
-		if ((addr & 3) || addr < 0 ||
-		    addr > sizeof(struct user) - 3)
+		if ((addr & (sizeof(data) - 1)) || addr < 0 ||
+		    addr >= sizeof(struct user))
 			break;
 
 		tmp = 0;  /* Default return condition */
-		if(addr < FRAME_SIZE*sizeof(long))
+		if (addr < sizeof(struct user_regs_struct))
 			tmp = getreg(child, addr);
-		if(addr >= (long) &dummy->u_debugreg[0] &&
-		   addr <= (long) &dummy->u_debugreg[7]){
-			addr -= (long) &dummy->u_debugreg[0];
-			addr = addr >> 2;
-			tmp = ptrace_get_debugreg(child, addr);
+		else if (addr >= offsetof(struct user, u_debugreg[0]) &&
+			 addr <= offsetof(struct user, u_debugreg[7])) {
+			addr -= offsetof(struct user, u_debugreg[0]);
+			tmp = ptrace_get_debugreg(child, addr / sizeof(data));
 		}
 		ret = put_user(tmp, datap);
 		break;
@@ -509,34 +509,26 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 
 	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
 		ret = -EIO;
-		if ((addr & 3) || addr < 0 ||
-		    addr > sizeof(struct user) - 3)
+		if ((addr & (sizeof(data) - 1)) || addr < 0 ||
+		    addr >= sizeof(struct user))
 			break;
 
-		if (addr < FRAME_SIZE*sizeof(long)) {
+		if (addr < sizeof(struct user_regs_struct))
 			ret = putreg(child, addr, data);
-			break;
+		else if (addr >= offsetof(struct user, u_debugreg[0]) &&
+			 addr <= offsetof(struct user, u_debugreg[7])) {
+			addr -= offsetof(struct user, u_debugreg[0]);
+			ret = ptrace_set_debugreg(child,
+						  addr / sizeof(data), data);
 		}
-		/* We need to be very careful here.  We implicitly
-		   want to modify a portion of the task_struct, and we
-		   have to be selective about what portions we allow someone
-		   to modify. */
-
-		  ret = -EIO;
-		  if(addr >= (long) &dummy->u_debugreg[0] &&
-		     addr <= (long) &dummy->u_debugreg[7]){
-			  addr -= (long) &dummy->u_debugreg;
-			  addr = addr >> 2;
-			  ret = ptrace_set_debugreg(child, addr, data);
-		  }
-		  break;
+		break;
 
 	case PTRACE_GETREGS: { /* Get all gp regs from the child. */
-	  	if (!access_ok(VERIFY_WRITE, datap, FRAME_SIZE*sizeof(long))) {
+		if (!access_ok(VERIFY_WRITE, datap, sizeof(struct user_regs_struct))) {
 			ret = -EIO;
 			break;
 		}
-		for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) {
+		for (i = 0; i < sizeof(struct user_regs_struct); i += sizeof(long)) {
 			__put_user(getreg(child, i), datap);
 			datap++;
 		}
@@ -546,11 +538,11 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 
 	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
 		unsigned long tmp;
-	  	if (!access_ok(VERIFY_READ, datap, FRAME_SIZE*sizeof(long))) {
+		if (!access_ok(VERIFY_READ, datap, sizeof(struct user_regs_struct))) {
 			ret = -EIO;
 			break;
 		}
-		for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) {
+		for (i = 0; i < sizeof(struct user_regs_struct); i += sizeof(long)) {
 			__get_user(tmp, datap);
 			putreg(child, i, tmp);
 			datap++;
@@ -584,6 +576,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		break;
 	}
 
+#ifdef CONFIG_X86_32
 	case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */
 		if (!access_ok(VERIFY_WRITE, datap,
 			       sizeof(struct user_fxsr_struct))) {
@@ -606,7 +599,9 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		ret = set_fpxregs(child, (struct user_fxsr_struct __user *)data);
 		break;
 	}
+#endif
 
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
 	case PTRACE_GET_THREAD_AREA:
 		if (addr < 0)
 			return -EIO;
@@ -620,6 +615,16 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		ret = do_set_thread_area(child, addr,
 					 (struct user_desc __user *) data, 0);
 		break;
+#endif
+
+#ifdef CONFIG_X86_64
+		/* normal 64bit interface to access TLS data.
+		   Works just like arch_prctl, except that the arguments
+		   are reversed. */
+	case PTRACE_ARCH_PRCTL:
+		ret = do_arch_prctl(child, data, addr);
+		break;
+#endif
 
 	default:
 		ret = ptrace_request(child, request, addr, data);

From 86976cd805eccf46d9b720bb188a540fc5769427 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:01 +0100
Subject: [PATCH 227/890] x86: x86 ptrace merge syscall trace

This moves the 64-bit syscall tracing functions into ptrace.c,
so that ptrace_64.c becomes entirely obsolete.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace.c | 64 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 7161d60e152d..509804957f5f 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -634,6 +634,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 	return ret;
 }
 
+#ifdef CONFIG_X86_32
+
 void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
 {
 	struct siginfo info;
@@ -731,3 +733,65 @@ out:
 		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
 	return 1;
 }
+
+#else  /* CONFIG_X86_64 */
+
+static void syscall_trace(struct pt_regs *regs)
+{
+
+#if 0
+	printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
+	       current->comm,
+	       regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0),
+	       current_thread_info()->flags, current->ptrace);
+#endif
+
+	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+				? 0x80 : 0));
+	/*
+	 * this isn't the same as continuing with a signal, but it will do
+	 * for normal use.  strace only continues with a signal if the
+	 * stopping signal is not SIGTRAP.  -brl
+	 */
+	if (current->exit_code) {
+		send_sig(current->exit_code, current, 1);
+		current->exit_code = 0;
+	}
+}
+
+asmlinkage void syscall_trace_enter(struct pt_regs *regs)
+{
+	/* do the secure computing check first */
+	secure_computing(regs->orig_ax);
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE)
+	    && (current->ptrace & PT_PTRACED))
+		syscall_trace(regs);
+
+	if (unlikely(current->audit_context)) {
+		if (test_thread_flag(TIF_IA32)) {
+			audit_syscall_entry(AUDIT_ARCH_I386,
+					    regs->orig_ax,
+					    regs->bx, regs->cx,
+					    regs->dx, regs->si);
+		} else {
+			audit_syscall_entry(AUDIT_ARCH_X86_64,
+					    regs->orig_ax,
+					    regs->di, regs->si,
+					    regs->dx, regs->r10);
+		}
+	}
+}
+
+asmlinkage void syscall_trace_leave(struct pt_regs *regs)
+{
+	if (unlikely(current->audit_context))
+		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
+
+	if ((test_thread_flag(TIF_SYSCALL_TRACE)
+	     || test_thread_flag(TIF_SINGLESTEP))
+	    && (current->ptrace & PT_PTRACED))
+		syscall_trace(regs);
+}
+
+#endif	/* CONFIG_X86_32 */

From cb757c41f3c2e1ac6f950f9d070e9849983efc18 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:01 +0100
Subject: [PATCH 228/890] x86: x86 ia32 ptrace getreg/putreg merge

This reimplements the 64-bit IA32-emulation register access
functions in arch/x86/kernel/ptrace.c, where they can share
some guts with the native access functions directly.

These functions are not used yet, but this paves the way to move
IA32 ptrace support into this file to share its local functions.

[akpm@linuxfoundation.org: Build fix]

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace.c | 130 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 128 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 509804957f5f..d56aa18309f8 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -215,14 +215,14 @@ static int set_segment_reg(struct task_struct *task,
 #ifdef CONFIG_IA32_EMULATION
 		if (test_tsk_thread_flag(task, TIF_IA32))
 			task_pt_regs(task)->cs = value;
-		break;
 #endif
+		break;
 	case offsetof(struct user_regs_struct,ss):
 #ifdef CONFIG_IA32_EMULATION
 		if (test_tsk_thread_flag(task, TIF_IA32))
 			task_pt_regs(task)->ss = value;
-		break;
 #endif
+		break;
 	}
 
 	return 0;
@@ -634,6 +634,132 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 	return ret;
 }
 
+#ifdef CONFIG_IA32_EMULATION
+
+#include <asm/user32.h>
+
+#define R32(l,q)							\
+	case offsetof(struct user32, regs.l):				\
+		regs->q = value; break
+
+#define SEG32(rs)							\
+	case offsetof(struct user32, regs.rs):				\
+		return set_segment_reg(child,				\
+				       offsetof(struct user_regs_struct, rs), \
+				       value);				\
+		break
+
+static int putreg32(struct task_struct *child, unsigned regno, u32 value)
+{
+	struct pt_regs *regs = task_pt_regs(child);
+
+	switch (regno) {
+
+	SEG32(cs);
+	SEG32(ds);
+	SEG32(es);
+	SEG32(fs);
+	SEG32(gs);
+	SEG32(ss);
+
+	R32(ebx, bx);
+	R32(ecx, cx);
+	R32(edx, dx);
+	R32(edi, di);
+	R32(esi, si);
+	R32(ebp, bp);
+	R32(eax, ax);
+	R32(orig_eax, orig_ax);
+	R32(eip, ip);
+	R32(esp, sp);
+
+	case offsetof(struct user32, regs.eflags):
+		return set_flags(child, value);
+
+	case offsetof(struct user32, u_debugreg[0]) ...
+		offsetof(struct user32, u_debugreg[7]):
+		regno -= offsetof(struct user32, u_debugreg[0]);
+		return ptrace_set_debugreg(child, regno / 4, value);
+
+	default:
+		if (regno > sizeof(struct user32) || (regno & 3))
+			return -EIO;
+
+		/*
+		 * Other dummy fields in the virtual user structure
+		 * are ignored
+		 */
+		break;
+	}
+	return 0;
+}
+
+#undef R32
+#undef SEG32
+
+#define R32(l,q)							\
+	case offsetof(struct user32, regs.l):				\
+		*val = regs->q; break
+
+#define SEG32(rs)							\
+	case offsetof(struct user32, regs.rs):				\
+		*val = get_segment_reg(child,				\
+				       offsetof(struct user_regs_struct, rs)); \
+		break
+
+static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
+{
+	struct pt_regs *regs = task_pt_regs(child);
+
+	switch (regno) {
+
+	SEG32(ds);
+	SEG32(es);
+	SEG32(fs);
+	SEG32(gs);
+
+	R32(cs, cs);
+	R32(ss, ss);
+	R32(ebx, bx);
+	R32(ecx, cx);
+	R32(edx, dx);
+	R32(edi, di);
+	R32(esi, si);
+	R32(ebp, bp);
+	R32(eax, ax);
+	R32(orig_eax, orig_ax);
+	R32(eip, ip);
+	R32(esp, sp);
+
+	case offsetof(struct user32, regs.eflags):
+		*val = get_flags(child);
+		break;
+
+	case offsetof(struct user32, u_debugreg[0]) ...
+		offsetof(struct user32, u_debugreg[7]):
+		regno -= offsetof(struct user32, u_debugreg[0]);
+		*val = ptrace_get_debugreg(child, regno / 4);
+		break;
+
+	default:
+		if (regno > sizeof(struct user32) || (regno & 3))
+			return -EIO;
+
+		/*
+		 * Other dummy fields in the virtual user structure
+		 * are ignored
+		 */
+		*val = 0;
+		break;
+	}
+	return 0;
+}
+
+#undef R32
+#undef SEG32
+
+#endif	/* CONFIG_IA32_EMULATION */
+
 #ifdef CONFIG_X86_32
 
 void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)

From 099cd6e9dac84baafdef00c3955ee68e71282f86 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:01 +0100
Subject: [PATCH 229/890] x86: x86 ia32 ptrace arch merge

This moves the sys32_ptrace code into arch/x86/kernel/ptrace.c,
verbatim except for a few hard-coded sizes replaced with sizeof.
Here this code can use the shared local functions in this file.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace.c | 214 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 214 insertions(+)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index d56aa18309f8..3399c1be79b8 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -636,6 +636,10 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 
 #ifdef CONFIG_IA32_EMULATION
 
+#include <linux/compat.h>
+#include <linux/syscalls.h>
+#include <asm/ia32.h>
+#include <asm/fpu32.h>
 #include <asm/user32.h>
 
 #define R32(l,q)							\
@@ -758,6 +762,216 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
 #undef R32
 #undef SEG32
 
+static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data)
+{
+	siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t));
+	compat_siginfo_t __user *si32 = compat_ptr(data);
+	siginfo_t ssi;
+	int ret;
+
+	if (request == PTRACE_SETSIGINFO) {
+		memset(&ssi, 0, sizeof(siginfo_t));
+		ret = copy_siginfo_from_user32(&ssi, si32);
+		if (ret)
+			return ret;
+		if (copy_to_user(si, &ssi, sizeof(siginfo_t)))
+			return -EFAULT;
+	}
+	ret = sys_ptrace(request, pid, addr, (unsigned long)si);
+	if (ret)
+		return ret;
+	if (request == PTRACE_GETSIGINFO) {
+		if (copy_from_user(&ssi, si, sizeof(siginfo_t)))
+			return -EFAULT;
+		ret = copy_siginfo_to_user32(si32, &ssi);
+	}
+	return ret;
+}
+
+asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
+{
+	struct task_struct *child;
+	struct pt_regs *childregs;
+	void __user *datap = compat_ptr(data);
+	int ret;
+	__u32 val;
+
+	switch (request) {
+	case PTRACE_TRACEME:
+	case PTRACE_ATTACH:
+	case PTRACE_KILL:
+	case PTRACE_CONT:
+	case PTRACE_SINGLESTEP:
+	case PTRACE_SINGLEBLOCK:
+	case PTRACE_DETACH:
+	case PTRACE_SYSCALL:
+	case PTRACE_OLDSETOPTIONS:
+	case PTRACE_SETOPTIONS:
+	case PTRACE_SET_THREAD_AREA:
+	case PTRACE_GET_THREAD_AREA:
+		return sys_ptrace(request, pid, addr, data);
+
+	default:
+		return -EINVAL;
+
+	case PTRACE_PEEKTEXT:
+	case PTRACE_PEEKDATA:
+	case PTRACE_POKEDATA:
+	case PTRACE_POKETEXT:
+	case PTRACE_POKEUSR:
+	case PTRACE_PEEKUSR:
+	case PTRACE_GETREGS:
+	case PTRACE_SETREGS:
+	case PTRACE_SETFPREGS:
+	case PTRACE_GETFPREGS:
+	case PTRACE_SETFPXREGS:
+	case PTRACE_GETFPXREGS:
+	case PTRACE_GETEVENTMSG:
+		break;
+
+	case PTRACE_SETSIGINFO:
+	case PTRACE_GETSIGINFO:
+		return ptrace32_siginfo(request, pid, addr, data);
+	}
+
+	child = ptrace_get_task_struct(pid);
+	if (IS_ERR(child))
+		return PTR_ERR(child);
+
+	ret = ptrace_check_attach(child, request == PTRACE_KILL);
+	if (ret < 0)
+		goto out;
+
+	childregs = task_pt_regs(child);
+
+	switch (request) {
+	case PTRACE_PEEKDATA:
+	case PTRACE_PEEKTEXT:
+		ret = 0;
+		if (access_process_vm(child, addr, &val, sizeof(u32), 0) !=
+		    sizeof(u32))
+			ret = -EIO;
+		else
+			ret = put_user(val, (unsigned int __user *)datap);
+		break;
+
+	case PTRACE_POKEDATA:
+	case PTRACE_POKETEXT:
+		ret = 0;
+		if (access_process_vm(child, addr, &data, sizeof(u32), 1) !=
+		    sizeof(u32))
+			ret = -EIO;
+		break;
+
+	case PTRACE_PEEKUSR:
+		ret = getreg32(child, addr, &val);
+		if (ret == 0)
+			ret = put_user(val, (__u32 __user *)datap);
+		break;
+
+	case PTRACE_POKEUSR:
+		ret = putreg32(child, addr, data);
+		break;
+
+	case PTRACE_GETREGS: { /* Get all gp regs from the child. */
+		int i;
+
+		if (!access_ok(VERIFY_WRITE, datap, 16*4)) {
+			ret = -EIO;
+			break;
+		}
+		ret = 0;
+		for (i = 0; i < sizeof(struct user_regs_struct32); i += sizeof(__u32)) {
+			getreg32(child, i, &val);
+			ret |= __put_user(val, (u32 __user *)datap);
+			datap += sizeof(u32);
+		}
+		break;
+	}
+
+	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
+		unsigned long tmp;
+		int i;
+
+		if (!access_ok(VERIFY_READ, datap, 16*4)) {
+			ret = -EIO;
+			break;
+		}
+		ret = 0;
+		for (i = 0; i < sizeof(struct user_regs_struct32); i += sizeof(u32)) {
+			ret |= __get_user(tmp, (u32 __user *)datap);
+			putreg32(child, i, tmp);
+			datap += sizeof(u32);
+		}
+		break;
+	}
+
+	case PTRACE_GETFPREGS:
+		ret = -EIO;
+		if (!access_ok(VERIFY_READ, compat_ptr(data),
+			       sizeof(struct user_i387_struct)))
+			break;
+		save_i387_ia32(child, datap, childregs, 1);
+		ret = 0;
+			break;
+
+	case PTRACE_SETFPREGS:
+		ret = -EIO;
+		if (!access_ok(VERIFY_WRITE, datap,
+			       sizeof(struct user_i387_struct)))
+			break;
+		ret = 0;
+		/* don't check EFAULT to be bug-to-bug compatible to i386 */
+		restore_i387_ia32(child, datap, 1);
+		break;
+
+	case PTRACE_GETFPXREGS: {
+		struct user32_fxsr_struct __user *u = datap;
+
+		init_fpu(child);
+		ret = -EIO;
+		if (!access_ok(VERIFY_WRITE, u, sizeof(*u)))
+			break;
+			ret = -EFAULT;
+		if (__copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u)))
+			break;
+		ret = __put_user(childregs->cs, &u->fcs);
+		ret |= __put_user(child->thread.ds, &u->fos);
+		break;
+	}
+	case PTRACE_SETFPXREGS: {
+		struct user32_fxsr_struct __user *u = datap;
+
+		unlazy_fpu(child);
+		ret = -EIO;
+		if (!access_ok(VERIFY_READ, u, sizeof(*u)))
+			break;
+		/*
+		 * no checking to be bug-to-bug compatible with i386.
+		 * but silence warning
+		 */
+		if (__copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u)))
+			;
+		set_stopped_child_used_math(child);
+		child->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
+		ret = 0;
+		break;
+	}
+
+	case PTRACE_GETEVENTMSG:
+		ret = put_user(child->ptrace_message,
+			       (unsigned int __user *)compat_ptr(data));
+		break;
+
+	default:
+		BUG();
+	}
+
+ out:
+	put_task_struct(child);
+	return ret;
+}
+
 #endif	/* CONFIG_IA32_EMULATION */
 
 #ifdef CONFIG_X86_32

From cbc9d9d98215f08ed998228e7bce88502d1ce360 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:02 +0100
Subject: [PATCH 230/890] x86: x86 ptrace merge complete

This switches over the 64-bit build to use the shared ptrace code,
instead of the old ptrace_64.c and arch/x86/ia32/ptrace32.c code.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/ia32/Makefile      | 2 +-
 arch/x86/kernel/Makefile_64 | 3 ++-
 include/asm-x86/ptrace.h    | 3 ---
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile
index ea6088640847..ec71cfeac87e 100644
--- a/arch/x86/ia32/Makefile
+++ b/arch/x86/ia32/Makefile
@@ -3,7 +3,7 @@
 #
 
 obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o \
-	ia32_binfmt.o fpu32.o ptrace32.o
+	ia32_binfmt.o fpu32.o
 
 sysv-$(CONFIG_SYSVIPC) := ipc32.o
 obj-$(CONFIG_IA32_EMULATION) += $(sysv-y)
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index d908f0175e76..b41e838dc788 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -7,12 +7,13 @@ CPPFLAGS_vmlinux.lds += -Ux86_64
 EXTRA_AFLAGS	:= -traditional
 
 obj-y	:= process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
-		ptrace_64.o time_64.o ioport_64.o ldt.o setup_64.o i8259_64.o sys_x86_64.o \
+		time_64.o ioport_64.o ldt.o setup_64.o i8259_64.o sys_x86_64.o \
 		x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \
 		setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \
 		pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \
 		i8253.o io_delay.o rtc.o
 
+obj-y				+= ptrace.o
 obj-y				+= step.o
 
 obj-$(CONFIG_IA32_EMULATION)	+= tls.o
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index 1b7a8b8d6f60..9228870f6157 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -164,9 +164,6 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
 
 struct task_struct;
 
-extern unsigned long ptrace_get_debugreg(struct task_struct *child, int n);
-extern int ptrace_set_debugreg(struct task_struct *child, int n, unsigned long);
-
 extern unsigned long
 convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs);
 

From 25149b62d3e6a3e737af39bd4a0b4e97de0811b7 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:02 +0100
Subject: [PATCH 231/890] x86: x86 ptrace merge removals

This removes the old separate 64-bit and ia32 ptrace source files.
They are no longer used.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/ia32/ptrace32.c    | 411 -------------------------------
 arch/x86/kernel/ptrace_64.c | 470 ------------------------------------
 2 files changed, 881 deletions(-)
 delete mode 100644 arch/x86/ia32/ptrace32.c
 delete mode 100644 arch/x86/kernel/ptrace_64.c

diff --git a/arch/x86/ia32/ptrace32.c b/arch/x86/ia32/ptrace32.c
deleted file mode 100644
index d5663e295330..000000000000
--- a/arch/x86/ia32/ptrace32.c
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- * 32bit ptrace for x86-64.
- *
- * Copyright 2001,2002 Andi Kleen, SuSE Labs.
- * Some parts copied from arch/i386/kernel/ptrace.c. See that file for earlier
- * copyright.
- *
- * This allows to access 64bit processes too; but there is no way to
- * see the extended register contents.
- */
-
-#include <linux/kernel.h>
-#include <linux/stddef.h>
-#include <linux/sched.h>
-#include <linux/syscalls.h>
-#include <linux/unistd.h>
-#include <linux/mm.h>
-#include <linux/err.h>
-#include <linux/ptrace.h>
-#include <asm/ptrace.h>
-#include <asm/compat.h>
-#include <asm/uaccess.h>
-#include <asm/user32.h>
-#include <asm/user.h>
-#include <asm/errno.h>
-#include <asm/debugreg.h>
-#include <asm/i387.h>
-#include <asm/fpu32.h>
-#include <asm/ia32.h>
-
-/*
- * Determines which flags the user has access to [1 = access, 0 = no access].
- * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9).
- * Also masks reserved bits (31-22, 15, 5, 3, 1).
- */
-#define FLAG_MASK 0x54dd5UL
-
-#define R32(l,q)							\
-	case offsetof(struct user32, regs.l):				\
-		regs->q = val; break;
-
-static int putreg32(struct task_struct *child, unsigned regno, u32 val)
-{
-	struct pt_regs *regs = task_pt_regs(child);
-
-	switch (regno) {
-	case offsetof(struct user32, regs.fs):
-		if (val && (val & 3) != 3)
-			return -EIO;
-		child->thread.fsindex = val & 0xffff;
-		if (child == current)
-			loadsegment(fs, child->thread.fsindex);
-		break;
-	case offsetof(struct user32, regs.gs):
-		if (val && (val & 3) != 3)
-			return -EIO;
-		child->thread.gsindex = val & 0xffff;
-		if (child == current)
-			load_gs_index(child->thread.gsindex);
-		break;
-	case offsetof(struct user32, regs.ds):
-		if (val && (val & 3) != 3)
-			return -EIO;
-		child->thread.ds = val & 0xffff;
-		if (child == current)
-			loadsegment(ds, child->thread.ds);
-		break;
-	case offsetof(struct user32, regs.es):
-		child->thread.es = val & 0xffff;
-		if (child == current)
-			loadsegment(es, child->thread.ds);
-		break;
-	case offsetof(struct user32, regs.ss):
-		if ((val & 3) != 3)
-			return -EIO;
-		regs->ss = val & 0xffff;
-		break;
-	case offsetof(struct user32, regs.cs):
-		if ((val & 3) != 3)
-			return -EIO;
-		regs->cs = val & 0xffff;
-		break;
-
-	R32(ebx, bx);
-	R32(ecx, cx);
-	R32(edx, dx);
-	R32(edi, di);
-	R32(esi, si);
-	R32(ebp, bp);
-	R32(eax, ax);
-	R32(orig_eax, orig_ax);
-	R32(eip, ip);
-	R32(esp, sp);
-
-	case offsetof(struct user32, regs.eflags):
-		val &= FLAG_MASK;
-		/*
-		 * If the user value contains TF, mark that
-		 * it was not "us" (the debugger) that set it.
-		 * If not, make sure it stays set if we had.
-		 */
-		if (val & X86_EFLAGS_TF)
-			clear_tsk_thread_flag(child, TIF_FORCED_TF);
-		else if (test_tsk_thread_flag(child, TIF_FORCED_TF))
-			val |= X86_EFLAGS_TF;
-		regs->flags = val | (regs->flags & ~FLAG_MASK);
-		break;
-
-	case offsetof(struct user32, u_debugreg[0]) ...
-		offsetof(struct user32, u_debugreg[7]):
-		regno -= offsetof(struct user32, u_debugreg[0]);
-		return ptrace_set_debugreg(child, regno / 4, val);
-
-	default:
-		if (regno > sizeof(struct user32) || (regno & 3))
-			return -EIO;
-
-		/*
-		 * Other dummy fields in the virtual user structure
-		 * are ignored
-		 */
-		break;
-	}
-	return 0;
-}
-
-#undef R32
-
-#define R32(l,q)							\
-	case offsetof(struct user32, regs.l):				\
-		*val = regs->q; break
-
-static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
-{
-	struct pt_regs *regs = task_pt_regs(child);
-
-	switch (regno) {
-	case offsetof(struct user32, regs.fs):
-		*val = child->thread.fsindex;
-		if (child == current)
-			asm("movl %%fs,%0" : "=r" (*val));
-		break;
-	case offsetof(struct user32, regs.gs):
-		*val = child->thread.gsindex;
-		if (child == current)
-			asm("movl %%gs,%0" : "=r" (*val));
-		break;
-	case offsetof(struct user32, regs.ds):
-		*val = child->thread.ds;
-		if (child == current)
-			asm("movl %%ds,%0" : "=r" (*val));
-		break;
-	case offsetof(struct user32, regs.es):
-		*val = child->thread.es;
-		if (child == current)
-			asm("movl %%es,%0" : "=r" (*val));
-		break;
-
-	R32(cs, cs);
-	R32(ss, ss);
-	R32(ebx, bx);
-	R32(ecx, cx);
-	R32(edx, dx);
-	R32(edi, di);
-	R32(esi, si);
-	R32(ebp, bp);
-	R32(eax, ax);
-	R32(orig_eax, orig_ax);
-	R32(eip, ip);
-	R32(esp, sp);
-
-	case offsetof(struct user32, regs.eflags):
-		/*
-		 * If the debugger set TF, hide it from the readout.
-		 */
-		*val = regs->flags;
-		if (test_tsk_thread_flag(child, TIF_FORCED_TF))
-			*val &= ~X86_EFLAGS_TF;
-		break;
-
-	case offsetof(struct user32, u_debugreg[0]) ...
-		offsetof(struct user32, u_debugreg[7]):
-		regno -= offsetof(struct user32, u_debugreg[0]);
-		*val = ptrace_get_debugreg(child, regno / 4);
-		break;
-
-	default:
-		if (regno > sizeof(struct user32) || (regno & 3))
-			return -EIO;
-
-		/*
-		 * Other dummy fields in the virtual user structure
-		 * are ignored
-		 */
-		*val = 0;
-		break;
-	}
-	return 0;
-}
-
-#undef R32
-
-static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data)
-{
-	siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t));
-	compat_siginfo_t __user *si32 = compat_ptr(data);
-	siginfo_t ssi;
-	int ret;
-
-	if (request == PTRACE_SETSIGINFO) {
-		memset(&ssi, 0, sizeof(siginfo_t));
-		ret = copy_siginfo_from_user32(&ssi, si32);
-		if (ret)
-			return ret;
-		if (copy_to_user(si, &ssi, sizeof(siginfo_t)))
-			return -EFAULT;
-	}
-	ret = sys_ptrace(request, pid, addr, (unsigned long)si);
-	if (ret)
-		return ret;
-	if (request == PTRACE_GETSIGINFO) {
-		if (copy_from_user(&ssi, si, sizeof(siginfo_t)))
-			return -EFAULT;
-		ret = copy_siginfo_to_user32(si32, &ssi);
-	}
-	return ret;
-}
-
-asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
-{
-	struct task_struct *child;
-	struct pt_regs *childregs;
-	void __user *datap = compat_ptr(data);
-	int ret;
-	__u32 val;
-
-	switch (request) {
-	case PTRACE_TRACEME:
-	case PTRACE_ATTACH:
-	case PTRACE_KILL:
-	case PTRACE_CONT:
-	case PTRACE_SINGLESTEP:
-	case PTRACE_SINGLEBLOCK:
-	case PTRACE_DETACH:
-	case PTRACE_SYSCALL:
-	case PTRACE_OLDSETOPTIONS:
-	case PTRACE_SETOPTIONS:
-	case PTRACE_SET_THREAD_AREA:
-	case PTRACE_GET_THREAD_AREA:
-		return sys_ptrace(request, pid, addr, data);
-
-	default:
-		return -EINVAL;
-
-	case PTRACE_PEEKTEXT:
-	case PTRACE_PEEKDATA:
-	case PTRACE_POKEDATA:
-	case PTRACE_POKETEXT:
-	case PTRACE_POKEUSR:
-	case PTRACE_PEEKUSR:
-	case PTRACE_GETREGS:
-	case PTRACE_SETREGS:
-	case PTRACE_SETFPREGS:
-	case PTRACE_GETFPREGS:
-	case PTRACE_SETFPXREGS:
-	case PTRACE_GETFPXREGS:
-	case PTRACE_GETEVENTMSG:
-		break;
-
-	case PTRACE_SETSIGINFO:
-	case PTRACE_GETSIGINFO:
-		return ptrace32_siginfo(request, pid, addr, data);
-	}
-
-	child = ptrace_get_task_struct(pid);
-	if (IS_ERR(child))
-		return PTR_ERR(child);
-
-	ret = ptrace_check_attach(child, request == PTRACE_KILL);
-	if (ret < 0)
-		goto out;
-
-	childregs = task_pt_regs(child);
-
-	switch (request) {
-	case PTRACE_PEEKDATA:
-	case PTRACE_PEEKTEXT:
-		ret = 0;
-		if (access_process_vm(child, addr, &val, sizeof(u32), 0) !=
-		    sizeof(u32))
-			ret = -EIO;
-		else
-			ret = put_user(val, (unsigned int __user *)datap);
-		break;
-
-	case PTRACE_POKEDATA:
-	case PTRACE_POKETEXT:
-		ret = 0;
-		if (access_process_vm(child, addr, &data, sizeof(u32), 1) !=
-		    sizeof(u32))
-			ret = -EIO;
-		break;
-
-	case PTRACE_PEEKUSR:
-		ret = getreg32(child, addr, &val);
-		if (ret == 0)
-			ret = put_user(val, (__u32 __user *)datap);
-		break;
-
-	case PTRACE_POKEUSR:
-		ret = putreg32(child, addr, data);
-		break;
-
-	case PTRACE_GETREGS: { /* Get all gp regs from the child. */
-		int i;
-
-		if (!access_ok(VERIFY_WRITE, datap, 16*4)) {
-			ret = -EIO;
-			break;
-		}
-		ret = 0;
-		for (i = 0; i <= 16*4; i += sizeof(__u32)) {
-			getreg32(child, i, &val);
-			ret |= __put_user(val, (u32 __user *)datap);
-			datap += sizeof(u32);
-		}
-		break;
-	}
-
-	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
-		unsigned long tmp;
-		int i;
-
-		if (!access_ok(VERIFY_READ, datap, 16*4)) {
-			ret = -EIO;
-			break;
-		}
-		ret = 0;
-		for (i = 0; i <= 16*4; i += sizeof(u32)) {
-			ret |= __get_user(tmp, (u32 __user *)datap);
-			putreg32(child, i, tmp);
-			datap += sizeof(u32);
-		}
-		break;
-	}
-
-	case PTRACE_GETFPREGS:
-		ret = -EIO;
-		if (!access_ok(VERIFY_READ, compat_ptr(data),
-			       sizeof(struct user_i387_struct)))
-			break;
-		save_i387_ia32(child, datap, childregs, 1);
-		ret = 0;
-			break;
-
-	case PTRACE_SETFPREGS:
-		ret = -EIO;
-		if (!access_ok(VERIFY_WRITE, datap,
-			       sizeof(struct user_i387_struct)))
-			break;
-		ret = 0;
-		/* don't check EFAULT to be bug-to-bug compatible to i386 */
-		restore_i387_ia32(child, datap, 1);
-		break;
-
-	case PTRACE_GETFPXREGS: {
-		struct user32_fxsr_struct __user *u = datap;
-
-		init_fpu(child);
-		ret = -EIO;
-		if (!access_ok(VERIFY_WRITE, u, sizeof(*u)))
-			break;
-			ret = -EFAULT;
-		if (__copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u)))
-			break;
-		ret = __put_user(childregs->cs, &u->fcs);
-		ret |= __put_user(child->thread.ds, &u->fos);
-		break;
-	}
-	case PTRACE_SETFPXREGS: {
-		struct user32_fxsr_struct __user *u = datap;
-
-		unlazy_fpu(child);
-		ret = -EIO;
-		if (!access_ok(VERIFY_READ, u, sizeof(*u)))
-			break;
-		/*
-		 * no checking to be bug-to-bug compatible with i386.
-		 * but silence warning
-		 */
-		if (__copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u)))
-			;
-		set_stopped_child_used_math(child);
-		child->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
-		ret = 0;
-		break;
-	}
-
-	case PTRACE_GETEVENTMSG:
-		ret = put_user(child->ptrace_message,
-			       (unsigned int __user *)compat_ptr(data));
-		break;
-
-	default:
-		BUG();
-	}
-
- out:
-	put_task_struct(child);
-	return ret;
-}
diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c
deleted file mode 100644
index 5979dbe8e0a2..000000000000
--- a/arch/x86/kernel/ptrace_64.c
+++ /dev/null
@@ -1,470 +0,0 @@
-/* By Ross Biro 1/23/92 */
-/*
- * Pentium III FXSR, SSE support
- *	Gareth Hughes <gareth@valinux.com>, May 2000
- *
- * x86-64 port 2000-2002 Andi Kleen
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
-#include <linux/ptrace.h>
-#include <linux/user.h>
-#include <linux/security.h>
-#include <linux/audit.h>
-#include <linux/seccomp.h>
-#include <linux/signal.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/system.h>
-#include <asm/processor.h>
-#include <asm/prctl.h>
-#include <asm/i387.h>
-#include <asm/debugreg.h>
-#include <asm/ldt.h>
-#include <asm/desc.h>
-#include <asm/proto.h>
-#include <asm/ia32.h>
-
-/*
- * does not yet catch signals sent when the child dies.
- * in exit.c or in signal.c.
- */
-
-/*
- * Determines which flags the user has access to [1 = access, 0 = no access].
- * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9).
- * Also masks reserved bits (63-22, 15, 5, 3, 1).
- */
-#define FLAG_MASK 0x54dd5UL
-
-/*
- * Called by kernel/ptrace.c when detaching..
- *
- * Make sure the single step bit is not set.
- */
-void ptrace_disable(struct task_struct *child)
-{
-	user_disable_single_step(child);
-}
-
-static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long offset)
-{
-	BUILD_BUG_ON(offsetof(struct pt_regs, r15) != 0);
-	return &regs->r15 + (offset / sizeof(regs->r15));
-}
-
-static int putreg(struct task_struct *child,
-	unsigned long regno, unsigned long value)
-{
-	struct pt_regs *regs = task_pt_regs(child);
-	switch (regno) {
-	case offsetof(struct user_regs_struct,fs):
-		if (value && (value & 3) != 3)
-			return -EIO;
-		child->thread.fsindex = value & 0xffff;
-		if (child == current)
-			loadsegment(fs, child->thread.fsindex);
-		return 0;
-	case offsetof(struct user_regs_struct,gs):
-		if (value && (value & 3) != 3)
-			return -EIO;
-		child->thread.gsindex = value & 0xffff;
-		if (child == current)
-			load_gs_index(child->thread.gsindex);
-		return 0;
-	case offsetof(struct user_regs_struct,ds):
-		if (value && (value & 3) != 3)
-			return -EIO;
-		child->thread.ds = value & 0xffff;
-		if (child == current)
-			loadsegment(ds, child->thread.ds);
-		return 0;
-	case offsetof(struct user_regs_struct,es):
-		if (value && (value & 3) != 3)
-			return -EIO;
-		child->thread.es = value & 0xffff;
-		if (child == current)
-			loadsegment(es, child->thread.es);
-		return 0;
-	case offsetof(struct user_regs_struct,ss):
-		if ((value & 3) != 3)
-			return -EIO;
-		value &= 0xffff;
-		return 0;
-	case offsetof(struct user_regs_struct,fs_base):
-		if (value >= TASK_SIZE_OF(child))
-			return -EIO;
-		/*
-		 * When changing the segment base, use do_arch_prctl
-		 * to set either thread.fs or thread.fsindex and the
-		 * corresponding GDT slot.
-		 */
-		if (child->thread.fs != value)
-			return do_arch_prctl(child, ARCH_SET_FS, value);
-		return 0;
-	case offsetof(struct user_regs_struct,gs_base):
-		/*
-		 * Exactly the same here as the %fs handling above.
-		 */
-		if (value >= TASK_SIZE_OF(child))
-			return -EIO;
-		if (child->thread.gs != value)
-			return do_arch_prctl(child, ARCH_SET_GS, value);
-		return 0;
-	case offsetof(struct user_regs_struct,flags):
-		value &= FLAG_MASK;
-		/*
-		 * If the user value contains TF, mark that
-		 * it was not "us" (the debugger) that set it.
-		 * If not, make sure it stays set if we had.
-		 */
-		if (value & X86_EFLAGS_TF)
-			clear_tsk_thread_flag(child, TIF_FORCED_TF);
-		else if (test_tsk_thread_flag(child, TIF_FORCED_TF))
-			value |= X86_EFLAGS_TF;
-		value |= regs->flags & ~FLAG_MASK;
-		break;
-	case offsetof(struct user_regs_struct,cs):
-		if ((value & 3) != 3)
-			return -EIO;
-		value &= 0xffff;
-		break;
-	}
-	*pt_regs_access(regs, regno) = value;
-	return 0;
-}
-
-static unsigned long getreg(struct task_struct *child, unsigned long regno)
-{
-	struct pt_regs *regs = task_pt_regs(child);
-	unsigned long val;
-	unsigned int seg;
-	switch (regno) {
-	case offsetof(struct user_regs_struct, fs):
-		if (child == current) {
-			/* Older gas can't assemble movq %?s,%r?? */
-			asm("movl %%fs,%0" : "=r" (seg));
-			return seg;
-		}
-		return child->thread.fsindex;
-	case offsetof(struct user_regs_struct, gs):
-		if (child == current) {
-			asm("movl %%gs,%0" : "=r" (seg));
-			return seg;
-		}
-		return child->thread.gsindex;
-	case offsetof(struct user_regs_struct, ds):
-		if (child == current) {
-			asm("movl %%ds,%0" : "=r" (seg));
-			return seg;
-		}
-		return child->thread.ds;
-	case offsetof(struct user_regs_struct, es):
-		if (child == current) {
-			asm("movl %%es,%0" : "=r" (seg));
-			return seg;
-		}
-		return child->thread.es;
-	case offsetof(struct user_regs_struct, fs_base):
-		/*
-		 * do_arch_prctl may have used a GDT slot instead of
-		 * the MSR.  To userland, it appears the same either
-		 * way, except the %fs segment selector might not be 0.
-		 */
-		if (child->thread.fs != 0)
-			return child->thread.fs;
-		seg = child->thread.fsindex;
-		if (child == current)
-			asm("movl %%fs,%0" : "=r" (seg));
-		if (seg != FS_TLS_SEL)
-			return 0;
-		return get_desc_base(&child->thread.tls_array[FS_TLS]);
-	case offsetof(struct user_regs_struct, gs_base):
-		/*
-		 * Exactly the same here as the %fs handling above.
-		 */
-		if (child->thread.gs != 0)
-			return child->thread.gs;
-		seg = child->thread.gsindex;
-		if (child == current)
-			asm("movl %%gs,%0" : "=r" (seg));
-		if (seg != GS_TLS_SEL)
-			return 0;
-		return get_desc_base(&child->thread.tls_array[GS_TLS]);
-	case offsetof(struct user_regs_struct, flags):
-		/*
-		 * If the debugger set TF, hide it from the readout.
-		 */
-		val = regs->flags;
-		if (test_tsk_thread_flag(child, TIF_IA32))
-			val &= 0xffffffff;
-		if (test_tsk_thread_flag(child, TIF_FORCED_TF))
-			val &= ~X86_EFLAGS_TF;
-		return val;
-	default:
-		val = *pt_regs_access(regs, regno);
-		if (test_tsk_thread_flag(child, TIF_IA32))
-			val &= 0xffffffff;
-		return val;
-	}
-
-}
-
-unsigned long ptrace_get_debugreg(struct task_struct *child, int n)
-{
-	switch (n) {
-	case 0:		return child->thread.debugreg0;
-	case 1:		return child->thread.debugreg1;
-	case 2:		return child->thread.debugreg2;
-	case 3:		return child->thread.debugreg3;
-	case 6:		return child->thread.debugreg6;
-	case 7:		return child->thread.debugreg7;
-	}
-	return 0;
-}
-
-int ptrace_set_debugreg(struct task_struct *child, int n, unsigned long data)
-{
-	int i;
-
-	if (n < 4) {
-		int dsize = test_tsk_thread_flag(child, TIF_IA32) ? 3 : 7;
-		if (unlikely(data >= TASK_SIZE_OF(child) - dsize))
-			return -EIO;
-	}
-
-	switch (n) {
-	case 0:		child->thread.debugreg0 = data; break;
-	case 1:		child->thread.debugreg1 = data; break;
-	case 2:		child->thread.debugreg2 = data; break;
-	case 3:		child->thread.debugreg3 = data; break;
-
-	case 6:
-		if (data >> 32)
-			return -EIO;
-		child->thread.debugreg6 = data;
-		break;
-
-	case 7:
-		/*
-		 * See ptrace_32.c for an explanation of this awkward check.
-		 */
-		data &= ~DR_CONTROL_RESERVED;
-		for (i = 0; i < 4; i++)
-			if ((0x5554 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
-				return -EIO;
-		child->thread.debugreg7 = data;
-		if (data)
-			set_tsk_thread_flag(child, TIF_DEBUG);
-		else
-			clear_tsk_thread_flag(child, TIF_DEBUG);
-		break;
-	}
-
-	return 0;
-}
-
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
-{
-	long ret;
-	unsigned ui;
-
-	switch (request) {
-	/* when I and D space are separate, these will need to be fixed. */
-	case PTRACE_PEEKTEXT: /* read word at location addr. */
-	case PTRACE_PEEKDATA:
-		ret = generic_ptrace_peekdata(child, addr, data);
-		break;
-
-	/* read the word at location addr in the USER area. */
-	case PTRACE_PEEKUSR: {
-		unsigned long tmp;
-
-		ret = -EIO;
-		if ((addr & 7) ||
-		    addr > sizeof(struct user) - 7)
-			break;
-
-		tmp = 0;
-		if (addr < sizeof(struct user_regs_struct))
-			tmp = getreg(child, addr);
-		else if (addr >= offsetof(struct user, u_debugreg[0])) {
-			addr -= offsetof(struct user, u_debugreg[0]);
-			tmp = ptrace_get_debugreg(child, addr / sizeof(long));
-		}
-
-		ret = put_user(tmp,(unsigned long __user *) data);
-		break;
-	}
-
-	/* when I and D space are separate, this will have to be fixed. */
-	case PTRACE_POKETEXT: /* write the word at location addr. */
-	case PTRACE_POKEDATA:
-		ret = generic_ptrace_pokedata(child, addr, data);
-		break;
-
-	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
-		ret = -EIO;
-		if ((addr & 7) ||
-		    addr > sizeof(struct user) - 7)
-			break;
-
-		if (addr < sizeof(struct user_regs_struct))
-			ret = putreg(child, addr, data);
-		else if (addr >= offsetof(struct user, u_debugreg[0])) {
-			addr -= offsetof(struct user, u_debugreg[0]);
-			ret = ptrace_set_debugreg(child,
-						  addr / sizeof(long), data);
-		}
-		break;
-
-#ifdef CONFIG_IA32_EMULATION
-		/* This makes only sense with 32bit programs. Allow a
-		   64bit debugger to fully examine them too. Better
-		   don't use it against 64bit processes, use
-		   PTRACE_ARCH_PRCTL instead. */
-	case PTRACE_GET_THREAD_AREA:
-		if (addr < 0)
-			return -EIO;
-		ret = do_get_thread_area(child, addr,
-					 (struct user_desc __user *) data);
-
-		break;
-	case PTRACE_SET_THREAD_AREA:
-		if (addr < 0)
-			return -EIO;
-		ret = do_set_thread_area(child, addr,
-					 (struct user_desc __user *) data, 0);
-		break;
-#endif
-		/* normal 64bit interface to access TLS data.
-		   Works just like arch_prctl, except that the arguments
-		   are reversed. */
-	case PTRACE_ARCH_PRCTL:
-		ret = do_arch_prctl(child, data, addr);
-		break;
-
-	case PTRACE_GETREGS: { /* Get all gp regs from the child. */
-	  	if (!access_ok(VERIFY_WRITE, (unsigned __user *)data,
-			       sizeof(struct user_regs_struct))) {
-			ret = -EIO;
-			break;
-		}
-		ret = 0;
-		for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) {
-			ret |= __put_user(getreg(child, ui),(unsigned long __user *) data);
-			data += sizeof(long);
-		}
-		break;
-	}
-
-	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
-		unsigned long tmp;
-	  	if (!access_ok(VERIFY_READ, (unsigned __user *)data,
-			       sizeof(struct user_regs_struct))) {
-			ret = -EIO;
-			break;
-		}
-		ret = 0;
-		for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) {
-			ret = __get_user(tmp, (unsigned long __user *) data);
-			if (ret)
-				break;
-			ret = putreg(child, ui, tmp);
-			if (ret)
-				break;
-			data += sizeof(long);
-		}
-		break;
-	}
-
-	case PTRACE_GETFPREGS: { /* Get the child extended FPU state. */
-		if (!access_ok(VERIFY_WRITE, (unsigned __user *)data,
-			       sizeof(struct user_i387_struct))) {
-			ret = -EIO;
-			break;
-		}
-		ret = get_fpregs((struct user_i387_struct __user *)data, child);
-		break;
-	}
-
-	case PTRACE_SETFPREGS: { /* Set the child extended FPU state. */
-		if (!access_ok(VERIFY_READ, (unsigned __user *)data,
-			       sizeof(struct user_i387_struct))) {
-			ret = -EIO;
-			break;
-		}
-		set_stopped_child_used_math(child);
-		ret = set_fpregs(child, (struct user_i387_struct __user *)data);
-		break;
-	}
-
-	default:
-		ret = ptrace_request(child, request, addr, data);
-		break;
-	}
-	return ret;
-}
-
-static void syscall_trace(struct pt_regs *regs)
-{
-
-#if 0
-	printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
-	       current->comm,
-	       regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0),
-	       current_thread_info()->flags, current->ptrace);
-#endif
-
-	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
-				? 0x80 : 0));
-	/*
-	 * this isn't the same as continuing with a signal, but it will do
-	 * for normal use.  strace only continues with a signal if the
-	 * stopping signal is not SIGTRAP.  -brl
-	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
-	}
-}
-
-asmlinkage void syscall_trace_enter(struct pt_regs *regs)
-{
-	/* do the secure computing check first */
-	secure_computing(regs->orig_ax);
-
-	if (test_thread_flag(TIF_SYSCALL_TRACE)
-	    && (current->ptrace & PT_PTRACED))
-		syscall_trace(regs);
-
-	if (unlikely(current->audit_context)) {
-		if (test_thread_flag(TIF_IA32)) {
-			audit_syscall_entry(AUDIT_ARCH_I386,
-					    regs->orig_ax,
-					    regs->bx, regs->cx,
-					    regs->dx, regs->si);
-		} else {
-			audit_syscall_entry(AUDIT_ARCH_X86_64,
-					    regs->orig_ax,
-					    regs->di, regs->si,
-					    regs->dx, regs->r10);
-		}
-	}
-}
-
-asmlinkage void syscall_trace_leave(struct pt_regs *regs)
-{
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
-
-	if ((test_thread_flag(TIF_SYSCALL_TRACE)
-	     || test_thread_flag(TIF_SINGLESTEP))
-	    && (current->ptrace & PT_PTRACED))
-		syscall_trace(regs);
-}

From faca62273b602ab482fb7d3d940dbf41ef08b00e Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:31:02 +0100
Subject: [PATCH 232/890] x86: use generic register name in the thread and tss
 structures

This changes size-specific register names (eip/rip, esp/rsp, etc.) to
generic names in the thread and tss structures.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/asm-offsets_32.c |  4 ++--
 arch/x86/kernel/cpu/common.c     |  2 +-
 arch/x86/kernel/doublefault_32.c | 15 +++++++------
 arch/x86/kernel/entry_32.S       |  6 +++---
 arch/x86/kernel/paravirt_32.c    |  2 +-
 arch/x86/kernel/process_32.c     | 12 +++++------
 arch/x86/kernel/process_64.c     | 16 +++++++-------
 arch/x86/kernel/smpboot_32.c     |  8 +++----
 arch/x86/kernel/smpboot_64.c     |  6 +++---
 arch/x86/kernel/traps_32.c       |  6 +++---
 arch/x86/kernel/traps_64.c       |  4 ++--
 arch/x86/kernel/vm86_32.c        | 16 +++++++-------
 arch/x86/kernel/vmi_32.c         | 10 ++++-----
 arch/x86/lguest/boot.c           |  6 +++---
 arch/x86/vdso/vdso32-setup.c     |  4 ++--
 arch/x86/xen/enlighten.c         |  6 +++---
 arch/x86/xen/smp.c               |  4 ++--
 drivers/lguest/x86/core.c        |  4 ++--
 include/asm-x86/paravirt.h       |  6 +++---
 include/asm-x86/processor_32.h   | 37 +++++++++++++++-----------------
 include/asm-x86/processor_64.h   | 20 ++++++++---------
 include/asm-x86/system_32.h      |  4 ++--
 include/asm-x86/system_64.h      |  2 +-
 23 files changed, 99 insertions(+), 101 deletions(-)

diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 4fc24a61f431..415313556708 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -101,8 +101,8 @@ void foo(void)
 	OFFSET(pbe_orig_address, pbe, orig_address);
 	OFFSET(pbe_next, pbe, next);
 
-	/* Offset from the sysenter stack to tss.esp0 */
-	DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, x86_tss.esp0) -
+	/* Offset from the sysenter stack to tss.sp0 */
+	DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
 		 sizeof(struct tss_struct));
 
 	DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5db2a163bf4b..235cd615b89d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -691,7 +691,7 @@ void __cpuinit cpu_init(void)
 		BUG();
 	enter_lazy_tlb(&init_mm, curr);
 
-	load_esp0(t, thread);
+	load_sp0(t, thread);
 	set_tss_desc(cpu,t);
 	load_TR_desc();
 	load_LDT(&init_mm.context);
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c
index 40978af630e7..cc19a3ea403a 100644
--- a/arch/x86/kernel/doublefault_32.c
+++ b/arch/x86/kernel/doublefault_32.c
@@ -35,12 +35,13 @@ static void doublefault_fn(void)
 		if (ptr_ok(tss)) {
 			struct i386_hw_tss *t = (struct i386_hw_tss *)tss;
 
-			printk(KERN_EMERG "eip = %08lx, esp = %08lx\n", t->eip, t->esp);
+			printk(KERN_EMERG "eip = %08lx, esp = %08lx\n",
+			       t->ip, t->sp);
 
 			printk(KERN_EMERG "eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
-				t->eax, t->ebx, t->ecx, t->edx);
+				t->ax, t->bx, t->cx, t->dx);
 			printk(KERN_EMERG "esi = %08lx, edi = %08lx\n",
-				t->esi, t->edi);
+				t->si, t->di);
 		}
 	}
 
@@ -50,15 +51,15 @@ static void doublefault_fn(void)
 
 struct tss_struct doublefault_tss __cacheline_aligned = {
 	.x86_tss = {
-		.esp0		= STACK_START,
+		.sp0		= STACK_START,
 		.ss0		= __KERNEL_DS,
 		.ldt		= 0,
 		.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,
 
-		.eip		= (unsigned long) doublefault_fn,
+		.ip		= (unsigned long) doublefault_fn,
 		/* 0x2 bit is always set */
-		.eflags		= X86_EFLAGS_SF | 0x2,
-		.esp		= STACK_START,
+		.flags		= X86_EFLAGS_SF | 0x2,
+		.sp		= STACK_START,
 		.es		= __USER_DS,
 		.cs		= __KERNEL_CS,
 		.ss		= __KERNEL_DS,
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 153bb87a4eea..6a474e1028c7 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -288,7 +288,7 @@ ENTRY(ia32_sysenter_target)
 	CFI_SIGNAL_FRAME
 	CFI_DEF_CFA esp, 0
 	CFI_REGISTER esp, ebp
-	movl TSS_sysenter_esp0(%esp),%esp
+	movl TSS_sysenter_sp0(%esp),%esp
 sysenter_past_esp:
 	/*
 	 * No need to follow this irqs on/off section: the syscall
@@ -743,7 +743,7 @@ END(device_not_available)
  * that sets up the real kernel stack. Check here, since we can't
  * allow the wrong stack to be used.
  *
- * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have
+ * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
  * already pushed 3 words if it hits on the sysenter instruction:
  * eflags, cs and eip.
  *
@@ -755,7 +755,7 @@ END(device_not_available)
 	cmpw $__KERNEL_CS,4(%esp);		\
 	jne ok;					\
 label:						\
-	movl TSS_sysenter_esp0+offset(%esp),%esp;	\
+	movl TSS_sysenter_sp0+offset(%esp),%esp;	\
 	CFI_DEF_CFA esp, 0;			\
 	CFI_UNDEFINED eip;			\
 	pushfl;					\
diff --git a/arch/x86/kernel/paravirt_32.c b/arch/x86/kernel/paravirt_32.c
index 706b0562ea40..f4e3a8e01cf2 100644
--- a/arch/x86/kernel/paravirt_32.c
+++ b/arch/x86/kernel/paravirt_32.c
@@ -382,7 +382,7 @@ struct pv_cpu_ops pv_cpu_ops = {
 	.write_ldt_entry = write_dt_entry,
 	.write_gdt_entry = write_dt_entry,
 	.write_idt_entry = write_dt_entry,
-	.load_esp0 = native_load_esp0,
+	.load_sp0 = native_load_sp0,
 
 	.irq_enable_syscall_ret = native_irq_enable_syscall_ret,
 	.iret = native_iret,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 3744cf63682c..add3bf34e205 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -75,7 +75,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number);
  */
 unsigned long thread_saved_pc(struct task_struct *tsk)
 {
-	return ((unsigned long *)tsk->thread.esp)[3];
+	return ((unsigned long *)tsk->thread.sp)[3];
 }
 
 /*
@@ -488,10 +488,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 	childregs->ax = 0;
 	childregs->sp = sp;
 
-	p->thread.esp = (unsigned long) childregs;
-	p->thread.esp0 = (unsigned long) (childregs+1);
+	p->thread.sp = (unsigned long) childregs;
+	p->thread.sp0 = (unsigned long) (childregs+1);
 
-	p->thread.eip = (unsigned long) ret_from_fork;
+	p->thread.ip = (unsigned long) ret_from_fork;
 
 	savesegment(gs,p->thread.gs);
 
@@ -718,7 +718,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
 	/*
 	 * Reload esp0.
 	 */
-	load_esp0(tss, next);
+	load_sp0(tss, next);
 
 	/*
 	 * Save away %gs. No need to save %fs, as it was saved on the
@@ -851,7 +851,7 @@ unsigned long get_wchan(struct task_struct *p)
 	if (!p || p == current || p->state == TASK_RUNNING)
 		return 0;
 	stack_page = (unsigned long)task_stack_page(p);
-	sp = p->thread.esp;
+	sp = p->thread.sp;
 	if (!stack_page || sp < stack_page || sp > top_esp+stack_page)
 		return 0;
 	/* include/asm-i386/system.h:switch_to() pushes bp last. */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index efbb1a2eab97..238193822e23 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -493,9 +493,9 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 	if (sp == ~0UL)
 		childregs->sp = (unsigned long)childregs;
 
-	p->thread.rsp = (unsigned long) childregs;
-	p->thread.rsp0 = (unsigned long) (childregs+1);
-	p->thread.userrsp = me->thread.userrsp; 
+	p->thread.sp = (unsigned long) childregs;
+	p->thread.sp0 = (unsigned long) (childregs+1);
+	p->thread.usersp = me->thread.usersp;
 
 	set_tsk_thread_flag(p, TIF_FORK);
 
@@ -607,7 +607,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	/*
 	 * Reload esp0, LDT and the page table pointer:
 	 */
-	tss->rsp0 = next->rsp0;
+	tss->sp0 = next->sp0;
 
 	/* 
 	 * Switch DS and ES.
@@ -666,8 +666,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	/* 
 	 * Switch the PDA and FPU contexts.
 	 */
-	prev->userrsp = read_pda(oldrsp); 
-	write_pda(oldrsp, next->userrsp); 
+	prev->usersp = read_pda(oldrsp);
+	write_pda(oldrsp, next->usersp);
 	write_pda(pcurrent, next_p); 
 
 	write_pda(kernelstack,
@@ -769,9 +769,9 @@ unsigned long get_wchan(struct task_struct *p)
 	if (!p || p == current || p->state==TASK_RUNNING)
 		return 0; 
 	stack = (unsigned long)task_stack_page(p);
-	if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
+	if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
 		return 0;
-	fp = *(u64 *)(p->thread.rsp);
+	fp = *(u64 *)(p->thread.sp);
 	do { 
 		if (fp < (unsigned long)stack ||
 		    fp > (unsigned long)stack+THREAD_SIZE)
diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c
index 3566191832b3..0f294d6e22cf 100644
--- a/arch/x86/kernel/smpboot_32.c
+++ b/arch/x86/kernel/smpboot_32.c
@@ -454,7 +454,7 @@ void __devinit initialize_secondary(void)
 		"movl %0,%%esp\n\t"
 		"jmp *%1"
 		:
-		:"m" (current->thread.esp),"m" (current->thread.eip));
+		:"m" (current->thread.sp),"m" (current->thread.ip));
 }
 
 /* Static state in head.S used to set up a CPU */
@@ -753,7 +753,7 @@ static inline struct task_struct * __cpuinit alloc_idle_task(int cpu)
 		/* initialize thread_struct.  we really want to avoid destroy
 		 * idle tread
 		 */
-		idle->thread.esp = (unsigned long)task_pt_regs(idle);
+		idle->thread.sp = (unsigned long)task_pt_regs(idle);
 		init_idle(idle, cpu);
 		return idle;
 	}
@@ -798,7 +798,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
  	per_cpu(current_task, cpu) = idle;
 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 
-	idle->thread.eip = (unsigned long) start_secondary;
+	idle->thread.ip = (unsigned long) start_secondary;
 	/* start_eip had better be page-aligned! */
 	start_eip = setup_trampoline();
 
@@ -808,7 +808,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 	/* So we see what's up   */
 	printk("Booting processor %d/%d ip %lx\n", cpu, apicid, start_eip);
 	/* Stack for startup_32 can be just as for start_secondary onwards */
-	stack_start.sp = (void *) idle->thread.esp;
+	stack_start.sp = (void *) idle->thread.sp;
 
 	irq_ctx_init(cpu);
 
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index ac1089f2b917..c3f2736ba530 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -577,7 +577,7 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
 	c_idle.idle = get_idle_for_cpu(cpu);
 
 	if (c_idle.idle) {
-		c_idle.idle->thread.rsp = (unsigned long) (((struct pt_regs *)
+		c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *)
 			(THREAD_SIZE +  task_stack_page(c_idle.idle))) - 1);
 		init_idle(c_idle.idle, cpu);
 		goto do_rest;
@@ -613,8 +613,8 @@ do_rest:
 
 	start_rip = setup_trampoline();
 
-	init_rsp = c_idle.idle->thread.rsp;
-	per_cpu(init_tss,cpu).rsp0 = init_rsp;
+	init_rsp = c_idle.idle->thread.sp;
+	per_cpu(init_tss,cpu).sp0 = init_rsp;
 	initial_code = start_secondary;
 	clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
 
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 27713553cc59..57491942cc4e 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -163,7 +163,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 		unsigned long dummy;
 		stack = &dummy;
 		if (task != current)
-			stack = (unsigned long *)task->thread.esp;
+			stack = (unsigned long *)task->thread.sp;
 	}
 
 #ifdef CONFIG_FRAME_POINTER
@@ -173,7 +173,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 			asm ("movl %%ebp, %0" : "=r" (bp) : );
 		} else {
 			/* bp is the last reg pushed by switch_to */
-			bp = *(unsigned long *) task->thread.esp;
+			bp = *(unsigned long *) task->thread.sp;
 		}
 	}
 #endif
@@ -253,7 +253,7 @@ static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 
 	if (sp == NULL) {
 		if (task)
-			sp = (unsigned long*)task->thread.esp;
+			sp = (unsigned long*)task->thread.sp;
 		else
 			sp = (unsigned long *)&sp;
 	}
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index f7fecf9d47c3..965f2cc3a013 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -230,7 +230,7 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 		unsigned long dummy;
 		stack = &dummy;
 		if (tsk && tsk != current)
-			stack = (unsigned long *)tsk->thread.rsp;
+			stack = (unsigned long *)tsk->thread.sp;
 	}
 
 	/*
@@ -366,7 +366,7 @@ _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp)
 
 	if (sp == NULL) {
 		if (tsk)
-			sp = (unsigned long *)tsk->thread.rsp;
+			sp = (unsigned long *)tsk->thread.sp;
 		else
 			sp = (unsigned long *)&sp;
 	}
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 980e85b90091..e85bb44265cb 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -147,10 +147,10 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
 	}
 
 	tss = &per_cpu(init_tss, get_cpu());
-	current->thread.esp0 = current->thread.saved_esp0;
+	current->thread.sp0 = current->thread.saved_sp0;
 	current->thread.sysenter_cs = __KERNEL_CS;
-	load_esp0(tss, &current->thread);
-	current->thread.saved_esp0 = 0;
+	load_sp0(tss, &current->thread);
+	current->thread.saved_sp0 = 0;
 	put_cpu();
 
 	ret = KVM86->regs32;
@@ -207,7 +207,7 @@ asmlinkage int sys_vm86old(struct pt_regs regs)
 	int tmp, ret = -EPERM;
 
 	tsk = current;
-	if (tsk->thread.saved_esp0)
+	if (tsk->thread.saved_sp0)
 		goto out;
 	tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
 				       offsetof(struct kernel_vm86_struct, vm86plus) -
@@ -256,7 +256,7 @@ asmlinkage int sys_vm86(struct pt_regs regs)
 
 	/* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */
 	ret = -EPERM;
-	if (tsk->thread.saved_esp0)
+	if (tsk->thread.saved_sp0)
 		goto out;
 	v86 = (struct vm86plus_struct __user *)regs.cx;
 	tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
@@ -318,15 +318,15 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
  * Save old state, set default return value (%ax) to 0
  */
 	info->regs32->ax = 0;
-	tsk->thread.saved_esp0 = tsk->thread.esp0;
+	tsk->thread.saved_sp0 = tsk->thread.sp0;
 	tsk->thread.saved_fs = info->regs32->fs;
 	savesegment(gs, tsk->thread.saved_gs);
 
 	tss = &per_cpu(init_tss, get_cpu());
-	tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
+	tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
 	if (cpu_has_sep)
 		tsk->thread.sysenter_cs = 0;
-	load_esp0(tss, &tsk->thread);
+	load_sp0(tss, &tsk->thread);
 	put_cpu();
 
 	tsk->thread.screen_bitmap = info->screen_bitmap;
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 599b6f2ed562..4cfda7dbe90f 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -62,7 +62,7 @@ static struct {
 	void (*cpuid)(void /* non-c */);
 	void (*_set_ldt)(u32 selector);
 	void (*set_tr)(u32 selector);
-	void (*set_kernel_stack)(u32 selector, u32 esp0);
+	void (*set_kernel_stack)(u32 selector, u32 sp0);
 	void (*allocate_page)(u32, u32, u32, u32, u32);
 	void (*release_page)(u32, u32);
 	void (*set_pte)(pte_t, pte_t *, unsigned);
@@ -214,17 +214,17 @@ static void vmi_set_tr(void)
 	vmi_ops.set_tr(GDT_ENTRY_TSS*sizeof(struct desc_struct));
 }
 
-static void vmi_load_esp0(struct tss_struct *tss,
+static void vmi_load_sp0(struct tss_struct *tss,
 				   struct thread_struct *thread)
 {
-	tss->x86_tss.esp0 = thread->esp0;
+	tss->x86_tss.sp0 = thread->sp0;
 
 	/* This can only happen when SEP is enabled, no need to test "SEP"arately */
 	if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
 		tss->x86_tss.ss1 = thread->sysenter_cs;
 		wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
 	}
-	vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.esp0);
+	vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.sp0);
 }
 
 static void vmi_flush_tlb_user(void)
@@ -793,7 +793,7 @@ static inline int __init activate_vmi(void)
 	para_fill(pv_cpu_ops.write_ldt_entry, WriteLDTEntry);
 	para_fill(pv_cpu_ops.write_gdt_entry, WriteGDTEntry);
 	para_fill(pv_cpu_ops.write_idt_entry, WriteIDTEntry);
-	para_wrap(pv_cpu_ops.load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack);
+	para_wrap(pv_cpu_ops.load_sp0, vmi_load_sp0, set_kernel_stack, UpdateKernelStack);
 	para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask);
 	para_fill(pv_cpu_ops.io_delay, IODelay);
 
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index ea46d05853bb..c751e3c03e85 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -755,10 +755,10 @@ static void lguest_time_init(void)
  * segment), the privilege level (we're privilege level 1, the Host is 0 and
  * will not tolerate us trying to use that), the stack pointer, and the number
  * of pages in the stack. */
-static void lguest_load_esp0(struct tss_struct *tss,
+static void lguest_load_sp0(struct tss_struct *tss,
 				     struct thread_struct *thread)
 {
-	lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->esp0,
+	lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->sp0,
 		   THREAD_SIZE/PAGE_SIZE);
 }
 
@@ -957,7 +957,7 @@ __init void lguest_init(void)
 	pv_cpu_ops.cpuid = lguest_cpuid;
 	pv_cpu_ops.load_idt = lguest_load_idt;
 	pv_cpu_ops.iret = lguest_iret;
-	pv_cpu_ops.load_esp0 = lguest_load_esp0;
+	pv_cpu_ops.load_sp0 = lguest_load_sp0;
 	pv_cpu_ops.load_tr_desc = lguest_load_tr_desc;
 	pv_cpu_ops.set_ldt = lguest_set_ldt;
 	pv_cpu_ops.load_tls = lguest_load_tls;
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index d97a6d7d062b..e0feb66a2408 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -243,9 +243,9 @@ void enable_sep_cpu(void)
 	}
 
 	tss->x86_tss.ss1 = __KERNEL_CS;
-	tss->x86_tss.esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
+	tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss;
 	wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
-	wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.esp1, 0);
+	wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0);
 	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0);
 	put_cpu();	
 }
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 29517faaa735..d81e8d709102 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -499,11 +499,11 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
 	preempt_enable();
 }
 
-static void xen_load_esp0(struct tss_struct *tss,
+static void xen_load_sp0(struct tss_struct *tss,
 			  struct thread_struct *thread)
 {
 	struct multicall_space mcs = xen_mc_entry(0);
-	MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->esp0);
+	MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
 	xen_mc_issue(PARAVIRT_LAZY_CPU);
 }
 
@@ -968,7 +968,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	.write_ldt_entry = xen_write_ldt_entry,
 	.write_gdt_entry = xen_write_gdt_entry,
 	.write_idt_entry = xen_write_idt_entry,
-	.load_esp0 = xen_load_esp0,
+	.load_sp0 = xen_load_sp0,
 
 	.set_iopl_mask = xen_set_iopl_mask,
 	.io_delay = xen_io_delay,
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 8e1234e14559..aafc54437403 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -239,10 +239,10 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 	ctxt->gdt_ents      = ARRAY_SIZE(gdt->gdt);
 
 	ctxt->user_regs.cs = __KERNEL_CS;
-	ctxt->user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
+	ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
 
 	ctxt->kernel_ss = __KERNEL_DS;
-	ctxt->kernel_sp = idle->thread.esp0;
+	ctxt->kernel_sp = idle->thread.sp0;
 
 	ctxt->event_callback_cs     = __KERNEL_CS;
 	ctxt->event_callback_eip    = (unsigned long)xen_hypervisor_callback;
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 96d0fd07c57d..44adb00e1490 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -94,7 +94,7 @@ static void copy_in_guest_info(struct lguest *lg, struct lguest_pages *pages)
 	/* Set up the two "TSS" members which tell the CPU what stack to use
 	 * for traps which do directly into the Guest (ie. traps at privilege
 	 * level 1). */
-	pages->state.guest_tss.esp1 = lg->esp1;
+	pages->state.guest_tss.sp1 = lg->esp1;
 	pages->state.guest_tss.ss1 = lg->ss1;
 
 	/* Copy direct-to-Guest trap entries. */
@@ -416,7 +416,7 @@ void __init lguest_arch_host_init(void)
 		/* We know where we want the stack to be when the Guest enters
 		 * the switcher: in pages->regs.  The stack grows upwards, so
 		 * we start it at the end of that structure. */
-		state->guest_tss.esp0 = (long)(&pages->regs + 1);
+		state->guest_tss.sp0 = (long)(&pages->regs + 1);
 		/* And this is the GDT entry to use for the stack: we keep a
 		 * couple of special LGUEST entries. */
 		state->guest_tss.ss0 = LGUEST_DS;
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index be7b934f6c54..d1780e32722e 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -101,7 +101,7 @@ struct pv_cpu_ops {
 				int entrynum, u32 low, u32 high);
 	void (*write_idt_entry)(struct desc_struct *,
 				int entrynum, u32 low, u32 high);
-	void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t);
+	void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
 
 	void (*set_iopl_mask)(unsigned mask);
 
@@ -449,10 +449,10 @@ static inline int paravirt_enabled(void)
 	return pv_info.paravirt_enabled;
 }
 
-static inline void load_esp0(struct tss_struct *tss,
+static inline void load_sp0(struct tss_struct *tss,
 			     struct thread_struct *thread)
 {
-	PVOP_VCALL2(pv_cpu_ops.load_esp0, tss, thread);
+	PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
 }
 
 #define ARCH_SETUP			pv_init_ops.arch_setup();
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index d50a4b48d441..6846cc346400 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -292,20 +292,17 @@ struct thread_struct;
 /* This is the TSS defined by the hardware. */
 struct i386_hw_tss {
 	unsigned short	back_link,__blh;
-	unsigned long	esp0;
+	unsigned long	sp0;
 	unsigned short	ss0,__ss0h;
-	unsigned long	esp1;
+	unsigned long	sp1;
 	unsigned short	ss1,__ss1h;	/* ss1 is used to cache MSR_IA32_SYSENTER_CS */
-	unsigned long	esp2;
+	unsigned long	sp2;
 	unsigned short	ss2,__ss2h;
 	unsigned long	__cr3;
-	unsigned long	eip;
-	unsigned long	eflags;
-	unsigned long	eax,ecx,edx,ebx;
-	unsigned long	esp;
-	unsigned long	ebp;
-	unsigned long	esi;
-	unsigned long	edi;
+	unsigned long	ip;
+	unsigned long	flags;
+	unsigned long	ax, cx, dx, bx;
+	unsigned long	sp, bp, si, di;
 	unsigned short	es, __esh;
 	unsigned short	cs, __csh;
 	unsigned short	ss, __ssh;
@@ -346,10 +343,10 @@ struct tss_struct {
 struct thread_struct {
 /* cached TLS descriptors. */
 	struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
-	unsigned long	esp0;
+	unsigned long	sp0;
 	unsigned long	sysenter_cs;
-	unsigned long	eip;
-	unsigned long	esp;
+	unsigned long	ip;
+	unsigned long	sp;
 	unsigned long	fs;
 	unsigned long	gs;
 /* Hardware debugging registers */
@@ -366,7 +363,7 @@ struct thread_struct {
 /* virtual 86 mode info */
 	struct vm86_struct __user * vm86_info;
 	unsigned long		screen_bitmap;
-	unsigned long		v86flags, v86mask, saved_esp0;
+	unsigned long		v86flags, v86mask, saved_sp0;
 	unsigned int		saved_fs, saved_gs;
 /* IO permissions */
 	unsigned long	*io_bitmap_ptr;
@@ -378,7 +375,7 @@ struct thread_struct {
 };
 
 #define INIT_THREAD  {							\
-	.esp0 = sizeof(init_stack) + (long)&init_stack,			\
+	.sp0 = sizeof(init_stack) + (long)&init_stack,			\
 	.vm86_info = NULL,						\
 	.sysenter_cs = __KERNEL_CS,					\
 	.io_bitmap_ptr = NULL,						\
@@ -393,7 +390,7 @@ struct thread_struct {
  */
 #define INIT_TSS  {							\
 	.x86_tss = {							\
-		.esp0		= sizeof(init_stack) + (long)&init_stack, \
+		.sp0		= sizeof(init_stack) + (long)&init_stack, \
 		.ss0		= __KERNEL_DS,				\
 		.ss1		= __KERNEL_CS,				\
 		.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,		\
@@ -503,9 +500,9 @@ static inline void rep_nop(void)
 
 #define cpu_relax()	rep_nop()
 
-static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct *thread)
+static inline void native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
 {
-	tss->x86_tss.esp0 = thread->esp0;
+	tss->x86_tss.sp0 = thread->sp0;
 	/* This can only happen when SEP is enabled, no need to test "SEP"arately */
 	if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
 		tss->x86_tss.ss1 = thread->sysenter_cs;
@@ -585,9 +582,9 @@ static inline void native_set_iopl_mask(unsigned mask)
 #define paravirt_enabled() 0
 #define __cpuid native_cpuid
 
-static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread)
+static inline void load_sp0(struct tss_struct *tss, struct thread_struct *thread)
 {
-	native_load_esp0(tss, thread);
+	native_load_sp0(tss, thread);
 }
 
 /*
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index 797770113e6d..0780f3e3fdfe 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -177,9 +177,9 @@ union i387_union {
 
 struct tss_struct {
 	u32 reserved1;
-	u64 rsp0;	
-	u64 rsp1;
-	u64 rsp2;
+	u64 sp0;
+	u64 sp1;
+	u64 sp2;
 	u64 reserved2;
 	u64 ist[7];
 	u32 reserved3;
@@ -216,9 +216,9 @@ DECLARE_PER_CPU(struct orig_ist, orig_ist);
 #endif
 
 struct thread_struct {
-	unsigned long	rsp0;
-	unsigned long	rsp;
-	unsigned long 	userrsp;	/* Copy from PDA */ 
+	unsigned long	sp0;
+	unsigned long	sp;
+	unsigned long 	usersp;	/* Copy from PDA */
 	unsigned long	fs;
 	unsigned long	gs;
 	unsigned short	es, ds, fsindex, gsindex;	
@@ -245,11 +245,11 @@ struct thread_struct {
 } __attribute__((aligned(16)));
 
 #define INIT_THREAD  { \
-	.rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+	.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
 }
 
 #define INIT_TSS  { \
-	.rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+	.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
 }
 
 #define INIT_MMAP \
@@ -293,10 +293,10 @@ extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
  * Return saved PC of a blocked thread.
  * What is this good for? it will be always the scheduler or ret_from_fork.
  */
-#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.rsp - 8))
+#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8))
 
 extern unsigned long get_wchan(struct task_struct *p);
-#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.rsp0 - 1)
+#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
 #define KSTK_EIP(tsk) (task_pt_regs(tsk)->ip)
 #define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */
 
diff --git a/include/asm-x86/system_32.h b/include/asm-x86/system_32.h
index db6283eb5e46..f5b3f77f5310 100644
--- a/include/asm-x86/system_32.h
+++ b/include/asm-x86/system_32.h
@@ -28,9 +28,9 @@ extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struc
 		     "1:\t"						\
 		     "popl %%ebp\n\t"					\
 		     "popfl"						\
-		     :"=m" (prev->thread.esp),"=m" (prev->thread.eip),	\
+		     :"=m" (prev->thread.sp),"=m" (prev->thread.ip),	\
 		      "=a" (last),"=S" (esi),"=D" (edi)			\
-		     :"m" (next->thread.esp),"m" (next->thread.eip),	\
+		     :"m" (next->thread.sp),"m" (next->thread.ip),	\
 		      "2" (prev), "d" (next));				\
 } while (0)
 
diff --git a/include/asm-x86/system_64.h b/include/asm-x86/system_64.h
index 6e9e4841a2da..3dcb217a7202 100644
--- a/include/asm-x86/system_64.h
+++ b/include/asm-x86/system_64.h
@@ -40,7 +40,7 @@
 		     RESTORE_CONTEXT						    \
 		     : "=a" (last)					  	  \
 		     : [next] "S" (next), [prev] "D" (prev),			  \
-		       [threadrsp] "i" (offsetof(struct task_struct, thread.rsp)), \
+		       [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
 		       [ti_flags] "i" (offsetof(struct thread_info, flags)),\
 		       [tif_fork] "i" (TIF_FORK),			  \
 		       [thread_info] "i" (offsetof(struct task_struct, stack)), \

From 2ba7deef09dad6662dc4fa8b275af8d0794fd9fc Mon Sep 17 00:00:00 2001
From: Len Brown <lenb@kernel.org>
Date: Wed, 30 Jan 2008 13:31:02 +0100
Subject: [PATCH 233/890] x86: 32-bit IOAPIC: de-fang IRQ compression

commit c434b7a6aedfe428ad17cd61b21b125a7b7a29ce
(x86: avoid wasting IRQs for PCI devices)
created a concept of "IRQ compression" on i386
to conserve IRQ numbers on systems with many
sparsely populated IO APICs.

The same scheme was also added to x86_64,
but later removed when x86_64 recieved an IRQ over-haul
that made it unnecessary -- including per-CPU
IRQ vectors that greatly increased the IRQ capacity
on the machine.

i386 has not received the analogous over-haul,
and thus a previous attempt to delete IRQ compression
from i386 was rejected on the theory that there may
exist machines that actually need it.  The fact is
that the author of IRQ compression patch was unable
to confirm the actual existence of such a system.

As a result, all i386 kernels with IOAPIC support
pay the following:

1. confusion

IRQ compression re-names the traditional IOAPIC
pin numbers (aka ACPI GSI's) into sequential IRQ #s:

ACPI: PCI Interrupt 0000:00:1c.0[A] -> GSI 20 (level, low) -> IRQ 16
ACPI: PCI Interrupt 0000:00:1c.1[B] -> GSI 21 (level, low) -> IRQ 17
ACPI: PCI Interrupt 0000:00:1c.2[C] -> GSI 22 (level, low) -> IRQ 18
ACPI: PCI Interrupt 0000:00:1c.3[D] -> GSI 23 (level, low) -> IRQ 19
ACPI: PCI Interrupt 0000:00:1c.4[A] -> GSI 20 (level, low) -> IRQ 16

This makes /proc/interrupts look different
depending on system configuration and device probe order.
It is also different than the x86_64 kernel running
on the exact same system.  As a result, programmers
get confused when comparing systems.

2. complexity

The IRQ code in Linux is already overly complex,
and IRQ compression makes it worse.  There have
already been two bug workarounds related to IRQ
compression -- the IRQ0 timer workaround and
the VIA PCI IRQ workaround.

3. size

All i386 kernels with IOAPIC support contain an int[4096] --
a 4 page array to contain the renamed IRQs.

So while the irq compression code on i386 should really
be deleted -- even before merging the x86_64 irq-overhaul,
this patch simply disables it on all high volume systems
to avoid problems #1 and #2 on most all i386 systems.

A large system with pin numbers >=64 will still have compression
to conserve limited IRQ numbers for sparse IOAPICS.  However,
the vast majority of the planet, those with only pin numbers < 64
will use an identity GSI -> IRQ mapping.

Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 arch/x86/kernel/mpparse_32.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c
index 22fc8d7dec11..bfcfc41f5607 100644
--- a/arch/x86/kernel/mpparse_32.c
+++ b/arch/x86/kernel/mpparse_32.c
@@ -1041,13 +1041,14 @@ void __init mp_config_acpi_legacy_irqs (void)
 }
 
 #define MAX_GSI_NUM	4096
+#define IRQ_COMPRESSION_START	64
 
 int mp_register_gsi(u32 gsi, int triggering, int polarity)
 {
 	int ioapic = -1;
 	int ioapic_pin = 0;
 	int idx, bit = 0;
-	static int pci_irq = 16;
+	static int pci_irq = IRQ_COMPRESSION_START;
 	/*
 	 * Mapping between Global System Interrups, which
 	 * represent all possible interrupts, and IRQs
@@ -1086,12 +1087,16 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
 	if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
 		Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
 			mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
-		return gsi_to_irq[gsi];
+		return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
 	}
 
 	mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
 
-	if (triggering == ACPI_LEVEL_SENSITIVE) {
+	/*
+	 * For GSI >= 64, use IRQ compression
+	 */
+	if ((gsi >= IRQ_COMPRESSION_START)
+		&& (triggering == ACPI_LEVEL_SENSITIVE)) {
 		/*
 		 * For PCI devices assign IRQs in order, avoiding gaps
 		 * due to unused I/O APIC pins.

From 1c6f70309900c47eeaef12c97b206fcccde1ebe2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 30 Jan 2008 13:31:02 +0100
Subject: [PATCH 234/890] x86: define all _PAGE_* in terms of _PAGE_BIT_* on
 64-bit

This patch defines the _PAGE_* paging attributes in pgtable_64.h in terms of
the former defined _PAGE_BIT_* values.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable_64.h | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 9b0ff477b39e..9d4f11dd566f 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -150,19 +150,23 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
 #define _PAGE_BIT_ACCESSED	5
 #define _PAGE_BIT_DIRTY		6
 #define _PAGE_BIT_PSE		7	/* 4 MB (or 2MB) page */
+#define _PAGE_BIT_FILE		6
 #define _PAGE_BIT_GLOBAL	8	/* Global TLB entry PPro+ */
 #define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */
 
-#define _PAGE_PRESENT	0x001
-#define _PAGE_RW	0x002
-#define _PAGE_USER	0x004
-#define _PAGE_PWT	0x008
-#define _PAGE_PCD	0x010
-#define _PAGE_ACCESSED	0x020
-#define _PAGE_DIRTY	0x040
-#define _PAGE_PSE	0x080	/* 2MB page */
-#define _PAGE_FILE	0x040	/* nonlinear file mapping, saved PTE; unset:swap */
-#define _PAGE_GLOBAL	0x100	/* Global TLB entry */
+#define _PAGE_PRESENT	(_AC(1,UL)<<_PAGE_BIT_PRESENT)
+#define _PAGE_RW	(_AC(1,UL)<<_PAGE_BIT_RW)
+#define _PAGE_USER	(_AC(1,UL)<<_PAGE_BIT_USER)
+#define _PAGE_PWT	(_AC(1,UL)<<_PAGE_BIT_PWT)
+#define _PAGE_PCD	(_AC(1,UL)<<_PAGE_BIT_PCD)
+#define _PAGE_ACCESSED	(_AC(1,UL)<<_PAGE_BIT_ACCESSED)
+#define _PAGE_DIRTY	(_AC(1,UL)<<_PAGE_BIT_DIRTY)
+/* 2MB page */
+#define _PAGE_PSE	(_AC(1,UL)<<_PAGE_BIT_PSE)
+/* nonlinear file mapping, saved PTE; unset:swap */
+#define _PAGE_FILE	(_AC(1,UL)<<_PAGE_BIT_FILE)
+/* Global TLB entry */
+#define _PAGE_GLOBAL	(_AC(1,UL)<<_PAGE_BIT_GLOBAL)
 
 #define _PAGE_PROTNONE	0x080	/* If not present */
 #define _PAGE_NX        (_AC(1,UL)<<_PAGE_BIT_NX)
@@ -248,8 +252,7 @@ static inline unsigned long pmd_bad(pmd_t pmd)
 #define pte_present(x)	(pte_val(x) & (_PAGE_PRESENT | _PAGE_PROTNONE))
 #define pte_clear(mm,addr,xp)	do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
 
-#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))	/* FIXME: is this
-						   right? */
+#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))	/* FIXME: is this right? */
 #define pte_page(x)	pfn_to_page(pte_pfn(x))
 #define pte_pfn(x)  ((pte_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
 

From 40842bf50e0719bcfe817fec2fe8b0b98dcdb244 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 30 Jan 2008 13:31:02 +0100
Subject: [PATCH 235/890] x86: use __PAGE_KERNEL* instead of _KERNPG_TABLE

This minor cleanup replaces _KERNPG_TABLE with the __PAGE_KERNEL* for 2MB PTEs
in the 64-bit memory initialization code. The __PAGE_KERNEL* defines are more
appropriate for PTEs.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 251eeb325ae3..3f8bf298dbb8 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -226,7 +226,7 @@ __meminit void *early_ioremap(unsigned long addr, unsigned long size)
 		vaddr += addr & ~PMD_MASK;
 		addr &= PMD_MASK;
 		for (i = 0; i < pmds; i++, addr += PMD_SIZE)
-			set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
+			set_pmd(pmd + i,__pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
 		__flush_tlb();
 		return (void *)vaddr;
 	next:
@@ -270,7 +270,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
 		if (pmd_val(*pmd))
 			continue;
 
-		entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
+		entry = __PAGE_KERNEL_LARGE|_PAGE_GLOBAL|address;
 		entry &= __supported_pte_mask;
 		set_pmd(pmd, __pmd(entry));
 	}

From 3c2362e629cdc16ea7472c5f14b6c2076aba58db Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:31:03 +0100
Subject: [PATCH 236/890] x86: use def_bool where possible

Change occurances of:
	bool
	default X

to:
	def_bool X

Change ocurances of:
	bool "Foo"
	default X

to:
	def_bool X
	prompt "Foo"

Shows no difference in generated config for allmodconfig/allyesconfig.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig | 198 ++++++++++++++++++-----------------------------
 1 file changed, 77 insertions(+), 121 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index da98368f66af..fef944bb920e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -17,81 +17,63 @@ config X86_64
 
 ### Arch settings
 config X86
-	bool
-	default y
+	def_bool y
 
 config GENERIC_TIME
-	bool
-	default y
+	def_bool y
 
 config GENERIC_CMOS_UPDATE
-	bool
-	default y
+	def_bool y
 
 config CLOCKSOURCE_WATCHDOG
-	bool
-	default y
+	def_bool y
 
 config GENERIC_CLOCKEVENTS
-	bool
-	default y
+	def_bool y
 
 config GENERIC_CLOCKEVENTS_BROADCAST
-	bool
-	default y
+	def_bool y
 	depends on X86_64 || (X86_32 && X86_LOCAL_APIC)
 
 config LOCKDEP_SUPPORT
-	bool
-	default y
+	def_bool y
 
 config STACKTRACE_SUPPORT
-	bool
-	default y
+	def_bool y
 
 config SEMAPHORE_SLEEPERS
-	bool
-	default y
+	def_bool y
 
 config MMU
-	bool
-	default y
+	def_bool y
 
 config ZONE_DMA
-	bool
-	default y
+	def_bool y
 
 config QUICKLIST
-	bool
-	default X86_32
+	def_bool X86_32
 
 config SBUS
 	bool
 
 config GENERIC_ISA_DMA
-	bool
-	default y
+	def_bool y
 
 config GENERIC_IOMAP
-	bool
-	default y
+	def_bool y
 
 config GENERIC_BUG
-	bool
-	default y
+	def_bool y
 	depends on BUG
 
 config GENERIC_HWEIGHT
-	bool
-	default y
+	def_bool y
 
 config ARCH_MAY_HAVE_PC_FDC
-	bool
-	default y
+	def_bool y
 
 config DMI
-	bool
-	default y
+	def_bool y
 
 config RWSEM_GENERIC_SPINLOCK
 	def_bool !X86_XADD
@@ -303,8 +285,8 @@ config X86_VSMP
 endchoice
 
 config SCHED_NO_NO_OMIT_FRAME_POINTER
-	bool "Single-depth WCHAN output"
-	default y
+	def_bool y
+	prompt "Single-depth WCHAN output"
 	depends on X86_32
 	help
 	  Calculate simpler /proc/<PID>/wchan values. If this option
@@ -351,37 +333,31 @@ source "arch/x86/lguest/Kconfig"
 endif
 
 config ACPI_SRAT
-	bool
-	default y
+	def_bool y
 	depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH)
 	select ACPI_NUMA
 
 config HAVE_ARCH_PARSE_SRAT
-       bool
-       default y
-       depends on ACPI_SRAT
+	def_bool y
+	depends on ACPI_SRAT
 
 config X86_SUMMIT_NUMA
-	bool
-	default y
+	def_bool y
 	depends on X86_32 && NUMA && (X86_SUMMIT || X86_GENERICARCH)
 
 config X86_CYCLONE_TIMER
-	bool
-	default y
+	def_bool y
 	depends on X86_32 && X86_SUMMIT || X86_GENERICARCH
 
 config ES7000_CLUSTERED_APIC
-	bool
-	default y
+	def_bool y
 	depends on SMP && X86_ES7000 && MPENTIUMIII
 
 source "arch/x86/Kconfig.cpu"
 
 config HPET_TIMER
-	bool
+	def_bool X86_64
 	prompt "HPET Timer Support" if X86_32
-	default X86_64
 	help
          Use the IA-PC HPET (High Precision Event Timer) to manage
          time in preference to the PIT and RTC, if a HPET is
@@ -399,9 +375,8 @@ config HPET_TIMER
          Choose N to continue using the legacy 8254 timer.
 
 config HPET_EMULATE_RTC
-	bool
+	def_bool y
 	depends on HPET_TIMER && RTC=y
-	default y
 
 # Mark as embedded because too many people got it wrong.
 # The code disables itself when not needed.
@@ -441,8 +416,8 @@ config CALGARY_IOMMU
 	  If unsure, say Y.
 
 config CALGARY_IOMMU_ENABLED_BY_DEFAULT
-	bool "Should Calgary be enabled by default?"
-	default y
+	def_bool y
+	prompt "Should Calgary be enabled by default?"
 	depends on CALGARY_IOMMU
 	help
 	  Should Calgary be enabled by default? if you choose 'y', Calgary
@@ -486,9 +461,9 @@ config SCHED_SMT
 	  N here.
 
 config SCHED_MC
-	bool "Multi-core scheduler support"
+	def_bool y
+	prompt "Multi-core scheduler support"
 	depends on (X86_64 && SMP) || (X86_32 && X86_HT)
-	default y
 	help
 	  Multi-core scheduler support improves the CPU scheduler's decision
 	  making when dealing with multi-core CPU chips at a cost of slightly
@@ -522,19 +497,16 @@ config X86_UP_IOAPIC
 	  an IO-APIC, then the kernel will still run with no slowdown at all.
 
 config X86_LOCAL_APIC
-	bool
+	def_bool y
 	depends on X86_64 || (X86_32 && (X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH))
-	default y
 
 config X86_IO_APIC
-	bool
+	def_bool y
 	depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH))
-	default y
 
 config X86_VISWS_APIC
-	bool
+	def_bool y
 	depends on X86_32 && X86_VISWS
-	default y
 
 config X86_MCE
 	bool "Machine Check Exception"
@@ -554,17 +526,17 @@ config X86_MCE
 	  the 386 and 486, so nearly everyone can say Y here.
 
 config X86_MCE_INTEL
-	bool "Intel MCE features"
+	def_bool y
+	prompt "Intel MCE features"
 	depends on X86_64 && X86_MCE && X86_LOCAL_APIC
-	default y
 	help
 	   Additional support for intel specific MCE features such as
 	   the thermal monitor.
 
 config X86_MCE_AMD
-	bool "AMD MCE features"
+	def_bool y
+	prompt "AMD MCE features"
 	depends on X86_64 && X86_MCE && X86_LOCAL_APIC
-	default y
 	help
 	   Additional support for AMD specific MCE features such as
 	   the DRAM Error Threshold.
@@ -637,9 +609,9 @@ config I8K
 	  Say N otherwise.
 
 config X86_REBOOTFIXUPS
-	bool "Enable X86 board specific fixups for reboot"
+	def_bool n
+	prompt "Enable X86 board specific fixups for reboot"
 	depends on X86_32 && X86
-	default n
 	---help---
 	  This enables chipset and/or board specific fixups to be done
 	  in order to get reboot to work correctly. This is only needed on
@@ -672,9 +644,8 @@ config MICROCODE
 	  module will be called microcode.
 
 config MICROCODE_OLD_INTERFACE
-	bool
+	def_bool y
 	depends on MICROCODE
-	default y
 
 config X86_MSR
 	tristate "/dev/cpu/*/msr - Model-specific register support"
@@ -798,13 +769,12 @@ config PAGE_OFFSET
 	depends on X86_32
 
 config HIGHMEM
-	bool
+	def_bool y
 	depends on X86_32 && (HIGHMEM64G || HIGHMEM4G)
-	default y
 
 config X86_PAE
-	bool "PAE (Physical Address Extension) Support"
-	default n
+	def_bool n
+	prompt "PAE (Physical Address Extension) Support"
 	depends on X86_32 && !HIGHMEM4G
 	select RESOURCES_64BIT
 	help
@@ -836,10 +806,10 @@ comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
 	depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI)
 
 config K8_NUMA
-       bool "Old style AMD Opteron NUMA detection"
-       depends on X86_64 && NUMA && PCI
-       default y
-       help
+	def_bool y
+	prompt "Old style AMD Opteron NUMA detection"
+	depends on X86_64 && NUMA && PCI
+	help
 	 Enable K8 NUMA node topology detection.  You should say Y here if
 	 you have a multi processor AMD K8 system. This uses an old
 	 method to read the NUMA configuration directly from the builtin
@@ -847,10 +817,10 @@ config K8_NUMA
 	 instead, which also takes priority if both are compiled in.
 
 config X86_64_ACPI_NUMA
-	bool "ACPI NUMA detection"
+	def_bool y
+	prompt "ACPI NUMA detection"
 	depends on X86_64 && NUMA && ACPI && PCI
 	select ACPI_NUMA
-	default y
 	help
 	  Enable ACPI SRAT based node topology detection.
 
@@ -870,24 +840,20 @@ config NODES_SHIFT
 	depends on NEED_MULTIPLE_NODES
 
 config HAVE_ARCH_BOOTMEM_NODE
-	bool
+	def_bool y
 	depends on X86_32 && NUMA
-	default y
 
 config ARCH_HAVE_MEMORY_PRESENT
-	bool
+	def_bool y
 	depends on X86_32 && DISCONTIGMEM
-	default y
 
 config NEED_NODE_MEMMAP_SIZE
-	bool
+	def_bool y
 	depends on X86_32 && (DISCONTIGMEM || SPARSEMEM)
-	default y
 
 config HAVE_ARCH_ALLOC_REMAP
-	bool
+	def_bool y
 	depends on X86_32 && NUMA
-	default y
 
 config ARCH_FLATMEM_ENABLE
 	def_bool y
@@ -991,9 +957,9 @@ config MTRR
 	  See <file:Documentation/mtrr.txt> for more information.
 
 config EFI
-	bool "Boot from EFI support"
+	def_bool n
+	prompt "Boot from EFI support"
 	depends on X86_32 && ACPI
-	default n
 	---help---
 	This enables the kernel to boot on EFI platforms using
 	system configuration information passed to it from the firmware.
@@ -1009,9 +975,9 @@ config EFI
 	kernel should continue to boot on existing non-EFI platforms.
 
 config IRQBALANCE
-	bool "Enable kernel irq balancing"
+	def_bool y
+	prompt "Enable kernel irq balancing"
 	depends on X86_32 && SMP && X86_IO_APIC
-	default y
 	help
 	  The default yes will allow the kernel to do irq load balancing.
 	  Saying no will keep the kernel from doing irq load balancing.
@@ -1019,14 +985,13 @@ config IRQBALANCE
 # turning this on wastes a bunch of space.
 # Summit needs it only when NUMA is on
 config BOOT_IOREMAP
-	bool
+	def_bool y
 	depends on X86_32 && (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI))
-	default y
 
 config SECCOMP
-	bool "Enable seccomp to safely compute untrusted bytecode"
+	def_bool y
+	prompt "Enable seccomp to safely compute untrusted bytecode"
 	depends on PROC_FS
-	default y
 	help
 	  This kernel feature is useful for number crunching applications
 	  that may need to compute untrusted bytecode during their
@@ -1193,8 +1158,8 @@ config HOTPLUG_CPU
 	  suspend.
 
 config COMPAT_VDSO
-	bool "Compat VDSO support"
-	default y
+	def_bool y
+	prompt "Compat VDSO support"
 	depends on X86_32 || IA32_EMULATION
 	help
 	  Map the 32-bit VDSO to the predictable old-style address too.
@@ -1219,9 +1184,8 @@ menu "Power management options"
 	depends on !X86_VOYAGER
 
 config ARCH_HIBERNATION_HEADER
-	bool
+	def_bool y
 	depends on X86_64 && HIBERNATION
-	default y
 
 source "kernel/power/Kconfig"
 
@@ -1414,25 +1378,21 @@ config PCI_GOANY
 endchoice
 
 config PCI_BIOS
-	bool
+	def_bool y
 	depends on X86_32 && !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
-	default y
 
 # x86-64 doesn't support PCI BIOS access from long mode so always go direct.
 config PCI_DIRECT
-	bool
+	def_bool y
 	depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY) || X86_VISWS)
-	default y
 
 config PCI_MMCONFIG
-	bool
+	def_bool y
 	depends on X86_32 && PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY)
-	default y
 
 config PCI_DOMAINS
-	bool
+	def_bool y
 	depends on PCI
-	default y
 
 config PCI_MMCONFIG
 	bool "Support mmconfig PCI config space access"
@@ -1449,9 +1409,9 @@ config DMAR
 	  remapping devices.
 
 config DMAR_GFX_WA
-	bool "Support for Graphics workaround"
+	def_bool y
+	prompt "Support for Graphics workaround"
 	depends on DMAR
-	default y
 	help
 	 Current Graphics drivers tend to use physical address
 	 for DMA and avoid using DMA APIs. Setting this config
@@ -1460,9 +1420,8 @@ config DMAR_GFX_WA
 	 to use physical addresses for DMA.
 
 config DMAR_FLOPPY_WA
-	bool
+	def_bool y
 	depends on DMAR
-	default y
 	help
 	 Floppy disk drivers are know to bypass DMA API calls
 	 thereby failing to work when IOMMU is enabled. This
@@ -1475,8 +1434,7 @@ source "drivers/pci/Kconfig"
 
 # x86_64 have no ISA slots, but do have ISA-style DMA.
 config ISA_DMA_API
-	bool
-	default y
+	def_bool y
 
 if X86_32
 
@@ -1542,9 +1500,9 @@ config SCx200HR_TIMER
 	  other workaround is idle=poll boot option.
 
 config GEODE_MFGPT_TIMER
-	bool "Geode Multi-Function General Purpose Timer (MFGPT) events"
+	def_bool y
+	prompt "Geode Multi-Function General Purpose Timer (MFGPT) events"
 	depends on MGEODE_LX && GENERIC_TIME && GENERIC_CLOCKEVENTS
-	default y
 	help
 	  This driver provides a clock event source based on the MFGPT
 	  timer(s) in the CS5535 and CS5536 companion chip for the geode.
@@ -1583,18 +1541,16 @@ config IA32_AOUT
          Support old a.out binaries in the 32bit emulation.
 
 config COMPAT
-	bool
+	def_bool y
 	depends on IA32_EMULATION
-	default y
 
 config COMPAT_FOR_U64_ALIGNMENT
 	def_bool COMPAT
 	depends on X86_64
 
 config SYSVIPC_COMPAT
-	bool
+	def_bool y
 	depends on X86_64 && COMPAT && SYSVIPC
-	default y
 
 endmenu
 

From 6612538ca9b38f0f45d0aec2aae8992c43313705 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 30 Jan 2008 13:31:03 +0100
Subject: [PATCH 237/890] x86: clean up process_32/64.c

White space and coding style clean up.
Make process_32/64.c similar.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process_32.c |   6 +-
 arch/x86/kernel/process_64.c | 199 +++++++++++++++++------------------
 2 files changed, 102 insertions(+), 103 deletions(-)

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index add3bf34e205..5350763a2d03 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -142,7 +142,7 @@ EXPORT_SYMBOL(default_idle);
  * to poll the ->work.need_resched flag instead of waiting for the
  * cross-CPU IPI to arrive. Use this option with caution.
  */
-static void poll_idle (void)
+static void poll_idle(void)
 {
 	cpu_relax();
 }
@@ -493,7 +493,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 
 	p->thread.ip = (unsigned long) ret_from_fork;
 
-	savesegment(gs,p->thread.gs);
+	savesegment(gs, p->thread.gs);
 
 	tsk = current;
 	if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
@@ -571,7 +571,7 @@ void dump_thread(struct pt_regs * regs, struct user * dump)
 }
 EXPORT_SYMBOL(dump_thread);
 
-/* 
+/*
  * Capture the user space registers if the task is not running (in user space)
  */
 int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 238193822e23..4c4d8b3f046e 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -3,7 +3,7 @@
  *
  *  Pentium III FXSR, SSE support
  *	Gareth Hughes <gareth@valinux.com>, May 2000
- * 
+ *
  *  X86-64 port
  *	Andi Kleen.
  *
@@ -19,19 +19,19 @@
 #include <linux/cpu.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/fs.h>
 #include <linux/elfcore.h>
 #include <linux/smp.h>
 #include <linux/slab.h>
 #include <linux/user.h>
-#include <linux/module.h>
 #include <linux/a.out.h>
 #include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/ptrace.h>
 #include <linux/utsname.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
 #include <linux/random.h>
 #include <linux/notifier.h>
 #include <linux/kprobes.h>
@@ -129,54 +129,12 @@ static void default_idle(void)
  * to poll the ->need_resched flag instead of waiting for the
  * cross-CPU IPI to arrive. Use this option with caution.
  */
-static void poll_idle (void)
+static void poll_idle(void)
 {
 	local_irq_enable();
 	cpu_relax();
 }
 
-static void do_nothing(void *unused)
-{
-}
-
-void cpu_idle_wait(void)
-{
-	unsigned int cpu, this_cpu = get_cpu();
-	cpumask_t map, tmp = current->cpus_allowed;
-
-	set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
-	put_cpu();
-
-	cpus_clear(map);
-	for_each_online_cpu(cpu) {
-		per_cpu(cpu_idle_state, cpu) = 1;
-		cpu_set(cpu, map);
-	}
-
-	__get_cpu_var(cpu_idle_state) = 0;
-
-	wmb();
-	do {
-		ssleep(1);
-		for_each_online_cpu(cpu) {
-			if (cpu_isset(cpu, map) &&
-					!per_cpu(cpu_idle_state, cpu))
-				cpu_clear(cpu, map);
-		}
-		cpus_and(map, map, cpu_online_map);
-		/*
-		 * We waited 1 sec, if a CPU still did not call idle
-		 * it may be because it is in idle and not waking up
-		 * because it has nothing to do.
-		 * Give all the remaining CPUS a kick.
-		 */
-		smp_call_function_mask(map, do_nothing, 0, 0);
-	} while (!cpus_empty(map));
-
-	set_cpus_allowed(current, tmp);
-}
-EXPORT_SYMBOL_GPL(cpu_idle_wait);
-
 #ifdef CONFIG_HOTPLUG_CPU
 DECLARE_PER_CPU(int, cpu_state);
 
@@ -247,6 +205,47 @@ void cpu_idle(void)
 	}
 }
 
+static void do_nothing(void *unused)
+{
+}
+
+void cpu_idle_wait(void)
+{
+	unsigned int cpu, this_cpu = get_cpu();
+	cpumask_t map, tmp = current->cpus_allowed;
+
+	set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
+	put_cpu();
+
+	cpus_clear(map);
+	for_each_online_cpu(cpu) {
+		per_cpu(cpu_idle_state, cpu) = 1;
+		cpu_set(cpu, map);
+	}
+
+	__get_cpu_var(cpu_idle_state) = 0;
+
+	wmb();
+	do {
+		ssleep(1);
+		for_each_online_cpu(cpu) {
+			if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
+				cpu_clear(cpu, map);
+		}
+		cpus_and(map, map, cpu_online_map);
+		/*
+		 * We waited 1 sec, if a CPU still did not call idle
+		 * it may be because it is in idle and not waking up
+		 * because it has nothing to do.
+		 * Give all the remaining CPUS a kick.
+		 */
+		smp_call_function_mask(map, do_nothing, 0, 0);
+	} while (!cpus_empty(map));
+
+	set_cpus_allowed(current, tmp);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
 /*
  * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
  * which can obviate IPI to trigger checking of need_resched.
@@ -300,7 +299,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 	}
 }
 
-static int __init idle_setup (char *str)
+static int __init idle_setup(char *str)
 {
 	if (!strcmp(str, "poll")) {
 		printk("using polling idle threads.\n");
@@ -315,13 +314,13 @@ static int __init idle_setup (char *str)
 }
 early_param("idle", idle_setup);
 
-/* Prints also some state that isn't saved in the pt_regs */ 
+/* Prints also some state that isn't saved in the pt_regs */
 void __show_regs(struct pt_regs * regs)
 {
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
 	unsigned long d0, d1, d2, d3, d6, d7;
-	unsigned int fsindex,gsindex;
-	unsigned int ds,cs,es; 
+	unsigned int fsindex, gsindex;
+	unsigned int ds, cs, es;
 
 	printk("\n");
 	print_modules();
@@ -390,7 +389,7 @@ void exit_thread(void)
 	struct task_struct *me = current;
 	struct thread_struct *t = &me->thread;
 
-	if (me->thread.io_bitmap_ptr) { 
+	if (me->thread.io_bitmap_ptr) {
 		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
 
 		kfree(t->io_bitmap_ptr);
@@ -426,7 +425,7 @@ void flush_thread(void)
 	tsk->thread.debugreg3 = 0;
 	tsk->thread.debugreg6 = 0;
 	tsk->thread.debugreg7 = 0;
-	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));	
+	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
 	/*
 	 * Forget coprocessor state..
 	 */
@@ -449,7 +448,7 @@ void release_thread(struct task_struct *dead_task)
 
 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
 {
-	struct user_desc ud = { 
+	struct user_desc ud = {
 		.base_addr = addr,
 		.limit = 0xfffff,
 		.seg_32bit = 1,
@@ -458,8 +457,8 @@ static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
 	};
 	struct n_desc_struct *desc = (void *)t->thread.tls_array;
 	desc += tls;
-	desc->a = LDT_entry_a(&ud); 
-	desc->b = LDT_entry_b(&ud); 
+	desc->a = LDT_entry_a(&ud);
+	desc->b = LDT_entry_b(&ud);
 }
 
 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
@@ -516,7 +515,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
 				IO_BITMAP_BYTES);
 		set_tsk_thread_flag(p, TIF_IO_BITMAP);
-	} 
+	}
 
 	/*
 	 * Set a new TLS for the child thread?
@@ -544,11 +543,29 @@ out:
 /*
  * This special macro can be used to load a debugging register
  */
-#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
+#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
+
+/*
+ * Capture the user space registers if the task is not running (in user space)
+ */
+int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
+{
+	struct pt_regs *pp, ptregs;
+
+	pp = task_pt_regs(tsk);
+
+	ptregs = *pp;
+	ptregs.cs &= 0xffff;
+	ptregs.ss &= 0xffff;
+
+	elf_core_copy_regs(regs, &ptregs);
+
+	return 1;
+}
 
 static inline void __switch_to_xtra(struct task_struct *prev_p,
-			     	    struct task_struct *next_p,
-			     	    struct tss_struct *tss)
+				    struct task_struct *next_p,
+				    struct tss_struct *tss)
 {
 	struct thread_struct *prev, *next;
 
@@ -586,7 +603,7 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
 /*
  *	switch_to(x,y) should switch tasks from x to y.
  *
- * This could still be optimized: 
+ * This could still be optimized:
  * - fold all the options into a flag word and test it with a single test.
  * - could test fs/gs bitsliced
  *
@@ -597,7 +614,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
 	struct thread_struct *prev = &prev_p->thread,
 				 *next = &next_p->thread;
-	int cpu = smp_processor_id();  
+	int cpu = smp_processor_id();
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
 
 	/* we're going to use this soon, after a few expensive things */
@@ -700,7 +717,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 /*
  * sys_execve() executes a new program.
  */
-asmlinkage 
+asmlinkage
 long sys_execve(char __user *name, char __user * __user *argv,
 		char __user * __user *envp, struct pt_regs regs)
 {
@@ -721,12 +738,12 @@ void set_personality_64bit(void)
 	/* inherit personality from parent */
 
 	/* Make sure to be in 64bit mode */
-	clear_thread_flag(TIF_IA32); 
+	clear_thread_flag(TIF_IA32);
 
 	/* TBD: overwrites user setup. Should have two bits.
 	   But 64bit processes have always behaved this way,
 	   so it's not too bad. The main problem is just that
-   	   32bit childs are affected again. */
+	   32bit childs are affected again. */
 	current->personality &= ~READ_IMPLIES_EXEC;
 }
 
@@ -819,19 +836,19 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 		/* Not strictly needed for fs, but do it for symmetry
 		   with gs */
 		if (addr >= TASK_SIZE_OF(task))
-			return -EPERM; 
+			return -EPERM;
 		cpu = get_cpu();
-		/* handle small bases via the GDT because that's faster to 
+		/* handle small bases via the GDT because that's faster to
 		   switch. */
-		if (addr <= 0xffffffff) { 
+		if (addr <= 0xffffffff) {
 			set_32bit_tls(task, FS_TLS, addr);
-			if (doit) { 
-				load_TLS(&task->thread, cpu); 
+			if (doit) {
+				load_TLS(&task->thread, cpu);
 				asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
 			}
 			task->thread.fsindex = FS_TLS_SEL;
 			task->thread.fs = 0;
-		} else { 
+		} else {
 			task->thread.fsindex = 0;
 			task->thread.fs = addr;
 			if (doit) {
@@ -843,24 +860,24 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 		}
 		put_cpu();
 		break;
-	case ARCH_GET_FS: { 
-		unsigned long base; 
+	case ARCH_GET_FS: {
+		unsigned long base;
 		if (task->thread.fsindex == FS_TLS_SEL)
 			base = read_32bit_tls(task, FS_TLS);
 		else if (doit)
 			rdmsrl(MSR_FS_BASE, base);
 		else
 			base = task->thread.fs;
-		ret = put_user(base, (unsigned long __user *)addr); 
-		break; 
+		ret = put_user(base, (unsigned long __user *)addr);
+		break;
 	}
-	case ARCH_GET_GS: { 
+	case ARCH_GET_GS: {
 		unsigned long base;
 		unsigned gsindex;
 		if (task->thread.gsindex == GS_TLS_SEL)
 			base = read_32bit_tls(task, GS_TLS);
 		else if (doit) {
- 			asm("movl %%gs,%0" : "=r" (gsindex));
+			asm("movl %%gs,%0" : "=r" (gsindex));
 			if (gsindex)
 				rdmsrl(MSR_KERNEL_GS_BASE, base);
 			else
@@ -868,39 +885,21 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 		}
 		else
 			base = task->thread.gs;
-		ret = put_user(base, (unsigned long __user *)addr); 
+		ret = put_user(base, (unsigned long __user *)addr);
 		break;
 	}
 
 	default:
 		ret = -EINVAL;
 		break;
-	} 
+	}
 
-	return ret;	
-} 
+	return ret;
+}
 
 long sys_arch_prctl(int code, unsigned long addr)
 {
 	return do_arch_prctl(current, code, addr);
-} 
-
-/* 
- * Capture the user space registers if the task is not running (in user space)
- */
-int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
-{
-	struct pt_regs *pp, ptregs;
-
-	pp = task_pt_regs(tsk);
-
-	ptregs = *pp; 
-	ptregs.cs &= 0xffff;
-	ptregs.ss &= 0xffff;
-
-	elf_core_copy_regs(regs, &ptregs);
- 
-	return 1;
 }
 
 unsigned long arch_align_stack(unsigned long sp)

From 96daa8cd53945a1220d2b2f4a44bc57f0cc46760 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:31:03 +0100
Subject: [PATCH 238/890] x86: use def_bool where possible in Kconfig.cpu

x86: Use def_bool where possible in Kconfig.cpu

Change occurances of:
	bool
	default X

to:
	def_bool X

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig.cpu | 56 ++++++++++++++++----------------------------
 1 file changed, 20 insertions(+), 36 deletions(-)

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 69e2ee44fdff..018d68e3184e 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -219,10 +219,10 @@ config MGEODEGX1
 	  Select this for a Geode GX1 (Cyrix MediaGX) chip.
 
 config MGEODE_LX
-       bool "Geode GX/LX"
+	bool "Geode GX/LX"
 	depends on X86_32
-       help
-         Select this for AMD Geode GX and LX processors.
+	help
+	  Select this for AMD Geode GX and LX processors.
 
 config MCYRIXIII
 	bool "CyrixIII/VIA-C3"
@@ -258,7 +258,7 @@ config MPSC
 	  Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey
 	  Xeon CPUs with Intel 64bit which is compatible with x86-64.
 	  Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the
-          Netburst core and shouldn't use this option. You can distinguish them
+	  Netburst core and shouldn't use this option. You can distinguish them
 	  using the cpu family field
 	  in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
 
@@ -317,81 +317,66 @@ config X86_L1_CACHE_SHIFT
 	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7
 
 config X86_XADD
-	bool
+	def_bool y
 	depends on X86_32 && !M386
-	default y
 
 config X86_PPRO_FENCE
-	bool
+	def_bool y
 	depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1
-	default y
 
 config X86_F00F_BUG
-	bool
+	def_bool y
 	depends on M586MMX || M586TSC || M586 || M486 || M386
-	default y
 
 config X86_WP_WORKS_OK
-	bool
+	def_bool y
 	depends on X86_32 && !M386
-	default y
 
 config X86_INVLPG
-	bool
+	def_bool y
 	depends on X86_32 && !M386
-	default y
 
 config X86_BSWAP
-	bool
+	def_bool y
 	depends on X86_32 && !M386
-	default y
 
 config X86_POPAD_OK
-	bool
+	def_bool y
 	depends on X86_32 && !M386
-	default y
 
 config X86_ALIGNMENT_16
-	bool
+	def_bool y
 	depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
-	default y
 
 config X86_GOOD_APIC
-	bool
+	def_bool y
 	depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2 || MVIAC7 || X86_64
-	default y
 
 config X86_INTEL_USERCOPY
-	bool
+	def_bool y
 	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
-	default y
 
 config X86_USE_PPRO_CHECKSUM
-	bool
+	def_bool y
 	depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2
-	default y
 
 config X86_USE_3DNOW
-	bool
+	def_bool y
 	depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML
-	default y
 
 config X86_OOSTORE
-	bool
+	def_bool y
 	depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR
-	default y
 
 config X86_TSC
-	bool
+	def_bool y
 	depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64
-	default y
 
 # this should be set for all -march=.. options where the compiler
 # generates cmov.
 config X86_CMOV
-	bool
+	def_bool y
 	depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7)
-	default y
 
 config X86_MINIMUM_CPU_FAMILY
 	int
@@ -400,6 +385,5 @@ config X86_MINIMUM_CPU_FAMILY
 	default "3"
 
 config X86_DEBUGCTLMSR
-	bool
+	def_bool y
 	depends on !(M586MMX || M586TSC || M586 || M486 || M386)
-	default y

From fe58fc8f40257948c2f9fc5a56863077ce3138f0 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:03 +0100
Subject: [PATCH 239/890] x86: wipe out traditional opt from x86_64 Makefile

Among other things, using -traditional as a gcc option stops us from
using macro token pasting, which is a feature we heavily rely on.

There was still a use of -traditional in arch/x86/kernel/Makefile_64,
which this patch removes.

I don't see any problems building kernels in my x86_64 box without
-traditional.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/Makefile_64 | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index b41e838dc788..19af64e1a3fc 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -4,7 +4,6 @@
 
 extra-y 	:= head_64.o head64.o init_task.o vmlinux.lds
 CPPFLAGS_vmlinux.lds += -Ux86_64
-EXTRA_AFLAGS	:= -traditional
 
 obj-y	:= process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
 		time_64.o ioport_64.o ldt.o setup_64.o i8259_64.o sys_x86_64.o \

From 751de83c0c94a5235f14cff8549d3b39e745eb2b Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:03 +0100
Subject: [PATCH 240/890] x86: unify msr smp funcs

The functions under #ifdef CONFIG_SMP in msr.h are the same
for both x86_64 and i386, and this patches removes one of them,
putting them in a single location

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/msr.h | 33 +++++++--------------------------
 1 file changed, 7 insertions(+), 26 deletions(-)

diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index 80b027081b3c..5c95d0be1132 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -139,29 +139,6 @@ static inline int wrmsr_safe(u32 __msr, u32 __low, u32 __high)
 	} while(0)
 #endif	/* !CONFIG_PARAVIRT */
 
-#ifdef CONFIG_SMP
-void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
-void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
-int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
-int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
-#else  /*  CONFIG_SMP  */
-static inline void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
-	rdmsr(msr_no, *l, *h);
-}
-static inline void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
-	wrmsr(msr_no, l, h);
-}
-static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
-	return rdmsr_safe(msr_no, l, h);
-}
-static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
-	return wrmsr_safe(msr_no, l, h);
-}
-#endif  /*  CONFIG_SMP  */
 #endif  /* ! __ASSEMBLY__ */
 #endif  /* __KERNEL__ */
 
@@ -327,6 +304,12 @@ static inline unsigned int cpuid_edx(unsigned int op)
 			:"c"(msr), "i"(-EIO), "0"(0));			\
 	  ret__; })
 
+#endif  /* __ASSEMBLY__ */
+
+#endif  /* !__i386__ */
+
+#ifndef __ASSEMBLY__
+
 #ifdef CONFIG_SMP
 void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
 void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
@@ -351,8 +334,6 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
 }
 #endif  /* CONFIG_SMP */
 #endif  /* __KERNEL__ */
-#endif  /* __ASSEMBLY__ */
-
-#endif  /* !__i386__ */
+#endif /* __ASSEMBLY__ */
 
 #endif

From 16e2011be67b8625c1c600f9742c2279be3c0c68 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:03 +0100
Subject: [PATCH 241/890] x86: allow sched clock to be overridden by paravirt

This patch turns the sched_clock into native_sched_clock.
sched clock becomes a weak symbol, which can then give its
place to a paravirt definition.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/tsc_64.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index 32edd2c50e94..204a9080af04 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -63,7 +63,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 	local_irq_restore(flags);
 }
 
-unsigned long long sched_clock(void)
+unsigned long long native_sched_clock(void)
 {
 	unsigned long a = 0;
 
@@ -77,6 +77,19 @@ unsigned long long sched_clock(void)
 	return cycles_2_ns(a);
 }
 
+/* We need to define a real function for sched_clock, to override the
+   weak default version */
+#ifdef CONFIG_PARAVIRT
+unsigned long long sched_clock(void)
+{
+	return paravirt_sched_clock();
+}
+#else
+unsigned long long
+sched_clock(void) __attribute__((alias("native_sched_clock")));
+#endif
+
+
 static int tsc_unstable;
 
 inline int check_tsc_unstable(void)

From 4e87173eacfd0d798aeeba14026893797826bc93 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:03 +0100
Subject: [PATCH 242/890] x86: split get_cycles_sync

This patch splits get_cycles_sync() into  __get_cycles_sync(),
and the rdtscll part. Paravirt guests cannot issue rdtscl directly,
as it involves a function call in vdso area.

So, using the __get_cycles_sync() base, we introduce vget_cycles_sync,
which then calls the native version of rdtscll. Ideally, however, a guest
should define its own clocksource, together with a vread function

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/tsc_64.c |  2 +-
 include/asm-x86/tsc.h    | 37 +++++++++++++++++++++++++++++++++----
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index 204a9080af04..3723401c4593 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -306,7 +306,7 @@ static cycle_t read_tsc(void)
 
 static cycle_t __vsyscall_fn vread_tsc(void)
 {
-	cycle_t ret = (cycle_t)get_cycles_sync();
+	cycle_t ret = (cycle_t)vget_cycles_sync();
 	return ret;
 }
 
diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h
index d7b1c4e7a108..9b7d264897fa 100644
--- a/include/asm-x86/tsc.h
+++ b/include/asm-x86/tsc.h
@@ -33,14 +33,14 @@ static inline cycles_t get_cycles(void)
 }
 
 /* Like get_cycles, but make sure the CPU is synchronized. */
-static __always_inline cycles_t get_cycles_sync(void)
+static __always_inline cycles_t __get_cycles_sync(void)
 {
 	unsigned long long ret;
 	unsigned eax, edx;
 
 	/*
-  	 * Use RDTSCP if possible; it is guaranteed to be synchronous
- 	 * and doesn't cause a VMEXIT on Hypervisors
+	 * Use RDTSCP if possible; it is guaranteed to be synchronous
+	 * and doesn't cause a VMEXIT on Hypervisors
 	 */
 	alternative_io(ASM_NOP3, ".byte 0x0f,0x01,0xf9", X86_FEATURE_RDTSCP,
 		       ASM_OUTPUT2("=a" (eax), "=d" (edx)),
@@ -55,11 +55,40 @@ static __always_inline cycles_t get_cycles_sync(void)
 	 */
 	alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC,
 			  "=a" (eax), "0" (1) : "ebx","ecx","edx","memory");
-	rdtscll(ret);
 
+	return 0;
+}
+
+static __always_inline cycles_t get_cycles_sync(void)
+{
+	unsigned long long ret;
+	ret = __get_cycles_sync();
+	if (!ret)
+		rdtscll(ret);
 	return ret;
 }
 
+#ifdef CONFIG_PARAVIRT
+/*
+ * For paravirt guests, some functionalities are executed through function
+ * pointers in the various pvops structures.
+ * These function pointers exist inside the kernel and can not
+ * be accessed by user space. To avoid this, we make a copy of the
+ * get_cycles_sync (called in kernel) but force the use of native_read_tsc.
+ * Ideally, the guest should set up it's own clock and vread
+ */
+static __always_inline long long vget_cycles_sync(void)
+{
+	unsigned long long ret;
+	ret = __get_cycles_sync();
+	if (!ret)
+		ret = native_read_tsc();
+	return ret;
+}
+#else
+# define vget_cycles_sync() get_cycles_sync()
+#endif
+
 extern void tsc_init(void);
 extern void mark_tsc_unstable(char *reason);
 extern int unsynchronized_tsc(void);

From c758ecf62ad94ddfeb4e7d8a5498bdcb2e3c85db Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:03 +0100
Subject: [PATCH 243/890] x86: unify cpuid functions

cpuid is not very different between i386 and x86_64.
We move away the x86_64 version from msr.h, and
unify them at processor.h, where they belong.

cpuid() paravirt then comes for free.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/msr.h          | 67 -----------------------------
 include/asm-x86/processor.h    | 78 ++++++++++++++++++++++++++++++++++
 include/asm-x86/processor_32.h | 69 ------------------------------
 3 files changed, 78 insertions(+), 136 deletions(-)

diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index 5c95d0be1132..0648b2d57a38 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -203,73 +203,6 @@ static inline int wrmsr_safe(u32 __msr, u32 __low, u32 __high)
 			  : "c" (counter))
 
 
-static inline void cpuid(int op, unsigned int *eax, unsigned int *ebx,
-			 unsigned int *ecx, unsigned int *edx)
-{
-	__asm__("cpuid"
-		: "=a" (*eax),
-		  "=b" (*ebx),
-		  "=c" (*ecx),
-		  "=d" (*edx)
-		: "0" (op));
-}
-
-/* Some CPUID calls want 'count' to be placed in ecx */
-static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
-			       int *edx)
-{
-	__asm__("cpuid"
-		: "=a" (*eax),
-		  "=b" (*ebx),
-		  "=c" (*ecx),
-		  "=d" (*edx)
-		: "0" (op), "c" (count));
-}
-
-/*
- * CPUID functions returning a single datum
- */
-static inline unsigned int cpuid_eax(unsigned int op)
-{
-	unsigned int eax;
-
-	__asm__("cpuid"
-		: "=a" (eax)
-		: "0" (op)
-		: "bx", "cx", "dx");
-	return eax;
-}
-static inline unsigned int cpuid_ebx(unsigned int op)
-{
-	unsigned int eax, ebx;
-
-	__asm__("cpuid"
-		: "=a" (eax), "=b" (ebx)
-		: "0" (op)
-		: "cx", "dx" );
-	return ebx;
-}
-static inline unsigned int cpuid_ecx(unsigned int op)
-{
-	unsigned int eax, ecx;
-
-	__asm__("cpuid"
-		: "=a" (eax), "=c" (ecx)
-		: "0" (op)
-		: "bx", "dx" );
-	return ecx;
-}
-static inline unsigned int cpuid_edx(unsigned int op)
-{
-	unsigned int eax, edx;
-
-	__asm__("cpuid"
-		: "=a" (eax), "=d" (edx)
-		: "0" (op)
-		: "bx", "cx");
-	return edx;
-}
-
 #ifdef __KERNEL__
 
 /* wrmsr with exception handling */
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 46e1c04e309c..dea81b70895d 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -1,5 +1,83 @@
+#ifndef __ASM_X86_PROCESSOR_H
+#define __ASM_X86_PROCESSOR_H
+
+static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
+					 unsigned int *ecx, unsigned int *edx)
+{
+	/* ecx is often an input as well as an output. */
+	__asm__("cpuid"
+		: "=a" (*eax),
+		  "=b" (*ebx),
+		  "=c" (*ecx),
+		  "=d" (*edx)
+		: "0" (*eax), "2" (*ecx));
+}
+
+
 #ifdef CONFIG_X86_32
 # include "processor_32.h"
 #else
 # include "processor_64.h"
 #endif
+
+#ifndef CONFIG_PARAVIRT
+#define __cpuid native_cpuid
+#endif
+
+/*
+ * Generic CPUID function
+ * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
+ * resulting in stale register contents being returned.
+ */
+static inline void cpuid(unsigned int op,
+			 unsigned int *eax, unsigned int *ebx,
+			 unsigned int *ecx, unsigned int *edx)
+{
+	*eax = op;
+	*ecx = 0;
+	__cpuid(eax, ebx, ecx, edx);
+}
+
+/* Some CPUID calls want 'count' to be placed in ecx */
+static inline void cpuid_count(unsigned int op, int count,
+			       unsigned int *eax, unsigned int *ebx,
+			       unsigned int *ecx, unsigned int *edx)
+{
+	*eax = op;
+	*ecx = count;
+	__cpuid(eax, ebx, ecx, edx);
+}
+
+/*
+ * CPUID functions returning a single datum
+ */
+static inline unsigned int cpuid_eax(unsigned int op)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	cpuid(op, &eax, &ebx, &ecx, &edx);
+	return eax;
+}
+static inline unsigned int cpuid_ebx(unsigned int op)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	cpuid(op, &eax, &ebx, &ecx, &edx);
+	return ebx;
+}
+static inline unsigned int cpuid_ecx(unsigned int op)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	cpuid(op, &eax, &ebx, &ecx, &edx);
+	return ecx;
+}
+static inline unsigned int cpuid_edx(unsigned int op)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	cpuid(op, &eax, &ebx, &ecx, &edx);
+	return edx;
+}
+
+#endif
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 6846cc346400..0d83da198127 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -133,18 +133,6 @@ extern void detect_ht(struct cpuinfo_x86 *c);
 static inline void detect_ht(struct cpuinfo_x86 *c) {}
 #endif
 
-static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
-					 unsigned int *ecx, unsigned int *edx)
-{
-	/* ecx is often an input as well as an output. */
-	__asm__("cpuid"
-		: "=a" (*eax),
-		  "=b" (*ebx),
-		  "=c" (*ecx),
-		  "=d" (*edx)
-		: "0" (*eax), "2" (*ecx));
-}
-
 #define load_cr3(pgdir) write_cr3(__pa(pgdir))
 
 /*
@@ -580,7 +568,6 @@ static inline void native_set_iopl_mask(unsigned mask)
 #include <asm/paravirt.h>
 #else
 #define paravirt_enabled() 0
-#define __cpuid native_cpuid
 
 static inline void load_sp0(struct tss_struct *tss, struct thread_struct *thread)
 {
@@ -598,62 +585,6 @@ static inline void load_sp0(struct tss_struct *tss, struct thread_struct *thread
 #define set_iopl_mask native_set_iopl_mask
 #endif /* CONFIG_PARAVIRT */
 
-/*
- * Generic CPUID function
- * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
- * resulting in stale register contents being returned.
- */
-static inline void cpuid(unsigned int op,
-			 unsigned int *eax, unsigned int *ebx,
-			 unsigned int *ecx, unsigned int *edx)
-{
-	*eax = op;
-	*ecx = 0;
-	__cpuid(eax, ebx, ecx, edx);
-}
-
-/* Some CPUID calls want 'count' to be placed in ecx */
-static inline void cpuid_count(unsigned int op, int count,
-			       unsigned int *eax, unsigned int *ebx,
-			       unsigned int *ecx, unsigned int *edx)
-{
-	*eax = op;
-	*ecx = count;
-	__cpuid(eax, ebx, ecx, edx);
-}
-
-/*
- * CPUID functions returning a single datum
- */
-static inline unsigned int cpuid_eax(unsigned int op)
-{
-	unsigned int eax, ebx, ecx, edx;
-
-	cpuid(op, &eax, &ebx, &ecx, &edx);
-	return eax;
-}
-static inline unsigned int cpuid_ebx(unsigned int op)
-{
-	unsigned int eax, ebx, ecx, edx;
-
-	cpuid(op, &eax, &ebx, &ecx, &edx);
-	return ebx;
-}
-static inline unsigned int cpuid_ecx(unsigned int op)
-{
-	unsigned int eax, ebx, ecx, edx;
-
-	cpuid(op, &eax, &ebx, &ecx, &edx);
-	return ecx;
-}
-static inline unsigned int cpuid_edx(unsigned int op)
-{
-	unsigned int eax, ebx, ecx, edx;
-
-	cpuid(op, &eax, &ebx, &ecx, &edx);
-	return edx;
-}
-
 /* generic versions from gas */
 #define GENERIC_NOP1	".byte 0x90\n"
 #define GENERIC_NOP2    	".byte 0x89,0xf6\n"

From 8f12dea6135d0a55b151dcb4c6bbe211f5f8d35d Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:06 +0100
Subject: [PATCH 244/890] x86: introduce native_read_tscp

Targetting paravirt, this patch introduces native_read_tscp, in
place of rdtscp() macro. When in a paravirt guest, this will
involve a function call, and thus, cannot be done in the vdso area.
These users then have to call the native version directly

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/vsyscall_64.c |  4 ++--
 arch/x86/vdso/vgetcpu.c       |  4 ++--
 include/asm-x86/msr.h         | 29 +++++++++++++++++++++--------
 3 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 018f7cf33790..c4c5e765cd2c 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -190,7 +190,7 @@ time_t __vsyscall(1) vtime(time_t *t)
 long __vsyscall(2)
 vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
 {
-	unsigned int dummy, p;
+	unsigned int p;
 	unsigned long j = 0;
 
 	/* Fast cache - only recompute value once per jiffies and avoid
@@ -205,7 +205,7 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
 		p = tcache->blob[1];
 	} else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
 		/* Load per CPU data from RDTSCP */
-		rdtscp(dummy, dummy, p);
+		native_read_tscp(&p);
 	} else {
 		/* Load per CPU data from GDT */
 		asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c
index 3b1ae1abfba9..c8097f17f8a9 100644
--- a/arch/x86/vdso/vgetcpu.c
+++ b/arch/x86/vdso/vgetcpu.c
@@ -15,11 +15,11 @@
 
 long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
 {
-	unsigned int dummy, p;
+	unsigned int p;
 
 	if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) {
 		/* Load per CPU data from RDTSCP */
-		rdtscp(dummy, dummy, p);
+		native_read_tscp(&p);
 	} else {
 		/* Load per CPU data from GDT */
 		asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index 0648b2d57a38..b6262e99fc8e 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -7,6 +7,27 @@
 # include <linux/types.h>
 #endif
 
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+static inline unsigned long long native_read_tscp(int *aux)
+{
+	unsigned long low, high;
+	asm volatile (".byte 0x0f,0x01,0xf9"
+		      : "=a" (low), "=d" (high), "=c" (*aux));
+	return low | ((u64)high >> 32);
+}
+
+#define rdtscp(low, high, aux)						\
+       do {                                                            \
+		unsigned long long _val = native_read_tscp(&(aux));     \
+		(low) = (u32)_val;                                      \
+		(high) = (u32)(_val >> 32);                             \
+       } while (0)
+
+#define rdtscpll(val, aux) (val) = native_read_tscp(&(aux))
+#endif
+#endif
+
 #ifdef __i386__
 
 #ifdef __KERNEL__
@@ -178,8 +199,6 @@ static inline int wrmsr_safe(u32 __msr, u32 __low, u32 __high)
 #define rdtscl(low) \
      __asm__ __volatile__ ("rdtsc" : "=a" (low) : : "edx")
 
-#define rdtscp(low,high,aux) \
-     __asm__ __volatile__ (".byte 0x0f,0x01,0xf9" : "=a" (low), "=d" (high), "=c" (aux))
 
 #define rdtscll(val) do { \
      unsigned int __a,__d; \
@@ -187,12 +206,6 @@ static inline int wrmsr_safe(u32 __msr, u32 __low, u32 __high)
      (val) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \
 } while(0)
 
-#define rdtscpll(val, aux) do { \
-     unsigned long __a, __d; \
-     __asm__ __volatile__ (".byte 0x0f,0x01,0xf9" : "=a" (__a), "=d" (__d), "=c" (aux)); \
-     (val) = (__d << 32) | __a; \
-} while (0)
-
 #define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
 
 #define write_rdtscp_aux(val) wrmsr(0xc0000103, val, 0)

From b8d1fae7dbde6a1227fa142acecb48dc3dd63817 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:07 +0100
Subject: [PATCH 245/890] x86: change rdpmc interface

the rdpmc instruction gets a counter argument in rcx. However,
the i386 version was ignoring it. To make both x86_64 and i386 versions
the same, as well as to comply with the instruction semantics, this
parameter is added in the i386 version

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/msr.h      | 6 +++---
 include/asm-x86/paravirt.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index b6262e99fc8e..effb7319c0a5 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -94,10 +94,10 @@ static inline unsigned long long native_read_tsc(void)
 	return val;
 }
 
-static inline unsigned long long native_read_pmc(void)
+static inline unsigned long long native_read_pmc(int counter)
 {
 	unsigned long long val;
-	asm volatile("rdpmc" : "=A" (val));
+	asm volatile("rdpmc" : "=A" (val) : "c" (counter));
 	return val;
 }
 
@@ -154,7 +154,7 @@ static inline int wrmsr_safe(u32 __msr, u32 __low, u32 __high)
 
 #define rdpmc(counter,low,high)					\
 	do {							\
-		u64 _l = native_read_pmc();			\
+		u64 _l = native_read_pmc(counter);		\
 		(low)  = (u32)_l;				\
 		(high) = (u32)(_l >> 32);			\
 	} while(0)
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index d1780e32722e..4782be68d0fa 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -118,7 +118,7 @@ struct pv_cpu_ops {
 	int (*write_msr)(unsigned int msr, u64 val);
 
 	u64 (*read_tsc)(void);
-	u64 (*read_pmc)(void);
+	u64 (*read_pmc)(int counter);
 
 	/* These two are jmp to, not actually called. */
 	void (*irq_enable_syscall_ret)(void);

From c9dcda5ce46c395c5c99003e259e1973dface640 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:07 +0100
Subject: [PATCH 246/890] x86: change write msr functions interface

This patche changes the native_write_msr() and friends interface
to explicitly take 2 32-bit registers instead of a 64-bit value.
The change will ease the merge with 64-bit code. As the 64-bit
value will be passed as two registers anyway in i386,
the PVOP_CALL interface has to account for that and use low/high parameters
It would force the x86_64 version to be different.

The change does not make i386 generated code less efficient. As said above,
it would get the values from two registers anyway.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/msr.h      | 19 ++++++++++---------
 include/asm-x86/paravirt.h |  2 +-
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index effb7319c0a5..cb7222358897 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -63,13 +63,14 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
 	return val;
 }
 
-static inline void native_write_msr(unsigned int msr, unsigned long long val)
+static inline void native_write_msr(unsigned int msr,
+				    unsigned low, unsigned high)
 {
-	asm volatile("wrmsr" : : "c" (msr), "A"(val));
+	asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high));
 }
 
 static inline int native_write_msr_safe(unsigned int msr,
-					unsigned long long val)
+					unsigned low, unsigned high)
 {
 	int err;
 	asm volatile("2: wrmsr ; xorl %0,%0\n"
@@ -82,7 +83,7 @@ static inline int native_write_msr_safe(unsigned int msr,
 		     "   .long	2b,3b\n\t"
 		     ".previous"
 		     : "=a" (err)
-		     : "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)),
+		     : "c" (msr), "0" (low), "d" (high),
 		       "i" (-EFAULT));
 	return err;
 }
@@ -118,20 +119,20 @@ static inline unsigned long long native_read_pmc(int counter)
 		(val2) = (u32)(__val >> 32);				\
 	} while(0)
 
-static inline void wrmsr(u32 __msr, u32 __low, u32 __high)
+static inline void wrmsr(unsigned msr, unsigned low, unsigned high)
 {
-	native_write_msr(__msr, ((u64)__high << 32) | __low);
+	native_write_msr(msr, low, high);
 }
 
 #define rdmsrl(msr,val)							\
 	((val) = native_read_msr(msr))
 
-#define wrmsrl(msr,val)	native_write_msr(msr, val)
+#define wrmsrl(msr, val) native_write_msr(msr, (u32)val, (u32)(val >> 32))
 
 /* wrmsr with exception handling */
-static inline int wrmsr_safe(u32 __msr, u32 __low, u32 __high)
+static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high)
 {
-	return native_write_msr_safe(__msr, ((u64)__high << 32) | __low);
+	return native_write_msr_safe(msr, low, high);
 }
 
 /* rdmsr with exception handling */
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 4782be68d0fa..e95c2a655165 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -115,7 +115,7 @@ struct pv_cpu_ops {
 	/* MSR, PMC and TSR operations.
 	   err = 0/-EFAULT.  wrmsr returns 0/-EFAULT. */
 	u64 (*read_msr)(unsigned int msr, int *err);
-	int (*write_msr)(unsigned int msr, u64 val);
+	int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
 
 	u64 (*read_tsc)(void);
 	u64 (*read_pmc)(int counter);

From 56ec1ddcff967e51d98427e4efcbfc90de67efe3 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:07 +0100
Subject: [PATCH 247/890] x86: make fixups wordsize agnostic

This patch uses the _ASM_ALIGN and _ASM_PTR macros
to make the fixups in native_read/write_msr_safe look the same
for x86_64 and i386. Besides using this macros, we also have to
take the explicit instruction suffixes out. It's okay
because all this instructions uses registers, and can be sized by
them.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/msr.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index cb7222358897..792fde2e8908 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -33,6 +33,7 @@ static inline unsigned long long native_read_tscp(int *aux)
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
+#include <asm/asm.h>
 #include <asm/errno.h>
 
 static inline unsigned long long native_read_msr(unsigned int msr)
@@ -48,14 +49,14 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
 {
 	unsigned long long val;
 
-	asm volatile("2: rdmsr ; xorl %0,%0\n"
+	asm volatile("2: rdmsr ; xor %0,%0\n"
 		     "1:\n\t"
 		     ".section .fixup,\"ax\"\n\t"
-		     "3:  movl %3,%0 ; jmp 1b\n\t"
+		     "3:  mov %3,%0 ; jmp 1b\n\t"
 		     ".previous\n\t"
 		     ".section __ex_table,\"a\"\n"
-		     "   .align 4\n\t"
-		     "   .long	2b,3b\n\t"
+		     _ASM_ALIGN "\n\t"
+		     _ASM_PTR " 2b,3b\n\t"
 		     ".previous"
 		     : "=r" (*err), "=A" (val)
 		     : "c" (msr), "i" (-EFAULT));
@@ -73,14 +74,14 @@ static inline int native_write_msr_safe(unsigned int msr,
 					unsigned low, unsigned high)
 {
 	int err;
-	asm volatile("2: wrmsr ; xorl %0,%0\n"
+	asm volatile("2: wrmsr ; xor %0,%0\n"
 		     "1:\n\t"
 		     ".section .fixup,\"ax\"\n\t"
-		     "3:  movl %4,%0 ; jmp 1b\n\t"
+		     "3:  mov %4,%0 ; jmp 1b\n\t"
 		     ".previous\n\t"
 		     ".section __ex_table,\"a\"\n"
-		     "   .align 4\n\t"
-		     "   .long	2b,3b\n\t"
+		     _ASM_ALIGN "\n\t"
+		     _ASM_PTR " 2b,3b\n\t"
 		     ".previous"
 		     : "=a" (err)
 		     : "c" (msr), "0" (low), "d" (high),

From c210d24986dc19e387c10508c0bc2faadadc1a2e Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:07 +0100
Subject: [PATCH 248/890] x86: integrate 32-bit and 64-bit code in msr.h

This patches proceeds with the integration of msr.h, making
the code unified, instead of having a version for each architecture.
We stick with the native_* functions, and then paravirt comes for free.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/msr.h | 171 ++++++++++++------------------------------
 1 file changed, 49 insertions(+), 122 deletions(-)

diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index 792fde2e8908..040d3910d891 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -9,6 +9,10 @@
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
+
+#include <asm/asm.h>
+#include <asm/errno.h>
+
 static inline unsigned long long native_read_tscp(int *aux)
 {
 	unsigned long low, high;
@@ -17,37 +21,36 @@ static inline unsigned long long native_read_tscp(int *aux)
 	return low | ((u64)high >> 32);
 }
 
-#define rdtscp(low, high, aux)						\
-       do {                                                            \
-		unsigned long long _val = native_read_tscp(&(aux));     \
-		(low) = (u32)_val;                                      \
-		(high) = (u32)(_val >> 32);                             \
-       } while (0)
-
-#define rdtscpll(val, aux) (val) = native_read_tscp(&(aux))
+/*
+ * i386 calling convention returns 64-bit value in edx:eax, while
+ * x86_64 returns at rax. Also, the "A" constraint does not really
+ * mean rdx:rax in x86_64, so we need specialized behaviour for each
+ * architecture
+ */
+#ifdef CONFIG_X86_64
+#define DECLARE_ARGS(val, low, high)	unsigned low, high
+#define EAX_EDX_VAL(val, low, high)	(low | ((u64)(high) << 32))
+#define EAX_EDX_ARGS(val, low, high)	"a" (low), "d" (high)
+#define EAX_EDX_RET(val, low, high)	"=a" (low), "=d" (high)
+#else
+#define DECLARE_ARGS(val, low, high)	unsigned long long val
+#define EAX_EDX_VAL(val, low, high)	(val)
+#define EAX_EDX_ARGS(val, low, high)	"A" (val)
+#define EAX_EDX_RET(val, low, high)	"=A" (val)
 #endif
-#endif
-
-#ifdef __i386__
-
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-
-#include <asm/asm.h>
-#include <asm/errno.h>
 
 static inline unsigned long long native_read_msr(unsigned int msr)
 {
-	unsigned long long val;
+	DECLARE_ARGS(val, low, high);
 
-	asm volatile("rdmsr" : "=A" (val) : "c" (msr));
-	return val;
+	asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr));
+	return EAX_EDX_VAL(val, low, high);
 }
 
 static inline unsigned long long native_read_msr_safe(unsigned int msr,
 						      int *err)
 {
-	unsigned long long val;
+	DECLARE_ARGS(val, low, high);
 
 	asm volatile("2: rdmsr ; xor %0,%0\n"
 		     "1:\n\t"
@@ -58,10 +61,9 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
 		     _ASM_ALIGN "\n\t"
 		     _ASM_PTR " 2b,3b\n\t"
 		     ".previous"
-		     : "=r" (*err), "=A" (val)
+		     : "=r" (*err), EAX_EDX_RET(val, low, high)
 		     : "c" (msr), "i" (-EFAULT));
-
-	return val;
+	return EAX_EDX_VAL(val, low, high);
 }
 
 static inline void native_write_msr(unsigned int msr,
@@ -91,16 +93,18 @@ static inline int native_write_msr_safe(unsigned int msr,
 
 static inline unsigned long long native_read_tsc(void)
 {
-	unsigned long long val;
-	asm volatile("rdtsc" : "=A" (val));
-	return val;
+	DECLARE_ARGS(val, low, high);
+
+	asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));
+	return EAX_EDX_VAL(val, low, high);
 }
 
 static inline unsigned long long native_read_pmc(int counter)
 {
-	unsigned long long val;
-	asm volatile("rdpmc" : "=A" (val) : "c" (counter));
-	return val;
+	DECLARE_ARGS(val, low, high);
+
+	asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter));
+	return EAX_EDX_VAL(val, low, high);
 }
 
 #ifdef CONFIG_PARAVIRT
@@ -128,7 +132,8 @@ static inline void wrmsr(unsigned msr, unsigned low, unsigned high)
 #define rdmsrl(msr,val)							\
 	((val) = native_read_msr(msr))
 
-#define wrmsrl(msr, val) native_write_msr(msr, (u32)val, (u32)(val >> 32))
+#define wrmsrl(msr, val)						\
+	native_write_msr(msr, (u32)((u64)(val)), (u32)((u64)(val) >> 32))
 
 /* wrmsr with exception handling */
 static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high)
@@ -160,104 +165,25 @@ static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high)
 		(low)  = (u32)_l;				\
 		(high) = (u32)(_l >> 32);			\
 	} while(0)
+
+#define rdtscp(low, high, aux)						\
+       do {                                                            \
+		unsigned long long _val = native_read_tscp(&(aux));     \
+		(low) = (u32)_val;                                      \
+		(high) = (u32)(_val >> 32);                             \
+       } while (0)
+
+#define rdtscpll(val, aux) (val) = native_read_tscp(&(aux))
+
 #endif	/* !CONFIG_PARAVIRT */
 
-#endif  /* ! __ASSEMBLY__ */
-#endif  /* __KERNEL__ */
 
-#else   /* __i386__ */
-
-#ifndef __ASSEMBLY__
-#include <linux/errno.h>
-/*
- * Access to machine-specific registers (available on 586 and better only)
- * Note: the rd* operations modify the parameters directly (without using
- * pointer indirection), this allows gcc to optimize better
- */
-
-#define rdmsr(msr,val1,val2) \
-       __asm__ __volatile__("rdmsr" \
-			    : "=a" (val1), "=d" (val2) \
-			    : "c" (msr))
-
-
-#define rdmsrl(msr,val) do { unsigned long a__,b__; \
-       __asm__ __volatile__("rdmsr" \
-			    : "=a" (a__), "=d" (b__) \
-			    : "c" (msr)); \
-       val = a__ | (b__<<32); \
-} while(0)
-
-#define wrmsr(msr,val1,val2) \
-     __asm__ __volatile__("wrmsr" \
-			  : /* no outputs */ \
-			  : "c" (msr), "a" (val1), "d" (val2))
-
-#define wrmsrl(msr,val) wrmsr(msr,(__u32)((__u64)(val)),((__u64)(val))>>32)
-
-#define rdtsc(low,high) \
-     __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
-
-#define rdtscl(low) \
-     __asm__ __volatile__ ("rdtsc" : "=a" (low) : : "edx")
-
-
-#define rdtscll(val) do { \
-     unsigned int __a,__d; \
-     __asm__ __volatile__("rdtsc" : "=a" (__a), "=d" (__d)); \
-     (val) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \
-} while(0)
+#define checking_wrmsrl(msr,val) wrmsr_safe(msr,(u32)(val),(u32)((val)>>32))
 
 #define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
 
 #define write_rdtscp_aux(val) wrmsr(0xc0000103, val, 0)
 
-#define rdpmc(counter,low,high) \
-     __asm__ __volatile__("rdpmc" \
-			  : "=a" (low), "=d" (high) \
-			  : "c" (counter))
-
-
-#ifdef __KERNEL__
-
-/* wrmsr with exception handling */
-#define wrmsr_safe(msr,a,b) ({ int ret__;			\
-	asm volatile("2: wrmsr ; xorl %0,%0\n"			\
-		     "1:\n\t"					\
-		     ".section .fixup,\"ax\"\n\t"		\
-		     "3:  movl %4,%0 ; jmp 1b\n\t"		\
-		     ".previous\n\t"				\
-		     ".section __ex_table,\"a\"\n"		\
-		     "   .align 8\n\t"				\
-		     "   .quad	2b,3b\n\t"			\
-		     ".previous"				\
-		     : "=a" (ret__)				\
-		     : "c" (msr), "0" (a), "d" (b), "i" (-EFAULT)); \
-	ret__; })
-
-#define checking_wrmsrl(msr,val) wrmsr_safe(msr,(u32)(val),(u32)((val)>>32))
-
-#define rdmsr_safe(msr,a,b) \
-	({ int ret__;						\
-	  asm volatile ("1:       rdmsr\n"			\
-			"2:\n"					\
-			".section .fixup,\"ax\"\n"		\
-			"3:       movl %4,%0\n"			\
-			" jmp 2b\n"				\
-			".previous\n"				\
-			".section __ex_table,\"a\"\n"		\
-			" .align 8\n"				\
-			" .quad 1b,3b\n"				\
-			".previous":"=&bDS" (ret__), "=a"(*(a)), "=d"(*(b)) \
-			:"c"(msr), "i"(-EIO), "0"(0));			\
-	  ret__; })
-
-#endif  /* __ASSEMBLY__ */
-
-#endif  /* !__i386__ */
-
-#ifndef __ASSEMBLY__
-
 #ifdef CONFIG_SMP
 void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
 void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
@@ -281,7 +207,8 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
 	return wrmsr_safe(msr_no, l, h);
 }
 #endif  /* CONFIG_SMP */
-#endif  /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+
 
 #endif

From 82f74e7159749cc511ebf5954a7b9ea6ad634949 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:31:07 +0100
Subject: [PATCH 249/890] x86: unify include/asm-x86/linkage_[32|64].h

Remove definitions of FASTCALL/fastcall from linkage_32 as compiled with
-regparm=3 by default since 2.6.20 and should no longer be needed.

CONFIG X86_64 and CONFIG_X86_ALIGNMENT_16 are mutually exclusive as found
in Kconfig.cpu so it should be fine to test them separately.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/linkage.h    | 23 +++++++++++++++++++----
 include/asm-x86/linkage_32.h | 15 ---------------
 include/asm-x86/linkage_64.h |  6 ------
 3 files changed, 19 insertions(+), 25 deletions(-)
 delete mode 100644 include/asm-x86/linkage_32.h
 delete mode 100644 include/asm-x86/linkage_64.h

diff --git a/include/asm-x86/linkage.h b/include/asm-x86/linkage.h
index 94b257fa8701..5a4c95905420 100644
--- a/include/asm-x86/linkage.h
+++ b/include/asm-x86/linkage.h
@@ -1,5 +1,20 @@
-#ifdef CONFIG_X86_32
-# include "linkage_32.h"
-#else
-# include "linkage_64.h"
+#ifndef __ASM_LINKAGE_H
+#define __ASM_LINKAGE_H
+
+#ifdef CONFIG_X86_64
+#define __ALIGN .p2align 4,,15
+#define __ALIGN_STR ".p2align 4,,15"
 #endif
+
+#ifdef CONFIG_X86_32
+#define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
+#define prevent_tail_call(ret) __asm__ ("" : "=r" (ret) : "0" (ret))
+#endif
+
+#ifdef CONFIG_X86_ALIGNMENT_16
+#define __ALIGN .align 16,0x90
+#define __ALIGN_STR ".align 16,0x90"
+#endif
+
+#endif
+
diff --git a/include/asm-x86/linkage_32.h b/include/asm-x86/linkage_32.h
deleted file mode 100644
index f4a6ebac0247..000000000000
--- a/include/asm-x86/linkage_32.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef __ASM_LINKAGE_H
-#define __ASM_LINKAGE_H
-
-#define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
-#define FASTCALL(x)	x __attribute__((regparm(3)))
-#define fastcall	__attribute__((regparm(3)))
-
-#define prevent_tail_call(ret) __asm__ ("" : "=r" (ret) : "0" (ret))
-
-#ifdef CONFIG_X86_ALIGNMENT_16
-#define __ALIGN .align 16,0x90
-#define __ALIGN_STR ".align 16,0x90"
-#endif
-
-#endif
diff --git a/include/asm-x86/linkage_64.h b/include/asm-x86/linkage_64.h
deleted file mode 100644
index b5f39d0189ce..000000000000
--- a/include/asm-x86/linkage_64.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_LINKAGE_H
-#define __ASM_LINKAGE_H
-
-#define __ALIGN .p2align 4,,15
-
-#endif

From cc503c1b43e002e3f1fed70f46d947e2bf349bb6 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Wed, 30 Jan 2008 13:31:07 +0100
Subject: [PATCH 250/890] x86: PIE executable randomization

main executable of (specially compiled/linked -pie/-fpie) ET_DYN binaries
onto a random address (in cases in which mmap() is allowed to perform a
randomization).

The code has been extraced from Ingo's exec-shield patch
http://people.redhat.com/mingo/exec-shield/

[akpm@linux-foundation.org: fix used-uninitialsied warning]
[kamezawa.hiroyu@jp.fujitsu.com: fixed ia32 ELF on x86_64 handling]

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Roland McGrath <roland@redhat.com>
Cc: Jakub Jelinek <jakub@redhat.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/ia64/ia32/binfmt_elf32.c |   2 +-
 arch/x86/kernel/sys_x86_64.c  |  98 ++++++++++++++++++++++++++
 arch/x86/mm/mmap_64.c         | 129 ++++++++++++++++++++++++++++------
 fs/binfmt_elf.c               | 107 ++++++++++++++++++++++------
 include/asm-x86/pgtable_64.h  |   1 +
 5 files changed, 292 insertions(+), 45 deletions(-)

diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c
index 3e35987af458..2a662215359c 100644
--- a/arch/ia64/ia32/binfmt_elf32.c
+++ b/arch/ia64/ia32/binfmt_elf32.c
@@ -222,7 +222,7 @@ elf32_set_personality (void)
 }
 
 static unsigned long
-elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
+elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type, unsigned long unused)
 {
 	unsigned long pgoff = (eppnt->p_vaddr) & ~IA32_PAGE_MASK;
 
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 907942ee6e76..95485e63fd2f 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -12,6 +12,7 @@
 #include <linux/file.h>
 #include <linux/utsname.h>
 #include <linux/personality.h>
+#include <linux/random.h>
 
 #include <asm/uaccess.h>
 #include <asm/ia32.h>
@@ -65,6 +66,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
 			   unsigned long *end)
 {
 	if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) {
+		unsigned long new_begin;
 		/* This is usually used needed to map code in small
 		   model, so it needs to be in the first 31bit. Limit
 		   it to that.  This means we need to move the
@@ -74,6 +76,11 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
 		   of playground for now. -AK */ 
 		*begin = 0x40000000; 
 		*end = 0x80000000;		
+		if (current->flags & PF_RANDOMIZE) {
+			new_begin = randomize_range(*begin, *begin + 0x02000000, 0);
+			if (new_begin)
+				*begin = new_begin;
+		}
 	} else {
 		*begin = TASK_UNMAPPED_BASE;
 		*end = TASK_SIZE; 
@@ -143,6 +150,97 @@ full_search:
 	}
 }
 
+
+unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+			  const unsigned long len, const unsigned long pgoff,
+			  const unsigned long flags)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	unsigned long addr = addr0;
+
+	/* requested length too big for entire address space */
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED)
+		return addr;
+
+	/* for MAP_32BIT mappings we force the legact mmap base */
+	if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT))
+		goto bottomup;
+
+	/* requesting a specific address */
+	if (addr) {
+		addr = PAGE_ALIGN(addr);
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+				(!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+
+	/* check if free_area_cache is useful for us */
+	if (len <= mm->cached_hole_size) {
+ 	        mm->cached_hole_size = 0;
+ 		mm->free_area_cache = mm->mmap_base;
+ 	}
+
+	/* either no address requested or can't fit in requested address hole */
+	addr = mm->free_area_cache;
+
+	/* make sure it can fit in the remaining address space */
+	if (addr > len) {
+		vma = find_vma(mm, addr-len);
+		if (!vma || addr <= vma->vm_start)
+			/* remember the address as a hint for next time */
+			return (mm->free_area_cache = addr-len);
+	}
+
+	if (mm->mmap_base < len)
+		goto bottomup;
+
+	addr = mm->mmap_base-len;
+
+	do {
+		/*
+		 * Lookup failure means no vma is above this address,
+		 * else if new region fits below vma->vm_start,
+		 * return with success:
+		 */
+		vma = find_vma(mm, addr);
+		if (!vma || addr+len <= vma->vm_start)
+			/* remember the address as a hint for next time */
+			return (mm->free_area_cache = addr);
+
+ 		/* remember the largest hole we saw so far */
+ 		if (addr + mm->cached_hole_size < vma->vm_start)
+ 		        mm->cached_hole_size = vma->vm_start - addr;
+
+		/* try just below the current vma->vm_start */
+		addr = vma->vm_start-len;
+	} while (len < vma->vm_start);
+
+bottomup:
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	mm->cached_hole_size = ~0UL;
+  	mm->free_area_cache = TASK_UNMAPPED_BASE;
+	addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
+	/*
+	 * Restore the topdown base:
+	 */
+	mm->free_area_cache = mm->mmap_base;
+	mm->cached_hole_size = ~0UL;
+
+	return addr;
+}
+
+
 asmlinkage long sys_uname(struct new_utsname __user * name)
 {
 	int err;
diff --git a/arch/x86/mm/mmap_64.c b/arch/x86/mm/mmap_64.c
index ffb71a31bb6e..8cf03ea651f8 100644
--- a/arch/x86/mm/mmap_64.c
+++ b/arch/x86/mm/mmap_64.c
@@ -1,32 +1,117 @@
-/* Copyright 2005 Andi Kleen, SuSE Labs.
- * Licensed under GPL, v.2
+/*
+ *  linux/arch/x86-64/mm/mmap.c
+ *
+ *  flexible mmap layout support
+ *
+ * Based on code by Ingo Molnar and Andi Kleen, copyrighted
+ * as follows:
+ *
+ * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
+ * All Rights Reserved.
+ * Copyright 2005 Andi Kleen, SUSE Labs.
+ * Copyright 2007 Jiri Kosina, SUSE Labs.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
  */
+
+#include <linux/personality.h>
 #include <linux/mm.h>
-#include <linux/sched.h>
 #include <linux/random.h>
+#include <linux/limits.h>
+#include <linux/sched.h>
 #include <asm/ia32.h>
 
-/* Notebook: move the mmap code from sys_x86_64.c over here. */
+/*
+ * Top of mmap area (just below the process stack).
+ *
+ * Leave an at least ~128 MB hole.
+ */
+#define MIN_GAP (128*1024*1024)
+#define MAX_GAP (TASK_SIZE/6*5)
 
-void arch_pick_mmap_layout(struct mm_struct *mm)
+static inline unsigned long mmap_base(void)
 {
-#ifdef CONFIG_IA32_EMULATION
-	if (current_thread_info()->flags & _TIF_IA32)
-		return ia32_pick_mmap_layout(mm);
-#endif
-	mm->mmap_base = TASK_UNMAPPED_BASE;
-	if (current->flags & PF_RANDOMIZE) {
-		/*
-		 * Add 28bit randomness which is about 40bits of
-		 * address space because mmap base has to be page
-		 * aligned.  or ~1/128 of the total user VM (total
-		 * user address space is 47bits)
-		 */
-		unsigned rnd = get_random_int() & 0xfffffff;
+	unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
 
-		mm->mmap_base += ((unsigned long)rnd) << PAGE_SHIFT;
-	}
-	mm->get_unmapped_area = arch_get_unmapped_area;
-	mm->unmap_area = arch_unmap_area;
+	if (gap < MIN_GAP)
+		gap = MIN_GAP;
+	else if (gap > MAX_GAP)
+		gap = MAX_GAP;
+
+	return TASK_SIZE - (gap & PAGE_MASK);
 }
 
+static inline int mmap_is_32(void)
+{
+#ifdef CONFIG_IA32_EMULATION
+	if (test_thread_flag(TIF_IA32))
+		return 1;
+#endif
+	return 0;
+}
+
+static inline int mmap_is_legacy(void)
+{
+	if (current->personality & ADDR_COMPAT_LAYOUT)
+		return 1;
+
+	if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY)
+		return 1;
+
+	return sysctl_legacy_va_layout;
+}
+
+/*
+ * This function, called very early during the creation of a new
+ * process VM image, sets up which VM layout function to use:
+ */
+void arch_pick_mmap_layout(struct mm_struct *mm)
+{
+	int rnd = 0;
+	if (current->flags & PF_RANDOMIZE) {
+		/*
+		 * Add 28bit randomness which is about 40bits of address space
+		 * because mmap base has to be page aligned.
+		 * or ~1/128 of the total user VM
+		 * (total user address space is 47bits)
+		 */
+		rnd = get_random_int() & 0xfffffff;
+	}
+
+	/*
+	 * Fall back to the standard layout if the personality
+	 * bit is set, or if the expected stack growth is unlimited:
+	 */
+	if (mmap_is_32()) {
+#ifdef CONFIG_IA32_EMULATION
+		/* ia32_pick_mmap_layout has its own. */
+		return ia32_pick_mmap_layout(mm);
+#endif
+	} else if(mmap_is_legacy()) {
+		mm->mmap_base = TASK_UNMAPPED_BASE;
+		mm->get_unmapped_area = arch_get_unmapped_area;
+		mm->unmap_area = arch_unmap_area;
+	} else {
+		mm->mmap_base = mmap_base();
+		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+		mm->unmap_area = arch_unmap_area_topdown;
+		if (current->flags & PF_RANDOMIZE)
+			rnd = -rnd;
+	}
+	if (current->flags & PF_RANDOMIZE) {
+		mm->mmap_base += ((long)rnd) << PAGE_SHIFT;
+	}
+}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 043a800c8f71..8193d24be159 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -45,7 +45,7 @@
 
 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
 static int load_elf_library(struct file *);
-static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
+static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long);
 
 /*
  * If we don't support core dumping, then supply a NULL so we
@@ -298,33 +298,70 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 #ifndef elf_map
 
 static unsigned long elf_map(struct file *filep, unsigned long addr,
-		struct elf_phdr *eppnt, int prot, int type)
+		struct elf_phdr *eppnt, int prot, int type,
+		unsigned long total_size)
 {
 	unsigned long map_addr;
-	unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
+	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
+	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
+	addr = ELF_PAGESTART(addr);
+	size = ELF_PAGEALIGN(size);
 
-	down_write(&current->mm->mmap_sem);
 	/* mmap() will return -EINVAL if given a zero size, but a
 	 * segment with zero filesize is perfectly valid */
-	if (eppnt->p_filesz + pageoffset)
-		map_addr = do_mmap(filep, ELF_PAGESTART(addr),
-				   eppnt->p_filesz + pageoffset, prot, type,
-				   eppnt->p_offset - pageoffset);
-	else
-		map_addr = ELF_PAGESTART(addr);
+	if (!size)
+		return addr;
+
+	down_write(&current->mm->mmap_sem);
+	/*
+	* total_size is the size of the ELF (interpreter) image.
+	* The _first_ mmap needs to know the full size, otherwise
+	* randomization might put this image into an overlapping
+	* position with the ELF binary image. (since size < total_size)
+	* So we first map the 'big' image - and unmap the remainder at
+	* the end. (which unmap is needed for ELF images with holes.)
+	*/
+	if (total_size) {
+		total_size = ELF_PAGEALIGN(total_size);
+		map_addr = do_mmap(filep, addr, total_size, prot, type, off);
+		if (!BAD_ADDR(map_addr))
+			do_munmap(current->mm, map_addr+size, total_size-size);
+	} else
+		map_addr = do_mmap(filep, addr, size, prot, type, off);
+
 	up_write(&current->mm->mmap_sem);
 	return(map_addr);
 }
 
 #endif /* !elf_map */
 
+static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
+{
+	int i, first_idx = -1, last_idx = -1;
+
+	for (i = 0; i < nr; i++) {
+		if (cmds[i].p_type == PT_LOAD) {
+			last_idx = i;
+			if (first_idx == -1)
+				first_idx = i;
+		}
+	}
+	if (first_idx == -1)
+		return 0;
+
+	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
+				ELF_PAGESTART(cmds[first_idx].p_vaddr);
+}
+
+
 /* This is much more generalized than the library routine read function,
    so we keep this separate.  Technically the library read function
    is only provided so that we can read a.out libraries that have
    an ELF header */
 
 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
-		struct file *interpreter, unsigned long *interp_load_addr)
+		struct file *interpreter, unsigned long *interp_map_addr,
+		unsigned long no_base)
 {
 	struct elf_phdr *elf_phdata;
 	struct elf_phdr *eppnt;
@@ -332,6 +369,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 	int load_addr_set = 0;
 	unsigned long last_bss = 0, elf_bss = 0;
 	unsigned long error = ~0UL;
+	unsigned long total_size;
 	int retval, i, size;
 
 	/* First of all, some simple consistency checks */
@@ -370,6 +408,12 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 		goto out_close;
 	}
 
+	total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
+	if (!total_size) {
+		error = -EINVAL;
+		goto out_close;
+	}
+
 	eppnt = elf_phdata;
 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 		if (eppnt->p_type == PT_LOAD) {
@@ -387,9 +431,14 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 			vaddr = eppnt->p_vaddr;
 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 				elf_type |= MAP_FIXED;
+			else if (no_base && interp_elf_ex->e_type == ET_DYN)
+				load_addr = -vaddr;
 
 			map_addr = elf_map(interpreter, load_addr + vaddr,
-					   eppnt, elf_prot, elf_type);
+					   eppnt, elf_prot, elf_type, total_size);
+			total_size = 0;
+			if (!*interp_map_addr)
+				*interp_map_addr = map_addr;
 			error = map_addr;
 			if (BAD_ADDR(map_addr))
 				goto out_close;
@@ -455,8 +504,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 			goto out_close;
 	}
 
-	*interp_load_addr = load_addr;
-	error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
+	error = load_addr;
 
 out_close:
 	kfree(elf_phdata);
@@ -553,7 +601,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	int elf_exec_fileno;
 	int retval, i;
 	unsigned int size;
-	unsigned long elf_entry, interp_load_addr = 0;
+	unsigned long elf_entry;
+	unsigned long interp_load_addr = 0;
 	unsigned long start_code, end_code, start_data, end_data;
 	unsigned long reloc_func_desc = 0;
 	char passed_fileno[6];
@@ -825,9 +874,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	current->mm->start_stack = bprm->p;
 
 	/* Now we do a little grungy work by mmaping the ELF image into
-	   the correct location in memory.  At this point, we assume that
-	   the image should be loaded at fixed address, not at a variable
-	   address. */
+	   the correct location in memory. */
 	for(i = 0, elf_ppnt = elf_phdata;
 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 		int elf_prot = 0, elf_flags;
@@ -881,11 +928,15 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 			 * default mmap base, as well as whatever program they
 			 * might try to exec.  This is because the brk will
 			 * follow the loader, and is not movable.  */
+#ifdef CONFIG_X86
+			load_bias = 0;
+#else
 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
+#endif
 		}
 
 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
-				elf_prot, elf_flags);
+				elf_prot, elf_flags,0);
 		if (BAD_ADDR(error)) {
 			send_sig(SIGKILL, current, 0);
 			retval = IS_ERR((void *)error) ?
@@ -961,13 +1012,25 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	}
 
 	if (elf_interpreter) {
-		if (interpreter_type == INTERPRETER_AOUT)
+		if (interpreter_type == INTERPRETER_AOUT) {
 			elf_entry = load_aout_interp(&loc->interp_ex,
 						     interpreter);
-		else
+		} else {
+			unsigned long uninitialized_var(interp_map_addr);
+
 			elf_entry = load_elf_interp(&loc->interp_elf_ex,
 						    interpreter,
-						    &interp_load_addr);
+						    &interp_map_addr,
+						    load_bias);
+			if (!IS_ERR((void *)elf_entry)) {
+				/*
+				 * load_elf_interp() returns relocation
+				 * adjustment
+				 */
+				interp_load_addr = elf_entry;
+				elf_entry += loc->interp_elf_ex.e_entry;
+			}
+		}
 		if (BAD_ADDR(elf_entry)) {
 			force_sig(SIGSEGV, current);
 			retval = IS_ERR((void *)elf_entry) ?
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 9d4f11dd566f..6cf40dec0932 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -413,6 +413,7 @@ pte_t *lookup_address(unsigned long addr);
 		remap_pfn_range(vma, vaddr, pfn, size, prot)
 
 #define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 
 #define pgtable_cache_init()   do { } while (0)
 #define check_pgt_cache()      do { } while (0)

From bb1ad8205be4cb95e3286d7442596da6fd70409f Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 30 Jan 2008 13:31:07 +0100
Subject: [PATCH 251/890] x86: PIE executable randomization, checkpatch fixes

#39: FILE: arch/ia64/ia32/binfmt_elf32.c:229:
+elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type, unsigned long unused)

WARNING: no space between function name and open parenthesis '('
#39: FILE: arch/ia64/ia32/binfmt_elf32.c:229:
+elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type, unsigned long unused)

WARNING: line over 80 characters
#67: FILE: arch/x86/kernel/sys_x86_64.c:80:
+			new_begin = randomize_range(*begin, *begin + 0x02000000, 0);

ERROR: use tabs not spaces
#110: FILE: arch/x86/kernel/sys_x86_64.c:185:
+ ^I        mm->cached_hole_size = 0;$

ERROR: use tabs not spaces
#111: FILE: arch/x86/kernel/sys_x86_64.c:186:
+ ^I^Imm->free_area_cache = mm->mmap_base;$

ERROR: use tabs not spaces
#112: FILE: arch/x86/kernel/sys_x86_64.c:187:
+ ^I}$

ERROR: use tabs not spaces
#141: FILE: arch/x86/kernel/sys_x86_64.c:216:
+ ^I^I/* remember the largest hole we saw so far */$

ERROR: use tabs not spaces
#142: FILE: arch/x86/kernel/sys_x86_64.c:217:
+ ^I^Iif (addr + mm->cached_hole_size < vma->vm_start)$

ERROR: use tabs not spaces
#143: FILE: arch/x86/kernel/sys_x86_64.c:218:
+ ^I^I        mm->cached_hole_size = vma->vm_start - addr;$

ERROR: use tabs not spaces
#157: FILE: arch/x86/kernel/sys_x86_64.c:232:
+  ^Imm->free_area_cache = TASK_UNMAPPED_BASE;$

ERROR: need a space before the open parenthesis '('
#291: FILE: arch/x86/mm/mmap_64.c:101:
+	} else if(mmap_is_legacy()) {

WARNING: braces {} are not necessary for single statement blocks
#302: FILE: arch/x86/mm/mmap_64.c:112:
+	if (current->flags & PF_RANDOMIZE) {
+		mm->mmap_base += ((long)rnd) << PAGE_SHIFT;
+	}

WARNING: line over 80 characters
#314: FILE: fs/binfmt_elf.c:48:
+static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long);

WARNING: no space between function name and open parenthesis '('
#314: FILE: fs/binfmt_elf.c:48:
+static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long);

WARNING: line over 80 characters
#429: FILE: fs/binfmt_elf.c:438:
+					   eppnt, elf_prot, elf_type, total_size);

ERROR: need space after that ',' (ctx:VxV)
#480: FILE: fs/binfmt_elf.c:939:
+				elf_prot, elf_flags,0);
 				                   ^

total: 9 errors, 7 warnings, 461 lines checked
Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Please run checkpatch prior to sending patches

Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Jakub Jelinek <jakub@redhat.com>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/ia64/ia32/binfmt_elf32.c |  3 ++-
 arch/x86/kernel/sys_x86_64.c  | 14 +++++++-------
 arch/x86/mm/mmap_64.c         |  5 ++---
 fs/binfmt_elf.c               |  7 ++++---
 4 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c
index 2a662215359c..4f0c30c38e99 100644
--- a/arch/ia64/ia32/binfmt_elf32.c
+++ b/arch/ia64/ia32/binfmt_elf32.c
@@ -222,7 +222,8 @@ elf32_set_personality (void)
 }
 
 static unsigned long
-elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type, unsigned long unused)
+elf32_map(struct file *filep, unsigned long addr, struct elf_phdr *eppnt,
+		int prot, int type, unsigned long unused)
 {
 	unsigned long pgoff = (eppnt->p_vaddr) & ~IA32_PAGE_MASK;
 
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 95485e63fd2f..bd802a5e1aa3 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -182,9 +182,9 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 	/* check if free_area_cache is useful for us */
 	if (len <= mm->cached_hole_size) {
- 	        mm->cached_hole_size = 0;
- 		mm->free_area_cache = mm->mmap_base;
- 	}
+		mm->cached_hole_size = 0;
+		mm->free_area_cache = mm->mmap_base;
+	}
 
 	/* either no address requested or can't fit in requested address hole */
 	addr = mm->free_area_cache;
@@ -213,9 +213,9 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 			/* remember the address as a hint for next time */
 			return (mm->free_area_cache = addr);
 
- 		/* remember the largest hole we saw so far */
- 		if (addr + mm->cached_hole_size < vma->vm_start)
- 		        mm->cached_hole_size = vma->vm_start - addr;
+		/* remember the largest hole we saw so far */
+		if (addr + mm->cached_hole_size < vma->vm_start)
+			mm->cached_hole_size = vma->vm_start - addr;
 
 		/* try just below the current vma->vm_start */
 		addr = vma->vm_start-len;
@@ -229,7 +229,7 @@ bottomup:
 	 * allocations.
 	 */
 	mm->cached_hole_size = ~0UL;
-  	mm->free_area_cache = TASK_UNMAPPED_BASE;
+	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
 	/*
 	 * Restore the topdown base:
diff --git a/arch/x86/mm/mmap_64.c b/arch/x86/mm/mmap_64.c
index 8cf03ea651f8..65b34f226f14 100644
--- a/arch/x86/mm/mmap_64.c
+++ b/arch/x86/mm/mmap_64.c
@@ -100,7 +100,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 		/* ia32_pick_mmap_layout has its own. */
 		return ia32_pick_mmap_layout(mm);
 #endif
-	} else if(mmap_is_legacy()) {
+	} else if (mmap_is_legacy()) {
 		mm->mmap_base = TASK_UNMAPPED_BASE;
 		mm->get_unmapped_area = arch_get_unmapped_area;
 		mm->unmap_area = arch_unmap_area;
@@ -111,7 +111,6 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 		if (current->flags & PF_RANDOMIZE)
 			rnd = -rnd;
 	}
-	if (current->flags & PF_RANDOMIZE) {
+	if (current->flags & PF_RANDOMIZE)
 		mm->mmap_base += ((long)rnd) << PAGE_SHIFT;
-	}
 }
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 8193d24be159..b8bca1ebc1a0 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -45,7 +45,8 @@
 
 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
 static int load_elf_library(struct file *);
-static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long);
+static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
+				int, int, unsigned long);
 
 /*
  * If we don't support core dumping, then supply a NULL so we
@@ -435,7 +436,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 				load_addr = -vaddr;
 
 			map_addr = elf_map(interpreter, load_addr + vaddr,
-					   eppnt, elf_prot, elf_type, total_size);
+					eppnt, elf_prot, elf_type, total_size);
 			total_size = 0;
 			if (!*interp_map_addr)
 				*interp_map_addr = map_addr;
@@ -936,7 +937,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 		}
 
 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
-				elf_prot, elf_flags,0);
+				elf_prot, elf_flags, 0);
 		if (BAD_ADDR(error)) {
 			send_sig(SIGKILL, current, 0);
 			retval = IS_ERR((void *)error) ?

From 954683a2c19696114894384e34f858bb9f455b11 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 30 Jan 2008 13:31:07 +0100
Subject: [PATCH 252/890] x86: PIE executable randomization, uninlining

Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Jakub Jelinek <jakub@redhat.com>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/mmap_64.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/mmap_64.c b/arch/x86/mm/mmap_64.c
index 65b34f226f14..119bf34ec529 100644
--- a/arch/x86/mm/mmap_64.c
+++ b/arch/x86/mm/mmap_64.c
@@ -42,7 +42,7 @@
 #define MIN_GAP (128*1024*1024)
 #define MAX_GAP (TASK_SIZE/6*5)
 
-static inline unsigned long mmap_base(void)
+static unsigned long mmap_base(void)
 {
 	unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
 
@@ -54,7 +54,7 @@ static inline unsigned long mmap_base(void)
 	return TASK_SIZE - (gap & PAGE_MASK);
 }
 
-static inline int mmap_is_32(void)
+static int mmap_is_32(void)
 {
 #ifdef CONFIG_IA32_EMULATION
 	if (test_thread_flag(TIF_IA32))
@@ -63,7 +63,7 @@ static inline int mmap_is_32(void)
 	return 0;
 }
 
-static inline int mmap_is_legacy(void)
+static int mmap_is_legacy(void)
 {
 	if (current->personality & ADDR_COMPAT_LAYOUT)
 		return 1;

From 929fd589135ed417663f1b75b8031c9cf6687700 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 30 Jan 2008 13:31:08 +0100
Subject: [PATCH 253/890] x86: some whitespace cleanups in paging code

This patch does some whitespace cleanups in the paging code to fix some
checkpatch.pl warnings of my formerly merged cleanup patches.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_64.c        |  2 +-
 include/asm-x86/pgtable_64.h | 22 +++++++++++-----------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 3f8bf298dbb8..5cadbb40da3e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -226,7 +226,7 @@ __meminit void *early_ioremap(unsigned long addr, unsigned long size)
 		vaddr += addr & ~PMD_MASK;
 		addr &= PMD_MASK;
 		for (i = 0; i < pmds; i++, addr += PMD_SIZE)
-			set_pmd(pmd + i,__pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
+			set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
 		__flush_tlb();
 		return (void *)vaddr;
 	next:
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 6cf40dec0932..3072619365a8 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -154,22 +154,22 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
 #define _PAGE_BIT_GLOBAL	8	/* Global TLB entry PPro+ */
 #define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */
 
-#define _PAGE_PRESENT	(_AC(1,UL)<<_PAGE_BIT_PRESENT)
-#define _PAGE_RW	(_AC(1,UL)<<_PAGE_BIT_RW)
-#define _PAGE_USER	(_AC(1,UL)<<_PAGE_BIT_USER)
-#define _PAGE_PWT	(_AC(1,UL)<<_PAGE_BIT_PWT)
-#define _PAGE_PCD	(_AC(1,UL)<<_PAGE_BIT_PCD)
-#define _PAGE_ACCESSED	(_AC(1,UL)<<_PAGE_BIT_ACCESSED)
-#define _PAGE_DIRTY	(_AC(1,UL)<<_PAGE_BIT_DIRTY)
+#define _PAGE_PRESENT	(_AC(1, UL)<<_PAGE_BIT_PRESENT)
+#define _PAGE_RW	(_AC(1, UL)<<_PAGE_BIT_RW)
+#define _PAGE_USER	(_AC(1, UL)<<_PAGE_BIT_USER)
+#define _PAGE_PWT	(_AC(1, UL)<<_PAGE_BIT_PWT)
+#define _PAGE_PCD	(_AC(1, UL)<<_PAGE_BIT_PCD)
+#define _PAGE_ACCESSED	(_AC(1, UL)<<_PAGE_BIT_ACCESSED)
+#define _PAGE_DIRTY	(_AC(1, UL)<<_PAGE_BIT_DIRTY)
 /* 2MB page */
-#define _PAGE_PSE	(_AC(1,UL)<<_PAGE_BIT_PSE)
+#define _PAGE_PSE	(_AC(1, UL)<<_PAGE_BIT_PSE)
 /* nonlinear file mapping, saved PTE; unset:swap */
-#define _PAGE_FILE	(_AC(1,UL)<<_PAGE_BIT_FILE)
+#define _PAGE_FILE	(_AC(1, UL)<<_PAGE_BIT_FILE)
 /* Global TLB entry */
-#define _PAGE_GLOBAL	(_AC(1,UL)<<_PAGE_BIT_GLOBAL)
+#define _PAGE_GLOBAL	(_AC(1, UL)<<_PAGE_BIT_GLOBAL)
 
 #define _PAGE_PROTNONE	0x080	/* If not present */
-#define _PAGE_NX        (_AC(1,UL)<<_PAGE_BIT_NX)
+#define _PAGE_NX        (_AC(1, UL)<<_PAGE_BIT_NX)
 
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)

From e34907ae180f4fe6c28bb4516c679c2f81b0c9ed Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:08 +0100
Subject: [PATCH 254/890] x86: remove volatile keyword from clflush.

the p parameter is an explicit memory reference, and is
enough to prevent gcc to being nasty here. The volatile
seems completely not needed.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/system_32.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-x86/system_32.h b/include/asm-x86/system_32.h
index f5b3f77f5310..28978b17b07a 100644
--- a/include/asm-x86/system_32.h
+++ b/include/asm-x86/system_32.h
@@ -161,7 +161,7 @@ static inline void native_wbinvd(void)
 	asm volatile("wbinvd": : :"memory");
 }
 
-static inline void clflush(volatile void *__p)
+static inline void clflush(void *__p)
 {
 	asm volatile("clflush %0" : "+m" (*(char __force *)__p));
 }

From d89542229b657bdcce6a6f76168f9098ee3e9344 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:08 +0100
Subject: [PATCH 255/890] x86: put together equal pieces of system.h

This patch puts together pieces of system_{32,64}.h that
looks like the same. It's the first step towards integration
of this file.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process_64.c |  2 +-
 include/asm-x86/system.h     | 69 ++++++++++++++++++++++++++++++++++++
 include/asm-x86/system_32.h  | 58 ------------------------------
 include/asm-x86/system_64.h  | 12 -------
 4 files changed, 70 insertions(+), 71 deletions(-)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 4c4d8b3f046e..057b5442ffda 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -99,7 +99,7 @@ void exit_idle(void)
  * We use this if we don't have any better
  * idle routine..
  */
-static void default_idle(void)
+void default_idle(void)
 {
 	current_thread_info()->status &= ~TS_POLLING;
 	/*
diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index 692562b48f2a..d0803f8c70c4 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -1,5 +1,74 @@
+#ifndef _ASM_X86_SYSTEM_H_
+#define _ASM_X86_SYSTEM_H_
+
+#include <asm/asm.h>
+
 #ifdef CONFIG_X86_32
 # include "system_32.h"
 #else
 # include "system_64.h"
 #endif
+
+#ifdef __KERNEL__
+#define _set_base(addr, base) do { unsigned long __pr; \
+__asm__ __volatile__ ("movw %%dx,%1\n\t" \
+	"rorl $16,%%edx\n\t" \
+	"movb %%dl,%2\n\t" \
+	"movb %%dh,%3" \
+	:"=&d" (__pr) \
+	:"m" (*((addr)+2)), \
+	 "m" (*((addr)+4)), \
+	 "m" (*((addr)+7)), \
+	 "0" (base) \
+	); } while (0)
+
+#define _set_limit(addr, limit) do { unsigned long __lr; \
+__asm__ __volatile__ ("movw %%dx,%1\n\t" \
+	"rorl $16,%%edx\n\t" \
+	"movb %2,%%dh\n\t" \
+	"andb $0xf0,%%dh\n\t" \
+	"orb %%dh,%%dl\n\t" \
+	"movb %%dl,%2" \
+	:"=&d" (__lr) \
+	:"m" (*(addr)), \
+	 "m" (*((addr)+6)), \
+	 "0" (limit) \
+	); } while (0)
+
+#define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base))
+#define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1))
+
+/*
+ * Save a segment register away
+ */
+#define savesegment(seg, value) \
+	asm volatile("mov %%" #seg ",%0":"=rm" (value))
+
+static inline unsigned long get_limit(unsigned long segment)
+{
+	unsigned long __limit;
+	__asm__("lsll %1,%0"
+		:"=r" (__limit):"r" (segment));
+	return __limit+1;
+}
+#endif /* __KERNEL__ */
+
+static inline void clflush(void *__p)
+{
+	asm volatile("clflush %0" : "+m" (*(char __force *)__p));
+}
+
+#define nop() __asm__ __volatile__ ("nop")
+
+void disable_hlt(void);
+void enable_hlt(void);
+
+extern int es7000_plat;
+void cpu_idle_wait(void);
+
+extern unsigned long arch_align_stack(unsigned long sp);
+extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
+
+void default_idle(void);
+
+#endif
diff --git a/include/asm-x86/system_32.h b/include/asm-x86/system_32.h
index 28978b17b07a..fb457642ac58 100644
--- a/include/asm-x86/system_32.h
+++ b/include/asm-x86/system_32.h
@@ -34,34 +34,6 @@ extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struc
 		      "2" (prev), "d" (next));				\
 } while (0)
 
-#define _set_base(addr,base) do { unsigned long __pr; \
-__asm__ __volatile__ ("movw %%dx,%1\n\t" \
-	"rorl $16,%%edx\n\t" \
-	"movb %%dl,%2\n\t" \
-	"movb %%dh,%3" \
-	:"=&d" (__pr) \
-	:"m" (*((addr)+2)), \
-	 "m" (*((addr)+4)), \
-	 "m" (*((addr)+7)), \
-         "0" (base) \
-        ); } while(0)
-
-#define _set_limit(addr,limit) do { unsigned long __lr; \
-__asm__ __volatile__ ("movw %%dx,%1\n\t" \
-	"rorl $16,%%edx\n\t" \
-	"movb %2,%%dh\n\t" \
-	"andb $0xf0,%%dh\n\t" \
-	"orb %%dh,%%dl\n\t" \
-	"movb %%dl,%2" \
-	:"=&d" (__lr) \
-	:"m" (*(addr)), \
-	 "m" (*((addr)+6)), \
-	 "0" (limit) \
-        ); } while(0)
-
-#define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) )
-#define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1) )
-
 /*
  * Load a segment. Fall back on loading the zero
  * segment if something goes wrong..
@@ -83,12 +55,6 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" \
 		".previous"			\
 		: :"rm" (value))
 
-/*
- * Save a segment register away
- */
-#define savesegment(seg, value) \
-	asm volatile("mov %%" #seg ",%0":"=rm" (value))
-
 
 static inline void native_clts(void)
 {
@@ -161,11 +127,6 @@ static inline void native_wbinvd(void)
 	asm volatile("wbinvd": : :"memory");
 }
 
-static inline void clflush(void *__p)
-{
-	asm volatile("clflush %0" : "+m" (*(char __force *)__p));
-}
-
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
@@ -190,15 +151,6 @@ static inline void clflush(void *__p)
 
 #endif	/* __KERNEL__ */
 
-static inline unsigned long get_limit(unsigned long segment)
-{
-	unsigned long __limit;
-	__asm__("lsll %1,%0"
-		:"=r" (__limit):"r" (segment));
-	return __limit+1;
-}
-
-#define nop() __asm__ __volatile__ ("nop")
 
 /*
  * Force strict CPU ordering.
@@ -305,15 +257,5 @@ static inline unsigned long get_limit(unsigned long segment)
  * disable hlt during certain critical i/o operations
  */
 #define HAVE_DISABLE_HLT
-void disable_hlt(void);
-void enable_hlt(void);
-
-extern int es7000_plat;
-void cpu_idle_wait(void);
-
-extern unsigned long arch_align_stack(unsigned long sp);
-extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
-
-void default_idle(void);
 
 #endif
diff --git a/include/asm-x86/system_64.h b/include/asm-x86/system_64.h
index 3dcb217a7202..cc5b2666a044 100644
--- a/include/asm-x86/system_64.h
+++ b/include/asm-x86/system_64.h
@@ -141,13 +141,6 @@ static inline void write_cr8(unsigned long val)
 
 #endif	/* __KERNEL__ */
 
-static inline void clflush(volatile void *__p)
-{
-	asm volatile("clflush %0" : "+m" (*(char __force *)__p));
-}
-
-#define nop() __asm__ __volatile__ ("nop")
-
 #ifdef CONFIG_SMP
 #define smp_mb()	mb()
 #define smp_rmb()	barrier()
@@ -177,9 +170,4 @@ static inline void clflush(volatile void *__p)
 
 #include <linux/irqflags.h>
 
-void cpu_idle_wait(void);
-
-extern unsigned long arch_align_stack(unsigned long sp);
-extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
-
 #endif

From a6b4655258efd39b590e519815ed43bb74cd7188 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:08 +0100
Subject: [PATCH 256/890] x86: unify load_segment macro

This patch unifies the load_segment() macro, making them equal in both
x86_64 and i386 architectures. The common version goes to system.h,
and the old are deleted.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/system.h    | 21 +++++++++++++++++++++
 include/asm-x86/system_32.h | 22 ----------------------
 include/asm-x86/system_64.h | 20 --------------------
 3 files changed, 21 insertions(+), 42 deletions(-)

diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index d0803f8c70c4..3740bada097c 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -38,6 +38,27 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" \
 #define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base))
 #define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1))
 
+/*
+ * Load a segment. Fall back on loading the zero
+ * segment if something goes wrong..
+ */
+#define loadsegment(seg, value)			\
+	asm volatile("\n"			\
+		"1:\t"				\
+		"movl %k0,%%" #seg "\n"		\
+		"2:\n"				\
+		".section .fixup,\"ax\"\n"	\
+		"3:\t"				\
+		"movl %k1, %%" #seg "\n\t"	\
+		"jmp 2b\n"			\
+		".previous\n"			\
+		".section __ex_table,\"a\"\n\t"	\
+		_ASM_ALIGN "\n\t"		\
+		_ASM_PTR " 1b,3b\n"		\
+		".previous"			\
+		: :"r" (value), "r" (0))
+
+
 /*
  * Save a segment register away
  */
diff --git a/include/asm-x86/system_32.h b/include/asm-x86/system_32.h
index fb457642ac58..8db478984ed1 100644
--- a/include/asm-x86/system_32.h
+++ b/include/asm-x86/system_32.h
@@ -34,28 +34,6 @@ extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struc
 		      "2" (prev), "d" (next));				\
 } while (0)
 
-/*
- * Load a segment. Fall back on loading the zero
- * segment if something goes wrong..
- */
-#define loadsegment(seg,value)			\
-	asm volatile("\n"			\
-		"1:\t"				\
-		"mov %0,%%" #seg "\n"		\
-		"2:\n"				\
-		".section .fixup,\"ax\"\n"	\
-		"3:\t"				\
-		"pushl $0\n\t"			\
-		"popl %%" #seg "\n\t"		\
-		"jmp 2b\n"			\
-		".previous\n"			\
-		".section __ex_table,\"a\"\n\t"	\
-		".align 4\n\t"			\
-		".long 1b,3b\n"			\
-		".previous"			\
-		: :"rm" (value))
-
-
 static inline void native_clts(void)
 {
 	asm volatile ("clts");
diff --git a/include/asm-x86/system_64.h b/include/asm-x86/system_64.h
index cc5b2666a044..0885caace5d4 100644
--- a/include/asm-x86/system_64.h
+++ b/include/asm-x86/system_64.h
@@ -49,26 +49,6 @@
     
 extern void load_gs_index(unsigned); 
 
-/*
- * Load a segment. Fall back on loading the zero
- * segment if something goes wrong..
- */
-#define loadsegment(seg,value)	\
-	asm volatile("\n"			\
-		"1:\t"				\
-		"movl %k0,%%" #seg "\n"		\
-		"2:\n"				\
-		".section .fixup,\"ax\"\n"	\
-		"3:\t"				\
-		"movl %1,%%" #seg "\n\t" 	\
-		"jmp 2b\n"			\
-		".previous\n"			\
-		".section __ex_table,\"a\"\n\t"	\
-		".align 8\n\t"			\
-		".quad 1b,3b\n"			\
-		".previous"			\
-		: :"r" (value), "r" (0))
-
 /*
  * Clear and set 'TS' bit respectively
  */

From d3ca901f94b3299dfe3a814770d751844608419f Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:08 +0100
Subject: [PATCH 257/890] x86: unify paravirt parts of system.h

This patch moves the i386 control registers manipulation functions,
wbinvd, and clts functions to system.h. They are essentially the same
as in x86_64.

With this, system.h paravirt comes for free in x86_64.

[ mingo@elte.hu: reintroduced the cr8 bits - needed for resume images ]

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/system.h    | 110 ++++++++++++++++++++++++++++++++++++
 include/asm-x86/system_32.h |  94 ------------------------------
 include/asm-x86/system_64.h |  85 ++++------------------------
 3 files changed, 122 insertions(+), 167 deletions(-)

diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index 3740bada097c..01ba1f8e64d1 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -3,6 +3,8 @@
 
 #include <asm/asm.h>
 
+#include <linux/kernel.h>
+
 #ifdef CONFIG_X86_32
 # include "system_32.h"
 #else
@@ -38,6 +40,8 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" \
 #define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base))
 #define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1))
 
+extern void load_gs_index(unsigned);
+
 /*
  * Load a segment. Fall back on loading the zero
  * segment if something goes wrong..
@@ -72,6 +76,112 @@ static inline unsigned long get_limit(unsigned long segment)
 		:"=r" (__limit):"r" (segment));
 	return __limit+1;
 }
+
+static inline void native_clts(void)
+{
+	asm volatile ("clts");
+}
+
+/*
+ * Volatile isn't enough to prevent the compiler from reordering the
+ * read/write functions for the control registers and messing everything up.
+ * A memory clobber would solve the problem, but would prevent reordering of
+ * all loads stores around it, which can hurt performance. Solution is to
+ * use a variable and mimic reads and writes to it to enforce serialization
+ */
+static unsigned long __force_order;
+
+static inline unsigned long native_read_cr0(void)
+{
+	unsigned long val;
+	asm volatile("mov %%cr0,%0\n\t" :"=r" (val), "=m" (__force_order));
+	return val;
+}
+
+static inline void native_write_cr0(unsigned long val)
+{
+	asm volatile("mov %0,%%cr0": :"r" (val), "m" (__force_order));
+}
+
+static inline unsigned long native_read_cr2(void)
+{
+	unsigned long val;
+	asm volatile("mov %%cr2,%0\n\t" :"=r" (val), "=m" (__force_order));
+	return val;
+}
+
+static inline void native_write_cr2(unsigned long val)
+{
+	asm volatile("mov %0,%%cr2": :"r" (val), "m" (__force_order));
+}
+
+static inline unsigned long native_read_cr3(void)
+{
+	unsigned long val;
+	asm volatile("mov %%cr3,%0\n\t" :"=r" (val), "=m" (__force_order));
+	return val;
+}
+
+static inline void native_write_cr3(unsigned long val)
+{
+	asm volatile("mov %0,%%cr3": :"r" (val), "m" (__force_order));
+}
+
+static inline unsigned long native_read_cr4(void)
+{
+	unsigned long val;
+	asm volatile("mov %%cr4,%0\n\t" :"=r" (val), "=m" (__force_order));
+	return val;
+}
+
+static inline unsigned long native_read_cr4_safe(void)
+{
+	unsigned long val;
+	/* This could fault if %cr4 does not exist. In x86_64, a cr4 always
+	 * exists, so it will never fail. */
+#ifdef CONFIG_X86_32
+	asm volatile("1: mov %%cr4, %0		\n"
+		"2:				\n"
+		".section __ex_table,\"a\"	\n"
+		".long 1b,2b			\n"
+		".previous			\n"
+		: "=r" (val), "=m" (__force_order) : "0" (0));
+#else
+	val = native_read_cr4();
+#endif
+	return val;
+}
+
+static inline void native_write_cr4(unsigned long val)
+{
+	asm volatile("mov %0,%%cr4": :"r" (val), "m" (__force_order));
+}
+
+static inline void native_wbinvd(void)
+{
+	asm volatile("wbinvd": : :"memory");
+}
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define read_cr0()	(native_read_cr0())
+#define write_cr0(x)	(native_write_cr0(x))
+#define read_cr2()	(native_read_cr2())
+#define write_cr2(x)	(native_write_cr2(x))
+#define read_cr3()	(native_read_cr3())
+#define write_cr3(x)	(native_write_cr3(x))
+#define read_cr4()	(native_read_cr4())
+#define read_cr4_safe()	(native_read_cr4_safe())
+#define write_cr4(x)	(native_write_cr4(x))
+#define wbinvd()	(native_wbinvd())
+
+/* Clear the 'TS' bit */
+#define clts()		(native_clts())
+
+#endif/* CONFIG_PARAVIRT */
+
+#define stts() write_cr0(8 | read_cr0())
+
 #endif /* __KERNEL__ */
 
 static inline void clflush(void *__p)
diff --git a/include/asm-x86/system_32.h b/include/asm-x86/system_32.h
index 8db478984ed1..c05568290add 100644
--- a/include/asm-x86/system_32.h
+++ b/include/asm-x86/system_32.h
@@ -1,7 +1,6 @@
 #ifndef __ASM_SYSTEM_H
 #define __ASM_SYSTEM_H
 
-#include <linux/kernel.h>
 #include <asm/segment.h>
 #include <asm/cpufeature.h>
 #include <asm/cmpxchg.h>
@@ -34,99 +33,6 @@ extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struc
 		      "2" (prev), "d" (next));				\
 } while (0)
 
-static inline void native_clts(void)
-{
-	asm volatile ("clts");
-}
-
-static inline unsigned long native_read_cr0(void)
-{
-	unsigned long val;
-	asm volatile("movl %%cr0,%0\n\t" :"=r" (val));
-	return val;
-}
-
-static inline void native_write_cr0(unsigned long val)
-{
-	asm volatile("movl %0,%%cr0": :"r" (val));
-}
-
-static inline unsigned long native_read_cr2(void)
-{
-	unsigned long val;
-	asm volatile("movl %%cr2,%0\n\t" :"=r" (val));
-	return val;
-}
-
-static inline void native_write_cr2(unsigned long val)
-{
-	asm volatile("movl %0,%%cr2": :"r" (val));
-}
-
-static inline unsigned long native_read_cr3(void)
-{
-	unsigned long val;
-	asm volatile("movl %%cr3,%0\n\t" :"=r" (val));
-	return val;
-}
-
-static inline void native_write_cr3(unsigned long val)
-{
-	asm volatile("movl %0,%%cr3": :"r" (val));
-}
-
-static inline unsigned long native_read_cr4(void)
-{
-	unsigned long val;
-	asm volatile("movl %%cr4,%0\n\t" :"=r" (val));
-	return val;
-}
-
-static inline unsigned long native_read_cr4_safe(void)
-{
-	unsigned long val;
-	/* This could fault if %cr4 does not exist */
-	asm volatile("1: movl %%cr4, %0		\n"
-		"2:				\n"
-		".section __ex_table,\"a\"	\n"
-		".long 1b,2b			\n"
-		".previous			\n"
-		: "=r" (val): "0" (0));
-	return val;
-}
-
-static inline void native_write_cr4(unsigned long val)
-{
-	asm volatile("movl %0,%%cr4": :"r" (val));
-}
-
-static inline void native_wbinvd(void)
-{
-	asm volatile("wbinvd": : :"memory");
-}
-
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define read_cr0()	(native_read_cr0())
-#define write_cr0(x)	(native_write_cr0(x))
-#define read_cr2()	(native_read_cr2())
-#define write_cr2(x)	(native_write_cr2(x))
-#define read_cr3()	(native_read_cr3())
-#define write_cr3(x)	(native_write_cr3(x))
-#define read_cr4()	(native_read_cr4())
-#define read_cr4_safe()	(native_read_cr4_safe())
-#define write_cr4(x)	(native_write_cr4(x))
-#define wbinvd()	(native_wbinvd())
-
-/* Clear the 'TS' bit */
-#define clts()		(native_clts())
-
-#endif/* CONFIG_PARAVIRT */
-
-/* Set the 'TS' bit */
-#define stts() write_cr0(8 | read_cr0())
-
 #endif	/* __KERNEL__ */
 
 
diff --git a/include/asm-x86/system_64.h b/include/asm-x86/system_64.h
index 0885caace5d4..14ad6138439b 100644
--- a/include/asm-x86/system_64.h
+++ b/include/asm-x86/system_64.h
@@ -1,7 +1,6 @@
 #ifndef __ASM_SYSTEM_H
 #define __ASM_SYSTEM_H
 
-#include <linux/kernel.h>
 #include <asm/segment.h>
 #include <asm/cmpxchg.h>
 
@@ -47,78 +46,6 @@
 		       [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent))   \
 		     : "memory", "cc" __EXTRA_CLOBBER)
     
-extern void load_gs_index(unsigned); 
-
-/*
- * Clear and set 'TS' bit respectively
- */
-#define clts() __asm__ __volatile__ ("clts")
-
-static inline unsigned long read_cr0(void)
-{ 
-	unsigned long cr0;
-	asm volatile("movq %%cr0,%0" : "=r" (cr0));
-	return cr0;
-}
-
-static inline void write_cr0(unsigned long val) 
-{ 
-	asm volatile("movq %0,%%cr0" :: "r" (val));
-}
-
-static inline unsigned long read_cr2(void)
-{
-	unsigned long cr2;
-	asm volatile("movq %%cr2,%0" : "=r" (cr2));
-	return cr2;
-}
-
-static inline void write_cr2(unsigned long val)
-{
-	asm volatile("movq %0,%%cr2" :: "r" (val));
-}
-
-static inline unsigned long read_cr3(void)
-{ 
-	unsigned long cr3;
-	asm volatile("movq %%cr3,%0" : "=r" (cr3));
-	return cr3;
-}
-
-static inline void write_cr3(unsigned long val)
-{
-	asm volatile("movq %0,%%cr3" :: "r" (val) : "memory");
-}
-
-static inline unsigned long read_cr4(void)
-{ 
-	unsigned long cr4;
-	asm volatile("movq %%cr4,%0" : "=r" (cr4));
-	return cr4;
-}
-
-static inline void write_cr4(unsigned long val)
-{ 
-	asm volatile("movq %0,%%cr4" :: "r" (val) : "memory");
-}
-
-static inline unsigned long read_cr8(void)
-{
-	unsigned long cr8;
-	asm volatile("movq %%cr8,%0" : "=r" (cr8));
-	return cr8;
-}
-
-static inline void write_cr8(unsigned long val)
-{
-	asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
-}
-
-#define stts() write_cr0(8 | read_cr0())
-
-#define wbinvd() \
-	__asm__ __volatile__ ("wbinvd": : :"memory")
-
 #endif	/* __KERNEL__ */
 
 #ifdef CONFIG_SMP
@@ -148,6 +75,18 @@ static inline void write_cr8(unsigned long val)
 
 #define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0)
 
+static inline unsigned long read_cr8(void)
+{
+	unsigned long cr8;
+	asm volatile("movq %%cr8,%0" : "=r" (cr8));
+	return cr8;
+}
+
+static inline void write_cr8(unsigned long val)
+{
+	asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
+}
+
 #include <linux/irqflags.h>
 
 #endif

From 62fe164c5b036f4bdb19fbfb8f18a75631e67eee Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:08 +0100
Subject: [PATCH 258/890] x86: remove unused macro

Mr. Grep says warn_if_not_ulong() is not used anymore anywhere
in the code. So, we remove it.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/system_64.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/asm-x86/system_64.h b/include/asm-x86/system_64.h
index 14ad6138439b..560470ea27c8 100644
--- a/include/asm-x86/system_64.h
+++ b/include/asm-x86/system_64.h
@@ -73,8 +73,6 @@
 #define read_barrier_depends()	do {} while(0)
 #define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
 
-#define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0)
-
 static inline unsigned long read_cr8(void)
 {
 	unsigned long cr8;

From 833d8469b102365f427f7791e79ec1843ff5f164 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:08 +0100
Subject: [PATCH 259/890] x86: unify smp parts of system.h

The memory barrier parts of system.h are not very different between
i386 and x86_64, the main difference being the availability of
instructions, which we handle with the use of ifdefs.

They are consolidated in system.h file, and then removed from
the arch-specific headers.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/system.h    | 105 ++++++++++++++++++++++++++++++++++++
 include/asm-x86/system_32.h |  99 ----------------------------------
 include/asm-x86/system_64.h |  25 ---------
 3 files changed, 105 insertions(+), 124 deletions(-)

diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index 01ba1f8e64d1..4c15eb11a917 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -202,4 +202,109 @@ extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
 
 void default_idle(void);
 
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ */
+#ifdef CONFIG_X86_32
+/*
+ * For now, "wmb()" doesn't actually do anything, as all
+ * Intel CPU's follow what Intel calls a *Processor Order*,
+ * in which all writes are seen in the program order even
+ * outside the CPU.
+ *
+ * I expect future Intel CPU's to have a weaker ordering,
+ * but I'd also expect them to finally get their act together
+ * and add some real memory barriers if so.
+ *
+ * Some non intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
+#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
+#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
+#else
+#define mb() 	asm volatile("mfence":::"memory")
+#define rmb()	asm volatile("lfence":::"memory")
+#define wmb()	asm volatile("sfence" ::: "memory")
+#endif
+
+/**
+ * read_barrier_depends - Flush all pending reads that subsequents reads
+ * depend on.
+ *
+ * No data-dependent reads from memory-like regions are ever reordered
+ * over this barrier.  All reads preceding this primitive are guaranteed
+ * to access memory (but not necessarily other CPUs' caches) before any
+ * reads following this primitive that depend on the data return by
+ * any of the preceding reads.  This primitive is much lighter weight than
+ * rmb() on most CPUs, and is never heavier weight than is
+ * rmb().
+ *
+ * These ordering constraints are respected by both the local CPU
+ * and the compiler.
+ *
+ * Ordering is not guaranteed by anything other than these primitives,
+ * not even by data dependencies.  See the documentation for
+ * memory_barrier() for examples and URLs to more information.
+ *
+ * For example, the following code would force ordering (the initial
+ * value of "a" is zero, "b" is one, and "p" is "&a"):
+ *
+ * <programlisting>
+ *	CPU 0				CPU 1
+ *
+ *	b = 2;
+ *	memory_barrier();
+ *	p = &b;				q = p;
+ *					read_barrier_depends();
+ *					d = *q;
+ * </programlisting>
+ *
+ * because the read of "*q" depends on the read of "p" and these
+ * two reads are separated by a read_barrier_depends().  However,
+ * the following code, with the same initial values for "a" and "b":
+ *
+ * <programlisting>
+ *	CPU 0				CPU 1
+ *
+ *	a = 2;
+ *	memory_barrier();
+ *	b = 3;				y = b;
+ *					read_barrier_depends();
+ *					x = a;
+ * </programlisting>
+ *
+ * does not enforce ordering, since there is no data dependency between
+ * the read of "a" and the read of "b".  Therefore, on some CPUs, such
+ * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
+ * in cases like this where there are no data dependencies.
+ **/
+
+#define read_barrier_depends()	do { } while (0)
+
+#ifdef CONFIG_SMP
+#define smp_mb()	mb()
+#ifdef CONFIG_X86_PPRO_FENCE
+# define smp_rmb()	rmb()
+#else
+# define smp_rmb()	barrier()
+#endif
+#ifdef CONFIG_X86_OOSTORE
+# define smp_wmb() 	wmb()
+#else
+# define smp_wmb()	barrier()
+#endif
+#define smp_read_barrier_depends()	read_barrier_depends()
+#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
+#else
+#define smp_mb()	barrier()
+#define smp_rmb()	barrier()
+#define smp_wmb()	barrier()
+#define smp_read_barrier_depends()	do { } while (0)
+#define set_mb(var, value) do { var = value; barrier(); } while (0)
+#endif
+
+
 #endif
diff --git a/include/asm-x86/system_32.h b/include/asm-x86/system_32.h
index c05568290add..7da0716fb317 100644
--- a/include/asm-x86/system_32.h
+++ b/include/asm-x86/system_32.h
@@ -36,105 +36,6 @@ extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struc
 #endif	/* __KERNEL__ */
 
 
-/*
- * Force strict CPU ordering.
- * And yes, this is required on UP too when we're talking
- * to devices.
- *
- * For now, "wmb()" doesn't actually do anything, as all
- * Intel CPU's follow what Intel calls a *Processor Order*,
- * in which all writes are seen in the program order even
- * outside the CPU.
- *
- * I expect future Intel CPU's to have a weaker ordering,
- * but I'd also expect them to finally get their act together
- * and add some real memory barriers if so.
- *
- * Some non intel clones support out of order store. wmb() ceases to be a
- * nop for these.
- */
- 
-
-#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
-#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
-#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
-
-/**
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier.  All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads.  This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies.  See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- *	CPU 0				CPU 1
- *
- *	b = 2;
- *	memory_barrier();
- *	p = &b;				q = p;
- *					read_barrier_depends();
- *					d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends().  However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- *	CPU 0				CPU 1
- *
- *	a = 2;
- *	memory_barrier();
- *	b = 3;				y = b;
- *					read_barrier_depends();
- *					x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b".  Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
- * in cases like this where there are no data dependencies.
- **/
-
-#define read_barrier_depends()	do { } while(0)
-
-#ifdef CONFIG_SMP
-#define smp_mb()	mb()
-#ifdef CONFIG_X86_PPRO_FENCE
-# define smp_rmb()	rmb()
-#else
-# define smp_rmb()	barrier()
-#endif
-#ifdef CONFIG_X86_OOSTORE
-# define smp_wmb() 	wmb()
-#else
-# define smp_wmb()	barrier()
-#endif
-#define smp_read_barrier_depends()	read_barrier_depends()
-#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
-#else
-#define smp_mb()	barrier()
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-#define smp_read_barrier_depends()	do { } while(0)
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
-#endif
-
 #include <linux/irqflags.h>
 
 /*
diff --git a/include/asm-x86/system_64.h b/include/asm-x86/system_64.h
index 560470ea27c8..9def35eb75e3 100644
--- a/include/asm-x86/system_64.h
+++ b/include/asm-x86/system_64.h
@@ -48,31 +48,6 @@
     
 #endif	/* __KERNEL__ */
 
-#ifdef CONFIG_SMP
-#define smp_mb()	mb()
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-#define smp_read_barrier_depends()	do {} while(0)
-#else
-#define smp_mb()	barrier()
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-#define smp_read_barrier_depends()	do {} while(0)
-#endif
-
-    
-/*
- * Force strict CPU ordering.
- * And yes, this is required on UP too when we're talking
- * to devices.
- */
-#define mb() 	asm volatile("mfence":::"memory")
-#define rmb()	asm volatile("lfence":::"memory")
-#define wmb()	asm volatile("sfence" ::: "memory")
-
-#define read_barrier_depends()	do {} while(0)
-#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
-
 static inline unsigned long read_cr8(void)
 {
 	unsigned long cr8;

From 0a3b4d151a8e57f50ce5a12c7ea5ec9965cf0b5a Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:08 +0100
Subject: [PATCH 260/890] x86: move switch_to macro to system.h

This patch moves the switch_to() macro to system.h

As those macros are fundamentally different between i386 and x86_64,
they are enclosed around an ifdef.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/system.h    | 61 +++++++++++++++++++++++++++++++++++++
 include/asm-x86/system_32.h | 31 -------------------
 include/asm-x86/system_64.h | 43 --------------------------
 3 files changed, 61 insertions(+), 74 deletions(-)

diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index 4c15eb11a917..ba3403f1d020 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -6,8 +6,69 @@
 #include <linux/kernel.h>
 
 #ifdef CONFIG_X86_32
+#define AT_VECTOR_SIZE_ARCH 2 /* entries in ARCH_DLINFO */
+
+struct task_struct; /* one of the stranger aspects of C forward declarations */
+extern struct task_struct *FASTCALL(__switch_to(struct task_struct *prev,
+						struct task_struct *next));
+
+/*
+ * Saving eflags is important. It switches not only IOPL between tasks,
+ * it also protects other tasks from NT leaking through sysenter etc.
+ */
+#define switch_to(prev, next, last) do {				\
+	unsigned long esi, edi;						\
+	asm volatile("pushfl\n\t"		/* Save flags */	\
+		     "pushl %%ebp\n\t"					\
+		     "movl %%esp,%0\n\t"	/* save ESP */		\
+		     "movl %5,%%esp\n\t"	/* restore ESP */	\
+		     "movl $1f,%1\n\t"		/* save EIP */		\
+		     "pushl %6\n\t"		/* restore EIP */	\
+		     "jmp __switch_to\n"				\
+		     "1:\t"						\
+		     "popl %%ebp\n\t"					\
+		     "popfl"						\
+		     :"=m" (prev->thread.sp), "=m" (prev->thread.ip),	\
+		      "=a" (last), "=S" (esi), "=D" (edi)		\
+		     :"m" (next->thread.sp), "m" (next->thread.ip),	\
+		      "2" (prev), "d" (next));				\
+} while (0)
+
 # include "system_32.h"
 #else
+#define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t"
+#define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
+
+/* frame pointer must be last for get_wchan */
+#define SAVE_CONTEXT    "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
+#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
+
+#define __EXTRA_CLOBBER  \
+	, "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
+	  "r12", "r13", "r14", "r15"
+
+/* Save restore flags to clear handle leaking NT */
+#define switch_to(prev, next, last) \
+	asm volatile(SAVE_CONTEXT					  \
+	     "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */	  \
+	     "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */	  \
+	     "call __switch_to\n\t"					  \
+	     ".globl thread_return\n"					  \
+	     "thread_return:\n\t"					  \
+	     "movq %%gs:%P[pda_pcurrent],%%rsi\n\t"			  \
+	     "movq %P[thread_info](%%rsi),%%r8\n\t"			  \
+	     LOCK_PREFIX "btr  %[tif_fork],%P[ti_flags](%%r8)\n\t"	  \
+	     "movq %%rax,%%rdi\n\t" 					  \
+	     "jc   ret_from_fork\n\t"					  \
+	     RESTORE_CONTEXT						  \
+	     : "=a" (last)					  	  \
+	     : [next] "S" (next), [prev] "D" (prev),			  \
+	       [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
+	       [ti_flags] "i" (offsetof(struct thread_info, flags)),	  \
+	       [tif_fork] "i" (TIF_FORK),			  	  \
+	       [thread_info] "i" (offsetof(struct task_struct, stack)),   \
+	       [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent))  \
+	     : "memory", "cc" __EXTRA_CLOBBER)
 # include "system_64.h"
 #endif
 
diff --git a/include/asm-x86/system_32.h b/include/asm-x86/system_32.h
index 7da0716fb317..83af46443bd0 100644
--- a/include/asm-x86/system_32.h
+++ b/include/asm-x86/system_32.h
@@ -5,37 +5,6 @@
 #include <asm/cpufeature.h>
 #include <asm/cmpxchg.h>
 
-#ifdef __KERNEL__
-#define AT_VECTOR_SIZE_ARCH 2 /* entries in ARCH_DLINFO */
-
-struct task_struct;	/* one of the stranger aspects of C forward declarations.. */
-extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next));
-
-/*
- * Saving eflags is important. It switches not only IOPL between tasks,
- * it also protects other tasks from NT leaking through sysenter etc.
- */
-#define switch_to(prev,next,last) do {					\
-	unsigned long esi,edi;						\
-	asm volatile("pushfl\n\t"		/* Save flags */	\
-		     "pushl %%ebp\n\t"					\
-		     "movl %%esp,%0\n\t"	/* save ESP */		\
-		     "movl %5,%%esp\n\t"	/* restore ESP */	\
-		     "movl $1f,%1\n\t"		/* save EIP */		\
-		     "pushl %6\n\t"		/* restore EIP */	\
-		     "jmp __switch_to\n"				\
-		     "1:\t"						\
-		     "popl %%ebp\n\t"					\
-		     "popfl"						\
-		     :"=m" (prev->thread.sp),"=m" (prev->thread.ip),	\
-		      "=a" (last),"=S" (esi),"=D" (edi)			\
-		     :"m" (next->thread.sp),"m" (next->thread.ip),	\
-		      "2" (prev), "d" (next));				\
-} while (0)
-
-#endif	/* __KERNEL__ */
-
-
 #include <linux/irqflags.h>
 
 /*
diff --git a/include/asm-x86/system_64.h b/include/asm-x86/system_64.h
index 9def35eb75e3..97fa251ccb2b 100644
--- a/include/asm-x86/system_64.h
+++ b/include/asm-x86/system_64.h
@@ -4,49 +4,6 @@
 #include <asm/segment.h>
 #include <asm/cmpxchg.h>
 
-#ifdef __KERNEL__
-
-/* entries in ARCH_DLINFO: */
-#ifdef CONFIG_IA32_EMULATION
-# define AT_VECTOR_SIZE_ARCH 2
-#else
-# define AT_VECTOR_SIZE_ARCH 1
-#endif
-
-#define __SAVE(reg,offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t"
-#define __RESTORE(reg,offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
-
-/* frame pointer must be last for get_wchan */
-#define SAVE_CONTEXT    "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
-#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
-
-#define __EXTRA_CLOBBER  \
-	,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15"
-
-/* Save restore flags to clear handle leaking NT */
-#define switch_to(prev,next,last) \
-	asm volatile(SAVE_CONTEXT						    \
-		     "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */	  \
-		     "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */	  \
-		     "call __switch_to\n\t"					  \
-		     ".globl thread_return\n"					\
-		     "thread_return:\n\t"					    \
-		     "movq %%gs:%P[pda_pcurrent],%%rsi\n\t"			  \
-		     "movq %P[thread_info](%%rsi),%%r8\n\t"			  \
-		     LOCK_PREFIX "btr  %[tif_fork],%P[ti_flags](%%r8)\n\t"	  \
-		     "movq %%rax,%%rdi\n\t" 					  \
-		     "jc   ret_from_fork\n\t"					  \
-		     RESTORE_CONTEXT						    \
-		     : "=a" (last)					  	  \
-		     : [next] "S" (next), [prev] "D" (prev),			  \
-		       [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
-		       [ti_flags] "i" (offsetof(struct thread_info, flags)),\
-		       [tif_fork] "i" (TIF_FORK),			  \
-		       [thread_info] "i" (offsetof(struct task_struct, stack)), \
-		       [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent))   \
-		     : "memory", "cc" __EXTRA_CLOBBER)
-    
-#endif	/* __KERNEL__ */
 
 static inline unsigned long read_cr8(void)
 {

From d46d7d754014a299fa9b3400d080e09bfe4d629f Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:08 +0100
Subject: [PATCH 261/890] x86: unify system.h

This patch finishes the unification of system.h file.
i386 needs a constant to be defined, and it is defined inside an ifdef.

Other than that, pretty much nothing but includes are left in the arch
specific headers, and they are deleted.

[ mingo@elte.hu: 64-bit needs the cr8 access inlines. ]

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/system.h    | 26 ++++++++++++++++++++++++--
 include/asm-x86/system_32.h | 15 ---------------
 2 files changed, 24 insertions(+), 17 deletions(-)
 delete mode 100644 include/asm-x86/system_32.h

diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index ba3403f1d020..8a37dad38bc0 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -2,8 +2,12 @@
 #define _ASM_X86_SYSTEM_H_
 
 #include <asm/asm.h>
+#include <asm/segment.h>
+#include <asm/cpufeature.h>
+#include <asm/cmpxchg.h>
 
 #include <linux/kernel.h>
+#include <linux/irqflags.h>
 
 #ifdef CONFIG_X86_32
 #define AT_VECTOR_SIZE_ARCH 2 /* entries in ARCH_DLINFO */
@@ -34,7 +38,10 @@ extern struct task_struct *FASTCALL(__switch_to(struct task_struct *prev,
 		      "2" (prev), "d" (next));				\
 } while (0)
 
-# include "system_32.h"
+/*
+ * disable hlt during certain critical i/o operations
+ */
+#define HAVE_DISABLE_HLT
 #else
 #define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t"
 #define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
@@ -69,7 +76,6 @@ extern struct task_struct *FASTCALL(__switch_to(struct task_struct *prev,
 	       [thread_info] "i" (offsetof(struct task_struct, stack)),   \
 	       [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent))  \
 	     : "memory", "cc" __EXTRA_CLOBBER)
-# include "system_64.h"
 #endif
 
 #ifdef __KERNEL__
@@ -236,6 +242,22 @@ static inline void native_wbinvd(void)
 #define write_cr4(x)	(native_write_cr4(x))
 #define wbinvd()	(native_wbinvd())
 
+#ifdef CONFIG_X86_64
+
+static inline unsigned long read_cr8(void)
+{
+	unsigned long cr8;
+	asm volatile("movq %%cr8,%0" : "=r" (cr8));
+	return cr8;
+}
+
+static inline void write_cr8(unsigned long val)
+{
+	asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
+}
+
+#endif
+
 /* Clear the 'TS' bit */
 #define clts()		(native_clts())
 
diff --git a/include/asm-x86/system_32.h b/include/asm-x86/system_32.h
deleted file mode 100644
index 83af46443bd0..000000000000
--- a/include/asm-x86/system_32.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef __ASM_SYSTEM_H
-#define __ASM_SYSTEM_H
-
-#include <asm/segment.h>
-#include <asm/cpufeature.h>
-#include <asm/cmpxchg.h>
-
-#include <linux/irqflags.h>
-
-/*
- * disable hlt during certain critical i/o operations
- */
-#define HAVE_DISABLE_HLT
-
-#endif

From 7796931f542518092d1fd2fb7cf1f1d96e0cd4b5 Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Wed, 30 Jan 2008 13:31:08 +0100
Subject: [PATCH 262/890] UML: change sigcontext fields to match x86

git-x86, in commit 70aa1bd3839e3ec74ce65316528a82570e8de666, changed
a lot of the sigcontext field names.  This patch changes UML usage to
match.

I also changed includes of generic headers from "" to <>.

Signed-off-by: Jeff Dike <jdike@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/um/sys-i386/signal.c   | 50 +++++++++++++-------------
 arch/um/sys-x86_64/signal.c | 70 ++++++++++++++++++-------------------
 2 files changed, 60 insertions(+), 60 deletions(-)

diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c
index 0147227ce18d..19053d46cb60 100644
--- a/arch/um/sys-i386/signal.c
+++ b/arch/um/sys-i386/signal.c
@@ -3,10 +3,10 @@
  * Licensed under the GPL
  */
 
-#include "linux/ptrace.h"
-#include "asm/unistd.h"
-#include "asm/uaccess.h"
-#include "asm/ucontext.h"
+#include <linux/ptrace.h>
+#include <asm/unistd.h>
+#include <asm/uaccess.h>
+#include <asm/ucontext.h>
 #include "frame_kern.h"
 #include "skas.h"
 
@@ -18,17 +18,17 @@ void copy_sc(struct uml_pt_regs *regs, void *from)
 	REGS_FS(regs->gp) = sc->fs;
 	REGS_ES(regs->gp) = sc->es;
 	REGS_DS(regs->gp) = sc->ds;
-	REGS_EDI(regs->gp) = sc->edi;
-	REGS_ESI(regs->gp) = sc->esi;
-	REGS_EBP(regs->gp) = sc->ebp;
-	REGS_SP(regs->gp) = sc->esp;
-	REGS_EBX(regs->gp) = sc->ebx;
-	REGS_EDX(regs->gp) = sc->edx;
-	REGS_ECX(regs->gp) = sc->ecx;
-	REGS_EAX(regs->gp) = sc->eax;
-	REGS_IP(regs->gp) = sc->eip;
+	REGS_EDI(regs->gp) = sc->di;
+	REGS_ESI(regs->gp) = sc->si;
+	REGS_EBP(regs->gp) = sc->bp;
+	REGS_SP(regs->gp) = sc->sp;
+	REGS_EBX(regs->gp) = sc->bx;
+	REGS_EDX(regs->gp) = sc->dx;
+	REGS_ECX(regs->gp) = sc->cx;
+	REGS_EAX(regs->gp) = sc->ax;
+	REGS_IP(regs->gp) = sc->ip;
 	REGS_CS(regs->gp) = sc->cs;
-	REGS_EFLAGS(regs->gp) = sc->eflags;
+	REGS_EFLAGS(regs->gp) = sc->flags;
 	REGS_SS(regs->gp) = sc->ss;
 }
 
@@ -229,18 +229,18 @@ static int copy_sc_to_user(struct sigcontext __user *to,
 	sc.fs = REGS_FS(regs->regs.gp);
 	sc.es = REGS_ES(regs->regs.gp);
 	sc.ds = REGS_DS(regs->regs.gp);
-	sc.edi = REGS_EDI(regs->regs.gp);
-	sc.esi = REGS_ESI(regs->regs.gp);
-	sc.ebp = REGS_EBP(regs->regs.gp);
-	sc.esp = sp;
-	sc.ebx = REGS_EBX(regs->regs.gp);
-	sc.edx = REGS_EDX(regs->regs.gp);
-	sc.ecx = REGS_ECX(regs->regs.gp);
-	sc.eax = REGS_EAX(regs->regs.gp);
-	sc.eip = REGS_IP(regs->regs.gp);
+	sc.di = REGS_EDI(regs->regs.gp);
+	sc.si = REGS_ESI(regs->regs.gp);
+	sc.bp = REGS_EBP(regs->regs.gp);
+	sc.sp = sp;
+	sc.bx = REGS_EBX(regs->regs.gp);
+	sc.dx = REGS_EDX(regs->regs.gp);
+	sc.cx = REGS_ECX(regs->regs.gp);
+	sc.ax = REGS_EAX(regs->regs.gp);
+	sc.ip = REGS_IP(regs->regs.gp);
 	sc.cs = REGS_CS(regs->regs.gp);
-	sc.eflags = REGS_EFLAGS(regs->regs.gp);
-	sc.esp_at_signal = regs->regs.gp[UESP];
+	sc.flags = REGS_EFLAGS(regs->regs.gp);
+	sc.sp_at_signal = regs->regs.gp[UESP];
 	sc.ss = regs->regs.gp[SS];
 	sc.cr2 = fi->cr2;
 	sc.err = fi->error_code;
diff --git a/arch/um/sys-x86_64/signal.c b/arch/um/sys-x86_64/signal.c
index 1778d33808f4..7457436b433a 100644
--- a/arch/um/sys-x86_64/signal.c
+++ b/arch/um/sys-x86_64/signal.c
@@ -4,11 +4,11 @@
  * Licensed under the GPL
  */
 
-#include "linux/personality.h"
-#include "linux/ptrace.h"
-#include "asm/unistd.h"
-#include "asm/uaccess.h"
-#include "asm/ucontext.h"
+#include <linux/personality.h>
+#include <linux/ptrace.h>
+#include <asm/unistd.h>
+#include <asm/uaccess.h>
+#include <asm/ucontext.h>
 #include "frame_kern.h"
 #include "skas.h"
 
@@ -27,16 +27,16 @@ void copy_sc(struct uml_pt_regs *regs, void *from)
 	GETREG(regs, R13, sc, r13);
 	GETREG(regs, R14, sc, r14);
 	GETREG(regs, R15, sc, r15);
-	GETREG(regs, RDI, sc, rdi);
-	GETREG(regs, RSI, sc, rsi);
-	GETREG(regs, RBP, sc, rbp);
-	GETREG(regs, RBX, sc, rbx);
-	GETREG(regs, RDX, sc, rdx);
-	GETREG(regs, RAX, sc, rax);
-	GETREG(regs, RCX, sc, rcx);
-	GETREG(regs, RSP, sc, rsp);
-	GETREG(regs, RIP, sc, rip);
-	GETREG(regs, EFLAGS, sc, eflags);
+	GETREG(regs, RDI, sc, di);
+	GETREG(regs, RSI, sc, si);
+	GETREG(regs, RBP, sc, bp);
+	GETREG(regs, RBX, sc, bx);
+	GETREG(regs, RDX, sc, dx);
+	GETREG(regs, RAX, sc, ax);
+	GETREG(regs, RCX, sc, cx);
+	GETREG(regs, RSP, sc, sp);
+	GETREG(regs, RIP, sc, ip);
+	GETREG(regs, EFLAGS, sc, flags);
 	GETREG(regs, CS, sc, cs);
 
 #undef GETREG
@@ -61,16 +61,16 @@ static int copy_sc_from_user(struct pt_regs *regs,
 	err |= GETREG(regs, R13, from, r13);
 	err |= GETREG(regs, R14, from, r14);
 	err |= GETREG(regs, R15, from, r15);
-	err |= GETREG(regs, RDI, from, rdi);
-	err |= GETREG(regs, RSI, from, rsi);
-	err |= GETREG(regs, RBP, from, rbp);
-	err |= GETREG(regs, RBX, from, rbx);
-	err |= GETREG(regs, RDX, from, rdx);
-	err |= GETREG(regs, RAX, from, rax);
-	err |= GETREG(regs, RCX, from, rcx);
-	err |= GETREG(regs, RSP, from, rsp);
-	err |= GETREG(regs, RIP, from, rip);
-	err |= GETREG(regs, EFLAGS, from, eflags);
+	err |= GETREG(regs, RDI, from, di);
+	err |= GETREG(regs, RSI, from, si);
+	err |= GETREG(regs, RBP, from, bp);
+	err |= GETREG(regs, RBX, from, bx);
+	err |= GETREG(regs, RDX, from, dx);
+	err |= GETREG(regs, RAX, from, ax);
+	err |= GETREG(regs, RCX, from, cx);
+	err |= GETREG(regs, RSP, from, sp);
+	err |= GETREG(regs, RIP, from, ip);
+	err |= GETREG(regs, EFLAGS, from, flags);
 	err |= GETREG(regs, CS, from, cs);
 	if (err)
 		return 1;
@@ -108,19 +108,19 @@ static int copy_sc_to_user(struct sigcontext __user *to,
 	__put_user((regs)->regs.gp[(regno) / sizeof(unsigned long)],	\
 		   &(sc)->regname)
 
-	err |= PUTREG(regs, RDI, to, rdi);
-	err |= PUTREG(regs, RSI, to, rsi);
-	err |= PUTREG(regs, RBP, to, rbp);
+	err |= PUTREG(regs, RDI, to, di);
+	err |= PUTREG(regs, RSI, to, si);
+	err |= PUTREG(regs, RBP, to, bp);
 	/*
 	 * Must use orignal RSP, which is passed in, rather than what's in
 	 * the pt_regs, because that's already been updated to point at the
 	 * signal frame.
 	 */
-	err |= __put_user(sp, &to->rsp);
-	err |= PUTREG(regs, RBX, to, rbx);
-	err |= PUTREG(regs, RDX, to, rdx);
-	err |= PUTREG(regs, RCX, to, rcx);
-	err |= PUTREG(regs, RAX, to, rax);
+	err |= __put_user(sp, &to->sp);
+	err |= PUTREG(regs, RBX, to, bx);
+	err |= PUTREG(regs, RDX, to, dx);
+	err |= PUTREG(regs, RCX, to, cx);
+	err |= PUTREG(regs, RAX, to, ax);
 	err |= PUTREG(regs, R8, to, r8);
 	err |= PUTREG(regs, R9, to, r9);
 	err |= PUTREG(regs, R10, to, r10);
@@ -135,8 +135,8 @@ static int copy_sc_to_user(struct sigcontext __user *to,
 	err |= __put_user(fi->error_code, &to->err);
 	err |= __put_user(fi->trap_no, &to->trapno);
 
-	err |= PUTREG(regs, RIP, to, rip);
-	err |= PUTREG(regs, EFLAGS, to, eflags);
+	err |= PUTREG(regs, RIP, to, ip);
+	err |= PUTREG(regs, EFLAGS, to, flags);
 #undef PUTREG
 
 	err |= __put_user(mask, &to->oldmask);

From eee3af4a2c83a97fff107ddc445d9df6fded9ce4 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Wed, 30 Jan 2008 13:31:09 +0100
Subject: [PATCH 263/890] x86, ptrace: support for branch trace store(BTS)

Resend using different mail client

Changes to the last version:
- split implementation into two layers: ds/bts and ptrace
- renamed TIF's
- save/restore ds save area msr in __switch_to_xtra()
- make block-stepping only look at BTF bit

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/Makefile_32      |   1 +
 arch/x86/kernel/Makefile_64      |   1 +
 arch/x86/kernel/cpu/intel.c      |   5 +
 arch/x86/kernel/ds.c             | 429 +++++++++++++++++++++++++++++++
 arch/x86/kernel/process_32.c     |  19 +-
 arch/x86/kernel/process_64.c     |  26 +-
 arch/x86/kernel/ptrace.c         | 212 +++++++++++++++
 arch/x86/kernel/setup_64.c       |   5 +
 arch/x86/kernel/step.c           |  18 +-
 include/asm-x86/ds.h             |  65 +++++
 include/asm-x86/processor_32.h   |   3 +
 include/asm-x86/processor_64.h   |   3 +
 include/asm-x86/ptrace-abi.h     |  52 ++++
 include/asm-x86/ptrace.h         |  11 +
 include/asm-x86/thread_info_32.h |  12 +-
 include/asm-x86/thread_info_64.h |   9 +-
 16 files changed, 859 insertions(+), 12 deletions(-)
 create mode 100644 arch/x86/kernel/ds.c
 create mode 100644 include/asm-x86/ds.h

diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index b2d7aea4c82d..cc2651bcc07f 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -11,6 +11,7 @@ obj-y	:= process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
 		quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o rtc.o
 
 obj-y				+= ptrace.o
+obj-y				+= ds.o
 obj-y				+= tls.o
 obj-y				+= step.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index 19af64e1a3fc..2ec96acf6486 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -13,6 +13,7 @@ obj-y	:= process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
 		i8253.o io_delay.o rtc.o
 
 obj-y				+= ptrace.o
+obj-y				+= ds.o
 obj-y				+= step.o
 
 obj-$(CONFIG_IA32_EMULATION)	+= tls.o
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 867ff94579be..e4b7e73e9024 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -11,6 +11,8 @@
 #include <asm/pgtable.h>
 #include <asm/msr.h>
 #include <asm/uaccess.h>
+#include <asm/ptrace.h>
+#include <asm/ds.h>
 
 #include "cpu.h"
 
@@ -219,6 +221,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 		if (!(l1 & (1<<12)))
 			set_bit(X86_FEATURE_PEBS, c->x86_capability);
 	}
+
+	if (cpu_has_bts)
+		ds_init_intel(c);
 }
 
 static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
new file mode 100644
index 000000000000..996a7c4f5963
--- /dev/null
+++ b/arch/x86/kernel/ds.c
@@ -0,0 +1,429 @@
+/*
+ * Debug Store support
+ *
+ * This provides a low-level interface to the hardware's Debug Store
+ * feature that is used for last branch recording (LBR) and
+ * precise-event based sampling (PEBS).
+ *
+ * Different architectures use a different DS layout/pointer size.
+ * The below functions therefore work on a void*.
+ *
+ *
+ * Since there is no user for PEBS, yet, only LBR (or branch
+ * trace store, BTS) is supported.
+ *
+ *
+ * Copyright (C) 2007 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
+ */
+
+#include <asm/ds.h>
+
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+
+
+/*
+ * Debug Store (DS) save area configuration (see Intel64 and IA32
+ * Architectures Software Developer's Manual, section 18.5)
+ *
+ * The DS configuration consists of the following fields; different
+ * architetures vary in the size of those fields.
+ * - double-word aligned base linear address of the BTS buffer
+ * - write pointer into the BTS buffer
+ * - end linear address of the BTS buffer (one byte beyond the end of
+ *   the buffer)
+ * - interrupt pointer into BTS buffer
+ *   (interrupt occurs when write pointer passes interrupt pointer)
+ * - double-word aligned base linear address of the PEBS buffer
+ * - write pointer into the PEBS buffer
+ * - end linear address of the PEBS buffer (one byte beyond the end of
+ *   the buffer)
+ * - interrupt pointer into PEBS buffer
+ *   (interrupt occurs when write pointer passes interrupt pointer)
+ * - value to which counter is reset following counter overflow
+ *
+ * On later architectures, the last branch recording hardware uses
+ * 64bit pointers even in 32bit mode.
+ *
+ *
+ * Branch Trace Store (BTS) records store information about control
+ * flow changes. They at least provide the following information:
+ * - source linear address
+ * - destination linear address
+ *
+ * Netburst supported a predicated bit that had been dropped in later
+ * architectures. We do not suppor it.
+ *
+ *
+ * In order to abstract from the actual DS and BTS layout, we describe
+ * the access to the relevant fields.
+ * Thanks to Andi Kleen for proposing this design.
+ *
+ * The implementation, however, is not as general as it might seem. In
+ * order to stay somewhat simple and efficient, we assume an
+ * underlying unsigned type (mostly a pointer type) and we expect the
+ * field to be at least as big as that type.
+ */
+
+/*
+ * A special from_ip address to indicate that the BTS record is an
+ * info record that needs to be interpreted or skipped.
+ */
+#define BTS_ESCAPE_ADDRESS (-1)
+
+/*
+ * A field access descriptor
+ */
+struct access_desc {
+	unsigned char offset;
+	unsigned char size;
+};
+
+/*
+ * The configuration for a particular DS/BTS hardware implementation.
+ */
+struct ds_configuration {
+	/* the DS configuration */
+	unsigned char  sizeof_ds;
+	struct access_desc bts_buffer_base;
+	struct access_desc bts_index;
+	struct access_desc bts_absolute_maximum;
+	struct access_desc bts_interrupt_threshold;
+	/* the BTS configuration */
+	unsigned char  sizeof_bts;
+	struct access_desc from_ip;
+	struct access_desc to_ip;
+	/* BTS variants used to store additional information like
+	   timestamps */
+	struct access_desc info_type;
+	struct access_desc info_data;
+	unsigned long debugctl_mask;
+};
+
+/*
+ * The global configuration used by the below accessor functions
+ */
+static struct ds_configuration ds_cfg;
+
+/*
+ * Accessor functions for some DS and BTS fields using the above
+ * global ptrace_bts_cfg.
+ */
+static inline void *get_bts_buffer_base(char *base)
+{
+	return *(void **)(base + ds_cfg.bts_buffer_base.offset);
+}
+static inline void set_bts_buffer_base(char *base, void *value)
+{
+	(*(void **)(base + ds_cfg.bts_buffer_base.offset)) = value;
+}
+static inline void *get_bts_index(char *base)
+{
+	return *(void **)(base + ds_cfg.bts_index.offset);
+}
+static inline void set_bts_index(char *base, void *value)
+{
+	(*(void **)(base + ds_cfg.bts_index.offset)) = value;
+}
+static inline void *get_bts_absolute_maximum(char *base)
+{
+	return *(void **)(base + ds_cfg.bts_absolute_maximum.offset);
+}
+static inline void set_bts_absolute_maximum(char *base, void *value)
+{
+	(*(void **)(base + ds_cfg.bts_absolute_maximum.offset)) = value;
+}
+static inline void *get_bts_interrupt_threshold(char *base)
+{
+	return *(void **)(base + ds_cfg.bts_interrupt_threshold.offset);
+}
+static inline void set_bts_interrupt_threshold(char *base, void *value)
+{
+	(*(void **)(base + ds_cfg.bts_interrupt_threshold.offset)) = value;
+}
+static inline long get_from_ip(char *base)
+{
+	return *(long *)(base + ds_cfg.from_ip.offset);
+}
+static inline void set_from_ip(char *base, long value)
+{
+	(*(long *)(base + ds_cfg.from_ip.offset)) = value;
+}
+static inline long get_to_ip(char *base)
+{
+	return *(long *)(base + ds_cfg.to_ip.offset);
+}
+static inline void set_to_ip(char *base, long value)
+{
+	(*(long *)(base + ds_cfg.to_ip.offset)) = value;
+}
+static inline unsigned char get_info_type(char *base)
+{
+	return *(unsigned char *)(base + ds_cfg.info_type.offset);
+}
+static inline void set_info_type(char *base, unsigned char value)
+{
+	(*(unsigned char *)(base + ds_cfg.info_type.offset)) = value;
+}
+/*
+ * The info data might overlap with the info type on some architectures.
+ * We therefore read and write the exact number of bytes.
+ */
+static inline unsigned long long get_info_data(char *base)
+{
+	unsigned long long value = 0;
+	memcpy(&value,
+	       base + ds_cfg.info_data.offset,
+	       ds_cfg.info_data.size);
+	return value;
+}
+static inline void set_info_data(char *base, unsigned long long value)
+{
+	memcpy(base + ds_cfg.info_data.offset,
+	       &value,
+	       ds_cfg.info_data.size);
+}
+
+
+int ds_allocate(void **dsp, size_t bts_size_in_records)
+{
+	size_t bts_size_in_bytes = 0;
+	void *bts = 0;
+	void *ds = 0;
+
+	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+		return -EOPNOTSUPP;
+
+	if (bts_size_in_records < 0)
+		return -EINVAL;
+
+	bts_size_in_bytes =
+		bts_size_in_records * ds_cfg.sizeof_bts;
+
+	if (bts_size_in_bytes <= 0)
+		return -EINVAL;
+
+	bts = kzalloc(bts_size_in_bytes, GFP_KERNEL);
+
+	if (!bts)
+		return -ENOMEM;
+
+	ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
+
+	if (!ds) {
+		kfree(bts);
+		return -ENOMEM;
+	}
+
+	set_bts_buffer_base(ds, bts);
+	set_bts_index(ds, bts);
+	set_bts_absolute_maximum(ds, bts + bts_size_in_bytes);
+	set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1);
+
+	*dsp = ds;
+	return 0;
+}
+
+int ds_free(void **dsp)
+{
+	if (*dsp)
+		kfree(get_bts_buffer_base(*dsp));
+	kfree(*dsp);
+	*dsp = 0;
+
+	return 0;
+}
+
+int ds_get_bts_size(void *ds)
+{
+	size_t size_in_bytes;
+
+	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+		return -EOPNOTSUPP;
+
+	size_in_bytes =
+		get_bts_absolute_maximum(ds) -
+		get_bts_buffer_base(ds);
+
+	return size_in_bytes / ds_cfg.sizeof_bts;
+}
+
+int ds_get_bts_index(void *ds)
+{
+	size_t index_offset_in_bytes;
+
+	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+		return -EOPNOTSUPP;
+
+	index_offset_in_bytes =
+		get_bts_index(ds) -
+		get_bts_buffer_base(ds);
+
+	return index_offset_in_bytes / ds_cfg.sizeof_bts;
+}
+
+int ds_read_bts(void *ds, size_t index, struct bts_struct *out)
+{
+	void *bts;
+
+	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+		return -EOPNOTSUPP;
+
+	if (index < 0)
+		return -EINVAL;
+
+	if (index >= ds_get_bts_size(ds))
+		return -EINVAL;
+
+	bts = get_bts_buffer_base(ds);
+	bts = (char *)bts + (index * ds_cfg.sizeof_bts);
+
+	memset(out, 0, sizeof(*out));
+	if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) {
+		out->qualifier         = get_info_type(bts);
+		out->variant.timestamp = get_info_data(bts);
+	} else {
+		out->qualifier = BTS_BRANCH;
+		out->variant.lbr.from_ip = get_from_ip(bts);
+		out->variant.lbr.to_ip   = get_to_ip(bts);
+	}
+
+	return 0;
+}
+
+int ds_write_bts(void *ds, const struct bts_struct *in)
+{
+	void *bts;
+
+	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+		return -EOPNOTSUPP;
+
+	if (ds_get_bts_size(ds) <= 0)
+		return -ENXIO;
+
+	bts = get_bts_index(ds);
+
+	memset(bts, 0, ds_cfg.sizeof_bts);
+	switch (in->qualifier) {
+	case BTS_INVALID:
+		break;
+
+	case BTS_BRANCH:
+		set_from_ip(bts, in->variant.lbr.from_ip);
+		set_to_ip(bts, in->variant.lbr.to_ip);
+		break;
+
+	case BTS_TASK_ARRIVES:
+	case BTS_TASK_DEPARTS:
+		set_from_ip(bts, BTS_ESCAPE_ADDRESS);
+		set_info_type(bts, in->qualifier);
+		set_info_data(bts, in->variant.timestamp);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	bts = (char *)bts + ds_cfg.sizeof_bts;
+	if (bts >= get_bts_absolute_maximum(ds))
+		bts = get_bts_buffer_base(ds);
+	set_bts_index(ds, bts);
+
+	return 0;
+}
+
+unsigned long ds_debugctl_mask(void)
+{
+	return ds_cfg.debugctl_mask;
+}
+
+#ifdef __i386__
+static const struct ds_configuration ds_cfg_netburst = {
+	.sizeof_ds = 9 * 4,
+	.bts_buffer_base = { 0, 4 },
+	.bts_index = { 4, 4 },
+	.bts_absolute_maximum = { 8, 4 },
+	.bts_interrupt_threshold = { 12, 4 },
+	.sizeof_bts = 3 * 4,
+	.from_ip = { 0, 4 },
+	.to_ip = { 4, 4 },
+	.info_type = { 4, 1 },
+	.info_data = { 5, 7 },
+	.debugctl_mask = (1<<2)|(1<<3)
+};
+
+static const struct ds_configuration ds_cfg_pentium_m = {
+	.sizeof_ds = 9 * 4,
+	.bts_buffer_base = { 0, 4 },
+	.bts_index = { 4, 4 },
+	.bts_absolute_maximum = { 8, 4 },
+	.bts_interrupt_threshold = { 12, 4 },
+	.sizeof_bts = 3 * 4,
+	.from_ip = { 0, 4 },
+	.to_ip = { 4, 4 },
+	.info_type = { 4, 1 },
+	.info_data = { 5, 7 },
+	.debugctl_mask = (1<<6)|(1<<7)
+};
+#endif /* _i386_ */
+
+static const struct ds_configuration ds_cfg_core2 = {
+	.sizeof_ds = 9 * 8,
+	.bts_buffer_base = { 0, 8 },
+	.bts_index = { 8, 8 },
+	.bts_absolute_maximum = { 16, 8 },
+	.bts_interrupt_threshold = { 24, 8 },
+	.sizeof_bts = 3 * 8,
+	.from_ip = { 0, 8 },
+	.to_ip = { 8, 8 },
+	.info_type = { 8, 1 },
+	.info_data = { 9, 7 },
+	.debugctl_mask = (1<<6)|(1<<7)|(1<<9)
+};
+
+static inline void
+ds_configure(const struct ds_configuration *cfg)
+{
+	ds_cfg = *cfg;
+}
+
+void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
+{
+	switch (c->x86) {
+	case 0x6:
+		switch (c->x86_model) {
+#ifdef __i386__
+		case 0xD:
+		case 0xE: /* Pentium M */
+			ds_configure(&ds_cfg_pentium_m);
+			break;
+#endif /* _i386_ */
+		case 0xF: /* Core2 */
+			ds_configure(&ds_cfg_core2);
+			break;
+		default:
+			/* sorry, don't know about them */
+			break;
+		}
+		break;
+	case 0xF:
+		switch (c->x86_model) {
+#ifdef __i386__
+		case 0x0:
+		case 0x1:
+		case 0x2: /* Netburst */
+			ds_configure(&ds_cfg_netburst);
+			break;
+#endif /* _i386_ */
+		default:
+			/* sorry, don't know about them */
+			break;
+		}
+		break;
+	default:
+		/* sorry, don't know about them */
+		break;
+	}
+}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 5350763a2d03..2b9db9371060 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -614,11 +614,21 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 		 struct tss_struct *tss)
 {
 	struct thread_struct *prev, *next;
+	unsigned long debugctl;
 
 	prev = &prev_p->thread;
 	next = &next_p->thread;
 
-	if (next->debugctlmsr != prev->debugctlmsr)
+	debugctl = prev->debugctlmsr;
+	if (next->ds_area_msr != prev->ds_area_msr) {
+		/* we clear debugctl to make sure DS
+		 * is not in use when we change it */
+		debugctl = 0;
+		wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
+		wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
+	}
+
+	if (next->debugctlmsr != debugctl)
 		wrmsr(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr, 0);
 
 	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
@@ -642,6 +652,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 	}
 #endif
 
+	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
+		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
+
+	if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
+		ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
+
+
 	if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
 		/*
 		 * Disable the bitmap via an invalid offset. We still cache
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 057b5442ffda..843bf0c978a4 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -568,11 +568,21 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
 				    struct tss_struct *tss)
 {
 	struct thread_struct *prev, *next;
+	unsigned long debugctl;
 
 	prev = &prev_p->thread,
 	next = &next_p->thread;
 
-	if (next->debugctlmsr != prev->debugctlmsr)
+	debugctl = prev->debugctlmsr;
+	if (next->ds_area_msr != prev->ds_area_msr) {
+		/* we clear debugctl to make sure DS
+		 * is not in use when we change it */
+		debugctl = 0;
+		wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
+		wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
+	}
+
+	if (next->debugctlmsr != debugctl)
 		wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
 
 	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
@@ -598,6 +608,16 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
 		 */
 		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
 	}
+
+	/*
+	 * Last branch recording recofiguration of trace hardware and
+	 * disentangling of trace data per task.
+	 */
+	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
+		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
+
+	if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
+		ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
 }
 
 /*
@@ -701,8 +721,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	/*
 	 * Now maybe reload the debug registers and handle I/O bitmaps
 	 */
-	if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
-	    || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
+	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
+		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
 		__switch_to_xtra(prev_p, next_p, tss);
 
 	/* If the task has used fpu the last 5 timeslices, just do a full
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 3399c1be79b8..8d0dd8b5effe 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -2,6 +2,9 @@
 /*
  * Pentium III FXSR, SSE support
  *	Gareth Hughes <gareth@valinux.com>, May 2000
+ *
+ * BTS tracing
+ *	Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
  */
 
 #include <linux/kernel.h>
@@ -26,6 +29,14 @@
 #include <asm/desc.h>
 #include <asm/prctl.h>
 #include <asm/proto.h>
+#include <asm/ds.h>
+
+
+/*
+ * The maximal size of a BTS buffer per traced task in number of BTS
+ * records.
+ */
+#define PTRACE_BTS_BUFFER_MAX 4000
 
 /*
  * does not yet catch signals sent when the child dies.
@@ -455,6 +466,165 @@ static int ptrace_set_debugreg(struct task_struct *child,
 	return 0;
 }
 
+static int ptrace_bts_max_buffer_size(void)
+{
+	return PTRACE_BTS_BUFFER_MAX;
+}
+
+static int ptrace_bts_get_buffer_size(struct task_struct *child)
+{
+	if (!child->thread.ds_area_msr)
+		return -ENXIO;
+
+	return ds_get_bts_size((void *)child->thread.ds_area_msr);
+}
+
+static int ptrace_bts_get_index(struct task_struct *child)
+{
+	if (!child->thread.ds_area_msr)
+		return -ENXIO;
+
+	return ds_get_bts_index((void *)child->thread.ds_area_msr);
+}
+
+static int ptrace_bts_read_record(struct task_struct *child,
+				  long index,
+				  struct bts_struct __user *out)
+{
+	struct bts_struct ret;
+	int retval;
+
+	if (!child->thread.ds_area_msr)
+		return -ENXIO;
+
+	retval = ds_read_bts((void *)child->thread.ds_area_msr,
+			     index, &ret);
+	if (retval)
+		return retval;
+
+	if (copy_to_user(out, &ret, sizeof(ret)))
+		return -EFAULT;
+
+	return sizeof(ret);
+}
+
+static int ptrace_bts_write_record(struct task_struct *child,
+				   const struct bts_struct *in)
+{
+	int retval;
+
+	if (!child->thread.ds_area_msr)
+		return -ENXIO;
+
+	retval = ds_write_bts((void *)child->thread.ds_area_msr, in);
+	if (retval)
+		return retval;
+
+	return sizeof(*in);
+}
+
+static int ptrace_bts_config(struct task_struct *child,
+			     unsigned long options)
+{
+	unsigned long debugctl_mask = ds_debugctl_mask();
+	int retval;
+
+	retval = ptrace_bts_get_buffer_size(child);
+	if (retval < 0)
+		return retval;
+	if (retval == 0)
+		return -ENXIO;
+
+	if (options & PTRACE_BTS_O_TRACE_TASK) {
+		child->thread.debugctlmsr |= debugctl_mask;
+		set_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+	} else {
+		/* there is no way for us to check whether we 'own'
+		 * the respective bits in the DEBUGCTL MSR, we're
+		 * about to clear */
+		child->thread.debugctlmsr &= ~debugctl_mask;
+
+		if (!child->thread.debugctlmsr)
+			clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+	}
+
+	if (options & PTRACE_BTS_O_TIMESTAMPS)
+		set_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+	else
+		clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+
+	return 0;
+}
+
+static int ptrace_bts_status(struct task_struct *child)
+{
+	unsigned long debugctl_mask = ds_debugctl_mask();
+	int retval, status = 0;
+
+	retval = ptrace_bts_get_buffer_size(child);
+	if (retval < 0)
+		return retval;
+	if (retval == 0)
+		return -ENXIO;
+
+	if (ptrace_bts_get_buffer_size(child) <= 0)
+		return -ENXIO;
+
+	if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
+	    child->thread.debugctlmsr & debugctl_mask)
+		status |= PTRACE_BTS_O_TRACE_TASK;
+	if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
+		status |= PTRACE_BTS_O_TIMESTAMPS;
+
+	return status;
+}
+
+static int ptrace_bts_allocate_bts(struct task_struct *child,
+				   int size_in_records)
+{
+	int retval = 0;
+	void *ds;
+
+	if (size_in_records < 0)
+		return -EINVAL;
+
+	if (size_in_records > ptrace_bts_max_buffer_size())
+		return -EINVAL;
+
+	if (size_in_records == 0) {
+		ptrace_bts_config(child, /* options = */ 0);
+	} else {
+		retval = ds_allocate(&ds, size_in_records);
+		if (retval)
+			return retval;
+	}
+
+	if (child->thread.ds_area_msr)
+		ds_free((void **)&child->thread.ds_area_msr);
+
+	child->thread.ds_area_msr = (unsigned long)ds;
+	if (child->thread.ds_area_msr)
+		set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
+	else
+		clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
+
+	return retval;
+}
+
+void ptrace_bts_take_timestamp(struct task_struct *tsk,
+			       enum bts_qualifier qualifier)
+{
+	struct bts_struct rec = {
+		.qualifier = qualifier,
+		.variant.timestamp = sched_clock()
+	};
+
+	if (ptrace_bts_get_buffer_size(tsk) <= 0)
+		return;
+
+	ptrace_bts_write_record(tsk, &rec);
+}
+
 /*
  * Called by kernel/ptrace.c when detaching..
  *
@@ -466,6 +636,11 @@ void ptrace_disable(struct task_struct *child)
 #ifdef TIF_SYSCALL_EMU
 	clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 #endif
+	ptrace_bts_config(child, /* options = */ 0);
+	if (child->thread.ds_area_msr) {
+	    ds_free((void **)&child->thread.ds_area_msr);
+	    clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
+	}
 }
 
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
@@ -626,6 +801,36 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		break;
 #endif
 
+	case PTRACE_BTS_MAX_BUFFER_SIZE:
+		ret = ptrace_bts_max_buffer_size();
+		break;
+
+	case PTRACE_BTS_ALLOCATE_BUFFER:
+		ret = ptrace_bts_allocate_bts(child, data);
+		break;
+
+	case PTRACE_BTS_GET_BUFFER_SIZE:
+		ret = ptrace_bts_get_buffer_size(child);
+		break;
+
+	case PTRACE_BTS_GET_INDEX:
+		ret = ptrace_bts_get_index(child);
+		break;
+
+	case PTRACE_BTS_READ_RECORD:
+		ret = ptrace_bts_read_record
+			(child, data,
+			 (struct bts_struct __user *) addr);
+		break;
+
+	case PTRACE_BTS_CONFIG:
+		ret = ptrace_bts_config(child, data);
+		break;
+
+	case PTRACE_BTS_STATUS:
+		ret = ptrace_bts_status(child);
+		break;
+
 	default:
 		ret = ptrace_request(child, request, addr, data);
 		break;
@@ -809,6 +1014,13 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 	case PTRACE_SETOPTIONS:
 	case PTRACE_SET_THREAD_AREA:
 	case PTRACE_GET_THREAD_AREA:
+	case PTRACE_BTS_MAX_BUFFER_SIZE:
+	case PTRACE_BTS_ALLOCATE_BUFFER:
+	case PTRACE_BTS_GET_BUFFER_SIZE:
+	case PTRACE_BTS_GET_INDEX:
+	case PTRACE_BTS_READ_RECORD:
+	case PTRACE_BTS_CONFIG:
+	case PTRACE_BTS_STATUS:
 		return sys_ptrace(request, pid, addr, data);
 
 	default:
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index ce4d6b52ce36..f2b131ef844e 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -60,6 +60,7 @@
 #include <asm/dmi.h>
 #include <asm/cacheflush.h>
 #include <asm/mce.h>
+#include <asm/ds.h>
 
 /*
  * Machine setup..
@@ -823,6 +824,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 			set_cpu_cap(c, X86_FEATURE_PEBS);
 	}
 
+
+	if (cpu_has_bts)
+		ds_init_intel(c);
+
 	n = c->extended_cpuid_level;
 	if (n >= 0x80000008) {
 		unsigned eax = cpuid_eax(0x80000008);
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index f55c003f5b63..21ea22fda5fc 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -169,9 +169,14 @@ static void enable_step(struct task_struct *child, bool block)
 	 */
 	if (enable_single_step(child) && block) {
 		set_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
-		write_debugctlmsr(child, DEBUGCTLMSR_BTF);
-	} else if (test_and_clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR)) {
-		write_debugctlmsr(child, 0);
+		write_debugctlmsr(child,
+				  child->thread.debugctlmsr | DEBUGCTLMSR_BTF);
+	} else {
+	    write_debugctlmsr(child,
+			      child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR);
+
+	    if (!child->thread.debugctlmsr)
+		    clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
 	}
 }
 
@@ -190,8 +195,11 @@ void user_disable_single_step(struct task_struct *child)
 	/*
 	 * Make sure block stepping (BTF) is disabled.
 	 */
-	if (test_and_clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR))
-		write_debugctlmsr(child, 0);
+	write_debugctlmsr(child,
+			  child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR);
+
+	if (!child->thread.debugctlmsr)
+		clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
 
 	/* Always clear TIF_SINGLESTEP... */
 	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
diff --git a/include/asm-x86/ds.h b/include/asm-x86/ds.h
new file mode 100644
index 000000000000..edd8467740a6
--- /dev/null
+++ b/include/asm-x86/ds.h
@@ -0,0 +1,65 @@
+/*
+ * Debug Store (DS) support
+ *
+ * This provides a low-level interface to the hardware's Debug Store
+ * feature that is used for last branch recording (LBR) and
+ * precise-event based sampling (PEBS).
+ *
+ * Different architectures use a different DS layout/pointer size.
+ * The below functions therefore work on a void*.
+ *
+ *
+ * Since there is no user for PEBS, yet, only LBR (or branch
+ * trace store, BTS) is supported.
+ *
+ *
+ * Copyright (C) 2007 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
+ */
+
+#ifndef _ASM_X86_DS_H
+#define _ASM_X86_DS_H
+
+#include <linux/types.h>
+#include <linux/init.h>
+
+struct cpuinfo_x86;
+
+
+/* a branch trace record entry
+ *
+ * In order to unify the interface between various processor versions,
+ * we use the below data structure for all processors.
+ */
+enum bts_qualifier {
+	BTS_INVALID = 0,
+	BTS_BRANCH,
+	BTS_TASK_ARRIVES,
+	BTS_TASK_DEPARTS
+};
+
+struct bts_struct {
+	enum bts_qualifier qualifier;
+	union {
+		/* BTS_BRANCH */
+		struct {
+			long from_ip;
+			long to_ip;
+		} lbr;
+		/* BTS_TASK_ARRIVES or
+		   BTS_TASK_DEPARTS */
+		unsigned long long timestamp;
+	} variant;
+};
+
+
+extern int ds_allocate(void **, size_t);
+extern int ds_free(void **);
+extern int ds_get_bts_size(void *);
+extern int ds_get_bts_index(void *);
+extern int ds_read_bts(void *, size_t, struct bts_struct *);
+extern int ds_write_bts(void *, const struct bts_struct *);
+extern unsigned long ds_debugctl_mask(void);
+extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *c);
+
+#endif /* _ASM_X86_DS_H */
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 0d83da198127..9c0ab7f26bd9 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -360,6 +360,9 @@ struct thread_struct {
 	unsigned long	io_bitmap_max;
 /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
 	unsigned long	debugctlmsr;
+/* Debug Store - if not 0 points to a DS Save Area configuration;
+ *               goes into MSR_IA32_DS_AREA */
+	unsigned long	ds_area_msr;
 };
 
 #define INIT_THREAD  {							\
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index 0780f3e3fdfe..7b7f8a142e20 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -240,6 +240,9 @@ struct thread_struct {
 	unsigned io_bitmap_max;
 /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
 	unsigned long	debugctlmsr;
+/* Debug Store - if not 0 points to a DS Save Area configuration;
+ *               goes into MSR_IA32_DS_AREA */
+	unsigned long	ds_area_msr;
 /* cached TLS descriptors. */
 	u64 tls_array[GDT_ENTRY_TLS_ENTRIES];
 } __attribute__((aligned(16)));
diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h
index adce6b51df2e..6fadc5214e14 100644
--- a/include/asm-x86/ptrace-abi.h
+++ b/include/asm-x86/ptrace-abi.h
@@ -80,4 +80,56 @@
 
 #define PTRACE_SINGLEBLOCK	33	/* resume execution until next branch */
 
+/* Return maximal BTS buffer size in number of records,
+   if successuf; -1, otherwise.
+   EOPNOTSUPP...processor does not support bts tracing */
+#define PTRACE_BTS_MAX_BUFFER_SIZE 40
+
+/* Allocate new bts buffer (free old one, if exists) of size DATA bts records;
+   parameter ADDR is ignored.
+   Return 0, if successful; -1, otherwise.
+   EOPNOTSUPP...processor does not support bts tracing
+   EINVAL.......invalid size in records
+   ENOMEM.......out of memory */
+#define PTRACE_BTS_ALLOCATE_BUFFER 41
+
+/* Return the size of the bts buffer in number of bts records,
+   if successful; -1, otherwise.
+   EOPNOTSUPP...processor does not support bts tracing
+   ENXIO........no buffer allocated */
+#define PTRACE_BTS_GET_BUFFER_SIZE 42
+
+/* Return the index of the next bts record to be written,
+   if successful; -1, otherwise.
+   EOPNOTSUPP...processor does not support bts tracing
+   ENXIO........no buffer allocated
+   After the first warp-around, this is the start of the circular bts buffer. */
+#define PTRACE_BTS_GET_INDEX 43
+
+/* Read the DATA'th bts record into a ptrace_bts_record buffer provided in ADDR.
+   Return 0, if successful; -1, otherwise
+   EOPNOTSUPP...processor does not support bts tracing
+   ENXIO........no buffer allocated
+   EINVAL.......invalid index */
+#define PTRACE_BTS_READ_RECORD 44
+
+/* Configure last branch trace; the configuration is given as a bit-mask of
+   PTRACE_BTS_O_* options in DATA; parameter ADDR is ignored.
+   Return 0, if successful; -1, otherwise
+   EOPNOTSUPP...processor does not support bts tracing
+   ENXIO........no buffer allocated */
+#define PTRACE_BTS_CONFIG 45
+
+/* Return the configuration as bit-mask of PTRACE_BTS_O_* options
+   if successful; -1, otherwise.
+   EOPNOTSUPP...processor does not support bts tracing
+   ENXIO........no buffer allocated */
+#define PTRACE_BTS_STATUS 46
+
+/* Trace configuration options */
+/* Collect last branch trace */
+#define PTRACE_BTS_O_TRACE_TASK 0x1
+/* Take timestamps when the task arrives and departs */
+#define PTRACE_BTS_O_TIMESTAMPS 0x2
+
 #endif
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index 9228870f6157..a9a1bab1451a 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -4,8 +4,19 @@
 #include <linux/compiler.h>	/* For __user */
 #include <asm/ptrace-abi.h>
 
+
 #ifndef __ASSEMBLY__
 
+#ifdef __KERNEL__
+
+#include <asm/ds.h>
+
+struct task_struct;
+extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier);
+
+#endif /* __KERNEL__ */
+
+
 #ifdef __i386__
 /* this struct defines the way the registers are stored on the
    stack during a system call. */
diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h
index 306fc80800e1..5bd508260ffb 100644
--- a/include/asm-x86/thread_info_32.h
+++ b/include/asm-x86/thread_info_32.h
@@ -140,6 +140,8 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_NOTSC		20	/* TSC is not accessible in userland */
 #define TIF_FORCED_TF		21	/* true if TF in eflags artificially */
 #define TIF_DEBUGCTLMSR		22	/* uses thread_struct.debugctlmsr */
+#define TIF_DS_AREA_MSR 	23      /* uses thread_struct.ds_area_msr */
+#define TIF_BTS_TRACE_TS        24      /* record scheduling event timestamps */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
@@ -157,6 +159,8 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_NOTSC		(1<<TIF_NOTSC)
 #define _TIF_FORCED_TF		(1<<TIF_FORCED_TF)
 #define _TIF_DEBUGCTLMSR	(1<<TIF_DEBUGCTLMSR)
+#define _TIF_DS_AREA_MSR	(1<<TIF_DS_AREA_MSR)
+#define _TIF_BTS_TRACE_TS	(1<<TIF_BTS_TRACE_TS)
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK \
@@ -166,8 +170,12 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_ALLWORK_MASK	(0x0000FFFF & ~_TIF_SECCOMP)
 
 /* flags to check in __switch_to() */
-#define _TIF_WORK_CTXSW_NEXT (_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUG | _TIF_DEBUGCTLMSR)
-#define _TIF_WORK_CTXSW_PREV (_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUGCTLMSR)
+#define _TIF_WORK_CTXSW \
+    (_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUGCTLMSR | \
+     _TIF_DS_AREA_MSR | _TIF_BTS_TRACE_TS)
+#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
+#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW | _TIF_DEBUG)
+
 
 /*
  * Thread-synchronous status.
diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h
index ee35fd12b541..c2911a99cc32 100644
--- a/include/asm-x86/thread_info_64.h
+++ b/include/asm-x86/thread_info_64.h
@@ -123,6 +123,8 @@ static inline struct thread_info *stack_thread_info(void)
 #define TIF_FREEZE		23	/* is freezing for suspend */
 #define TIF_FORCED_TF		24	/* true if TF in eflags artificially */
 #define TIF_DEBUGCTLMSR		25	/* uses thread_struct.debugctlmsr */
+#define TIF_DS_AREA_MSR	25      /* uses thread_struct.ds_area_msr */
+#define TIF_BTS_TRACE_TS	26      /* record scheduling event timestamps */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
@@ -142,6 +144,8 @@ static inline struct thread_info *stack_thread_info(void)
 #define _TIF_FREEZE		(1<<TIF_FREEZE)
 #define _TIF_FORCED_TF		(1<<TIF_FORCED_TF)
 #define _TIF_DEBUGCTLMSR	(1<<TIF_DEBUGCTLMSR)
+#define _TIF_DS_AREA_MSR	(1<<TIF_DS_AREA_MSR)
+#define _TIF_BTS_TRACE_TS	(1<<TIF_BTS_TRACE_TS)
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK \
@@ -153,7 +157,10 @@ static inline struct thread_info *stack_thread_info(void)
 	(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED)
 
 /* flags to check in __switch_to() */
-#define _TIF_WORK_CTXSW (_TIF_DEBUG|_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR)
+#define _TIF_WORK_CTXSW \
+    (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS)
+#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
+#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
 
 #define PREEMPT_ACTIVE     0x10000000
 

From cef189508b2f043fdc2e354645803b6ce004aac3 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:09 +0100
Subject: [PATCH 264/890] x86: put get_kernel_rpl in a common location

This macro is useful for both i386 and x86_64, so put it in a common
location, where both arches can grab it.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/segment.h    | 9 +++++++++
 include/asm-x86/segment_32.h | 3 ---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/include/asm-x86/segment.h b/include/asm-x86/segment.h
index 605068280e28..b3a7a3e9fb19 100644
--- a/include/asm-x86/segment.h
+++ b/include/asm-x86/segment.h
@@ -1,5 +1,14 @@
+#ifndef _ASM_X86_SEGMENT_H_
+#define _ASM_X86_SEGMENT_H_
+
 #ifdef CONFIG_X86_32
 # include "segment_32.h"
 #else
 # include "segment_64.h"
 #endif
+
+#ifndef CONFIG_PARAVIRT
+#define get_kernel_rpl()  0
+#endif
+
+#endif
diff --git a/include/asm-x86/segment_32.h b/include/asm-x86/segment_32.h
index 597a47c2515f..ff186e34798a 100644
--- a/include/asm-x86/segment_32.h
+++ b/include/asm-x86/segment_32.h
@@ -129,9 +129,6 @@
 #define SEGMENT_LDT		0x4
 #define SEGMENT_GDT		0x0
 
-#ifndef CONFIG_PARAVIRT
-#define get_kernel_rpl()  0
-#endif
 /*
  * Matching rules for certain types of segments.
  */

From 15ee09b40831ae88eb8a804a647b6935eb8b7114 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:09 +0100
Subject: [PATCH 265/890] x86: unify common parts of segment.h

Although segment handling in i386 and x86_64 are very different,
there's a common part. Put them in segment.h instead of arch specific
headers

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/segment.h    | 16 ++++++++++++++++
 include/asm-x86/segment_32.h | 22 ----------------------
 include/asm-x86/segment_64.h | 17 -----------------
 3 files changed, 16 insertions(+), 39 deletions(-)

diff --git a/include/asm-x86/segment.h b/include/asm-x86/segment.h
index b3a7a3e9fb19..3c9b306bc4bf 100644
--- a/include/asm-x86/segment.h
+++ b/include/asm-x86/segment.h
@@ -11,4 +11,20 @@
 #define get_kernel_rpl()  0
 #endif
 
+/* User mode is privilege level 3 */
+#define USER_RPL		0x3
+/* LDT segment has TI set, GDT has it cleared */
+#define SEGMENT_LDT		0x4
+#define SEGMENT_GDT		0x0
+
+/* Bottom two bits of selector give the ring privilege level */
+#define SEGMENT_RPL_MASK	0x3
+/* Bit 2 is table indicator (LDT/GDT) */
+#define SEGMENT_TI_MASK		0x4
+
+#define IDT_ENTRIES 256
+#define GDT_SIZE (GDT_ENTRIES * 8)
+#define GDT_ENTRY_TLS_ENTRIES 3
+#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
+
 #endif
diff --git a/include/asm-x86/segment_32.h b/include/asm-x86/segment_32.h
index ff186e34798a..3e133ad4252d 100644
--- a/include/asm-x86/segment_32.h
+++ b/include/asm-x86/segment_32.h
@@ -45,12 +45,9 @@
  *  30 - unused
  *  31 - TSS for double fault handler
  */
-#define GDT_ENTRY_TLS_ENTRIES	3
 #define GDT_ENTRY_TLS_MIN	6
 #define GDT_ENTRY_TLS_MAX 	(GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
 
-#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
-
 #define GDT_ENTRY_DEFAULT_USER_CS	14
 #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
 
@@ -87,7 +84,6 @@
  * The GDT has 32 entries
  */
 #define GDT_ENTRIES 32
-#define GDT_SIZE (GDT_ENTRIES * 8)
 
 /* Simple and small GDT entries for booting only */
 
@@ -111,24 +107,6 @@
 #define PNP_TS1    (GDT_ENTRY_PNPBIOS_TS1 * 8)	/* transfer data segment */
 #define PNP_TS2    (GDT_ENTRY_PNPBIOS_TS2 * 8)	/* another data segment */
 
-/*
- * The interrupt descriptor table has room for 256 idt's,
- * the global descriptor table is dependent on the number
- * of tasks we can have..
- */
-#define IDT_ENTRIES 256
-
-/* Bottom two bits of selector give the ring privilege level */
-#define SEGMENT_RPL_MASK	0x3
-/* Bit 2 is table indicator (LDT/GDT) */
-#define SEGMENT_TI_MASK		0x4
-
-/* User mode is privilege level 3 */
-#define USER_RPL		0x3
-/* LDT segment has TI set, GDT has it cleared */
-#define SEGMENT_LDT		0x4
-#define SEGMENT_GDT		0x0
-
 /*
  * Matching rules for certain types of segments.
  */
diff --git a/include/asm-x86/segment_64.h b/include/asm-x86/segment_64.h
index dce742101348..5cc7deb8b034 100644
--- a/include/asm-x86/segment_64.h
+++ b/include/asm-x86/segment_64.h
@@ -33,8 +33,6 @@
 #define GDT_ENTRY_TLS_MIN 12
 #define GDT_ENTRY_TLS_MAX 14
 
-#define GDT_ENTRY_TLS_ENTRIES 3
-
 #define GDT_ENTRY_PER_CPU 15	/* Abused to load per CPU data from limit */
 #define __PER_CPU_SEG	(GDT_ENTRY_PER_CPU * 8 + 3)
 
@@ -45,20 +43,5 @@
 #define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
 #define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
 
-#define IDT_ENTRIES 256
 #define GDT_ENTRIES 16
-#define GDT_SIZE (GDT_ENTRIES * 8)
-#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) 
-
-/* Bottom two bits of selector give the ring privilege level */
-#define SEGMENT_RPL_MASK	0x3
-/* Bit 2 is table indicator (LDT/GDT) */
-#define SEGMENT_TI_MASK		0x4
-
-/* User mode is privilege level 3 */
-#define USER_RPL		0x3
-/* LDT segment has TI set, GDT has it cleared */
-#define SEGMENT_LDT		0x4
-#define SEGMENT_GDT		0x0
-
 #endif

From b23be399da88a5008b1db4db06f03146b25cdc52 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:09 +0100
Subject: [PATCH 266/890] x86: remove arch specific segment headers

This file puts the remainder of the arch specificic segment
headers in segment.h.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/segment.h    | 177 ++++++++++++++++++++++++++++++++++-
 include/asm-x86/segment_32.h | 122 ------------------------
 include/asm-x86/segment_64.h |  47 ----------
 3 files changed, 175 insertions(+), 171 deletions(-)
 delete mode 100644 include/asm-x86/segment_64.h

diff --git a/include/asm-x86/segment.h b/include/asm-x86/segment.h
index 3c9b306bc4bf..c7509df79141 100644
--- a/include/asm-x86/segment.h
+++ b/include/asm-x86/segment.h
@@ -2,9 +2,182 @@
 #define _ASM_X86_SEGMENT_H_
 
 #ifdef CONFIG_X86_32
-# include "segment_32.h"
+/*
+ * The layout of the per-CPU GDT under Linux:
+ *
+ *   0 - null
+ *   1 - reserved
+ *   2 - reserved
+ *   3 - reserved
+ *
+ *   4 - unused			<==== new cacheline
+ *   5 - unused
+ *
+ *  ------- start of TLS (Thread-Local Storage) segments:
+ *
+ *   6 - TLS segment #1			[ glibc's TLS segment ]
+ *   7 - TLS segment #2			[ Wine's %fs Win32 segment ]
+ *   8 - TLS segment #3
+ *   9 - reserved
+ *  10 - reserved
+ *  11 - reserved
+ *
+ *  ------- start of kernel segments:
+ *
+ *  12 - kernel code segment		<==== new cacheline
+ *  13 - kernel data segment
+ *  14 - default user CS
+ *  15 - default user DS
+ *  16 - TSS
+ *  17 - LDT
+ *  18 - PNPBIOS support (16->32 gate)
+ *  19 - PNPBIOS support
+ *  20 - PNPBIOS support
+ *  21 - PNPBIOS support
+ *  22 - PNPBIOS support
+ *  23 - APM BIOS support
+ *  24 - APM BIOS support
+ *  25 - APM BIOS support
+ *
+ *  26 - ESPFIX small SS
+ *  27 - per-cpu			[ offset to per-cpu data area ]
+ *  28 - unused
+ *  29 - unused
+ *  30 - unused
+ *  31 - TSS for double fault handler
+ */
+#define GDT_ENTRY_TLS_MIN	6
+#define GDT_ENTRY_TLS_MAX 	(GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
+
+#define GDT_ENTRY_DEFAULT_USER_CS	14
+#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
+
+#define GDT_ENTRY_DEFAULT_USER_DS	15
+#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
+
+#define GDT_ENTRY_KERNEL_BASE	12
+
+#define GDT_ENTRY_KERNEL_CS		(GDT_ENTRY_KERNEL_BASE + 0)
+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
+
+#define GDT_ENTRY_KERNEL_DS		(GDT_ENTRY_KERNEL_BASE + 1)
+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
+
+#define GDT_ENTRY_TSS			(GDT_ENTRY_KERNEL_BASE + 4)
+#define GDT_ENTRY_LDT			(GDT_ENTRY_KERNEL_BASE + 5)
+
+#define GDT_ENTRY_PNPBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 6)
+#define GDT_ENTRY_APMBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 11)
+
+#define GDT_ENTRY_ESPFIX_SS		(GDT_ENTRY_KERNEL_BASE + 14)
+#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
+
+#define GDT_ENTRY_PERCPU			(GDT_ENTRY_KERNEL_BASE + 15)
+#ifdef CONFIG_SMP
+#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8)
 #else
-# include "segment_64.h"
+#define __KERNEL_PERCPU 0
+#endif
+
+#define GDT_ENTRY_DOUBLEFAULT_TSS	31
+
+/*
+ * The GDT has 32 entries
+ */
+#define GDT_ENTRIES 32
+
+/* Simple and small GDT entries for booting only */
+
+#define GDT_ENTRY_BOOT_CS		2
+#define __BOOT_CS	(GDT_ENTRY_BOOT_CS * 8)
+
+#define GDT_ENTRY_BOOT_DS		(GDT_ENTRY_BOOT_CS + 1)
+#define __BOOT_DS	(GDT_ENTRY_BOOT_DS * 8)
+
+/* The PnP BIOS entries in the GDT */
+#define GDT_ENTRY_PNPBIOS_CS32		(GDT_ENTRY_PNPBIOS_BASE + 0)
+#define GDT_ENTRY_PNPBIOS_CS16		(GDT_ENTRY_PNPBIOS_BASE + 1)
+#define GDT_ENTRY_PNPBIOS_DS		(GDT_ENTRY_PNPBIOS_BASE + 2)
+#define GDT_ENTRY_PNPBIOS_TS1		(GDT_ENTRY_PNPBIOS_BASE + 3)
+#define GDT_ENTRY_PNPBIOS_TS2		(GDT_ENTRY_PNPBIOS_BASE + 4)
+
+/* The PnP BIOS selectors */
+#define PNP_CS32   (GDT_ENTRY_PNPBIOS_CS32 * 8)	/* segment for calling fn */
+#define PNP_CS16   (GDT_ENTRY_PNPBIOS_CS16 * 8)	/* code segment for BIOS */
+#define PNP_DS     (GDT_ENTRY_PNPBIOS_DS * 8)	/* data segment for BIOS */
+#define PNP_TS1    (GDT_ENTRY_PNPBIOS_TS1 * 8)	/* transfer data segment */
+#define PNP_TS2    (GDT_ENTRY_PNPBIOS_TS2 * 8)	/* another data segment */
+
+/* Bottom two bits of selector give the ring privilege level */
+#define SEGMENT_RPL_MASK	0x3
+/* Bit 2 is table indicator (LDT/GDT) */
+#define SEGMENT_TI_MASK		0x4
+
+/* User mode is privilege level 3 */
+#define USER_RPL		0x3
+/* LDT segment has TI set, GDT has it cleared */
+#define SEGMENT_LDT		0x4
+#define SEGMENT_GDT		0x0
+
+/*
+ * Matching rules for certain types of segments.
+ */
+
+/* Matches only __KERNEL_CS, ignoring PnP / USER / APM segments */
+#define SEGMENT_IS_KERNEL_CODE(x) (((x) & 0xfc) == GDT_ENTRY_KERNEL_CS * 8)
+
+/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
+#define SEGMENT_IS_FLAT_CODE(x)  (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8)
+
+/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
+#define SEGMENT_IS_PNP_CODE(x)   (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
+
+
+#else
+#include <asm/cache.h>
+
+/* Simple and small GDT entries for booting only */
+
+#define GDT_ENTRY_BOOT_CS		2
+#define __BOOT_CS	(GDT_ENTRY_BOOT_CS * 8)
+
+#define GDT_ENTRY_BOOT_DS		(GDT_ENTRY_BOOT_CS + 1)
+#define __BOOT_DS	(GDT_ENTRY_BOOT_DS * 8)
+
+#define __KERNEL_CS	0x10
+#define __KERNEL_DS	0x18
+
+#define __KERNEL32_CS   0x08
+
+/*
+ * we cannot use the same code segment descriptor for user and kernel
+ * -- not even in the long flat mode, because of different DPL /kkeil
+ * The segment offset needs to contain a RPL. Grr. -AK
+ * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets)
+ */
+
+#define __USER32_CS   0x23   /* 4*8+3 */
+#define __USER_DS     0x2b   /* 5*8+3 */
+#define __USER_CS     0x33   /* 6*8+3 */
+#define __USER32_DS	__USER_DS
+
+#define GDT_ENTRY_TSS 8	/* needs two entries */
+#define GDT_ENTRY_LDT 10 /* needs two entries */
+#define GDT_ENTRY_TLS_MIN 12
+#define GDT_ENTRY_TLS_MAX 14
+
+#define GDT_ENTRY_PER_CPU 15	/* Abused to load per CPU data from limit */
+#define __PER_CPU_SEG	(GDT_ENTRY_PER_CPU * 8 + 3)
+
+/* TLS indexes for 64bit - hardcoded in arch_prctl */
+#define FS_TLS 0
+#define GS_TLS 1
+
+#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
+#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
+
+#define GDT_ENTRIES 16
+
 #endif
 
 #ifndef CONFIG_PARAVIRT
diff --git a/include/asm-x86/segment_32.h b/include/asm-x86/segment_32.h
index 3e133ad4252d..8b137891791f 100644
--- a/include/asm-x86/segment_32.h
+++ b/include/asm-x86/segment_32.h
@@ -1,123 +1 @@
-#ifndef _ASM_SEGMENT_H
-#define _ASM_SEGMENT_H
 
-/*
- * The layout of the per-CPU GDT under Linux:
- *
- *   0 - null
- *   1 - reserved
- *   2 - reserved
- *   3 - reserved
- *
- *   4 - unused			<==== new cacheline
- *   5 - unused
- *
- *  ------- start of TLS (Thread-Local Storage) segments:
- *
- *   6 - TLS segment #1			[ glibc's TLS segment ]
- *   7 - TLS segment #2			[ Wine's %fs Win32 segment ]
- *   8 - TLS segment #3
- *   9 - reserved
- *  10 - reserved
- *  11 - reserved
- *
- *  ------- start of kernel segments:
- *
- *  12 - kernel code segment		<==== new cacheline
- *  13 - kernel data segment
- *  14 - default user CS
- *  15 - default user DS
- *  16 - TSS
- *  17 - LDT
- *  18 - PNPBIOS support (16->32 gate)
- *  19 - PNPBIOS support
- *  20 - PNPBIOS support
- *  21 - PNPBIOS support
- *  22 - PNPBIOS support
- *  23 - APM BIOS support
- *  24 - APM BIOS support
- *  25 - APM BIOS support 
- *
- *  26 - ESPFIX small SS
- *  27 - per-cpu			[ offset to per-cpu data area ]
- *  28 - unused
- *  29 - unused
- *  30 - unused
- *  31 - TSS for double fault handler
- */
-#define GDT_ENTRY_TLS_MIN	6
-#define GDT_ENTRY_TLS_MAX 	(GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
-
-#define GDT_ENTRY_DEFAULT_USER_CS	14
-#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
-
-#define GDT_ENTRY_DEFAULT_USER_DS	15
-#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
-
-#define GDT_ENTRY_KERNEL_BASE	12
-
-#define GDT_ENTRY_KERNEL_CS		(GDT_ENTRY_KERNEL_BASE + 0)
-#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
-
-#define GDT_ENTRY_KERNEL_DS		(GDT_ENTRY_KERNEL_BASE + 1)
-#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
-
-#define GDT_ENTRY_TSS			(GDT_ENTRY_KERNEL_BASE + 4)
-#define GDT_ENTRY_LDT			(GDT_ENTRY_KERNEL_BASE + 5)
-
-#define GDT_ENTRY_PNPBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 6)
-#define GDT_ENTRY_APMBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 11)
-
-#define GDT_ENTRY_ESPFIX_SS		(GDT_ENTRY_KERNEL_BASE + 14)
-#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
-
-#define GDT_ENTRY_PERCPU			(GDT_ENTRY_KERNEL_BASE + 15)
-#ifdef CONFIG_SMP
-#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8)
-#else
-#define __KERNEL_PERCPU 0
-#endif
-
-#define GDT_ENTRY_DOUBLEFAULT_TSS	31
-
-/*
- * The GDT has 32 entries
- */
-#define GDT_ENTRIES 32
-
-/* Simple and small GDT entries for booting only */
-
-#define GDT_ENTRY_BOOT_CS		2
-#define __BOOT_CS	(GDT_ENTRY_BOOT_CS * 8)
-
-#define GDT_ENTRY_BOOT_DS		(GDT_ENTRY_BOOT_CS + 1)
-#define __BOOT_DS	(GDT_ENTRY_BOOT_DS * 8)
-
-/* The PnP BIOS entries in the GDT */
-#define GDT_ENTRY_PNPBIOS_CS32		(GDT_ENTRY_PNPBIOS_BASE + 0)
-#define GDT_ENTRY_PNPBIOS_CS16		(GDT_ENTRY_PNPBIOS_BASE + 1)
-#define GDT_ENTRY_PNPBIOS_DS		(GDT_ENTRY_PNPBIOS_BASE + 2)
-#define GDT_ENTRY_PNPBIOS_TS1		(GDT_ENTRY_PNPBIOS_BASE + 3)
-#define GDT_ENTRY_PNPBIOS_TS2		(GDT_ENTRY_PNPBIOS_BASE + 4)
-
-/* The PnP BIOS selectors */
-#define PNP_CS32   (GDT_ENTRY_PNPBIOS_CS32 * 8)	/* segment for calling fn */
-#define PNP_CS16   (GDT_ENTRY_PNPBIOS_CS16 * 8)	/* code segment for BIOS */
-#define PNP_DS     (GDT_ENTRY_PNPBIOS_DS * 8)	/* data segment for BIOS */
-#define PNP_TS1    (GDT_ENTRY_PNPBIOS_TS1 * 8)	/* transfer data segment */
-#define PNP_TS2    (GDT_ENTRY_PNPBIOS_TS2 * 8)	/* another data segment */
-
-/*
- * Matching rules for certain types of segments.
- */
-
-/* Matches only __KERNEL_CS, ignoring PnP / USER / APM segments */
-#define SEGMENT_IS_KERNEL_CODE(x) (((x) & 0xfc) == GDT_ENTRY_KERNEL_CS * 8)
-
-/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
-#define SEGMENT_IS_FLAT_CODE(x)  (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8)
-
-/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
-#define SEGMENT_IS_PNP_CODE(x)   (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
-
-#endif
diff --git a/include/asm-x86/segment_64.h b/include/asm-x86/segment_64.h
deleted file mode 100644
index 5cc7deb8b034..000000000000
--- a/include/asm-x86/segment_64.h
+++ /dev/null
@@ -1,47 +0,0 @@
-#ifndef _ASM_SEGMENT_H
-#define _ASM_SEGMENT_H
-
-#include <asm/cache.h>
-
-/* Simple and small GDT entries for booting only */
-
-#define GDT_ENTRY_BOOT_CS		2
-#define __BOOT_CS	(GDT_ENTRY_BOOT_CS * 8)
-
-#define GDT_ENTRY_BOOT_DS		(GDT_ENTRY_BOOT_CS + 1)
-#define __BOOT_DS	(GDT_ENTRY_BOOT_DS * 8)
-
-#define __KERNEL_CS	0x10
-#define __KERNEL_DS	0x18
-
-#define __KERNEL32_CS   0x08
-
-/* 
- * we cannot use the same code segment descriptor for user and kernel
- * -- not even in the long flat mode, because of different DPL /kkeil 
- * The segment offset needs to contain a RPL. Grr. -AK
- * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) 
- */
-
-#define __USER32_CS   0x23   /* 4*8+3 */ 
-#define __USER_DS     0x2b   /* 5*8+3 */ 
-#define __USER_CS     0x33   /* 6*8+3 */ 
-#define __USER32_DS	__USER_DS 
-
-#define GDT_ENTRY_TSS 8	/* needs two entries */
-#define GDT_ENTRY_LDT 10 /* needs two entries */
-#define GDT_ENTRY_TLS_MIN 12
-#define GDT_ENTRY_TLS_MAX 14
-
-#define GDT_ENTRY_PER_CPU 15	/* Abused to load per CPU data from limit */
-#define __PER_CPU_SEG	(GDT_ENTRY_PER_CPU * 8 + 3)
-
-/* TLS indexes for 64bit - hardcoded in arch_prctl */
-#define FS_TLS 0	
-#define GS_TLS 1	
-
-#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
-#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
-
-#define GDT_ENTRIES 16
-#endif

From 6b4b05bd790389962e6fcfc862562e7fa239c9d2 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 30 Jan 2008 13:31:09 +0100
Subject: [PATCH 267/890] x64/page.h: convert some macros to inlines

Convert clear_page/copy_page macros to inline functions for type-checking.
Andrew wants to extirpate these ugly macros. (Ingo too. Thomas as well.
Please send us more "kill ugly macros" patches! :-)

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/page_32.h | 39 +++++++++++++++++++++++++++++++++------
 include/asm-x86/page_64.h | 19 +++++++++++++++----
 2 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index 80ecc66b6d86..c620c934e557 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -12,12 +12,21 @@
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
+#include <linux/string.h>
+
 #ifdef CONFIG_X86_USE_3DNOW
 
 #include <asm/mmx.h>
 
-#define clear_page(page)	mmx_clear_page((void *)(page))
-#define copy_page(to,from)	mmx_copy_page(to,from)
+static inline void clear_page(void *page)
+{
+	mmx_clear_page(page);
+}
+
+static inline void copy_page(void *to, void *from)
+{
+	mmx_copy_page(to, from);
+}
 
 #else
 
@@ -26,13 +35,31 @@
  *	Maybe the K6-III ?
  */
  
-#define clear_page(page)	memset((void *)(page), 0, PAGE_SIZE)
-#define copy_page(to,from)	memcpy((void *)(to), (void *)(from), PAGE_SIZE)
+static inline void clear_page(void *page)
+{
+	memset(page, 0, PAGE_SIZE);
+}
+
+static inline void copy_page(void *to, void *from)
+{
+	memcpy(to, from, PAGE_SIZE);
+}
 
 #endif
 
-#define clear_user_page(page, vaddr, pg)	clear_page(page)
-#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+struct page;
+
+static void inline clear_user_page(void *page, unsigned long vaddr,
+				struct page *pg)
+{
+	clear_page(page);
+}
+
+static void inline copy_user_page(void *to, void *from, unsigned long vaddr,
+				struct page *topage)
+{
+	copy_page(to, from);
+}
 
 #define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
 	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index d400167c5509..068b5e4cea4c 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -43,11 +43,22 @@
 extern unsigned long end_pfn;
 extern unsigned long end_pfn_map;
 
-void clear_page(void *);
-void copy_page(void *, void *);
+void clear_page(void *page);
+void copy_page(void *to, void *from);
 
-#define clear_user_page(page, vaddr, pg)	clear_page(page)
-#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+struct page;
+
+static void inline clear_user_page(void *page, unsigned long vaddr,
+				struct page *pg)
+{
+	clear_page(page);
+}
+
+static void inline copy_user_page(void *to, void *from, unsigned long vaddr,
+				struct page *topage)
+{
+	copy_page(to, from);
+}
 
 #define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
 	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)

From 5fd1fe9c582e00ca0a98f852cd693dc3caf607a0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:31:09 +0100
Subject: [PATCH 268/890] x86: clean up drivers/char/rtc.c

tons of style cleanup in drivers/char/rtc.c - no code changed:

   text    data     bss     dec     hex filename
   6400     384      32    6816    1aa0 rtc.o.before
   6400     384      32    6816    1aa0 rtc.o.after

since we seem to have a number of open breakages in this code we might
as well start with making the code more readable and maintainable.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/char/rtc.c | 234 ++++++++++++++++++++++++---------------------
 1 file changed, 127 insertions(+), 107 deletions(-)

diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index 0c66b802736a..3ac7952fe086 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -1,5 +1,5 @@
 /*
- *	Real Time Clock interface for Linux	
+ *	Real Time Clock interface for Linux
  *
  *	Copyright (C) 1996 Paul Gortmaker
  *
@@ -17,7 +17,7 @@
  *	has been received. If a RTC interrupt has already happened,
  *	it will output an unsigned long and then block. The output value
  *	contains the interrupt status in the low byte and the number of
- *	interrupts since the last read in the remaining high bytes. The 
+ *	interrupts since the last read in the remaining high bytes. The
  *	/dev/rtc interface can also be used with the select(2) call.
  *
  *	This program is free software; you can redistribute it and/or
@@ -104,12 +104,12 @@ static int rtc_has_irq = 1;
 
 #ifndef CONFIG_HPET_EMULATE_RTC
 #define is_hpet_enabled()			0
-#define hpet_set_alarm_time(hrs, min, sec) 	0
-#define hpet_set_periodic_freq(arg) 		0
-#define hpet_mask_rtc_irq_bit(arg) 		0
-#define hpet_set_rtc_irq_bit(arg) 		0
-#define hpet_rtc_timer_init() 			do { } while (0)
-#define hpet_rtc_dropped_irq() 			0
+#define hpet_set_alarm_time(hrs, min, sec)	0
+#define hpet_set_periodic_freq(arg)		0
+#define hpet_mask_rtc_irq_bit(arg)		0
+#define hpet_set_rtc_irq_bit(arg)		0
+#define hpet_rtc_timer_init()			do { } while (0)
+#define hpet_rtc_dropped_irq()			0
 #ifdef RTC_IRQ
 static irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
 {
@@ -147,7 +147,7 @@ static int rtc_ioctl(struct inode *inode, struct file *file,
 static unsigned int rtc_poll(struct file *file, poll_table *wait);
 #endif
 
-static void get_rtc_alm_time (struct rtc_time *alm_tm);
+static void get_rtc_alm_time(struct rtc_time *alm_tm);
 #ifdef RTC_IRQ
 static void set_rtc_irq_bit_locked(unsigned char bit);
 static void mask_rtc_irq_bit_locked(unsigned char bit);
@@ -185,9 +185,9 @@ static int rtc_proc_open(struct inode *inode, struct file *file);
  * rtc_status but before mod_timer is called, which would then reenable the
  * timer (but you would need to have an awful timing before you'd trip on it)
  */
-static unsigned long rtc_status = 0;	/* bitmapped status byte.	*/
-static unsigned long rtc_freq = 0;	/* Current periodic IRQ rate	*/
-static unsigned long rtc_irq_data = 0;	/* our output to the world	*/
+static unsigned long rtc_status;	/* bitmapped status byte.	*/
+static unsigned long rtc_freq;		/* Current periodic IRQ rate	*/
+static unsigned long rtc_irq_data;	/* our output to the world	*/
 static unsigned long rtc_max_user_freq = 64; /* > this, need CAP_SYS_RESOURCE */
 
 #ifdef RTC_IRQ
@@ -195,7 +195,7 @@ static unsigned long rtc_max_user_freq = 64; /* > this, need CAP_SYS_RESOURCE */
  * rtc_task_lock nests inside rtc_lock.
  */
 static DEFINE_SPINLOCK(rtc_task_lock);
-static rtc_task_t *rtc_callback = NULL;
+static rtc_task_t *rtc_callback;
 #endif
 
 /*
@@ -205,7 +205,7 @@ static rtc_task_t *rtc_callback = NULL;
 
 static unsigned long epoch = 1900;	/* year corresponding to 0x00	*/
 
-static const unsigned char days_in_mo[] = 
+static const unsigned char days_in_mo[] =
 {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
 
 /*
@@ -242,7 +242,7 @@ irqreturn_t rtc_interrupt(int irq, void *dev_id)
 	 *	the last read in the remainder of rtc_irq_data.
 	 */
 
-	spin_lock (&rtc_lock);
+	spin_lock(&rtc_lock);
 	rtc_irq_data += 0x100;
 	rtc_irq_data &= ~0xff;
 	if (is_hpet_enabled()) {
@@ -259,16 +259,16 @@ irqreturn_t rtc_interrupt(int irq, void *dev_id)
 	if (rtc_status & RTC_TIMER_ON)
 		mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100);
 
-	spin_unlock (&rtc_lock);
+	spin_unlock(&rtc_lock);
 
 	/* Now do the rest of the actions */
 	spin_lock(&rtc_task_lock);
 	if (rtc_callback)
 		rtc_callback->func(rtc_callback->private_data);
 	spin_unlock(&rtc_task_lock);
-	wake_up_interruptible(&rtc_wait);	
+	wake_up_interruptible(&rtc_wait);
 
-	kill_fasync (&rtc_async_queue, SIGIO, POLL_IN);
+	kill_fasync(&rtc_async_queue, SIGIO, POLL_IN);
 
 	return IRQ_HANDLED;
 }
@@ -335,7 +335,7 @@ static ssize_t rtc_read(struct file *file, char __user *buf,
 	DECLARE_WAITQUEUE(wait, current);
 	unsigned long data;
 	ssize_t retval;
-	
+
 	if (rtc_has_irq == 0)
 		return -EIO;
 
@@ -358,11 +358,11 @@ static ssize_t rtc_read(struct file *file, char __user *buf,
 		 * confusing. And no, xchg() is not the answer. */
 
 		__set_current_state(TASK_INTERRUPTIBLE);
-		
-		spin_lock_irq (&rtc_lock);
+
+		spin_lock_irq(&rtc_lock);
 		data = rtc_irq_data;
 		rtc_irq_data = 0;
-		spin_unlock_irq (&rtc_lock);
+		spin_unlock_irq(&rtc_lock);
 
 		if (data != 0)
 			break;
@@ -378,10 +378,13 @@ static ssize_t rtc_read(struct file *file, char __user *buf,
 		schedule();
 	} while (1);
 
-	if (count == sizeof(unsigned int))
-		retval = put_user(data, (unsigned int __user *)buf) ?: sizeof(int);
-	else
-		retval = put_user(data, (unsigned long __user *)buf) ?: sizeof(long);
+	if (count == sizeof(unsigned int)) {
+		retval = put_user(data,
+				  (unsigned int __user *)buf) ?: sizeof(int);
+	} else {
+		retval = put_user(data,
+				  (unsigned long __user *)buf) ?: sizeof(long);
+	}
 	if (!retval)
 		retval = count;
  out:
@@ -394,7 +397,7 @@ static ssize_t rtc_read(struct file *file, char __user *buf,
 
 static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
 {
-	struct rtc_time wtime; 
+	struct rtc_time wtime;
 
 #ifdef RTC_IRQ
 	if (rtc_has_irq == 0) {
@@ -426,35 +429,41 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
 	}
 	case RTC_PIE_OFF:	/* Mask periodic int. enab. bit	*/
 	{
-		unsigned long flags; /* can be called from isr via rtc_control() */
-		spin_lock_irqsave (&rtc_lock, flags);
+		/* can be called from isr via rtc_control() */
+		unsigned long flags;
+
+		spin_lock_irqsave(&rtc_lock, flags);
 		mask_rtc_irq_bit_locked(RTC_PIE);
 		if (rtc_status & RTC_TIMER_ON) {
 			rtc_status &= ~RTC_TIMER_ON;
 			del_timer(&rtc_irq_timer);
 		}
-		spin_unlock_irqrestore (&rtc_lock, flags);
+		spin_unlock_irqrestore(&rtc_lock, flags);
+
 		return 0;
 	}
 	case RTC_PIE_ON:	/* Allow periodic ints		*/
 	{
-		unsigned long flags; /* can be called from isr via rtc_control() */
+		/* can be called from isr via rtc_control() */
+		unsigned long flags;
+
 		/*
 		 * We don't really want Joe User enabling more
 		 * than 64Hz of interrupts on a multi-user machine.
 		 */
 		if (!kernel && (rtc_freq > rtc_max_user_freq) &&
-			(!capable(CAP_SYS_RESOURCE)))
+						(!capable(CAP_SYS_RESOURCE)))
 			return -EACCES;
 
-		spin_lock_irqsave (&rtc_lock, flags);
+		spin_lock_irqsave(&rtc_lock, flags);
 		if (!(rtc_status & RTC_TIMER_ON)) {
 			mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq +
 					2*HZ/100);
 			rtc_status |= RTC_TIMER_ON;
 		}
 		set_rtc_irq_bit_locked(RTC_PIE);
-		spin_unlock_irqrestore (&rtc_lock, flags);
+		spin_unlock_irqrestore(&rtc_lock, flags);
+
 		return 0;
 	}
 	case RTC_UIE_OFF:	/* Mask ints from RTC updates.	*/
@@ -477,7 +486,7 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
 		 */
 		memset(&wtime, 0, sizeof(struct rtc_time));
 		get_rtc_alm_time(&wtime);
-		break; 
+		break;
 	}
 	case RTC_ALM_SET:	/* Store a time into the alarm */
 	{
@@ -505,16 +514,21 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
 			 */
 		}
 		if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) ||
-		    RTC_ALWAYS_BCD)
-		{
-			if (sec < 60) BIN_TO_BCD(sec);
-			else sec = 0xff;
+							RTC_ALWAYS_BCD) {
+			if (sec < 60)
+				BIN_TO_BCD(sec);
+			else
+				sec = 0xff;
 
-			if (min < 60) BIN_TO_BCD(min);
-			else min = 0xff;
+			if (min < 60)
+				BIN_TO_BCD(min);
+			else
+				min = 0xff;
 
-			if (hrs < 24) BIN_TO_BCD(hrs);
-			else hrs = 0xff;
+			if (hrs < 24)
+				BIN_TO_BCD(hrs);
+			else
+				hrs = 0xff;
 		}
 		CMOS_WRITE(hrs, RTC_HOURS_ALARM);
 		CMOS_WRITE(min, RTC_MINUTES_ALARM);
@@ -563,11 +577,12 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
 
 		if (day > (days_in_mo[mon] + ((mon == 2) && leap_yr)))
 			return -EINVAL;
-			
+
 		if ((hrs >= 24) || (min >= 60) || (sec >= 60))
 			return -EINVAL;
 
-		if ((yrs -= epoch) > 255)    /* They are unsigned */
+		yrs -= epoch;
+		if (yrs > 255)		/* They are unsigned */
 			return -EINVAL;
 
 		spin_lock_irq(&rtc_lock);
@@ -635,9 +650,10 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
 	{
 		int tmp = 0;
 		unsigned char val;
-		unsigned long flags; /* can be called from isr via rtc_control() */
+		/* can be called from isr via rtc_control() */
+		unsigned long flags;
 
-		/* 
+		/*
 		 * The max we can do is 8192Hz.
 		 */
 		if ((arg < 2) || (arg > 8192))
@@ -646,7 +662,8 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
 		 * We don't really want Joe User generating more
 		 * than 64Hz of interrupts on a multi-user machine.
 		 */
-		if (!kernel && (arg > rtc_max_user_freq) && (!capable(CAP_SYS_RESOURCE)))
+		if (!kernel && (arg > rtc_max_user_freq) &&
+					!capable(CAP_SYS_RESOURCE))
 			return -EACCES;
 
 		while (arg > (1<<tmp))
@@ -674,11 +691,11 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
 #endif
 	case RTC_EPOCH_READ:	/* Read the epoch.	*/
 	{
-		return put_user (epoch, (unsigned long __user *)arg);
+		return put_user(epoch, (unsigned long __user *)arg);
 	}
 	case RTC_EPOCH_SET:	/* Set the epoch.	*/
 	{
-		/* 
+		/*
 		 * There were no RTC clocks before 1900.
 		 */
 		if (arg < 1900)
@@ -693,7 +710,8 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
 	default:
 		return -ENOTTY;
 	}
-	return copy_to_user((void __user *)arg, &wtime, sizeof wtime) ? -EFAULT : 0;
+	return copy_to_user((void __user *)arg,
+			    &wtime, sizeof wtime) ? -EFAULT : 0;
 }
 
 static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
@@ -712,26 +730,25 @@ static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
  * needed here. Or anywhere else in this driver. */
 static int rtc_open(struct inode *inode, struct file *file)
 {
-	spin_lock_irq (&rtc_lock);
+	spin_lock_irq(&rtc_lock);
 
-	if(rtc_status & RTC_IS_OPEN)
+	if (rtc_status & RTC_IS_OPEN)
 		goto out_busy;
 
 	rtc_status |= RTC_IS_OPEN;
 
 	rtc_irq_data = 0;
-	spin_unlock_irq (&rtc_lock);
+	spin_unlock_irq(&rtc_lock);
 	return 0;
 
 out_busy:
-	spin_unlock_irq (&rtc_lock);
+	spin_unlock_irq(&rtc_lock);
 	return -EBUSY;
 }
 
-static int rtc_fasync (int fd, struct file *filp, int on)
-
+static int rtc_fasync(int fd, struct file *filp, int on)
 {
-	return fasync_helper (fd, filp, on, &rtc_async_queue);
+	return fasync_helper(fd, filp, on, &rtc_async_queue);
 }
 
 static int rtc_release(struct inode *inode, struct file *file)
@@ -762,16 +779,16 @@ static int rtc_release(struct inode *inode, struct file *file)
 	}
 	spin_unlock_irq(&rtc_lock);
 
-	if (file->f_flags & FASYNC) {
-		rtc_fasync (-1, file, 0);
-	}
+	if (file->f_flags & FASYNC)
+		rtc_fasync(-1, file, 0);
 no_irq:
 #endif
 
-	spin_lock_irq (&rtc_lock);
+	spin_lock_irq(&rtc_lock);
 	rtc_irq_data = 0;
 	rtc_status &= ~RTC_IS_OPEN;
-	spin_unlock_irq (&rtc_lock);
+	spin_unlock_irq(&rtc_lock);
+
 	return 0;
 }
 
@@ -786,9 +803,9 @@ static unsigned int rtc_poll(struct file *file, poll_table *wait)
 
 	poll_wait(file, &rtc_wait, wait);
 
-	spin_lock_irq (&rtc_lock);
+	spin_lock_irq(&rtc_lock);
 	l = rtc_irq_data;
-	spin_unlock_irq (&rtc_lock);
+	spin_unlock_irq(&rtc_lock);
 
 	if (l != 0)
 		return POLLIN | POLLRDNORM;
@@ -796,14 +813,6 @@ static unsigned int rtc_poll(struct file *file, poll_table *wait)
 }
 #endif
 
-/*
- * exported stuffs
- */
-
-EXPORT_SYMBOL(rtc_register);
-EXPORT_SYMBOL(rtc_unregister);
-EXPORT_SYMBOL(rtc_control);
-
 int rtc_register(rtc_task_t *task)
 {
 #ifndef RTC_IRQ
@@ -829,6 +838,7 @@ int rtc_register(rtc_task_t *task)
 	return 0;
 #endif
 }
+EXPORT_SYMBOL(rtc_register);
 
 int rtc_unregister(rtc_task_t *task)
 {
@@ -845,7 +855,7 @@ int rtc_unregister(rtc_task_t *task)
 		return -ENXIO;
 	}
 	rtc_callback = NULL;
-	
+
 	/* disable controls */
 	if (!hpet_mask_rtc_irq_bit(RTC_PIE | RTC_AIE | RTC_UIE)) {
 		tmp = CMOS_READ(RTC_CONTROL);
@@ -865,6 +875,7 @@ int rtc_unregister(rtc_task_t *task)
 	return 0;
 #endif
 }
+EXPORT_SYMBOL(rtc_unregister);
 
 int rtc_control(rtc_task_t *task, unsigned int cmd, unsigned long arg)
 {
@@ -883,7 +894,7 @@ int rtc_control(rtc_task_t *task, unsigned int cmd, unsigned long arg)
 	return rtc_do_ioctl(cmd, arg, 1);
 #endif
 }
-
+EXPORT_SYMBOL(rtc_control);
 
 /*
  *	The various file operations we support.
@@ -910,11 +921,11 @@ static struct miscdevice rtc_dev = {
 
 #ifdef CONFIG_PROC_FS
 static const struct file_operations rtc_proc_fops = {
-	.owner = THIS_MODULE,
-	.open = rtc_proc_open,
-	.read  = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
+	.owner		= THIS_MODULE,
+	.open		= rtc_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
 };
 #endif
 
@@ -965,7 +976,7 @@ static int __init rtc_init(void)
 #ifdef CONFIG_SPARC32
 	for_each_ebus(ebus) {
 		for_each_ebusdev(edev, ebus) {
-			if(strcmp(edev->prom_node->name, "rtc") == 0) {
+			if (strcmp(edev->prom_node->name, "rtc") == 0) {
 				rtc_port = edev->resource[0].start;
 				rtc_irq = edev->irqs[0];
 				goto found;
@@ -986,7 +997,8 @@ found:
 	 * XXX Interrupt pin #7 in Espresso is shared between RTC and
 	 * PCI Slot 2 INTA# (and some INTx# in Slot 1).
 	 */
-	if (request_irq(rtc_irq, rtc_interrupt, IRQF_SHARED, "rtc", (void *)&rtc_port)) {
+	if (request_irq(rtc_irq, rtc_interrupt, IRQF_SHARED, "rtc",
+			(void *)&rtc_port)) {
 		rtc_has_irq = 0;
 		printk(KERN_ERR "rtc: cannot register IRQ %d\n", rtc_irq);
 		return -EIO;
@@ -1020,11 +1032,13 @@ no_irq:
 		rtc_int_handler_ptr = rtc_interrupt;
 	}
 
-	if(request_irq(RTC_IRQ, rtc_int_handler_ptr, IRQF_DISABLED, "rtc", NULL)) {
+	if (request_irq(RTC_IRQ, rtc_int_handler_ptr, IRQF_DISABLED,
+			"rtc", NULL)) {
 		/* Yeah right, seeing as irq 8 doesn't even hit the bus. */
 		rtc_has_irq = 0;
 		printk(KERN_ERR "rtc: IRQ %d is not free.\n", RTC_IRQ);
 		rtc_release_region();
+
 		return -EIO;
 	}
 	hpet_rtc_timer_init();
@@ -1052,21 +1066,21 @@ no_irq:
 
 #if defined(__alpha__) || defined(__mips__)
 	rtc_freq = HZ;
-	
+
 	/* Each operating system on an Alpha uses its own epoch.
 	   Let's try to guess which one we are using now. */
-	
+
 	if (rtc_is_updating() != 0)
 		msleep(20);
-	
+
 	spin_lock_irq(&rtc_lock);
 	year = CMOS_READ(RTC_YEAR);
 	ctrl = CMOS_READ(RTC_CONTROL);
 	spin_unlock_irq(&rtc_lock);
-	
+
 	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
 		BCD_TO_BIN(year);       /* This should never happen... */
-	
+
 	if (year < 20) {
 		epoch = 2000;
 		guess = "SRM (post-2000)";
@@ -1087,7 +1101,8 @@ no_irq:
 #endif
 	}
 	if (guess)
-		printk(KERN_INFO "rtc: %s epoch (%lu) detected\n", guess, epoch);
+		printk(KERN_INFO "rtc: %s epoch (%lu) detected\n",
+			guess, epoch);
 #endif
 #ifdef RTC_IRQ
 	if (rtc_has_irq == 0)
@@ -1096,8 +1111,12 @@ no_irq:
 	spin_lock_irq(&rtc_lock);
 	rtc_freq = 1024;
 	if (!hpet_set_periodic_freq(rtc_freq)) {
-		/* Initialize periodic freq. to CMOS reset default, which is 1024Hz */
-		CMOS_WRITE(((CMOS_READ(RTC_FREQ_SELECT) & 0xF0) | 0x06), RTC_FREQ_SELECT);
+		/*
+		 * Initialize periodic frequency to CMOS reset default,
+		 * which is 1024Hz
+		 */
+		CMOS_WRITE(((CMOS_READ(RTC_FREQ_SELECT) & 0xF0) | 0x06),
+			   RTC_FREQ_SELECT);
 	}
 	spin_unlock_irq(&rtc_lock);
 no_irq2:
@@ -1110,20 +1129,20 @@ no_irq2:
 	return 0;
 }
 
-static void __exit rtc_exit (void)
+static void __exit rtc_exit(void)
 {
 	cleanup_sysctl();
-	remove_proc_entry ("driver/rtc", NULL);
+	remove_proc_entry("driver/rtc", NULL);
 	misc_deregister(&rtc_dev);
 
 #ifdef CONFIG_SPARC32
 	if (rtc_has_irq)
-		free_irq (rtc_irq, &rtc_port);
+		free_irq(rtc_irq, &rtc_port);
 #else
 	rtc_release_region();
 #ifdef RTC_IRQ
 	if (rtc_has_irq)
-		free_irq (RTC_IRQ, NULL);
+		free_irq(RTC_IRQ, NULL);
 #endif
 #endif /* CONFIG_SPARC32 */
 }
@@ -1133,14 +1152,14 @@ module_exit(rtc_exit);
 
 #ifdef RTC_IRQ
 /*
- * 	At IRQ rates >= 4096Hz, an interrupt may get lost altogether.
+ *	At IRQ rates >= 4096Hz, an interrupt may get lost altogether.
  *	(usually during an IDE disk interrupt, with IRQ unmasking off)
  *	Since the interrupt handler doesn't get called, the IRQ status
  *	byte doesn't get read, and the RTC stops generating interrupts.
  *	A timer is set, and will call this function if/when that happens.
  *	To get it out of this stalled state, we just read the status.
  *	At least a jiffy of interrupts (rtc_freq/HZ) will have been lost.
- *	(You *really* shouldn't be trying to use a non-realtime system 
+ *	(You *really* shouldn't be trying to use a non-realtime system
  *	for something that requires a steady > 1KHz signal anyways.)
  */
 
@@ -1148,7 +1167,7 @@ static void rtc_dropped_irq(unsigned long data)
 {
 	unsigned long freq;
 
-	spin_lock_irq (&rtc_lock);
+	spin_lock_irq(&rtc_lock);
 
 	if (hpet_rtc_dropped_irq()) {
 		spin_unlock_irq(&rtc_lock);
@@ -1167,13 +1186,15 @@ static void rtc_dropped_irq(unsigned long data)
 
 	spin_unlock_irq(&rtc_lock);
 
-	if (printk_ratelimit())
-		printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n", freq);
+	if (printk_ratelimit()) {
+		printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n",
+			freq);
+	}
 
 	/* Now we have new data */
 	wake_up_interruptible(&rtc_wait);
 
-	kill_fasync (&rtc_async_queue, SIGIO, POLL_IN);
+	kill_fasync(&rtc_async_queue, SIGIO, POLL_IN);
 }
 #endif
 
@@ -1277,7 +1298,7 @@ void rtc_get_rtc_time(struct rtc_time *rtc_tm)
 	 * can take just over 2ms. We wait 20ms. There is no need to
 	 * to poll-wait (up to 1s - eeccch) for the falling edge of RTC_UIP.
 	 * If you need to know *exactly* when a second has started, enable
-	 * periodic update complete interrupts, (via ioctl) and then 
+	 * periodic update complete interrupts, (via ioctl) and then
 	 * immediately read /dev/rtc which will block until you get the IRQ.
 	 * Once the read clears, read the RTC time (again via ioctl). Easy.
 	 */
@@ -1307,8 +1328,7 @@ void rtc_get_rtc_time(struct rtc_time *rtc_tm)
 	ctrl = CMOS_READ(RTC_CONTROL);
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
-	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-	{
+	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
 		BCD_TO_BIN(rtc_tm->tm_sec);
 		BCD_TO_BIN(rtc_tm->tm_min);
 		BCD_TO_BIN(rtc_tm->tm_hour);
@@ -1326,7 +1346,8 @@ void rtc_get_rtc_time(struct rtc_time *rtc_tm)
 	 * Account for differences between how the RTC uses the values
 	 * and how they are defined in a struct rtc_time;
 	 */
-	if ((rtc_tm->tm_year += (epoch - 1900)) <= 69)
+	rtc_tm->tm_year += epoch - 1900;
+	if (rtc_tm->tm_year <= 69)
 		rtc_tm->tm_year += 100;
 
 	rtc_tm->tm_mon--;
@@ -1347,8 +1368,7 @@ static void get_rtc_alm_time(struct rtc_time *alm_tm)
 	ctrl = CMOS_READ(RTC_CONTROL);
 	spin_unlock_irq(&rtc_lock);
 
-	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-	{
+	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
 		BCD_TO_BIN(alm_tm->tm_sec);
 		BCD_TO_BIN(alm_tm->tm_min);
 		BCD_TO_BIN(alm_tm->tm_hour);

From 27ec161f93b8f018e0270a5383ea1d1c36f00c8a Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:31:09 +0100
Subject: [PATCH 269/890] x86: kill mk_pte_huge

It only has a single use, which can be trivially replaced.

Signed-off-by: Jeremy Fitzhardinge <Jeremy.Fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_64.c        | 3 +--
 include/asm-x86/pgtable_64.h | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5cadbb40da3e..15e05a004fcf 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -742,8 +742,7 @@ int __meminit vmemmap_populate(struct page *start_page,
 			if (!p)
 				return -ENOMEM;
 
-			entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
-			mk_pte_huge(entry);
+			entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL_LARGE);
 			set_pmd(pmd, __pmd(pte_val(entry)));
 
 			printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 3072619365a8..5c252d9fee1b 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -355,7 +355,6 @@ static inline int pmd_large(pmd_t pte) {
 
 /* page, protection -> pte */
 #define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
-#define mk_pte_huge(entry) (pte_val(entry) |= _PAGE_PRESENT | _PAGE_PSE)
  
 /* Change flags of a PTE */
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)

From f3f20de87c03a90add5e34806c28a03a0d60bde0 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:31:09 +0100
Subject: [PATCH 270/890] x86: clean up mm/init_32.c

Some code reformatting in init_32.c.  No functional change.

Signed-off-by: Jeremy Fitzhardinge <Jeremy.Fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_32.c | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 3c76d194fd2c..e6e34c7dcabf 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -165,16 +165,25 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
 		pmd = one_md_table_init(pgd);
 		if (pfn >= max_low_pfn)
 			continue;
-		for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
+		for (pmd_idx = 0;
+		     pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn;
+		     pmd++, pmd_idx++) {
 			unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
 
-			/* Map with big pages if possible, otherwise create normal page tables. */
+			/* Map with big pages if possible, otherwise
+			   create normal page tables. */
 			if (cpu_has_pse) {
-				unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
-				if (is_kernel_text(address) || is_kernel_text(address2))
-					set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
-				else
-					set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
+				unsigned int address2;
+				pgprot_t prot = PAGE_KERNEL_LARGE;
+
+				address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE +
+					PAGE_OFFSET + PAGE_SIZE-1;
+
+				if (is_kernel_text(address) ||
+				    is_kernel_text(address2))
+					prot = PAGE_KERNEL_LARGE_EXEC;
+
+				set_pmd(pmd, pfn_pmd(pfn, prot));
 
 				pfn += PTRS_PER_PTE;
 			} else {
@@ -183,10 +192,12 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
 				for (pte_ofs = 0;
 				     pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn;
 				     pte++, pfn++, pte_ofs++, address += PAGE_SIZE) {
+					pgprot_t prot = PAGE_KERNEL;
+
 					if (is_kernel_text(address))
-						set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
-					else
-						set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
+						prot = PAGE_KERNEL_EXEC;
+
+					set_pte(pte, pfn_pte(pfn, prot));
 				}
 			}
 		}

From 675a0813609f9097e323261b1cc4b9dc3f50d40b Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:31:10 +0100
Subject: [PATCH 271/890] x86: unify mmap_{32|64}.c

mmap_is_ia32 always true for X86_32, or while emulating IA32 on X86_64

Randomization not supported on X86_32 in legacy layout.  Both layouts allow
randomization on X86_64.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/Makefile_32           |  2 +-
 arch/x86/mm/Makefile_64           |  3 +-
 arch/x86/mm/{mmap_64.c => mmap.c} | 95 +++++++++++++++++--------------
 arch/x86/mm/mmap_32.c             | 81 --------------------------
 4 files changed, 53 insertions(+), 128 deletions(-)
 rename arch/x86/mm/{mmap_64.c => mmap.c} (71%)
 delete mode 100644 arch/x86/mm/mmap_32.c

diff --git a/arch/x86/mm/Makefile_32 b/arch/x86/mm/Makefile_32
index 362b4ad082de..a01aca79dc69 100644
--- a/arch/x86/mm/Makefile_32
+++ b/arch/x86/mm/Makefile_32
@@ -2,7 +2,7 @@
 # Makefile for the linux i386-specific parts of the memory manager.
 #
 
-obj-y	:= init_32.o pgtable_32.o fault_32.o ioremap_32.o extable_32.o pageattr_32.o mmap_32.o
+obj-y	:= init_32.o pgtable_32.o fault_32.o ioremap_32.o extable_32.o pageattr_32.o mmap.o
 
 obj-$(CONFIG_NUMA) += discontig_32.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/x86/mm/Makefile_64 b/arch/x86/mm/Makefile_64
index d18baed54fc6..b5a74486c3aa 100644
--- a/arch/x86/mm/Makefile_64
+++ b/arch/x86/mm/Makefile_64
@@ -2,9 +2,8 @@
 # Makefile for the linux x86_64-specific parts of the memory manager.
 #
 
-obj-y	 := init_64.o fault_64.o ioremap_64.o extable_64.o pageattr_64.o mmap_64.o
+obj-y	 := init_64.o fault_64.o ioremap_64.o extable_64.o pageattr_64.o mmap.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_NUMA) += numa_64.o
 obj-$(CONFIG_K8_NUMA) += k8topology_64.o
 obj-$(CONFIG_ACPI_NUMA) += srat_64.o
-obj-$(CONFIG_IA32_EMULATION) += mmap_32.o
diff --git a/arch/x86/mm/mmap_64.c b/arch/x86/mm/mmap.c
similarity index 71%
rename from arch/x86/mm/mmap_64.c
rename to arch/x86/mm/mmap.c
index 119bf34ec529..56fe7124fbec 100644
--- a/arch/x86/mm/mmap_64.c
+++ b/arch/x86/mm/mmap.c
@@ -1,7 +1,5 @@
 /*
- *  linux/arch/x86-64/mm/mmap.c
- *
- *  flexible mmap layout support
+ * Flexible mmap layout support
  *
  * Based on code by Ingo Molnar and Andi Kleen, copyrighted
  * as follows:
@@ -24,7 +22,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
  */
 
 #include <linux/personality.h>
@@ -32,7 +29,6 @@
 #include <linux/random.h>
 #include <linux/limits.h>
 #include <linux/sched.h>
-#include <asm/ia32.h>
 
 /*
  * Top of mmap area (just below the process stack).
@@ -42,20 +38,14 @@
 #define MIN_GAP (128*1024*1024)
 #define MAX_GAP (TASK_SIZE/6*5)
 
-static unsigned long mmap_base(void)
-{
-	unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
-
-	if (gap < MIN_GAP)
-		gap = MIN_GAP;
-	else if (gap > MAX_GAP)
-		gap = MAX_GAP;
-
-	return TASK_SIZE - (gap & PAGE_MASK);
-}
-
-static int mmap_is_32(void)
+/*
+ * True on X86_32 or when emulating IA32 on X86_64
+ */
+static int mmap_is_ia32(void)
 {
+#ifdef CONFIG_X86_32
+	return 1;
+#endif
 #ifdef CONFIG_IA32_EMULATION
 	if (test_thread_flag(TIF_IA32))
 		return 1;
@@ -74,43 +64,60 @@ static int mmap_is_legacy(void)
 	return sysctl_legacy_va_layout;
 }
 
+static unsigned long mmap_rnd(void)
+{
+	unsigned long rnd = 0;
+
+	/*
+	*  8 bits of randomness in 32bit mmaps, 20 address space bits
+	* 28 bits of randomness in 64bit mmaps, 40 address space bits
+	*/
+	if (current->flags & PF_RANDOMIZE) {
+		if (mmap_is_ia32())
+			rnd = (long)get_random_int() % (1<<8);
+		else
+			rnd = (long)(get_random_int() % (1<<28));
+	}
+	return rnd << PAGE_SHIFT;
+}
+
+static unsigned long mmap_base(void)
+{
+	unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
+
+	if (gap < MIN_GAP)
+		gap = MIN_GAP;
+	else if (gap > MAX_GAP)
+		gap = MAX_GAP;
+
+	return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd());
+}
+
+/*
+ * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64
+ * does, but not when emulating X86_32
+ */
+static unsigned long mmap_legacy_base(void)
+{
+	if (mmap_is_ia32())
+		return TASK_UNMAPPED_BASE;
+	else
+		return TASK_UNMAPPED_BASE + mmap_rnd();
+}
+
 /*
  * This function, called very early during the creation of a new
  * process VM image, sets up which VM layout function to use:
  */
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
-	int rnd = 0;
-	if (current->flags & PF_RANDOMIZE) {
-		/*
-		 * Add 28bit randomness which is about 40bits of address space
-		 * because mmap base has to be page aligned.
-		 * or ~1/128 of the total user VM
-		 * (total user address space is 47bits)
-		 */
-		rnd = get_random_int() & 0xfffffff;
-	}
-
-	/*
-	 * Fall back to the standard layout if the personality
-	 * bit is set, or if the expected stack growth is unlimited:
-	 */
-	if (mmap_is_32()) {
-#ifdef CONFIG_IA32_EMULATION
-		/* ia32_pick_mmap_layout has its own. */
-		return ia32_pick_mmap_layout(mm);
-#endif
-	} else if (mmap_is_legacy()) {
-		mm->mmap_base = TASK_UNMAPPED_BASE;
+	if (mmap_is_legacy()) {
+		mm->mmap_base = mmap_legacy_base();
 		mm->get_unmapped_area = arch_get_unmapped_area;
 		mm->unmap_area = arch_unmap_area;
 	} else {
 		mm->mmap_base = mmap_base();
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 		mm->unmap_area = arch_unmap_area_topdown;
-		if (current->flags & PF_RANDOMIZE)
-			rnd = -rnd;
 	}
-	if (current->flags & PF_RANDOMIZE)
-		mm->mmap_base += ((long)rnd) << PAGE_SHIFT;
 }
diff --git a/arch/x86/mm/mmap_32.c b/arch/x86/mm/mmap_32.c
deleted file mode 100644
index d490662e4af4..000000000000
--- a/arch/x86/mm/mmap_32.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- *  linux/arch/i386/mm/mmap.c
- *
- *  flexible mmap layout support
- *
- * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- *
- * Started by Ingo Molnar <mingo@elte.hu>
- */
-
-#include <linux/personality.h>
-#include <linux/mm.h>
-#include <linux/random.h>
-#include <linux/sched.h>
-
-/*
- * Top of mmap area (just below the process stack).
- *
- * Leave an at least ~128 MB hole.
- */
-#define MIN_GAP (128*1024*1024)
-#define MAX_GAP (TASK_SIZE/6*5)
-
-static inline unsigned long mmap_base(struct mm_struct *mm)
-{
-	unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
-	unsigned long random_factor = 0;
-
-	if (current->flags & PF_RANDOMIZE)
-		random_factor = get_random_int() % (1024*1024);
-
-	if (gap < MIN_GAP)
-		gap = MIN_GAP;
-	else if (gap > MAX_GAP)
-		gap = MAX_GAP;
-
-	return PAGE_ALIGN(TASK_SIZE - gap - random_factor);
-}
-
-/*
- * This function, called very early during the creation of a new
- * process VM image, sets up which VM layout function to use:
- */
-#ifdef CONFIG_X86_32
-void arch_pick_mmap_layout(struct mm_struct *mm)
-#else
-void ia32_pick_mmap_layout(struct mm_struct *mm)
-#endif
-{
-	/*
-	 * Fall back to the standard layout if the personality
-	 * bit is set, or if the expected stack growth is unlimited:
-	 */
-	if (sysctl_legacy_va_layout ||
-	    (current->personality & ADDR_COMPAT_LAYOUT) ||
-	    current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) {
-		mm->mmap_base = TASK_UNMAPPED_BASE;
-		mm->get_unmapped_area = arch_get_unmapped_area;
-		mm->unmap_area = arch_unmap_area;
-	} else {
-		mm->mmap_base = mmap_base(mm);
-		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
-		mm->unmap_area = arch_unmap_area_topdown;
-	}
-}

From 2355188570790930718fb72444cddc2959039d9d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:31:10 +0100
Subject: [PATCH 272/890] x86: avoid build warning

fix this build warning:

 include/asm/topology_32.h: In function 'node_to_first_cpu':
 include/asm/topology_32.h:66: warning: unused variable 'mask'

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpumask.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 85bd790c201e..7047f58306a7 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -218,8 +218,8 @@ int __first_cpu(const cpumask_t *srcp);
 int __next_cpu(int n, const cpumask_t *srcp);
 #define next_cpu(n, src) __next_cpu((n), &(src))
 #else
-#define first_cpu(src)		0
-#define next_cpu(n, src)	1
+#define first_cpu(src)		({ (void)(src); 0; })
+#define next_cpu(n, src)	({ (void)(src); 1; })
 #endif
 
 #define cpumask_of_cpu(cpu)						\

From 21db5584f955652a6e34afa7d3dbd72dd6c82a89 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:10 +0100
Subject: [PATCH 273/890] x86: export math_state_restore

Export math_state_restore symbol, so it can be used for hypervisors.
They are commonly loaded as modules (lguest being an example).

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/traps_64.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 965f2cc3a013..1a12a81fdb1b 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -1074,6 +1074,7 @@ asmlinkage void math_state_restore(void)
 	task_thread_info(me)->status |= TS_USEDFPU;
 	me->fpu_counter++;
 }
+EXPORT_SYMBOL_GPL(math_state_restore);
 
 void __init trap_init(void)
 {

From 70fd93c9d9e07dd2ea4465df76a0ddd414fd21ac Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:10 +0100
Subject: [PATCH 274/890] x86: export cpu_gdt_descr

With paravirualization, hypervisors needs to handle the gdt,
that was right to this point only used at very early
inialization code. Hypervisors (lguest being the current case)
are commonly modules, so make it an export

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/x8664_ksyms_64.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index e28b533674dd..a66e9c1a0537 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -8,6 +8,7 @@
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
+#include <asm/desc.h>
 
 EXPORT_SYMBOL(kernel_thread);
 
@@ -53,3 +54,8 @@ EXPORT_SYMBOL(init_level4_pgt);
 EXPORT_SYMBOL(load_gs_index);
 
 EXPORT_SYMBOL(_proxy_pda);
+
+#ifdef CONFIG_PARAVIRT
+/* Virtualized guests may want to use it */
+EXPORT_SYMBOL_GPL(cpu_gdt_descr);
+#endif

From 49a697871e2edcbc9cc682466bc4f2316b854d23 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:10 +0100
Subject: [PATCH 275/890] x86: turn priviled operation into a macro in
 head_64.S

under paravirt, read cr2 cannot be issued directly anymore.
So wrap it in a macro, defined to the operation itself in case
paravirt is off, but to something else if we have paravirt
in the game

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/head_64.S | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index b6167fe3330e..c31b1c96a9d3 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -19,6 +19,13 @@
 #include <asm/msr.h>
 #include <asm/cache.h>
 
+#ifdef CONFIG_PARAVIRT
+#include <asm/asm-offsets.h>
+#include <asm/paravirt.h>
+#else
+#define GET_CR2_INTO_RCX movq %cr2, %rcx
+#endif
+
 /* we are not able to switch in one step to the final KERNEL ADRESS SPACE
  * because we need identity-mapped pages.
  *
@@ -267,7 +274,7 @@ ENTRY(early_idt_handler)
 	xorl %eax,%eax
 	movq 8(%rsp),%rsi	# get rip
 	movq (%rsp),%rdx
-	movq %cr2,%rcx
+	GET_CR2_INTO_RCX
 	leaq early_idt_msg(%rip),%rdi
 	call early_printk
 	cmpl $2,early_recursion_flag(%rip)

From ee238e5ca66858f80170f87724f84d67183b069a Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:10 +0100
Subject: [PATCH 276/890] x86: prepare time related functions for paravirt

This patch add provisions for time related functions so they
can be later replaced by paravirt versions.

it basically encloses {g,s}et_wallclock inside the
already existent functions update_persistent_clock and
read_persistent_clock, and defines {s,g}et_wallclock
to the core of such functions.

it also allow for a later-on-game time initialization, as done
by i386. Paravirt guests can set a function to do their own
initialization this way.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/time_64.c | 12 +++++++++---
 include/asm-x86/time.h    | 26 +++++++++++++++++++++-----
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index bf0bcc9bb001..91d4d495904e 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -21,6 +21,8 @@
 #include <asm/hpet.h>
 #include <asm/nmi.h>
 #include <asm/vgtod.h>
+#include <asm/time.h>
+#include <asm/timer.h>
 
 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
 
@@ -54,7 +56,7 @@ static irqreturn_t timer_event_interrupt(int irq, void *dev_id)
 /* calibrate_cpu is used on systems with fixed rate TSCs to determine
  * processor frequency */
 #define TICK_COUNT 100000000
-static unsigned int __init tsc_calibrate_cpu_khz(void)
+unsigned long __init native_calculate_cpu_khz(void)
 {
 	int tsc_start, tsc_now;
 	int i, no_ctr_free;
@@ -104,20 +106,23 @@ static struct irqaction irq0 = {
 	.name		= "timer"
 };
 
-void __init time_init(void)
+void __init hpet_time_init(void)
 {
 	if (!hpet_enable())
 		setup_pit_timer();
 
 	setup_irq(0, &irq0);
+}
 
+void __init time_init(void)
+{
 	tsc_calibrate();
 
 	cpu_khz = tsc_khz;
 	if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
 		boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
 		boot_cpu_data.x86 == 16)
-		cpu_khz = tsc_calibrate_cpu_khz();
+		cpu_khz = calculate_cpu_khz();
 
 	if (unsynchronized_tsc())
 		mark_tsc_unstable("TSCs unsynchronized");
@@ -130,4 +135,5 @@ void __init time_init(void)
 	printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
 		cpu_khz / 1000, cpu_khz % 1000);
 	init_tsc_clocksource();
+	late_time_init = choose_time_init();
 }
diff --git a/include/asm-x86/time.h b/include/asm-x86/time.h
index b3f94cd81ac6..68779b048a3e 100644
--- a/include/asm-x86/time.h
+++ b/include/asm-x86/time.h
@@ -1,8 +1,12 @@
-#ifndef _ASMi386_TIME_H
-#define _ASMi386_TIME_H
+#ifndef _ASMX86_TIME_H
+#define _ASMX86_TIME_H
+
+extern void (*late_time_init)(void);
+extern void hpet_time_init(void);
 
-#include <linux/efi.h>
 #include <asm/mc146818rtc.h>
+#ifdef CONFIG_X86_32
+#include <linux/efi.h>
 
 static inline unsigned long native_get_wallclock(void)
 {
@@ -28,8 +32,20 @@ static inline int native_set_wallclock(unsigned long nowtime)
 	return retval;
 }
 
-extern void (*late_time_init)(void);
-extern void hpet_time_init(void);
+#else
+extern void native_time_init_hook(void);
+
+static inline unsigned long native_get_wallclock(void)
+{
+	return mach_get_cmos_time();
+}
+
+static inline int native_set_wallclock(unsigned long nowtime)
+{
+	return mach_set_rtc_mmss(nowtime);
+}
+
+#endif
 
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>

From ba082427ae6ffbf8e48a26ae4f72f4501a6b80c1 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:10 +0100
Subject: [PATCH 277/890] x86: tweak io_64.h for paravirt.

We need something here because we can't call in and out instructions
directly. However, we have to be careful, because no indirections are
allowed in misc_64.c , and paravirt_ops is a kind of one. So just
call it directly there

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/compressed/misc_64.c |  6 ++++++
 include/asm-x86/io_64.h            | 30 +++++++++++++++++++++++-------
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/arch/x86/boot/compressed/misc_64.c b/arch/x86/boot/compressed/misc_64.c
index 43e5fcc37be9..7d8338e21b00 100644
--- a/arch/x86/boot/compressed/misc_64.c
+++ b/arch/x86/boot/compressed/misc_64.c
@@ -9,6 +9,12 @@
  * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
  */
 
+/*
+ * we have to be careful, because no indirections are allowed here, and
+ * paravirt_ops is a kind of one. As it will only run in baremetal anyway,
+ * we just keep it from happening
+ */
+#undef CONFIG_PARAVIRT
 #define _LINUX_STRING_H_ 1
 #define __LINUX_BITMAP_H 1
 
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index dbcc03aa1c6a..fef0ce2ced81 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -40,6 +40,10 @@ extern void native_io_delay(void);
 extern int io_delay_type;
 extern void io_delay_init(void);
 
+#if defined(CONFIG_PARAVIRT)
+#include <asm/paravirt.h>
+#else
+
 static inline void slow_down_io(void)
 {
 	native_io_delay();
@@ -49,6 +53,7 @@ static inline void slow_down_io(void)
 	native_io_delay();
 #endif
 }
+#endif
 
 /*
  * Talk about misusing macros..
@@ -57,21 +62,32 @@ static inline void slow_down_io(void)
 static inline void out##s(unsigned x value, unsigned short port) {
 
 #define __OUT2(s,s1,s2) \
-__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" : : "a" (value), "Nd" (port))
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+
+#ifndef REALLY_SLOW_IO
+#define REALLY_SLOW_IO
+#define UNSET_REALLY_SLOW_IO
+#endif
 
 #define __OUT(s,s1,x) \
-__OUT1(s,x) __OUT2(s,s1,"w"); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w"); slow_down_io(); }
+__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
+__OUT1(s##_p, x) __OUT2(s, s1, "w") : : "a" (value), "Nd" (port)); \
+		slow_down_io(); }
 
 #define __IN1(s) \
 static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
 
 #define __IN2(s,s1,s2) \
-__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" : "=a" (_v) : "Nd" (port))
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
 
-#define __IN(s,s1) \
-__IN1(s) __IN2(s,s1,"w"); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w"); slow_down_io(); return _v; }
+#define __IN(s,s1,i...) \
+__IN1(s) __IN2(s, s1, "w") : "=a" (_v) : "Nd" (port), ##i); return _v; } \
+__IN1(s##_p) __IN2(s, s1, "w") : "=a" (_v) : "Nd" (port), ##i);	  \
+				slow_down_io(); return _v; }
+
+#ifdef UNSET_REALLY_SLOW_IO
+#undef REALLY_SLOW_IO
+#endif
 
 #define __INS(s) \
 static inline void ins##s(unsigned short port, void * addr, unsigned long count) \

From 746ef0cd0c7190d570c65b8e39a4ac67550ae43a Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:11 +0100
Subject: [PATCH 278/890] x86: prepare 64-bit architecture initialization for
 paravirt

This patch prepares the x86_64 architecture initialization for
paravirt. It requires a memory initialization step, which is done
by implementing 64-bit version for machine_specific_memory_setup,
and putting an ARCH_SETUP hook, for guest-dependent initialization.
This last step is done akin to i386

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/e820_64.c    |  9 +++++++--
 arch/x86/kernel/setup_64.c   | 28 +++++++++++++++++++++++++++-
 arch/x86/kernel/smpboot_64.c |  4 ++--
 include/asm-x86/setup.h      | 11 ++++++++---
 4 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 8e7321101a0a..abc473bcabe8 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -638,8 +638,10 @@ static void early_panic(char *msg)
 	panic(msg);
 }
 
-void __init setup_memory_region(void)
+/* We're not void only for x86 32-bit compat */
+char * __init machine_specific_memory_setup(void)
 {
+	char *who = "BIOS-e820";
 	/*
 	 * Try to copy the BIOS-supplied E820-map.
 	 *
@@ -650,7 +652,10 @@ void __init setup_memory_region(void)
 	if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0)
 		early_panic("Cannot find a valid memory map");
 	printk(KERN_INFO "BIOS-provided physical RAM map:\n");
-	e820_print_map("BIOS-e820");
+	e820_print_map(who);
+
+	/* In case someone cares... */
+	return who;
 }
 
 static int __init parse_memopt(char *p)
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index f2b131ef844e..8dd110d93e73 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -39,6 +39,7 @@
 #include <linux/dmi.h>
 #include <linux/dma-mapping.h>
 #include <linux/ctype.h>
+#include <linux/uaccess.h>
 
 #include <asm/mtrr.h>
 #include <asm/uaccess.h>
@@ -62,6 +63,12 @@
 #include <asm/mce.h>
 #include <asm/ds.h>
 
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define ARCH_SETUP
+#endif
+
 /*
  * Machine setup..
  */
@@ -246,6 +253,16 @@ static void discover_ebda(void)
 	 * 4K EBDA area at 0x40E
 	 */
 	ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
+	/*
+	 * There can be some situations, like paravirtualized guests,
+	 * in which there is no available ebda information. In such
+	 * case, just skip it
+	 */
+	if (!ebda_addr) {
+		ebda_size = 0;
+		return;
+	}
+
 	ebda_addr <<= 4;
 
 	ebda_size = *(unsigned short *)__va(ebda_addr);
@@ -259,6 +276,12 @@ static void discover_ebda(void)
 		ebda_size = 64*1024;
 }
 
+/* Overridden in paravirt.c if CONFIG_PARAVIRT */
+void __attribute__((weak)) memory_setup(void)
+{
+       machine_specific_memory_setup();
+}
+
 void __init setup_arch(char **cmdline_p)
 {
 	unsigned i;
@@ -276,7 +299,10 @@ void __init setup_arch(char **cmdline_p)
 	rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
 	rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
 #endif
-	setup_memory_region();
+
+	ARCH_SETUP
+
+	memory_setup();
 	copy_edd();
 
 	if (!boot_params.hdr.root_flags)
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index c3f2736ba530..cb73c4da87fc 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -369,7 +369,7 @@ void __cpuinit start_secondary(void)
 
 	unlock_ipi_call_lock();
 
-	setup_secondary_APIC_clock();
+	setup_secondary_clock();
 
 	cpu_idle();
 }
@@ -923,7 +923,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	 * Set up local APIC timer on boot CPU.
 	 */
 
-	setup_boot_APIC_clock();
+	setup_boot_clock();
 }
 
 /*
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index 24d786e07b49..071e054abd82 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -3,6 +3,13 @@
 
 #define COMMAND_LINE_SIZE 2048
 
+#ifndef __ASSEMBLY__
+char *machine_specific_memory_setup(void);
+#ifndef CONFIG_PARAVIRT
+#define paravirt_post_allocator_init()	do {} while (0)
+#endif
+#endif /* __ASSEMBLY__ */
+
 #ifdef __KERNEL__
 
 #ifdef __i386__
@@ -51,9 +58,7 @@ void __init add_memory_region(unsigned long long start,
 
 extern unsigned long init_pg_tables_end;
 
-#ifndef CONFIG_PARAVIRT
-#define paravirt_post_allocator_init()	do {} while (0)
-#endif
+
 
 #endif /* __i386__ */
 #endif /* _SETUP */

From 6842ef0e85a9cc1295f3ef933a230f863b01eb0f Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:11 +0100
Subject: [PATCH 279/890] x86: unify desc_struct

This patch aims to make the access of struct desc_struct variables
equal across architectures. In this patch, I unify the i386 and x86_64
versions under an anonymous union, keeping the way they are accessed
untouched (a and b for 32-bit code, individual bit-fields for 64-bit).

This solution is not beautiful, but will allow us to integrate common
code that differed by the way descriptors were used. This is to be viewed
incrementally. There's simply too much code to be fixed at once.

In the future, goal is to set up in a single way of acessing
the desc_struct fields.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apm_32.c       |  2 +-
 arch/x86/kernel/cpu/common.c   | 35 ++++++++++++++++++++--------------
 arch/x86/kernel/process_64.c   |  2 +-
 arch/x86/kernel/traps_32.c     |  3 ++-
 include/asm-x86/desc_defs.h    | 25 ++++++++++++++++--------
 include/asm-x86/lguest.h       |  4 ++--
 include/asm-x86/processor_32.h |  5 +----
 7 files changed, 45 insertions(+), 31 deletions(-)

diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 7496c2e4b6ae..e32f6c37db9b 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -405,7 +405,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
 static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
 static struct apm_user *	user_list;
 static DEFINE_SPINLOCK(user_list_lock);
-static const struct desc_struct	bad_bios_desc = { 0, 0x00409200 };
+static const struct desc_struct	bad_bios_desc = { { { 0, 0x00409200 } } };
 
 static const char		driver_version[] = "1.16ac";	/* no spaces */
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 235cd615b89d..4f9e31912a25 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -22,31 +22,38 @@
 #include "cpu.h"
 
 DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
-	[GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 },
-	[GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 },
-	[GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 },
-	[GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 },
+	[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
+	[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
+	[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
+	[GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } },
 	/*
 	 * Segments used for calling PnP BIOS have byte granularity.
 	 * They code segments and data segments have fixed 64k limits,
 	 * the transfer segment sizes are set at run time.
 	 */
-	[GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
-	[GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */
-	[GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */
-	[GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */
-	[GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */
+	/* 32-bit code */
+	[GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } },
+	/* 16-bit code */
+	[GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } },
+	/* 16-bit data */
+	[GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } },
+	/* 16-bit data */
+	[GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } },
+	/* 16-bit data */
+	[GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } },
 	/*
 	 * The APM segments have byte granularity and their bases
 	 * are set at run time.  All have 64k limits.
 	 */
-	[GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
+	/* 32-bit code */
+	[GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } },
 	/* 16-bit code */
-	[GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 },
-	[GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */
+	[GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } },
+	/* data */
+	[GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
 
-	[GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 },
-	[GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 },
+	[GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
+	[GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } },
 } };
 EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 843bf0c978a4..86c310acc989 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -455,7 +455,7 @@ static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
 		.limit_in_pages = 1,
 		.useable = 1,
 	};
-	struct n_desc_struct *desc = (void *)t->thread.tls_array;
+	struct desc_struct *desc = (void *)t->thread.tls_array;
 	desc += tls;
 	desc->a = LDT_entry_a(&ud);
 	desc->b = LDT_entry_b(&ud);
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 57491942cc4e..0d45017ed824 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -76,7 +76,8 @@ char ignore_fpu_irq = 0;
  * F0 0F bug workaround.. We have a special link segment
  * for this.
  */
-struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, };
+struct desc_struct idt_table[256]
+	__attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
 
 asmlinkage void divide_error(void);
 asmlinkage void debug(void);
diff --git a/include/asm-x86/desc_defs.h b/include/asm-x86/desc_defs.h
index 089004070099..de47eb0a23aa 100644
--- a/include/asm-x86/desc_defs.h
+++ b/include/asm-x86/desc_defs.h
@@ -11,17 +11,26 @@
 
 #include <linux/types.h>
 
+/*
+ * FIXME: Acessing the desc_struct through its fields is more elegant,
+ * and should be the one valid thing to do. However, a lot of open code
+ * still touches the a and b acessors, and doing this allow us to do it
+ * incrementally. We keep the signature as a struct, rather than an union,
+ * so we can get rid of it transparently in the future -- glommer
+ */
 // 8 byte segment descriptor
 struct desc_struct {
-	u16 limit0;
-	u16 base0;
-	unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1;
-	unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8;
-} __attribute__((packed));
+	union {
+		struct { unsigned int a, b; };
+		struct {
+			u16 limit0;
+			u16 base0;
+			unsigned base1: 8, type: 4, s: 1, dpl: 2, p: 1;
+			unsigned limit: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8;
+		};
 
-struct n_desc_struct {
-	unsigned int a,b;
-};
+	};
+} __attribute__((packed));
 
 enum {
 	GATE_INTERRUPT = 0xE,
diff --git a/include/asm-x86/lguest.h b/include/asm-x86/lguest.h
index ccd338460811..17c908c0ef1c 100644
--- a/include/asm-x86/lguest.h
+++ b/include/asm-x86/lguest.h
@@ -78,8 +78,8 @@ static inline void lguest_set_ts(void)
 }
 
 /* Full 4G segment descriptors, suitable for CS and DS. */
-#define FULL_EXEC_SEGMENT ((struct desc_struct){0x0000ffff, 0x00cf9b00})
-#define FULL_SEGMENT ((struct desc_struct){0x0000ffff, 0x00cf9300})
+#define FULL_EXEC_SEGMENT ((struct desc_struct){ { {0x0000ffff, 0x00cf9b00} } })
+#define FULL_SEGMENT ((struct desc_struct){ { {0x0000ffff, 0x00cf9300} } })
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 9c0ab7f26bd9..bc48ad64de47 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -20,14 +20,11 @@
 #include <linux/cpumask.h>
 #include <linux/init.h>
 #include <asm/processor-flags.h>
+#include <asm/desc_defs.h>
 
 /* flag for disabling the tsc */
 extern int tsc_disable;
 
-struct desc_struct {
-	unsigned long a,b;
-};
-
 static inline int desc_empty(const void *ptr)
 {
 	const u32 *desc = ptr;

From 6b68f01baa810e9f63fbf39e9d5c3ef1d94a966f Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:12 +0100
Subject: [PATCH 280/890] x86: unify struct desc_ptr

This patch unifies struct desc_ptr between i386 and x86_64.
They can be expressed in the exact same way in C code, only
having to change the name of one of them. As Xgt_desc_struct
is ugly and big, this is the one that goes away.

There's also a padding field in i386, but it is not really
needed in the C structure definition.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/asm-offsets_32.c   |  5 ++---
 arch/x86/kernel/cpu/common.c       |  2 +-
 arch/x86/kernel/doublefault_32.c   |  2 +-
 arch/x86/kernel/efi_32.c           |  4 ++--
 arch/x86/kernel/machine_kexec_32.c |  4 ++--
 arch/x86/kernel/reboot_32.c        |  2 +-
 arch/x86/lguest/boot.c             |  4 ++--
 arch/x86/xen/enlighten.c           | 10 +++++-----
 drivers/kvm/svm.c                  |  2 +-
 include/asm-x86/desc_32.h          | 16 +++++-----------
 include/asm-x86/lguest.h           |  8 ++++----
 include/asm-x86/paravirt.h         | 18 +++++++++---------
 include/asm-x86/processor_32.h     |  2 +-
 include/asm-x86/suspend_32.h       |  4 ++--
 14 files changed, 38 insertions(+), 45 deletions(-)

diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 415313556708..afd84463b712 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -70,9 +70,8 @@ void foo(void)
 	OFFSET(TI_cpu, thread_info, cpu);
 	BLANK();
 
-	OFFSET(GDS_size, Xgt_desc_struct, size);
-	OFFSET(GDS_address, Xgt_desc_struct, address);
-	OFFSET(GDS_pad, Xgt_desc_struct, pad);
+	OFFSET(GDS_size, desc_ptr, size);
+	OFFSET(GDS_address, desc_ptr, address);
 	BLANK();
 
 	OFFSET(PT_EBX, pt_regs, bx);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4f9e31912a25..69507ae8a65b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -649,7 +649,7 @@ struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
  * it's on the real one. */
 void switch_to_new_gdt(void)
 {
-	struct Xgt_desc_struct gdt_descr;
+	struct desc_ptr gdt_descr;
 
 	gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
 	gdt_descr.size = GDT_SIZE - 1;
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c
index cc19a3ea403a..d16122a8e4eb 100644
--- a/arch/x86/kernel/doublefault_32.c
+++ b/arch/x86/kernel/doublefault_32.c
@@ -17,7 +17,7 @@ static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
 
 static void doublefault_fn(void)
 {
-	struct Xgt_desc_struct gdt_desc = {0, 0};
+	struct desc_ptr gdt_desc = {0, 0};
 	unsigned long gdt, tss;
 
 	store_gdt(&gdt_desc);
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c
index e2be78f49399..863e8926f2bb 100644
--- a/arch/x86/kernel/efi_32.c
+++ b/arch/x86/kernel/efi_32.c
@@ -69,7 +69,7 @@ static void efi_call_phys_prelog(void) __acquires(efi_rt_lock)
 {
 	unsigned long cr4;
 	unsigned long temp;
-	struct Xgt_desc_struct gdt_descr;
+	struct desc_ptr gdt_descr;
 
 	spin_lock(&efi_rt_lock);
 	local_irq_save(efi_rt_eflags);
@@ -111,7 +111,7 @@ static void efi_call_phys_prelog(void) __acquires(efi_rt_lock)
 static void efi_call_phys_epilog(void) __releases(efi_rt_lock)
 {
 	unsigned long cr4;
-	struct Xgt_desc_struct gdt_descr;
+	struct desc_ptr gdt_descr;
 
 	gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
 	gdt_descr.size = GDT_SIZE - 1;
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 11b935f4f886..c1cfd60639d4 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -32,7 +32,7 @@ static u32 kexec_pte1[1024] PAGE_ALIGNED;
 
 static void set_idt(void *newidt, __u16 limit)
 {
-	struct Xgt_desc_struct curidt;
+	struct desc_ptr curidt;
 
 	/* ia32 supports unaliged loads & stores */
 	curidt.size    = limit;
@@ -44,7 +44,7 @@ static void set_idt(void *newidt, __u16 limit)
 
 static void set_gdt(void *newgdt, __u16 limit)
 {
-	struct Xgt_desc_struct curgdt;
+	struct desc_ptr curgdt;
 
 	/* ia32 supports unaligned loads & stores */
 	curgdt.size    = limit;
diff --git a/arch/x86/kernel/reboot_32.c b/arch/x86/kernel/reboot_32.c
index bb1a0f889c5e..c3376fae639d 100644
--- a/arch/x86/kernel/reboot_32.c
+++ b/arch/x86/kernel/reboot_32.c
@@ -161,7 +161,7 @@ real_mode_gdt_entries [3] =
 	0x000092000100ffffULL	/* 16-bit real-mode 64k data at 0x00000100 */
 };
 
-static struct Xgt_desc_struct
+static struct desc_ptr
 real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries },
 real_mode_idt = { 0x3ff, 0 },
 no_idt = { 0, 0 };
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index c751e3c03e85..aa0bdd5fc4bb 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -229,7 +229,7 @@ static void lguest_write_idt_entry(struct desc_struct *dt,
 /* Changing to a different IDT is very rare: we keep the IDT up-to-date every
  * time it is written, so we can simply loop through all entries and tell the
  * Host about them. */
-static void lguest_load_idt(const struct Xgt_desc_struct *desc)
+static void lguest_load_idt(const struct desc_ptr *desc)
 {
 	unsigned int i;
 	struct desc_struct *idt = (void *)desc->address;
@@ -252,7 +252,7 @@ static void lguest_load_idt(const struct Xgt_desc_struct *desc)
  * hypercall and use that repeatedly to load a new IDT.  I don't think it
  * really matters, but wouldn't it be nice if they were the same?
  */
-static void lguest_load_gdt(const struct Xgt_desc_struct *desc)
+static void lguest_load_gdt(const struct desc_ptr *desc)
 {
 	BUG_ON((desc->size+1)/8 != GDT_ENTRIES);
 	hcall(LHCALL_LOAD_GDT, __pa(desc->address), GDT_ENTRIES, 0);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d81e8d709102..c32e0fd0f838 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -295,7 +295,7 @@ static void xen_set_ldt(const void *addr, unsigned entries)
 	xen_mc_issue(PARAVIRT_LAZY_CPU);
 }
 
-static void xen_load_gdt(const struct Xgt_desc_struct *dtr)
+static void xen_load_gdt(const struct desc_ptr *dtr)
 {
 	unsigned long *frames;
 	unsigned long va = dtr->address;
@@ -395,7 +395,7 @@ static int cvt_gate_to_trap(int vector, u32 low, u32 high,
 }
 
 /* Locations of each CPU's IDT */
-static DEFINE_PER_CPU(struct Xgt_desc_struct, idt_desc);
+static DEFINE_PER_CPU(struct desc_ptr, idt_desc);
 
 /* Set an IDT entry.  If the entry is part of the current IDT, then
    also update Xen. */
@@ -427,7 +427,7 @@ static void xen_write_idt_entry(struct desc_struct *dt, int entrynum,
 	preempt_enable();
 }
 
-static void xen_convert_trap_info(const struct Xgt_desc_struct *desc,
+static void xen_convert_trap_info(const struct desc_ptr *desc,
 				  struct trap_info *traps)
 {
 	unsigned in, out, count;
@@ -446,7 +446,7 @@ static void xen_convert_trap_info(const struct Xgt_desc_struct *desc,
 
 void xen_copy_trap_info(struct trap_info *traps)
 {
-	const struct Xgt_desc_struct *desc = &__get_cpu_var(idt_desc);
+	const struct desc_ptr *desc = &__get_cpu_var(idt_desc);
 
 	xen_convert_trap_info(desc, traps);
 }
@@ -454,7 +454,7 @@ void xen_copy_trap_info(struct trap_info *traps)
 /* Load a new IDT into Xen.  In principle this can be per-CPU, so we
    hold a spinlock to protect the static traps[] array (static because
    it avoids allocation, and saves stack space). */
-static void xen_load_idt(const struct Xgt_desc_struct *desc)
+static void xen_load_idt(const struct desc_ptr *desc)
 {
 	static DEFINE_SPINLOCK(lock);
 	static struct trap_info traps[257];
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 4e04e49a2f1c..ced4ac1955db 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -290,7 +290,7 @@ static void svm_hardware_enable(void *garbage)
 #ifdef CONFIG_X86_64
 	struct desc_ptr gdt_descr;
 #else
-	struct Xgt_desc_struct gdt_descr;
+	struct desc_ptr gdt_descr;
 #endif
 	struct desc_struct *gdt;
 	int me = raw_smp_processor_id();
diff --git a/include/asm-x86/desc_32.h b/include/asm-x86/desc_32.h
index c547403f341d..bc5ca3453466 100644
--- a/include/asm-x86/desc_32.h
+++ b/include/asm-x86/desc_32.h
@@ -12,12 +12,6 @@
 
 #include <asm/mmu.h>
 
-struct Xgt_desc_struct {
-	unsigned short size;
-	unsigned long address __attribute__((packed));
-	unsigned short pad;
-} __attribute__ ((packed));
-
 struct gdt_page
 {
 	struct desc_struct gdt[GDT_ENTRIES];
@@ -29,7 +23,7 @@ static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
 	return per_cpu(gdt_page, cpu).gdt;
 }
 
-extern struct Xgt_desc_struct idt_descr;
+extern struct desc_ptr idt_descr;
 extern struct desc_struct idt_table[];
 extern void set_intr_gate(unsigned int irq, void * addr);
 
@@ -107,22 +101,22 @@ static inline void native_load_tr_desc(void)
 	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
 }
 
-static inline void native_load_gdt(const struct Xgt_desc_struct *dtr)
+static inline void native_load_gdt(const struct desc_ptr *dtr)
 {
 	asm volatile("lgdt %0"::"m" (*dtr));
 }
 
-static inline void native_load_idt(const struct Xgt_desc_struct *dtr)
+static inline void native_load_idt(const struct desc_ptr *dtr)
 {
 	asm volatile("lidt %0"::"m" (*dtr));
 }
 
-static inline void native_store_gdt(struct Xgt_desc_struct *dtr)
+static inline void native_store_gdt(struct desc_ptr *dtr)
 {
 	asm ("sgdt %0":"=m" (*dtr));
 }
 
-static inline void native_store_idt(struct Xgt_desc_struct *dtr)
+static inline void native_store_idt(struct desc_ptr *dtr)
 {
 	asm ("sidt %0":"=m" (*dtr));
 }
diff --git a/include/asm-x86/lguest.h b/include/asm-x86/lguest.h
index 17c908c0ef1c..3585a1628b59 100644
--- a/include/asm-x86/lguest.h
+++ b/include/asm-x86/lguest.h
@@ -44,13 +44,13 @@ struct lguest_ro_state
 {
 	/* Host information we need to restore when we switch back. */
 	u32 host_cr3;
-	struct Xgt_desc_struct host_idt_desc;
-	struct Xgt_desc_struct host_gdt_desc;
+	struct desc_ptr host_idt_desc;
+	struct desc_ptr host_gdt_desc;
 	u32 host_sp;
 
 	/* Fields which are used when guest is running. */
-	struct Xgt_desc_struct guest_idt_desc;
-	struct Xgt_desc_struct guest_gdt_desc;
+	struct desc_ptr guest_idt_desc;
+	struct desc_ptr guest_gdt_desc;
 	struct i386_hw_tss guest_tss;
 	struct desc_struct guest_idt[IDT_ENTRIES];
 	struct desc_struct guest_gdt[GDT_ENTRIES];
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index e95c2a655165..0333fb6988b5 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -20,7 +20,7 @@
 
 struct page;
 struct thread_struct;
-struct Xgt_desc_struct;
+struct desc_ptr;
 struct tss_struct;
 struct mm_struct;
 struct desc_struct;
@@ -88,10 +88,10 @@ struct pv_cpu_ops {
 
 	/* Segment descriptor handling */
 	void (*load_tr_desc)(void);
-	void (*load_gdt)(const struct Xgt_desc_struct *);
-	void (*load_idt)(const struct Xgt_desc_struct *);
-	void (*store_gdt)(struct Xgt_desc_struct *);
-	void (*store_idt)(struct Xgt_desc_struct *);
+	void (*load_gdt)(const struct desc_ptr *);
+	void (*load_idt)(const struct desc_ptr *);
+	void (*store_gdt)(struct desc_ptr *);
+	void (*store_idt)(struct desc_ptr *);
 	void (*set_ldt)(const void *desc, unsigned entries);
 	unsigned long (*store_tr)(void);
 	void (*load_tls)(struct thread_struct *t, unsigned int cpu);
@@ -630,11 +630,11 @@ static inline void load_TR_desc(void)
 {
 	PVOP_VCALL0(pv_cpu_ops.load_tr_desc);
 }
-static inline void load_gdt(const struct Xgt_desc_struct *dtr)
+static inline void load_gdt(const struct desc_ptr *dtr)
 {
 	PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr);
 }
-static inline void load_idt(const struct Xgt_desc_struct *dtr)
+static inline void load_idt(const struct desc_ptr *dtr)
 {
 	PVOP_VCALL1(pv_cpu_ops.load_idt, dtr);
 }
@@ -642,11 +642,11 @@ static inline void set_ldt(const void *addr, unsigned entries)
 {
 	PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
 }
-static inline void store_gdt(struct Xgt_desc_struct *dtr)
+static inline void store_gdt(struct desc_ptr *dtr)
 {
 	PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr);
 }
-static inline void store_idt(struct Xgt_desc_struct *dtr)
+static inline void store_idt(struct desc_ptr *dtr)
 {
 	PVOP_VCALL1(pv_cpu_ops.store_idt, dtr);
 }
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index bc48ad64de47..e5056ab9dd9f 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -707,7 +707,7 @@ extern void enable_sep_cpu(void);
 extern int sysenter_setup(void);
 
 /* Defined in head.S */
-extern struct Xgt_desc_struct early_gdt_descr;
+extern struct desc_ptr early_gdt_descr;
 
 extern void cpu_set_gdt(int);
 extern void switch_to_new_gdt(void);
diff --git a/include/asm-x86/suspend_32.h b/include/asm-x86/suspend_32.h
index a2520732ffd6..1bbda3ad7796 100644
--- a/include/asm-x86/suspend_32.h
+++ b/include/asm-x86/suspend_32.h
@@ -12,8 +12,8 @@ static inline int arch_prepare_suspend(void) { return 0; }
 struct saved_context {
   	u16 es, fs, gs, ss;
 	unsigned long cr0, cr2, cr3, cr4;
-	struct Xgt_desc_struct gdt;
-	struct Xgt_desc_struct idt;
+	struct desc_ptr gdt;
+	struct desc_ptr idt;
 	u16 ldt;
 	u16 tss;
 	unsigned long tr;

From f6dc247cac1a8ee64e59019c3b948d23f3b1a36f Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:12 +0100
Subject: [PATCH 281/890] x86: change gdt acessor macro name

This patch changes the name of x86_64 macro used to access the per-cpu
gdt. It is now equal to the i386 version, which will allow code to be shared.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup64.c     |  2 +-
 arch/x86/kernel/suspend_64.c  |  2 +-
 arch/x86/kernel/vsyscall_64.c |  2 +-
 include/asm-x86/desc_64.h     | 10 +++++-----
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index 51297cca6b42..05cafcb94109 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -228,7 +228,7 @@ void __cpuinit cpu_init (void)
 	 * and set up the GDT descriptor:
 	 */
 	if (cpu)
- 		memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE);
+		memcpy(get_cpu_gdt_table(cpu), cpu_gdt_table, GDT_SIZE);
 
 	cpu_gdt_descr[cpu].size = GDT_SIZE;
 	load_gdt((const struct desc_ptr *)&cpu_gdt_descr[cpu]);
diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/kernel/suspend_64.c
index 569f1b540e36..279c25775d19 100644
--- a/arch/x86/kernel/suspend_64.c
+++ b/arch/x86/kernel/suspend_64.c
@@ -140,7 +140,7 @@ void fix_processor_context(void)
 
 	set_tss_desc(cpu,t);	/* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */
 
-	cpu_gdt(cpu)[GDT_ENTRY_TSS].type = 9;
+	get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9;
 
 	syscall_init();                         /* This sets MSR_*STAR and related */
 	load_TR_desc();				/* This does ltr */
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index c4c5e765cd2c..e5c1118a8098 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -297,7 +297,7 @@ static void __cpuinit vsyscall_set_cpu(int cpu)
 	/* Store cpu number in limit so that it can be loaded quickly
 	   in user space in vgetcpu.
 	   12 bits for the CPU and 8 bits for the node. */
-	d = (unsigned long *)(cpu_gdt(cpu) + GDT_ENTRY_PER_CPU);
+	d = (unsigned long *)(get_cpu_gdt_table(cpu) + GDT_ENTRY_PER_CPU);
 	*d = 0x0f40000000000ULL;
 	*d |= cpu;
 	*d |= (node & 0xf) << 12;
diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h
index 230ac6e50a0f..4d67f61b4c23 100644
--- a/include/asm-x86/desc_64.h
+++ b/include/asm-x86/desc_64.h
@@ -48,7 +48,7 @@ static inline void write_ldt_entry(struct desc_struct *ldt,
 }
 
 /* the cpu gdt accessor */
-#define cpu_gdt(_cpu) ((struct desc_struct *)cpu_gdt_descr[_cpu].address)
+#define get_cpu_gdt_table(x) ((struct desc_struct *)cpu_gdt_descr[x].address)
 
 static inline void load_gdt(const struct desc_ptr *ptr)
 {
@@ -141,15 +141,15 @@ static inline void set_tss_desc(unsigned cpu, void *addr)
 	 * -1? seg base+limit should be pointing to the address of the
 	 * last valid byte
 	 */
-	set_tssldt_descriptor(&cpu_gdt(cpu)[GDT_ENTRY_TSS],
+	set_tssldt_descriptor(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS],
 		(unsigned long)addr, DESC_TSS,
 		IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
 }
 
 static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
 {
-	set_tssldt_descriptor(&cpu_gdt(cpu)[GDT_ENTRY_LDT], (unsigned long)addr,
-			      DESC_LDT, size * 8 - 1);
+	set_tssldt_descriptor(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT],
+			     (unsigned long)addr, DESC_LDT, size * 8 - 1);
 }
 
 #define LDT_entry_a(info) \
@@ -183,7 +183,7 @@ static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
 static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
 {
 	unsigned int i;
-	u64 *gdt = (u64 *)(cpu_gdt(cpu) + GDT_ENTRY_TLS_MIN);
+	u64 *gdt = (u64 *)(get_cpu_gdt_table(cpu) + GDT_ENTRY_TLS_MIN);
 
 	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
 		gdt[i] = t->tls_array[i];

From 7e6ebe1432b04c2980cd030c9f0d1ed480e1fe4d Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:12 +0100
Subject: [PATCH 282/890] x86: removed unused variable

This variable is not used anywere, and is then removed

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/desc_64.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h
index 4d67f61b4c23..8bd84a4ef2d5 100644
--- a/include/asm-x86/desc_64.h
+++ b/include/asm-x86/desc_64.h
@@ -30,11 +30,6 @@ static inline unsigned long __store_tr(void)
 
 #define store_tr(tr) (tr) = __store_tr()
 
-/*
- * This is the ldt that every process will get unless we need
- * something other than this.
- */
-extern struct desc_struct default_ldt[];
 extern struct gate_struct idt_table[];
 extern struct desc_ptr cpu_gdt_descr[];
 

From 010d4f8221cf51a2ab8b037d0149506b397d073f Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:12 +0100
Subject: [PATCH 283/890] x86: introduce gate_desc type.

To account for the differences in gate descriptor in i386 and x86_64
a gate_desc type is introduced.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/traps_32.c  |  2 +-
 include/asm-x86/desc_32.h   | 15 ++++++++-------
 include/asm-x86/desc_64.h   |  4 ++--
 include/asm-x86/desc_defs.h |  8 +++++++-
 4 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 0d45017ed824..c70c41fd710b 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -76,7 +76,7 @@ char ignore_fpu_irq = 0;
  * F0 0F bug workaround.. We have a special link segment
  * for this.
  */
-struct desc_struct idt_table[256]
+gate_desc idt_table[256]
 	__attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
 
 asmlinkage void divide_error(void);
diff --git a/include/asm-x86/desc_32.h b/include/asm-x86/desc_32.h
index bc5ca3453466..77f1e5a4ad7c 100644
--- a/include/asm-x86/desc_32.h
+++ b/include/asm-x86/desc_32.h
@@ -3,6 +3,7 @@
 
 #include <asm/ldt.h>
 #include <asm/segment.h>
+#include <asm/desc_defs.h>
 
 #ifndef __ASSEMBLY__
 
@@ -24,7 +25,7 @@ static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
 }
 
 extern struct desc_ptr idt_descr;
-extern struct desc_struct idt_table[];
+extern gate_desc idt_table[];
 extern void set_intr_gate(unsigned int irq, void * addr);
 
 static inline void pack_descriptor(__u32 *a, __u32 *b,
@@ -35,11 +36,11 @@ static inline void pack_descriptor(__u32 *a, __u32 *b,
 		(limit & 0x000f0000) | ((type & 0xff) << 8) | ((flags & 0xf) << 20);
 }
 
-static inline void pack_gate(__u32 *a, __u32 *b,
+static inline void pack_gate(gate_desc *gate,
 	unsigned long base, unsigned short seg, unsigned char type, unsigned char flags)
 {
-	*a = (seg << 16) | (base & 0xffff);
-	*b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff);
+	gate->a = (seg << 16) | (base & 0xffff);
+	gate->b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff);
 }
 
 #define DESCTYPE_LDT 	0x82	/* present, system, DPL-0, LDT */
@@ -139,9 +140,9 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
 
 static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
 {
-	__u32 a, b;
-	pack_gate(&a, &b, (unsigned long)addr, seg, type, 0);
-	write_idt_entry(idt_table, gate, a, b);
+	gate_desc g;
+	pack_gate(&g, (unsigned long)addr, seg, type, 0);
+	write_idt_entry(idt_table, gate, g.a, g.b);
 }
 
 static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h
index 8bd84a4ef2d5..887393f044b1 100644
--- a/include/asm-x86/desc_64.h
+++ b/include/asm-x86/desc_64.h
@@ -30,7 +30,7 @@ static inline unsigned long __store_tr(void)
 
 #define store_tr(tr) (tr) = __store_tr()
 
-extern struct gate_struct idt_table[];
+extern gate_desc idt_table[];
 extern struct desc_ptr cpu_gdt_descr[];
 
 static inline void write_ldt_entry(struct desc_struct *ldt,
@@ -58,7 +58,7 @@ static inline void store_gdt(struct desc_ptr *ptr)
 static inline void _set_gate(void *adr, unsigned type, unsigned long func,
 			     unsigned dpl, unsigned ist)
 {
-	struct gate_struct s;
+	gate_desc s;
 
 	s.offset_low = PTR_LOW(func);
 	s.segment = __KERNEL_CS;
diff --git a/include/asm-x86/desc_defs.h b/include/asm-x86/desc_defs.h
index de47eb0a23aa..9732285116af 100644
--- a/include/asm-x86/desc_defs.h
+++ b/include/asm-x86/desc_defs.h
@@ -39,7 +39,7 @@ enum {
 };
 
 // 16byte gate
-struct gate_struct {
+struct gate_struct64 {
 	u16 offset_low;
 	u16 segment;
 	unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
@@ -67,6 +67,12 @@ struct ldttss_desc {
 	u32 zero1;
 } __attribute__((packed));
 
+#ifdef CONFIG_X86_64
+typedef struct gate_struct64 gate_desc;
+#else
+typedef struct desc_struct gate_desc;
+#endif
+
 struct desc_ptr {
 	unsigned short size;
 	unsigned long address;

From 8d947344c47a40626730bb80d136d8daac9f2060 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:12 +0100
Subject: [PATCH 284/890] x86: change write_idt_entry signature

this patch changes write_idt_entry signature. It now takes a gate_desc
instead of the a and b parameters. It will allow it to be later unified
between i386 and x86_64.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
CC: Zachary Amsden <zach@vmware.com>
CC: Jeremy Fitzhardinge <Jeremy.Fitzhardinge.citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/paravirt_32.c |  2 +-
 arch/x86/kernel/vmi_32.c      | 10 +++++++++-
 arch/x86/lguest/boot.c        |  9 +++++----
 arch/x86/xen/enlighten.c      |  8 ++++----
 include/asm-x86/desc_32.h     | 10 ++++++++--
 include/asm-x86/paravirt.h    |  9 +++++----
 6 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kernel/paravirt_32.c b/arch/x86/kernel/paravirt_32.c
index f4e3a8e01cf2..13bbc99b639b 100644
--- a/arch/x86/kernel/paravirt_32.c
+++ b/arch/x86/kernel/paravirt_32.c
@@ -381,7 +381,7 @@ struct pv_cpu_ops pv_cpu_ops = {
 	.load_tls = native_load_tls,
 	.write_ldt_entry = write_dt_entry,
 	.write_gdt_entry = write_dt_entry,
-	.write_idt_entry = write_dt_entry,
+	.write_idt_entry = native_write_idt_entry,
 	.load_sp0 = native_load_sp0,
 
 	.irq_enable_syscall_ret = native_irq_enable_syscall_ret,
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 4cfda7dbe90f..a635b22de25f 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -62,6 +62,7 @@ static struct {
 	void (*cpuid)(void /* non-c */);
 	void (*_set_ldt)(u32 selector);
 	void (*set_tr)(u32 selector);
+	void (*write_idt_entry)(struct desc_struct *, int, u32, u32);
 	void (*set_kernel_stack)(u32 selector, u32 sp0);
 	void (*allocate_page)(u32, u32, u32, u32, u32);
 	void (*release_page)(u32, u32);
@@ -214,6 +215,12 @@ static void vmi_set_tr(void)
 	vmi_ops.set_tr(GDT_ENTRY_TSS*sizeof(struct desc_struct));
 }
 
+static void vmi_write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
+{
+	u32 *idt_entry = (u32 *)g;
+	vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[2]);
+}
+
 static void vmi_load_sp0(struct tss_struct *tss,
 				   struct thread_struct *thread)
 {
@@ -792,7 +799,8 @@ static inline int __init activate_vmi(void)
 	pv_cpu_ops.load_tls = vmi_load_tls;
 	para_fill(pv_cpu_ops.write_ldt_entry, WriteLDTEntry);
 	para_fill(pv_cpu_ops.write_gdt_entry, WriteGDTEntry);
-	para_fill(pv_cpu_ops.write_idt_entry, WriteIDTEntry);
+	para_wrap(pv_cpu_ops.write_idt_entry, vmi_write_idt_entry,
+		  write_idt_entry, WriteIDTEntry);
 	para_wrap(pv_cpu_ops.load_sp0, vmi_load_sp0, set_kernel_stack, UpdateKernelStack);
 	para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask);
 	para_fill(pv_cpu_ops.io_delay, IODelay);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index aa0bdd5fc4bb..b50c8ad25ab4 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -217,13 +217,14 @@ static void irq_enable(void)
  * address of the handler, and... well, who cares?  The Guest just asks the
  * Host to make the change anyway, because the Host controls the real IDT.
  */
-static void lguest_write_idt_entry(struct desc_struct *dt,
-				   int entrynum, u32 low, u32 high)
+static void lguest_write_idt_entry(gate_desc *dt,
+				   int entrynum, const gate_desc *g)
 {
+	u32 *desc = (u32 *)g;
 	/* Keep the local copy up to date. */
-	write_dt_entry(dt, entrynum, low, high);
+	native_write_idt_entry(dt, entrynum, g);
 	/* Tell Host about this new entry. */
-	hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, low, high);
+	hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]);
 }
 
 /* Changing to a different IDT is very rare: we keep the IDT up-to-date every
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c32e0fd0f838..b7b7346d8cdc 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -399,8 +399,7 @@ static DEFINE_PER_CPU(struct desc_ptr, idt_desc);
 
 /* Set an IDT entry.  If the entry is part of the current IDT, then
    also update Xen. */
-static void xen_write_idt_entry(struct desc_struct *dt, int entrynum,
-				u32 low, u32 high)
+static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
 {
 	unsigned long p = (unsigned long)&dt[entrynum];
 	unsigned long start, end;
@@ -412,14 +411,15 @@ static void xen_write_idt_entry(struct desc_struct *dt, int entrynum,
 
 	xen_mc_flush();
 
-	write_dt_entry(dt, entrynum, low, high);
+	native_write_idt_entry(dt, entrynum, g);
 
 	if (p >= start && (p + 8) <= end) {
 		struct trap_info info[2];
+		u32 *desc = (u32 *)g;
 
 		info[1].address = 0;
 
-		if (cvt_gate_to_trap(entrynum, low, high, &info[0]))
+		if (cvt_gate_to_trap(entrynum, desc[0], desc[1], &info[0]))
 			if (HYPERVISOR_set_trap_table(info))
 				BUG();
 	}
diff --git a/include/asm-x86/desc_32.h b/include/asm-x86/desc_32.h
index 77f1e5a4ad7c..54b2314f2ddf 100644
--- a/include/asm-x86/desc_32.h
+++ b/include/asm-x86/desc_32.h
@@ -70,9 +70,15 @@ static inline void pack_gate(gate_desc *gate,
 
 #define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
 #define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
-#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
+#define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g)
 #endif
 
+static inline void native_write_idt_entry(gate_desc *idt, int entry,
+					  const gate_desc *gate)
+{
+	memcpy(&idt[entry], gate, sizeof(*gate));
+}
+
 static inline void write_dt_entry(struct desc_struct *dt,
 				  int entry, u32 entry_low, u32 entry_high)
 {
@@ -142,7 +148,7 @@ static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned s
 {
 	gate_desc g;
 	pack_gate(&g, (unsigned long)addr, seg, type, 0);
-	write_idt_entry(idt_table, gate, g.a, g.b);
+	write_idt_entry(idt_table, gate, &g);
 }
 
 static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 0333fb6988b5..86a9d7b0920f 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -17,6 +17,7 @@
 #include <linux/types.h>
 #include <linux/cpumask.h>
 #include <asm/kmap_types.h>
+#include <asm/desc_defs.h>
 
 struct page;
 struct thread_struct;
@@ -99,8 +100,8 @@ struct pv_cpu_ops {
 				int entrynum, u32 low, u32 high);
 	void (*write_gdt_entry)(struct desc_struct *,
 				int entrynum, u32 low, u32 high);
-	void (*write_idt_entry)(struct desc_struct *,
-				int entrynum, u32 low, u32 high);
+	void (*write_idt_entry)(gate_desc *,
+				int entrynum, const gate_desc *gate);
 	void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
 
 	void (*set_iopl_mask)(unsigned mask);
@@ -667,9 +668,9 @@ static inline void write_gdt_entry(void *dt, int entry, u32 low, u32 high)
 {
 	PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, low, high);
 }
-static inline void write_idt_entry(void *dt, int entry, u32 low, u32 high)
+static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
 {
-	PVOP_VCALL4(pv_cpu_ops.write_idt_entry, dt, entry, low, high);
+	PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g);
 }
 static inline void set_iopl_mask(unsigned mask)
 {

From 25deca53e9b74a899efd3ae1f4493f4f2ce36846 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:13 +0100
Subject: [PATCH 285/890] x86: introduce ldt_desc type.

this patch introduces ldt_desc type to account for the differences
in the ldt descriptor in x86_64 and i386

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/desc_64.h   | 2 +-
 include/asm-x86/desc_defs.h | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h
index 887393f044b1..c49f928ed8b6 100644
--- a/include/asm-x86/desc_64.h
+++ b/include/asm-x86/desc_64.h
@@ -113,7 +113,7 @@ static inline void store_idt(struct desc_ptr *dtr)
 static inline void set_tssldt_descriptor(void *ptr, unsigned long tss,
 					 unsigned type, unsigned size)
 {
-	struct ldttss_desc d;
+	struct ldttss_desc64 d;
 
 	memset(&d, 0, sizeof(d));
 	d.limit0 = size & 0xFFFF;
diff --git a/include/asm-x86/desc_defs.h b/include/asm-x86/desc_defs.h
index 9732285116af..09ea05243497 100644
--- a/include/asm-x86/desc_defs.h
+++ b/include/asm-x86/desc_defs.h
@@ -58,7 +58,7 @@ enum {
 };
 
 // LDT or TSS descriptor in the GDT. 16 bytes.
-struct ldttss_desc {
+struct ldttss_desc64 {
 	u16 limit0;
 	u16 base0;
 	unsigned base1 : 8, type : 5, dpl : 2, p : 1;
@@ -69,8 +69,10 @@ struct ldttss_desc {
 
 #ifdef CONFIG_X86_64
 typedef struct gate_struct64 gate_desc;
+typedef struct ldttss_desc64 ldt_desc;
 #else
 typedef struct desc_struct gate_desc;
+typedef struct desc_struct ldt_desc;
 #endif
 
 struct desc_ptr {

From 5af725026fe902bf81f1b90b1b9d9ee4b9e1eb6a Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:13 +0100
Subject: [PATCH 286/890] x86: modify write_ldt function

This patch modifies the write_ldt() function to make use
of the new struct desc_struct instead of entry_1 and entry_2
entries

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ldt.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index a8cdca3615bf..7eb0c8a45734 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -186,7 +186,7 @@ static int read_default_ldt(void __user *ptr, unsigned long bytecount)
 static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 {
 	struct mm_struct *mm = current->mm;
-	__u32 entry_1, entry_2;
+	struct desc_struct ldt;
 	int error;
 	struct user_desc ldt_info;
 
@@ -218,21 +218,20 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 	/* Allow LDTs to be cleared by the user. */
 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
 		if (oldmode || LDT_empty(&ldt_info)) {
-			entry_1 = 0;
-			entry_2 = 0;
+			memset(&ldt, 0, sizeof(ldt));
 			goto install;
 		}
 	}
 
-	entry_1 = LDT_entry_a(&ldt_info);
-	entry_2 = LDT_entry_b(&ldt_info);
+	ldt.a = LDT_entry_a(&ldt_info);
+	ldt.b = LDT_entry_b(&ldt_info);
 	if (oldmode)
-		entry_2 &= ~(1 << 20);
+		ldt.avl = 0;
 
 	/* Install the new entry ...  */
 install:
-	write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1,
-			entry_2);
+	write_ldt_entry(mm->context.ldt, ldt_info.entry_number,
+			ldt.a, ldt.b);
 	error = 0;
 
 out_unlock:

From 80fbb69a8d1268ef48dfe21da80e68cb01922f31 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:13 +0100
Subject: [PATCH 287/890] x86: introduce fill_ldt

This patch introduces fill_ldt(), which populates a ldt descriptor
from a user_desc in once, instead of relying in the LDT_entry_a and
LDT_entry_b macros

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ldt.c        |  3 +--
 arch/x86/kernel/process_64.c |  3 +--
 arch/x86/kernel/tls.c        |  7 +++----
 include/asm-x86/desc.h       | 29 +++++++++++++++++++++++++++++
 include/asm-x86/desc_32.h    | 15 ---------------
 include/asm-x86/desc_64.h    | 17 -----------------
 6 files changed, 34 insertions(+), 40 deletions(-)

diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 7eb0c8a45734..3e872b468533 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -223,8 +223,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 		}
 	}
 
-	ldt.a = LDT_entry_a(&ldt_info);
-	ldt.b = LDT_entry_b(&ldt_info);
+	fill_ldt(&ldt, &ldt_info);
 	if (oldmode)
 		ldt.avl = 0;
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 86c310acc989..f91521e26335 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -457,8 +457,7 @@ static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
 	};
 	struct desc_struct *desc = (void *)t->thread.tls_array;
 	desc += tls;
-	desc->a = LDT_entry_a(&ud);
-	desc->b = LDT_entry_b(&ud);
+	fill_ldt(desc, &ud);
 }
 
 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 67a377621b12..74d2b65a82eb 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -67,10 +67,9 @@ int do_set_thread_area(struct task_struct *p, int idx,
 	if (LDT_empty(&info)) {
 		desc[0] = 0;
 		desc[1] = 0;
-	} else {
-		desc[0] = LDT_entry_a(&info);
-		desc[1] = LDT_entry_b(&info);
-	}
+	} else
+		fill_ldt((struct desc_struct *)desc, &info);
+
 	if (t == &current->thread)
 		load_TLS(t, cpu);
 
diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index 6065c5092265..47086d2d9298 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -1,5 +1,34 @@
+#ifndef _ASM_DESC_H_
+#define _ASM_DESC_H_
+
+#ifndef __ASSEMBLY__
+#include <asm/desc_defs.h>
+#include <asm/ldt.h>
+
+static inline void fill_ldt(struct desc_struct *desc, struct user_desc *info)
+{
+	desc->limit0 = info->limit & 0x0ffff;
+	desc->base0 = info->base_addr & 0x0000ffff;
+
+	desc->base1 = (info->base_addr & 0x00ff0000) >> 16;
+	desc->type = (info->read_exec_only ^ 1) << 1;
+	desc->type |= info->contents << 2;
+	desc->s = 1;
+	desc->dpl = 0x3;
+	desc->p = info->seg_not_present ^ 1;
+	desc->limit = (info->limit & 0xf0000) >> 16;
+	desc->avl = info->useable;
+	desc->d = info->seg_32bit;
+	desc->g = info->limit_in_pages;
+	desc->base2 = (info->base_addr & 0xff000000) >> 24;
+}
+
+#endif
+
 #ifdef CONFIG_X86_32
 # include "desc_32.h"
 #else
 # include "desc_64.h"
 #endif
+
+#endif
diff --git a/include/asm-x86/desc_32.h b/include/asm-x86/desc_32.h
index 54b2314f2ddf..03700991c5db 100644
--- a/include/asm-x86/desc_32.h
+++ b/include/asm-x86/desc_32.h
@@ -163,21 +163,6 @@ static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const vo
 
 #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
 
-#define LDT_entry_a(info) \
-	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
-
-#define LDT_entry_b(info) \
-	(((info)->base_addr & 0xff000000) | \
-	(((info)->base_addr & 0x00ff0000) >> 16) | \
-	((info)->limit & 0xf0000) | \
-	(((info)->read_exec_only ^ 1) << 9) | \
-	((info)->contents << 10) | \
-	(((info)->seg_not_present ^ 1) << 15) | \
-	((info)->seg_32bit << 22) | \
-	((info)->limit_in_pages << 23) | \
-	((info)->useable << 20) | \
-	0x7000)
-
 #define LDT_empty(info) (\
 	(info)->base_addr	== 0	&& \
 	(info)->limit		== 0	&& \
diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h
index c49f928ed8b6..ba7fb87d10f3 100644
--- a/include/asm-x86/desc_64.h
+++ b/include/asm-x86/desc_64.h
@@ -147,23 +147,6 @@ static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
 			     (unsigned long)addr, DESC_LDT, size * 8 - 1);
 }
 
-#define LDT_entry_a(info) \
-	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
-/* Don't allow setting of the lm bit. It is useless anyways because
-   64bit system calls require __USER_CS. */
-#define LDT_entry_b(info) \
-	(((info)->base_addr & 0xff000000) | \
-	(((info)->base_addr & 0x00ff0000) >> 16) | \
-	((info)->limit & 0xf0000) | \
-	(((info)->read_exec_only ^ 1) << 9) | \
-	((info)->contents << 10) | \
-	(((info)->seg_not_present ^ 1) << 15) | \
-	((info)->seg_32bit << 22) | \
-	((info)->limit_in_pages << 23) | \
-	((info)->useable << 20) | \
-	/* ((info)->lm << 21) | */ \
-	0x7000)
-
 #define LDT_empty(info) (\
 	(info)->base_addr	== 0	&& \
 	(info)->limit		== 0	&& \

From 18245d5b6e54ca76b60b687e43eb893f9e9dd611 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:13 +0100
Subject: [PATCH 288/890] x86: provide tss_desc

Provide a new type, tss_desc, to represent the tss descriptor
in a unified way accross x86_64 and i386

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/desc_defs.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/asm-x86/desc_defs.h b/include/asm-x86/desc_defs.h
index 09ea05243497..c811f1e0829f 100644
--- a/include/asm-x86/desc_defs.h
+++ b/include/asm-x86/desc_defs.h
@@ -70,9 +70,11 @@ struct ldttss_desc64 {
 #ifdef CONFIG_X86_64
 typedef struct gate_struct64 gate_desc;
 typedef struct ldttss_desc64 ldt_desc;
+typedef struct ldttss_desc64 tss_desc;
 #else
 typedef struct desc_struct gate_desc;
 typedef struct desc_struct ldt_desc;
+typedef struct desc_struct tss_desc;
 #endif
 
 struct desc_ptr {

From 014b15be30c04622d130946ab7c0a9101b523a8a Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:13 +0100
Subject: [PATCH 289/890] x86: change write_gdt_entry signature.

This patch changes the write_gdt_entry function signature.
Instead of the old "a" and "b" parameters, it now receives
a pointer to a desc_struct, and the size of the entry being
handled. This is because x86_64 can have some 16-byte entries
as well as 8-byte ones.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
CC: Zachary Amsden <zach@vmware.com>
CC: Jeremy Fitzhardinge <Jeremy.Fitzhardinge.citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/paravirt_32.c  |  2 +-
 arch/x86/kernel/smpcommon_32.c |  7 ++++---
 arch/x86/kernel/vmi_32.c       | 21 +++++++++++++++------
 arch/x86/lguest/boot.c         |  6 +++---
 arch/x86/xen/enlighten.c       | 11 +++++------
 include/asm-x86/desc_32.h      | 34 ++++++++++++++++++++++------------
 include/asm-x86/paravirt.h     |  9 ++++++---
 7 files changed, 56 insertions(+), 34 deletions(-)

diff --git a/arch/x86/kernel/paravirt_32.c b/arch/x86/kernel/paravirt_32.c
index 13bbc99b639b..77602c1252d7 100644
--- a/arch/x86/kernel/paravirt_32.c
+++ b/arch/x86/kernel/paravirt_32.c
@@ -380,7 +380,7 @@ struct pv_cpu_ops pv_cpu_ops = {
 	.store_tr = native_store_tr,
 	.load_tls = native_load_tls,
 	.write_ldt_entry = write_dt_entry,
-	.write_gdt_entry = write_dt_entry,
+	.write_gdt_entry = native_write_gdt_entry,
 	.write_idt_entry = native_write_idt_entry,
 	.load_sp0 = native_load_sp0,
 
diff --git a/arch/x86/kernel/smpcommon_32.c b/arch/x86/kernel/smpcommon_32.c
index bbfe85a0f699..8bc38af29aef 100644
--- a/arch/x86/kernel/smpcommon_32.c
+++ b/arch/x86/kernel/smpcommon_32.c
@@ -14,10 +14,11 @@ __cpuinit void init_gdt(int cpu)
 {
 	struct desc_struct *gdt = get_cpu_gdt_table(cpu);
 
-	pack_descriptor((u32 *)&gdt[GDT_ENTRY_PERCPU].a,
-			(u32 *)&gdt[GDT_ENTRY_PERCPU].b,
+	pack_descriptor(&gdt[GDT_ENTRY_PERCPU],
 			__per_cpu_offset[cpu], 0xFFFFF,
-			0x80 | DESCTYPE_S | 0x2, 0x8);
+			0x2 | DESCTYPE_S, 0x8);
+
+	gdt[GDT_ENTRY_PERCPU].s = 1;
 
 	per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
 	per_cpu(cpu_number, cpu) = cpu;
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index a635b22de25f..21edd0d6eae5 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -63,6 +63,7 @@ static struct {
 	void (*_set_ldt)(u32 selector);
 	void (*set_tr)(u32 selector);
 	void (*write_idt_entry)(struct desc_struct *, int, u32, u32);
+	void (*write_gdt_entry)(struct desc_struct *, int, u32, u32);
 	void (*set_kernel_stack)(u32 selector, u32 sp0);
 	void (*allocate_page)(u32, u32, u32, u32, u32);
 	void (*release_page)(u32, u32);
@@ -187,7 +188,7 @@ static void vmi_cpuid(unsigned int *ax, unsigned int *bx,
 static inline void vmi_maybe_load_tls(struct desc_struct *gdt, int nr, struct desc_struct *new)
 {
 	if (gdt[nr].a != new->a || gdt[nr].b != new->b)
-		write_gdt_entry(gdt, nr, new->a, new->b);
+		write_gdt_entry(gdt, nr, new, 0);
 }
 
 static void vmi_load_tls(struct thread_struct *t, unsigned int cpu)
@@ -201,12 +202,12 @@ static void vmi_load_tls(struct thread_struct *t, unsigned int cpu)
 static void vmi_set_ldt(const void *addr, unsigned entries)
 {
 	unsigned cpu = smp_processor_id();
-	u32 low, high;
+	struct desc_struct desc;
 
-	pack_descriptor(&low, &high, (unsigned long)addr,
+	pack_descriptor(&desc, (unsigned long)addr,
 			entries * sizeof(struct desc_struct) - 1,
-			DESCTYPE_LDT, 0);
-	write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, low, high);
+			DESC_LDT, 0);
+	write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, &desc, DESC_LDT);
 	vmi_ops._set_ldt(entries ? GDT_ENTRY_LDT*sizeof(struct desc_struct) : 0);
 }
 
@@ -221,6 +222,13 @@ static void vmi_write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
 	vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[2]);
 }
 
+static void vmi_write_gdt_entry(struct desc_struct *dt, int entry,
+				const void *desc, int type)
+{
+	u32 *gdt_entry = (u32 *)desc;
+	vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[2]);
+}
+
 static void vmi_load_sp0(struct tss_struct *tss,
 				   struct thread_struct *thread)
 {
@@ -798,7 +806,8 @@ static inline int __init activate_vmi(void)
 	para_fill(pv_cpu_ops.store_tr, GetTR);
 	pv_cpu_ops.load_tls = vmi_load_tls;
 	para_fill(pv_cpu_ops.write_ldt_entry, WriteLDTEntry);
-	para_fill(pv_cpu_ops.write_gdt_entry, WriteGDTEntry);
+	para_wrap(pv_cpu_ops.write_gdt_entry, vmi_write_gdt_entry,
+		  write_gdt_entry, WriteGDTEntry);
 	para_wrap(pv_cpu_ops.write_idt_entry, vmi_write_idt_entry,
 		  write_idt_entry, WriteIDTEntry);
 	para_wrap(pv_cpu_ops.load_sp0, vmi_load_sp0, set_kernel_stack, UpdateKernelStack);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index b50c8ad25ab4..a63373759f08 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -262,10 +262,10 @@ static void lguest_load_gdt(const struct desc_ptr *desc)
 /* For a single GDT entry which changes, we do the lazy thing: alter our GDT,
  * then tell the Host to reload the entire thing.  This operation is so rare
  * that this naive implementation is reasonable. */
-static void lguest_write_gdt_entry(struct desc_struct *dt,
-				   int entrynum, u32 low, u32 high)
+static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
+				   const void *desc, int type)
 {
-	write_dt_entry(dt, entrynum, low, high);
+	native_write_gdt_entry(dt, entrynum, desc, type);
 	hcall(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES, 0);
 }
 
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b7b7346d8cdc..7f98c63f6381 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -475,22 +475,21 @@ static void xen_load_idt(const struct desc_ptr *desc)
 /* Write a GDT descriptor entry.  Ignore LDT descriptors, since
    they're handled differently. */
 static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
-				u32 low, u32 high)
+				const void *desc, int type)
 {
 	preempt_disable();
 
-	switch ((high >> 8) & 0xff) {
-	case DESCTYPE_LDT:
-	case DESCTYPE_TSS:
+	switch (type) {
+	case DESC_LDT:
+	case DESC_TSS:
 		/* ignore */
 		break;
 
 	default: {
 		xmaddr_t maddr = virt_to_machine(&dt[entry]);
-		u64 desc = (u64)high << 32 | low;
 
 		xen_mc_flush();
-		if (HYPERVISOR_update_descriptor(maddr.maddr, desc))
+		if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
 			BUG();
 	}
 
diff --git a/include/asm-x86/desc_32.h b/include/asm-x86/desc_32.h
index 03700991c5db..41d8214c6173 100644
--- a/include/asm-x86/desc_32.h
+++ b/include/asm-x86/desc_32.h
@@ -28,12 +28,13 @@ extern struct desc_ptr idt_descr;
 extern gate_desc idt_table[];
 extern void set_intr_gate(unsigned int irq, void * addr);
 
-static inline void pack_descriptor(__u32 *a, __u32 *b,
+static inline void pack_descriptor(struct desc_struct *desc,
 	unsigned long base, unsigned long limit, unsigned char type, unsigned char flags)
 {
-	*a = ((base & 0xffff) << 16) | (limit & 0xffff);
-	*b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
+	desc->a = ((base & 0xffff) << 16) | (limit & 0xffff);
+	desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
 		(limit & 0x000f0000) | ((type & 0xff) << 8) | ((flags & 0xf) << 20);
+	desc->p = 1;
 }
 
 static inline void pack_gate(gate_desc *gate,
@@ -69,7 +70,8 @@ static inline void pack_gate(gate_desc *gate,
 #define set_ldt native_set_ldt
 
 #define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
-#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
+#define write_gdt_entry(dt, entry, desc, type) \
+				native_write_gdt_entry(dt, entry, desc, type)
 #define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g)
 #endif
 
@@ -79,6 +81,12 @@ static inline void native_write_idt_entry(gate_desc *idt, int entry,
 	memcpy(&idt[entry], gate, sizeof(*gate));
 }
 
+static inline void native_write_gdt_entry(struct desc_struct *gdt, int entry,
+					  const void *desc, int type)
+{
+	memcpy(&gdt[entry], desc, sizeof(struct desc_struct));
+}
+
 static inline void write_dt_entry(struct desc_struct *dt,
 				  int entry, u32 entry_low, u32 entry_high)
 {
@@ -86,18 +94,20 @@ static inline void write_dt_entry(struct desc_struct *dt,
 	dt[entry].b = entry_high;
 }
 
+
 static inline void native_set_ldt(const void *addr, unsigned int entries)
 {
 	if (likely(entries == 0))
 		__asm__ __volatile__("lldt %w0"::"q" (0));
 	else {
 		unsigned cpu = smp_processor_id();
-		__u32 a, b;
+		ldt_desc ldt;
 
-		pack_descriptor(&a, &b, (unsigned long)addr,
+		pack_descriptor(&ldt, (unsigned long)addr,
 				entries * sizeof(struct desc_struct) - 1,
-				DESCTYPE_LDT, 0);
-		write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
+				DESC_LDT, 0);
+		write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
+				&ldt, DESC_LDT);
 		__asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
 	}
 }
@@ -153,11 +163,11 @@ static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned s
 
 static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
 {
-	__u32 a, b;
-	pack_descriptor(&a, &b, (unsigned long)addr,
+	tss_desc tss;
+	pack_descriptor(&tss, (unsigned long)addr,
 			offsetof(struct tss_struct, __cacheline_filler) - 1,
-			DESCTYPE_TSS, 0);
-	write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b);
+			DESC_TSS, 0);
+	write_gdt_entry(get_cpu_gdt_table(cpu), entry, &tss, DESC_TSS);
 }
 
 
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 86a9d7b0920f..3f2abf295e2a 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -99,7 +99,7 @@ struct pv_cpu_ops {
 	void (*write_ldt_entry)(struct desc_struct *,
 				int entrynum, u32 low, u32 high);
 	void (*write_gdt_entry)(struct desc_struct *,
-				int entrynum, u32 low, u32 high);
+				int entrynum, const void *desc, int size);
 	void (*write_idt_entry)(gate_desc *,
 				int entrynum, const gate_desc *gate);
 	void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
@@ -664,10 +664,13 @@ static inline void write_ldt_entry(void *dt, int entry, u32 low, u32 high)
 {
 	PVOP_VCALL4(pv_cpu_ops.write_ldt_entry, dt, entry, low, high);
 }
-static inline void write_gdt_entry(void *dt, int entry, u32 low, u32 high)
+
+static inline void write_gdt_entry(struct desc_struct *dt, int entry,
+				   void *desc, int type)
 {
-	PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, low, high);
+	PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, desc, type);
 }
+
 static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
 {
 	PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g);

From 75b8bb3e56ca09a467fbbe5229bc68627f7445be Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:13 +0100
Subject: [PATCH 290/890] x86: change write_ldt_entry signature

this patch changes the signature of write_ldt_entry.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
CC: Zachary Amsden <zach@vmware.com>
CC: Jeremy Fitzhardinge <Jeremy.Fitzhardinge.citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ldt.c         |  3 +--
 arch/x86/kernel/paravirt_32.c |  2 +-
 arch/x86/kernel/vmi_32.c      | 11 ++++++++++-
 arch/x86/xen/enlighten.c      |  4 ++--
 include/asm-x86/desc_32.h     |  9 ++++++++-
 include/asm-x86/desc_64.h     |  7 ++-----
 include/asm-x86/paravirt.h    | 10 ++++++----
 7 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 3e872b468533..b8ef46270e24 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -229,8 +229,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 
 	/* Install the new entry ...  */
 install:
-	write_ldt_entry(mm->context.ldt, ldt_info.entry_number,
-			ldt.a, ldt.b);
+	write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt);
 	error = 0;
 
 out_unlock:
diff --git a/arch/x86/kernel/paravirt_32.c b/arch/x86/kernel/paravirt_32.c
index 77602c1252d7..dd063fba2b21 100644
--- a/arch/x86/kernel/paravirt_32.c
+++ b/arch/x86/kernel/paravirt_32.c
@@ -379,7 +379,7 @@ struct pv_cpu_ops pv_cpu_ops = {
 	.store_idt = native_store_idt,
 	.store_tr = native_store_tr,
 	.load_tls = native_load_tls,
-	.write_ldt_entry = write_dt_entry,
+	.write_ldt_entry = native_write_ldt_entry,
 	.write_gdt_entry = native_write_gdt_entry,
 	.write_idt_entry = native_write_idt_entry,
 	.load_sp0 = native_load_sp0,
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 21edd0d6eae5..10c46419d35d 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -64,6 +64,7 @@ static struct {
 	void (*set_tr)(u32 selector);
 	void (*write_idt_entry)(struct desc_struct *, int, u32, u32);
 	void (*write_gdt_entry)(struct desc_struct *, int, u32, u32);
+	void (*write_ldt_entry)(struct desc_struct *, int, u32, u32);
 	void (*set_kernel_stack)(u32 selector, u32 sp0);
 	void (*allocate_page)(u32, u32, u32, u32, u32);
 	void (*release_page)(u32, u32);
@@ -229,6 +230,13 @@ static void vmi_write_gdt_entry(struct desc_struct *dt, int entry,
 	vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[2]);
 }
 
+static void vmi_write_ldt_entry(struct desc_struct *dt, int entry,
+				const void *desc)
+{
+	u32 *ldt_entry = (u32 *)desc;
+	vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[2]);
+}
+
 static void vmi_load_sp0(struct tss_struct *tss,
 				   struct thread_struct *thread)
 {
@@ -805,7 +813,8 @@ static inline int __init activate_vmi(void)
 	para_fill(pv_cpu_ops.store_idt, GetIDT);
 	para_fill(pv_cpu_ops.store_tr, GetTR);
 	pv_cpu_ops.load_tls = vmi_load_tls;
-	para_fill(pv_cpu_ops.write_ldt_entry, WriteLDTEntry);
+	para_wrap(pv_cpu_ops.write_ldt_entry, vmi_write_ldt_entry,
+		  write_ldt_entry, WriteLDTEntry);
 	para_wrap(pv_cpu_ops.write_gdt_entry, vmi_write_gdt_entry,
 		  write_gdt_entry, WriteGDTEntry);
 	para_wrap(pv_cpu_ops.write_idt_entry, vmi_write_idt_entry,
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 7f98c63f6381..72dd14d0685c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -357,11 +357,11 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
 }
 
 static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
-				u32 low, u32 high)
+				const void *ptr)
 {
 	unsigned long lp = (unsigned long)&dt[entrynum];
 	xmaddr_t mach_lp = virt_to_machine(lp);
-	u64 entry = (u64)high << 32 | low;
+	u64 entry = *(u64 *)ptr;
 
 	preempt_disable();
 
diff --git a/include/asm-x86/desc_32.h b/include/asm-x86/desc_32.h
index 41d8214c6173..92a72b0381e2 100644
--- a/include/asm-x86/desc_32.h
+++ b/include/asm-x86/desc_32.h
@@ -69,12 +69,19 @@ static inline void pack_gate(gate_desc *gate,
 #define load_TLS(t, cpu) native_load_tls(t, cpu)
 #define set_ldt native_set_ldt
 
-#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
+#define write_ldt_entry(dt, entry, desc) \
+				native_write_ldt_entry(dt, entry, desc)
 #define write_gdt_entry(dt, entry, desc, type) \
 				native_write_gdt_entry(dt, entry, desc, type)
 #define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g)
 #endif
 
+static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry,
+					  const void *desc)
+{
+	memcpy(&ldt[entry], desc, sizeof(struct desc_struct));
+}
+
 static inline void native_write_idt_entry(gate_desc *idt, int entry,
 					  const gate_desc *gate)
 {
diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h
index ba7fb87d10f3..7cdd3f0420fd 100644
--- a/include/asm-x86/desc_64.h
+++ b/include/asm-x86/desc_64.h
@@ -34,12 +34,9 @@ extern gate_desc idt_table[];
 extern struct desc_ptr cpu_gdt_descr[];
 
 static inline void write_ldt_entry(struct desc_struct *ldt,
-				   int entry, u32 entry_low, u32 entry_high)
+				   int entry, void *ptr)
 {
-	__u32 *lp = (__u32 *)((entry << 3) + (char *)ldt);
-
-	lp[0] = entry_low;
-	lp[1] = entry_high;
+	memcpy(&ldt[entry], ptr, 8);
 }
 
 /* the cpu gdt accessor */
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 3f2abf295e2a..4f23f434a1f3 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -96,8 +96,8 @@ struct pv_cpu_ops {
 	void (*set_ldt)(const void *desc, unsigned entries);
 	unsigned long (*store_tr)(void);
 	void (*load_tls)(struct thread_struct *t, unsigned int cpu);
-	void (*write_ldt_entry)(struct desc_struct *,
-				int entrynum, u32 low, u32 high);
+	void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum,
+				const void *desc);
 	void (*write_gdt_entry)(struct desc_struct *,
 				int entrynum, const void *desc, int size);
 	void (*write_idt_entry)(gate_desc *,
@@ -660,9 +660,11 @@ static inline void load_TLS(struct thread_struct *t, unsigned cpu)
 {
 	PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu);
 }
-static inline void write_ldt_entry(void *dt, int entry, u32 low, u32 high)
+
+static inline void write_ldt_entry(struct desc_struct *dt, int entry,
+				   const void *desc)
 {
-	PVOP_VCALL4(pv_cpu_ops.write_ldt_entry, dt, entry, low, high);
+	PVOP_VCALL3(pv_cpu_ops.write_ldt_entry, dt, entry, desc);
 }
 
 static inline void write_gdt_entry(struct desc_struct *dt, int entry,

From 59fbed775aa5a6f352bfbde6b40508b541086b7e Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:14 +0100
Subject: [PATCH 291/890] x86: move constants to desc_defs.h

this patch moves constant definitions regarding descriptor types
from desc_32.h to desc_defs.h. The change from defines to enum
to comply with previous versions in desc_defs.h

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/desc_32.h   | 8 --------
 include/asm-x86/desc_defs.h | 5 +++++
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/include/asm-x86/desc_32.h b/include/asm-x86/desc_32.h
index 92a72b0381e2..14238df87c7d 100644
--- a/include/asm-x86/desc_32.h
+++ b/include/asm-x86/desc_32.h
@@ -44,14 +44,6 @@ static inline void pack_gate(gate_desc *gate,
 	gate->b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff);
 }
 
-#define DESCTYPE_LDT 	0x82	/* present, system, DPL-0, LDT */
-#define DESCTYPE_TSS 	0x89	/* present, system, DPL-0, 32-bit TSS */
-#define DESCTYPE_TASK	0x85	/* present, system, DPL-0, task gate */
-#define DESCTYPE_INT	0x8e	/* present, system, DPL-0, interrupt gate */
-#define DESCTYPE_TRAP	0x8f	/* present, system, DPL-0, trap gate */
-#define DESCTYPE_DPL3	0x60	/* DPL-3 */
-#define DESCTYPE_S	0x10	/* !system */
-
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
diff --git a/include/asm-x86/desc_defs.h b/include/asm-x86/desc_defs.h
index c811f1e0829f..79fe59fc50ec 100644
--- a/include/asm-x86/desc_defs.h
+++ b/include/asm-x86/desc_defs.h
@@ -55,6 +55,11 @@ struct gate_struct64 {
 enum {
 	DESC_TSS = 0x9,
 	DESC_LDT = 0x2,
+	DESCTYPE_TASK = 0x85,   /* present, system, DPL-0, task gate */
+	DESCTYPE_INT =  0x8e,   /* present, system, DPL-0, interrupt gate */
+	DESCTYPE_TRAP = 0x8f,   /* present, system, DPL-0, trap gate */
+	DESCTYPE_DPL3 = 0x60,   /* DPL-3 */
+	DESCTYPE_S =	0x10,	/* !system */
 };
 
 // LDT or TSS descriptor in the GDT. 16 bytes.

From 881c2975d02b6575c417a6eee40868d60a9ecba9 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:14 +0100
Subject: [PATCH 292/890] x86: unify non-paravirt parts of desc.h

This patch unifies the non-paravirt part of desc_{32,64}.h into
desc.h. Most of it, is simply common code, that is moved to
the shared header. The only exception is the set_ldt_desc in desc_64.h,
which is changed - included its name - to accomodate for the way
the ldt is set up in i386.

Also, constant definitions used in desc_32.h are moved to desc_defs.h

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/desc.h           | 65 +++++++++++++++++++++++++++++++-
 include/asm-x86/desc_32.h        | 58 ----------------------------
 include/asm-x86/desc_64.h        | 55 +++++----------------------
 include/asm-x86/mmu_context_64.h |  4 +-
 4 files changed, 76 insertions(+), 106 deletions(-)

diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index 47086d2d9298..99c4adc851e2 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -4,6 +4,7 @@
 #ifndef __ASSEMBLY__
 #include <asm/desc_defs.h>
 #include <asm/ldt.h>
+#include <asm/mmu.h>
 
 static inline void fill_ldt(struct desc_struct *desc, struct user_desc *info)
 {
@@ -23,7 +24,8 @@ static inline void fill_ldt(struct desc_struct *desc, struct user_desc *info)
 	desc->base2 = (info->base_addr & 0xff000000) >> 24;
 }
 
-#endif
+extern struct desc_ptr idt_descr;
+extern gate_desc idt_table[];
 
 #ifdef CONFIG_X86_32
 # include "desc_32.h"
@@ -31,4 +33,65 @@ static inline void fill_ldt(struct desc_struct *desc, struct user_desc *info)
 # include "desc_64.h"
 #endif
 
+#define _LDT_empty(info) (\
+	(info)->base_addr	== 0	&& \
+	(info)->limit		== 0	&& \
+	(info)->contents	== 0	&& \
+	(info)->read_exec_only	== 1	&& \
+	(info)->seg_32bit	== 0	&& \
+	(info)->limit_in_pages	== 0	&& \
+	(info)->seg_not_present	== 1	&& \
+	(info)->useable		== 0)
+
+#ifdef CONFIG_X86_64
+#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
+#else
+#define LDT_empty(info) (_LDT_empty(info))
+#endif
+
+static inline void clear_LDT(void)
+{
+	set_ldt(NULL, 0);
+}
+
+/*
+ * load one particular LDT into the current CPU
+ */
+static inline void load_LDT_nolock(mm_context_t *pc)
+{
+	set_ldt(pc->ldt, pc->size);
+}
+
+static inline void load_LDT(mm_context_t *pc)
+{
+	preempt_disable();
+	load_LDT_nolock(pc);
+	preempt_enable();
+}
+
+#else
+/*
+ * GET_DESC_BASE reads the descriptor base of the specified segment.
+ *
+ * Args:
+ *    idx - descriptor index
+ *    gdt - GDT pointer
+ *    base - 32bit register to which the base will be written
+ *    lo_w - lo word of the "base" register
+ *    lo_b - lo byte of the "base" register
+ *    hi_b - hi byte of the low word of the "base" register
+ *
+ * Example:
+ *    GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
+ *    Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
+ */
+#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
+	movb idx*8+4(gdt), lo_b; \
+	movb idx*8+7(gdt), hi_b; \
+	shll $16, base; \
+	movw idx*8+2(gdt), lo_w;
+
+
+#endif /* __ASSEMBLY__ */
+
 #endif
diff --git a/include/asm-x86/desc_32.h b/include/asm-x86/desc_32.h
index 14238df87c7d..01415ad35fdf 100644
--- a/include/asm-x86/desc_32.h
+++ b/include/asm-x86/desc_32.h
@@ -11,8 +11,6 @@
 #include <linux/smp.h>
 #include <linux/percpu.h>
 
-#include <asm/mmu.h>
-
 struct gdt_page
 {
 	struct desc_struct gdt[GDT_ENTRIES];
@@ -24,8 +22,6 @@ static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
 	return per_cpu(gdt_page, cpu).gdt;
 }
 
-extern struct desc_ptr idt_descr;
-extern gate_desc idt_table[];
 extern void set_intr_gate(unsigned int irq, void * addr);
 
 static inline void pack_descriptor(struct desc_struct *desc,
@@ -172,36 +168,6 @@ static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const vo
 
 #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
 
-#define LDT_empty(info) (\
-	(info)->base_addr	== 0	&& \
-	(info)->limit		== 0	&& \
-	(info)->contents	== 0	&& \
-	(info)->read_exec_only	== 1	&& \
-	(info)->seg_32bit	== 0	&& \
-	(info)->limit_in_pages	== 0	&& \
-	(info)->seg_not_present	== 1	&& \
-	(info)->useable		== 0	)
-
-static inline void clear_LDT(void)
-{
-	set_ldt(NULL, 0);
-}
-
-/*
- * load one particular LDT into the current CPU
- */
-static inline void load_LDT_nolock(mm_context_t *pc)
-{
-	set_ldt(pc->ldt, pc->size);
-}
-
-static inline void load_LDT(mm_context_t *pc)
-{
-	preempt_disable();
-	load_LDT_nolock(pc);
-	preempt_enable();
-}
-
 static inline unsigned long get_desc_base(unsigned long *desc)
 {
 	unsigned long base;
@@ -210,30 +176,6 @@ static inline unsigned long get_desc_base(unsigned long *desc)
 		(desc[1] & 0xff000000);
 	return base;
 }
-
-#else /* __ASSEMBLY__ */
-
-/*
- * GET_DESC_BASE reads the descriptor base of the specified segment.
- *
- * Args:
- *    idx - descriptor index
- *    gdt - GDT pointer
- *    base - 32bit register to which the base will be written
- *    lo_w - lo word of the "base" register
- *    lo_b - lo byte of the "base" register
- *    hi_b - hi byte of the low word of the "base" register
- *
- * Example:
- *    GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
- *    Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
- */
-#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
-	movb idx*8+4(gdt), lo_b; \
-	movb idx*8+7(gdt), hi_b; \
-	shll $16, base; \
-	movw idx*8+2(gdt), lo_w;
-
 #endif /* !__ASSEMBLY__ */
 
 #endif
diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h
index 7cdd3f0420fd..c037c7d55b92 100644
--- a/include/asm-x86/desc_64.h
+++ b/include/asm-x86/desc_64.h
@@ -9,16 +9,13 @@
 
 #include <linux/string.h>
 #include <linux/smp.h>
-#include <asm/desc_defs.h>
 
 #include <asm/segment.h>
-#include <asm/mmu.h>
 
 extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
 
 #define load_TR_desc() asm volatile("ltr %w0"::"r" (GDT_ENTRY_TSS*8))
 #define load_LDT_desc() asm volatile("lldt %w0"::"r" (GDT_ENTRY_LDT*8))
-#define clear_LDT()  asm volatile("lldt %w0"::"r" (0))
 
 static inline unsigned long __store_tr(void)
 {
@@ -30,7 +27,6 @@ static inline unsigned long __store_tr(void)
 
 #define store_tr(tr) (tr) = __store_tr()
 
-extern gate_desc idt_table[];
 extern struct desc_ptr cpu_gdt_descr[];
 
 static inline void write_ldt_entry(struct desc_struct *ldt,
@@ -138,22 +134,18 @@ static inline void set_tss_desc(unsigned cpu, void *addr)
 		IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
 }
 
-static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
+static inline void set_ldt(void *addr, int entries)
 {
-	set_tssldt_descriptor(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT],
-			     (unsigned long)addr, DESC_LDT, size * 8 - 1);
-}
+	if (likely(entries == 0))
+		__asm__ __volatile__("lldt %w0"::"q" (0));
+	else {
+		unsigned cpu = smp_processor_id();
 
-#define LDT_empty(info) (\
-	(info)->base_addr	== 0	&& \
-	(info)->limit		== 0	&& \
-	(info)->contents	== 0	&& \
-	(info)->read_exec_only	== 1	&& \
-	(info)->seg_32bit	== 0	&& \
-	(info)->limit_in_pages	== 0	&& \
-	(info)->seg_not_present	== 1	&& \
-	(info)->useable		== 0	&& \
-	(info)->lm		== 0)
+		set_tssldt_descriptor(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT],
+			     (unsigned long)addr, DESC_LDT, entries * 8 - 1);
+		__asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
+	}
+}
 
 static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
 {
@@ -164,32 +156,6 @@ static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
 		gdt[i] = t->tls_array[i];
 }
 
-/*
- * load one particular LDT into the current CPU
- */
-static inline void load_LDT_nolock(mm_context_t *pc, int cpu)
-{
-	int count = pc->size;
-
-	if (likely(!count)) {
-		clear_LDT();
-		return;
-	}
-
-	set_ldt_desc(cpu, pc->ldt, count);
-	load_LDT_desc();
-}
-
-static inline void load_LDT(mm_context_t *pc)
-{
-	int cpu = get_cpu();
-
-	load_LDT_nolock(pc, cpu);
-	put_cpu();
-}
-
-extern struct desc_ptr idt_descr;
-
 static inline unsigned long get_desc_base(const void *ptr)
 {
 	const u32 *desc = ptr;
@@ -199,7 +165,6 @@ static inline unsigned long get_desc_base(const void *ptr)
 		(desc[1] & 0xff000000);
 	return base;
 }
-
 #endif /* !__ASSEMBLY__ */
 
 #endif
diff --git a/include/asm-x86/mmu_context_64.h b/include/asm-x86/mmu_context_64.h
index 29f95c3acc28..98bfe43dd806 100644
--- a/include/asm-x86/mmu_context_64.h
+++ b/include/asm-x86/mmu_context_64.h
@@ -43,7 +43,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 		load_cr3(next->pgd);
 
 		if (unlikely(next->context.ldt != prev->context.ldt)) 
-			load_LDT_nolock(&next->context, cpu);
+			load_LDT_nolock(&next->context);
 	}
 #ifdef CONFIG_SMP
 	else {
@@ -56,7 +56,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 			 * to make sure to use no freed page tables.
 			 */
 			load_cr3(next->pgd);
-			load_LDT_nolock(&next->context, cpu);
+			load_LDT_nolock(&next->context);
 		}
 	}
 #endif

From 26048d75e8d6c840742468667f4a7ab8c2df74c9 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:14 +0100
Subject: [PATCH 293/890] x86: use the same data type for tls_array.

This patch changes the type of tls_array in x86_64 to
a desc_struct. Now, both i386 and x86_64 tls_array have
the same type, and code accessing it can be shared.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/desc_64.h      | 2 +-
 include/asm-x86/processor_64.h | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h
index c037c7d55b92..448f96ed973f 100644
--- a/include/asm-x86/desc_64.h
+++ b/include/asm-x86/desc_64.h
@@ -150,7 +150,7 @@ static inline void set_ldt(void *addr, int entries)
 static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
 {
 	unsigned int i;
-	u64 *gdt = (u64 *)(get_cpu_gdt_table(cpu) + GDT_ENTRY_TLS_MIN);
+	struct desc_struct *gdt = (get_cpu_gdt_table(cpu) + GDT_ENTRY_TLS_MIN);
 
 	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
 		gdt[i] = t->tls_array[i];
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index 7b7f8a142e20..a0a9e5515097 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -19,6 +19,7 @@
 #include <linux/personality.h>
 #include <linux/cpumask.h>
 #include <asm/processor-flags.h>
+#include <asm/desc_defs.h>
 
 #define TF_MASK		0x00000100
 #define IF_MASK		0x00000200
@@ -244,7 +245,7 @@ struct thread_struct {
  *               goes into MSR_IA32_DS_AREA */
 	unsigned long	ds_area_msr;
 /* cached TLS descriptors. */
-	u64 tls_array[GDT_ENTRY_TLS_ENTRIES];
+	struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
 } __attribute__((aligned(16)));
 
 #define INIT_THREAD  { \

From cc6978528cbd475d952e0eb5073375839dfb600e Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:14 +0100
Subject: [PATCH 294/890] x86: modify get_desc_base

This patch makes get_desc_base() receive a struct desc_struct,
and then uses its internal fields to compute the base address.
This is done at both i386 and x86_64, and then it is moved
to common header

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/tls.c     | 2 +-
 arch/x86/mm/fault_32.c    | 2 +-
 include/asm-x86/desc.h    | 5 +++++
 include/asm-x86/desc_32.h | 8 --------
 include/asm-x86/desc_64.h | 9 ---------
 5 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 74d2b65a82eb..98f428be8e8c 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -112,7 +112,7 @@ int do_get_thread_area(struct task_struct *p, int idx,
 
 	memset(&info, 0, sizeof(struct user_desc));
 	info.entry_number = idx;
-	info.base_addr = get_desc_base((void *)desc);
+	info.base_addr = get_desc_base((struct desc_struct *)desc);
 	info.limit = GET_LIMIT(desc);
 	info.seg_32bit = GET_32BIT(desc);
 	info.contents = GET_CONTENTS(desc);
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 6056c6d71835..ef5ab2b925c4 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -115,7 +115,7 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
 	}
 
 	/* Decode the code segment base from the descriptor */
-	base = get_desc_base((unsigned long *)desc);
+	base = get_desc_base((struct desc_struct *)desc);
 
 	if (seg & (1<<2)) { 
 		mutex_unlock(&current->mm->context.lock);
diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index 99c4adc851e2..a6fdd7c7b6b2 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -69,6 +69,11 @@ static inline void load_LDT(mm_context_t *pc)
 	preempt_enable();
 }
 
+static inline unsigned long get_desc_base(struct desc_struct *desc)
+{
+	return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24);
+}
+
 #else
 /*
  * GET_DESC_BASE reads the descriptor base of the specified segment.
diff --git a/include/asm-x86/desc_32.h b/include/asm-x86/desc_32.h
index 01415ad35fdf..8450c2a99c3a 100644
--- a/include/asm-x86/desc_32.h
+++ b/include/asm-x86/desc_32.h
@@ -168,14 +168,6 @@ static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const vo
 
 #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
 
-static inline unsigned long get_desc_base(unsigned long *desc)
-{
-	unsigned long base;
-	base = ((desc[0] >> 16)  & 0x0000ffff) |
-		((desc[1] << 16) & 0x00ff0000) |
-		(desc[1] & 0xff000000);
-	return base;
-}
 #endif /* !__ASSEMBLY__ */
 
 #endif
diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h
index 448f96ed973f..a7a6c301c6bc 100644
--- a/include/asm-x86/desc_64.h
+++ b/include/asm-x86/desc_64.h
@@ -156,15 +156,6 @@ static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
 		gdt[i] = t->tls_array[i];
 }
 
-static inline unsigned long get_desc_base(const void *ptr)
-{
-	const u32 *desc = ptr;
-	unsigned long base;
-	base = ((desc[0] >> 16)  & 0x0000ffff) |
-		((desc[1] << 16) & 0x00ff0000) |
-		(desc[1] & 0xff000000);
-	return base;
-}
 #endif /* !__ASSEMBLY__ */
 
 #endif

From 54cd0eac7286b83ef1a657d2dddd74e0556209e7 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:14 +0100
Subject: [PATCH 295/890] x86: unify paravirt pieces of descriptor handling

With the types used to access descriptors in x86_64 and i386
now being the same, the code that effectively handles them can
now be easily shared. This patch moves the paravirt part of
desc_32.h into desc.h, and then, we get paravirt support in x86_64
for free.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/desc.h      | 169 ++++++++++++++++++++++++++++++++++++
 include/asm-x86/desc_32.h   | 130 ---------------------------
 include/asm-x86/desc_64.h   | 104 +++-------------------
 include/asm-x86/desc_defs.h |   6 +-
 4 files changed, 183 insertions(+), 226 deletions(-)

diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index a6fdd7c7b6b2..e61a5a38caba 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -5,6 +5,7 @@
 #include <asm/desc_defs.h>
 #include <asm/ldt.h>
 #include <asm/mmu.h>
+#include <linux/smp.h>
 
 static inline void fill_ldt(struct desc_struct *desc, struct user_desc *info)
 {
@@ -27,6 +28,174 @@ static inline void fill_ldt(struct desc_struct *desc, struct user_desc *info)
 extern struct desc_ptr idt_descr;
 extern gate_desc idt_table[];
 
+#ifdef CONFIG_X86_64
+extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
+extern struct desc_ptr cpu_gdt_descr[];
+/* the cpu gdt accessor */
+#define get_cpu_gdt_table(x) ((struct desc_struct *)cpu_gdt_descr[x].address)
+#else
+struct gdt_page {
+	struct desc_struct gdt[GDT_ENTRIES];
+} __attribute__((aligned(PAGE_SIZE)));
+DECLARE_PER_CPU(struct gdt_page, gdt_page);
+
+static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
+{
+	return per_cpu(gdt_page, cpu).gdt;
+}
+#endif
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define load_TR_desc() native_load_tr_desc()
+#define load_gdt(dtr) native_load_gdt(dtr)
+#define load_idt(dtr) native_load_idt(dtr)
+#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
+#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
+
+#define store_gdt(dtr) native_store_gdt(dtr)
+#define store_idt(dtr) native_store_idt(dtr)
+#define store_tr(tr) (tr = native_store_tr())
+#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
+
+#define load_TLS(t, cpu) native_load_tls(t, cpu)
+#define set_ldt native_set_ldt
+
+#define write_ldt_entry(dt, entry, desc) \
+				native_write_ldt_entry(dt, entry, desc)
+#define write_gdt_entry(dt, entry, desc, type) \
+				native_write_gdt_entry(dt, entry, desc, type)
+#define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g)
+#endif
+
+static inline void native_write_idt_entry(gate_desc *idt, int entry,
+					  const gate_desc *gate)
+{
+	memcpy(&idt[entry], gate, sizeof(*gate));
+}
+
+static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry,
+					  const void *desc)
+{
+	memcpy(&ldt[entry], desc, 8);
+}
+
+static inline void native_write_gdt_entry(struct desc_struct *gdt, int entry,
+					  const void *desc, int type)
+{
+	unsigned int size;
+	switch (type) {
+	case DESC_TSS:
+		size = sizeof(tss_desc);
+		break;
+	case DESC_LDT:
+		size = sizeof(ldt_desc);
+		break;
+	default:
+		size = sizeof(struct desc_struct);
+		break;
+	}
+	memcpy(&gdt[entry], desc, size);
+}
+
+static inline void set_tssldt_descriptor(struct ldttss_desc64 *d,
+					 unsigned long tss, unsigned type,
+					 unsigned size)
+{
+	memset(d, 0, sizeof(*d));
+	d->limit0 = size & 0xFFFF;
+	d->base0 = PTR_LOW(tss);
+	d->base1 = PTR_MIDDLE(tss) & 0xFF;
+	d->type = type;
+	d->p = 1;
+	d->limit1 = (size >> 16) & 0xF;
+	d->base2 = (PTR_MIDDLE(tss) >> 8) & 0xFF;
+	d->base3 = PTR_HIGH(tss);
+}
+
+static inline void pack_descriptor(struct desc_struct *desc, unsigned long base,
+				   unsigned long limit, unsigned char type,
+				   unsigned char flags)
+{
+	desc->a = ((base & 0xffff) << 16) | (limit & 0xffff);
+	desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
+		  (limit & 0x000f0000) | ((type & 0xff) << 8) |
+		  ((flags & 0xf) << 20);
+	desc->p = 1;
+}
+
+static inline void pack_ldt(ldt_desc *ldt, unsigned long addr,
+			   unsigned size)
+{
+
+#ifdef CONFIG_X86_64
+		set_tssldt_descriptor(ldt,
+			     addr, DESC_LDT, size);
+#else
+		pack_descriptor(ldt, (unsigned long)addr,
+				size,
+				0x80 | DESC_LDT, 0);
+#endif
+}
+
+static inline void native_set_ldt(const void *addr, unsigned int entries)
+{
+	if (likely(entries == 0))
+		__asm__ __volatile__("lldt %w0"::"q" (0));
+	else {
+		unsigned cpu = smp_processor_id();
+		ldt_desc ldt;
+
+		pack_ldt(&ldt, (unsigned long)addr,
+				entries * sizeof(ldt) - 1);
+		write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
+				&ldt, DESC_LDT);
+		__asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
+	}
+}
+
+static inline void native_load_tr_desc(void)
+{
+	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+}
+
+static inline void native_load_gdt(const struct desc_ptr *dtr)
+{
+	asm volatile("lgdt %0"::"m" (*dtr));
+}
+
+static inline void native_load_idt(const struct desc_ptr *dtr)
+{
+	asm volatile("lidt %0"::"m" (*dtr));
+}
+
+static inline void native_store_gdt(struct desc_ptr *dtr)
+{
+	asm volatile("sgdt %0":"=m" (*dtr));
+}
+
+static inline void native_store_idt(struct desc_ptr *dtr)
+{
+	asm volatile("sidt %0":"=m" (*dtr));
+}
+
+static inline unsigned long native_store_tr(void)
+{
+	unsigned long tr;
+	asm volatile("str %0":"=r" (tr));
+	return tr;
+}
+
+static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
+{
+	unsigned int i;
+	struct desc_struct *gdt = get_cpu_gdt_table(cpu);
+
+	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
+		gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
+}
+
 #ifdef CONFIG_X86_32
 # include "desc_32.h"
 #else
diff --git a/include/asm-x86/desc_32.h b/include/asm-x86/desc_32.h
index 8450c2a99c3a..4bf20b7dd741 100644
--- a/include/asm-x86/desc_32.h
+++ b/include/asm-x86/desc_32.h
@@ -8,31 +8,10 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/preempt.h>
-#include <linux/smp.h>
 #include <linux/percpu.h>
 
-struct gdt_page
-{
-	struct desc_struct gdt[GDT_ENTRIES];
-} __attribute__((aligned(PAGE_SIZE)));
-DECLARE_PER_CPU(struct gdt_page, gdt_page);
-
-static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
-{
-	return per_cpu(gdt_page, cpu).gdt;
-}
-
 extern void set_intr_gate(unsigned int irq, void * addr);
 
-static inline void pack_descriptor(struct desc_struct *desc,
-	unsigned long base, unsigned long limit, unsigned char type, unsigned char flags)
-{
-	desc->a = ((base & 0xffff) << 16) | (limit & 0xffff);
-	desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
-		(limit & 0x000f0000) | ((type & 0xff) << 8) | ((flags & 0xf) << 20);
-	desc->p = 1;
-}
-
 static inline void pack_gate(gate_desc *gate,
 	unsigned long base, unsigned short seg, unsigned char type, unsigned char flags)
 {
@@ -40,115 +19,6 @@ static inline void pack_gate(gate_desc *gate,
 	gate->b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff);
 }
 
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define load_TR_desc() native_load_tr_desc()
-#define load_gdt(dtr) native_load_gdt(dtr)
-#define load_idt(dtr) native_load_idt(dtr)
-#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
-#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
-
-#define store_gdt(dtr) native_store_gdt(dtr)
-#define store_idt(dtr) native_store_idt(dtr)
-#define store_tr(tr) (tr = native_store_tr())
-#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
-
-#define load_TLS(t, cpu) native_load_tls(t, cpu)
-#define set_ldt native_set_ldt
-
-#define write_ldt_entry(dt, entry, desc) \
-				native_write_ldt_entry(dt, entry, desc)
-#define write_gdt_entry(dt, entry, desc, type) \
-				native_write_gdt_entry(dt, entry, desc, type)
-#define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g)
-#endif
-
-static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry,
-					  const void *desc)
-{
-	memcpy(&ldt[entry], desc, sizeof(struct desc_struct));
-}
-
-static inline void native_write_idt_entry(gate_desc *idt, int entry,
-					  const gate_desc *gate)
-{
-	memcpy(&idt[entry], gate, sizeof(*gate));
-}
-
-static inline void native_write_gdt_entry(struct desc_struct *gdt, int entry,
-					  const void *desc, int type)
-{
-	memcpy(&gdt[entry], desc, sizeof(struct desc_struct));
-}
-
-static inline void write_dt_entry(struct desc_struct *dt,
-				  int entry, u32 entry_low, u32 entry_high)
-{
-	dt[entry].a = entry_low;
-	dt[entry].b = entry_high;
-}
-
-
-static inline void native_set_ldt(const void *addr, unsigned int entries)
-{
-	if (likely(entries == 0))
-		__asm__ __volatile__("lldt %w0"::"q" (0));
-	else {
-		unsigned cpu = smp_processor_id();
-		ldt_desc ldt;
-
-		pack_descriptor(&ldt, (unsigned long)addr,
-				entries * sizeof(struct desc_struct) - 1,
-				DESC_LDT, 0);
-		write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
-				&ldt, DESC_LDT);
-		__asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
-	}
-}
-
-
-static inline void native_load_tr_desc(void)
-{
-	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
-}
-
-static inline void native_load_gdt(const struct desc_ptr *dtr)
-{
-	asm volatile("lgdt %0"::"m" (*dtr));
-}
-
-static inline void native_load_idt(const struct desc_ptr *dtr)
-{
-	asm volatile("lidt %0"::"m" (*dtr));
-}
-
-static inline void native_store_gdt(struct desc_ptr *dtr)
-{
-	asm ("sgdt %0":"=m" (*dtr));
-}
-
-static inline void native_store_idt(struct desc_ptr *dtr)
-{
-	asm ("sidt %0":"=m" (*dtr));
-}
-
-static inline unsigned long native_store_tr(void)
-{
-	unsigned long tr;
-	asm ("str %0":"=r" (tr));
-	return tr;
-}
-
-static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
-{
-	unsigned int i;
-	struct desc_struct *gdt = get_cpu_gdt_table(cpu);
-
-	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
-		gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
-}
-
 static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
 {
 	gate_desc g;
diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h
index a7a6c301c6bc..b0290c45319c 100644
--- a/include/asm-x86/desc_64.h
+++ b/include/asm-x86/desc_64.h
@@ -8,47 +8,10 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/string.h>
-#include <linux/smp.h>
 
 #include <asm/segment.h>
 
-extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
-
-#define load_TR_desc() asm volatile("ltr %w0"::"r" (GDT_ENTRY_TSS*8))
-#define load_LDT_desc() asm volatile("lldt %w0"::"r" (GDT_ENTRY_LDT*8))
-
-static inline unsigned long __store_tr(void)
-{
-       unsigned long tr;
-
-       asm volatile ("str %w0":"=r" (tr));
-       return tr;
-}
-
-#define store_tr(tr) (tr) = __store_tr()
-
-extern struct desc_ptr cpu_gdt_descr[];
-
-static inline void write_ldt_entry(struct desc_struct *ldt,
-				   int entry, void *ptr)
-{
-	memcpy(&ldt[entry], ptr, 8);
-}
-
-/* the cpu gdt accessor */
-#define get_cpu_gdt_table(x) ((struct desc_struct *)cpu_gdt_descr[x].address)
-
-static inline void load_gdt(const struct desc_ptr *ptr)
-{
-	asm volatile("lgdt %w0"::"m" (*ptr));
-}
-
-static inline void store_gdt(struct desc_ptr *ptr)
-{
-       asm("sgdt %w0":"=m" (*ptr));
-}
-
-static inline void _set_gate(void *adr, unsigned type, unsigned long func,
+static inline void _set_gate(int gate, unsigned type, unsigned long func,
 			     unsigned dpl, unsigned ist)
 {
 	gate_desc s;
@@ -67,61 +30,37 @@ static inline void _set_gate(void *adr, unsigned type, unsigned long func,
 	 * does not need to be atomic because it is only done once at
 	 * setup time
 	 */
-	memcpy(adr, &s, 16);
+	write_idt_entry(idt_table, gate, &s);
 }
 
 static inline void set_intr_gate(int nr, void *func)
 {
 	BUG_ON((unsigned)nr > 0xFF);
-	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 0);
+	_set_gate(nr, GATE_INTERRUPT, (unsigned long) func, 0, 0);
 }
 
 static inline void set_intr_gate_ist(int nr, void *func, unsigned ist)
 {
 	BUG_ON((unsigned)nr > 0xFF);
-	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, ist);
+	_set_gate(nr, GATE_INTERRUPT, (unsigned long) func, 0, ist);
 }
 
 static inline void set_system_gate(int nr, void *func)
 {
 	BUG_ON((unsigned)nr > 0xFF);
-	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0);
+	_set_gate(nr, GATE_INTERRUPT, (unsigned long) func, 3, 0);
 }
 
 static inline void set_system_gate_ist(int nr, void *func, unsigned ist)
 {
-	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, ist);
-}
-
-static inline void load_idt(const struct desc_ptr *ptr)
-{
-	asm volatile("lidt %w0"::"m" (*ptr));
-}
-
-static inline void store_idt(struct desc_ptr *dtr)
-{
-       asm("sidt %w0":"=m" (*dtr));
-}
-
-static inline void set_tssldt_descriptor(void *ptr, unsigned long tss,
-					 unsigned type, unsigned size)
-{
-	struct ldttss_desc64 d;
-
-	memset(&d, 0, sizeof(d));
-	d.limit0 = size & 0xFFFF;
-	d.base0 = PTR_LOW(tss);
-	d.base1 = PTR_MIDDLE(tss) & 0xFF;
-	d.type = type;
-	d.p = 1;
-	d.limit1 = (size >> 16) & 0xF;
-	d.base2 = (PTR_MIDDLE(tss) >> 8) & 0xFF;
-	d.base3 = PTR_HIGH(tss);
-	memcpy(ptr, &d, 16);
+	_set_gate(nr, GATE_INTERRUPT, (unsigned long) func, 3, ist);
 }
 
 static inline void set_tss_desc(unsigned cpu, void *addr)
 {
+	struct desc_struct *d = get_cpu_gdt_table(cpu);
+	tss_desc tss;
+
 	/*
 	 * sizeof(unsigned long) coming from an extra "long" at the end
 	 * of the iobitmap. See tss_struct definition in processor.h
@@ -129,31 +68,10 @@ static inline void set_tss_desc(unsigned cpu, void *addr)
 	 * -1? seg base+limit should be pointing to the address of the
 	 * last valid byte
 	 */
-	set_tssldt_descriptor(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS],
+	set_tssldt_descriptor(&tss,
 		(unsigned long)addr, DESC_TSS,
 		IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
-}
-
-static inline void set_ldt(void *addr, int entries)
-{
-	if (likely(entries == 0))
-		__asm__ __volatile__("lldt %w0"::"q" (0));
-	else {
-		unsigned cpu = smp_processor_id();
-
-		set_tssldt_descriptor(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT],
-			     (unsigned long)addr, DESC_LDT, entries * 8 - 1);
-		__asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
-	}
-}
-
-static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
-{
-	unsigned int i;
-	struct desc_struct *gdt = (get_cpu_gdt_table(cpu) + GDT_ENTRY_TLS_MIN);
-
-	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
-		gdt[i] = t->tls_array[i];
+	write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS);
 }
 
 #endif /* !__ASSEMBLY__ */
diff --git a/include/asm-x86/desc_defs.h b/include/asm-x86/desc_defs.h
index 79fe59fc50ec..ebb64fe3a450 100644
--- a/include/asm-x86/desc_defs.h
+++ b/include/asm-x86/desc_defs.h
@@ -48,9 +48,9 @@ struct gate_struct64 {
 	u32 zero1;
 } __attribute__((packed));
 
-#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF)
-#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF)
-#define PTR_HIGH(x) ((unsigned long)(x) >> 32)
+#define PTR_LOW(x) ((unsigned long long)(x) & 0xFFFF)
+#define PTR_MIDDLE(x) (((unsigned long long)(x) >> 16) & 0xFFFF)
+#define PTR_HIGH(x) ((unsigned long long)(x) >> 32)
 
 enum {
 	DESC_TSS = 0x9,

From 507f90c9f92592e7630b1c1e87bf92d2c9858cc6 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:14 +0100
Subject: [PATCH 296/890] x86: move _set_gate and its users to a common
 location

This patch moves _set_gate and its users to desc.h. We can now
use common code for x86_64 and i386.

[ mingo@elte.hu: set_system_gate() fixes for nasty crashes. ]

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/traps_32.c  | 34 --------------
 include/asm-x86/desc.h      | 92 +++++++++++++++++++++++++++++++++++++
 include/asm-x86/desc_32.h   | 16 -------
 include/asm-x86/desc_64.h   | 45 ------------------
 include/asm-x86/desc_defs.h |  5 +-
 5 files changed, 93 insertions(+), 99 deletions(-)

diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index c70c41fd710b..3065b3f41928 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1102,40 +1102,6 @@ asmlinkage void math_emulate(long arg)
 
 #endif /* CONFIG_MATH_EMULATION */
 
-/*
- * This needs to use 'idt_table' rather than 'idt', and
- * thus use the _nonmapped_ version of the IDT, as the
- * Pentium F0 0F bugfix can have resulted in the mapped
- * IDT being write-protected.
- */
-void set_intr_gate(unsigned int n, void *addr)
-{
-	_set_gate(n, DESCTYPE_INT, addr, __KERNEL_CS);
-}
-
-/*
- * This routine sets up an interrupt gate at directory privilege level 3.
- */
-static inline void set_system_intr_gate(unsigned int n, void *addr)
-{
-	_set_gate(n, DESCTYPE_INT | DESCTYPE_DPL3, addr, __KERNEL_CS);
-}
-
-static void __init set_trap_gate(unsigned int n, void *addr)
-{
-	_set_gate(n, DESCTYPE_TRAP, addr, __KERNEL_CS);
-}
-
-static void __init set_system_gate(unsigned int n, void *addr)
-{
-	_set_gate(n, DESCTYPE_TRAP | DESCTYPE_DPL3, addr, __KERNEL_CS);
-}
-
-static void __init set_task_gate(unsigned int n, unsigned int gdt_entry)
-{
-	_set_gate(n, DESCTYPE_TASK, (void *)0, (gdt_entry<<3));
-}
-
 
 void __init trap_init(void)
 {
diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index e61a5a38caba..d75bc0634313 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -33,6 +33,22 @@ extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
 extern struct desc_ptr cpu_gdt_descr[];
 /* the cpu gdt accessor */
 #define get_cpu_gdt_table(x) ((struct desc_struct *)cpu_gdt_descr[x].address)
+
+static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
+			     unsigned dpl, unsigned ist, unsigned seg)
+{
+	gate->offset_low = PTR_LOW(func);
+	gate->segment = __KERNEL_CS;
+	gate->ist = ist;
+	gate->p = 1;
+	gate->dpl = dpl;
+	gate->zero0 = 0;
+	gate->zero1 = 0;
+	gate->type = type;
+	gate->offset_middle = PTR_MIDDLE(func);
+	gate->offset_high = PTR_HIGH(func);
+}
+
 #else
 struct gdt_page {
 	struct desc_struct gdt[GDT_ENTRIES];
@@ -43,6 +59,16 @@ static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
 {
 	return per_cpu(gdt_page, cpu).gdt;
 }
+
+static inline void pack_gate(gate_desc *gate, unsigned char type,
+       unsigned long base, unsigned dpl, unsigned flags, unsigned short seg)
+
+{
+	gate->a = (seg << 16) | (base & 0xffff);
+	gate->b = (base & 0xffff0000) |
+		  (((0x80 | type | (dpl << 5)) & 0xff) << 8);
+}
+
 #endif
 
 #ifdef CONFIG_PARAVIRT
@@ -242,6 +268,72 @@ static inline unsigned long get_desc_base(struct desc_struct *desc)
 {
 	return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24);
 }
+static inline void _set_gate(int gate, unsigned type, void *addr,
+			      unsigned dpl, unsigned ist, unsigned seg)
+{
+	gate_desc s;
+	pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
+	/*
+	 * does not need to be atomic because it is only done once at
+	 * setup time
+	 */
+	write_idt_entry(idt_table, gate, &s);
+}
+
+/*
+ * This needs to use 'idt_table' rather than 'idt', and
+ * thus use the _nonmapped_ version of the IDT, as the
+ * Pentium F0 0F bugfix can have resulted in the mapped
+ * IDT being write-protected.
+ */
+static inline void set_intr_gate(unsigned int n, void *addr)
+{
+	BUG_ON((unsigned)n > 0xFF);
+	_set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS);
+}
+
+/*
+ * This routine sets up an interrupt gate at directory privilege level 3.
+ */
+static inline void set_system_intr_gate(unsigned int n, void *addr)
+{
+	BUG_ON((unsigned)n > 0xFF);
+	_set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS);
+}
+
+static inline void set_trap_gate(unsigned int n, void *addr)
+{
+	BUG_ON((unsigned)n > 0xFF);
+	_set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS);
+}
+
+static inline void set_system_gate(unsigned int n, void *addr)
+{
+	BUG_ON((unsigned)n > 0xFF);
+#ifdef CONFIG_X86_32
+	_set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS);
+#else
+	_set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS);
+#endif
+}
+
+static inline void set_task_gate(unsigned int n, unsigned int gdt_entry)
+{
+	BUG_ON((unsigned)n > 0xFF);
+	_set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry<<3));
+}
+
+static inline void set_intr_gate_ist(int n, void *addr, unsigned ist)
+{
+	BUG_ON((unsigned)n > 0xFF);
+	_set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS);
+}
+
+static inline void set_system_gate_ist(int n, void *addr, unsigned ist)
+{
+	BUG_ON((unsigned)n > 0xFF);
+	_set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
+}
 
 #else
 /*
diff --git a/include/asm-x86/desc_32.h b/include/asm-x86/desc_32.h
index 4bf20b7dd741..3b112ec186a0 100644
--- a/include/asm-x86/desc_32.h
+++ b/include/asm-x86/desc_32.h
@@ -10,22 +10,6 @@
 #include <linux/preempt.h>
 #include <linux/percpu.h>
 
-extern void set_intr_gate(unsigned int irq, void * addr);
-
-static inline void pack_gate(gate_desc *gate,
-	unsigned long base, unsigned short seg, unsigned char type, unsigned char flags)
-{
-	gate->a = (seg << 16) | (base & 0xffff);
-	gate->b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff);
-}
-
-static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
-{
-	gate_desc g;
-	pack_gate(&g, (unsigned long)addr, seg, type, 0);
-	write_idt_entry(idt_table, gate, &g);
-}
-
 static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
 {
 	tss_desc tss;
diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h
index b0290c45319c..6bc92e6e5cc3 100644
--- a/include/asm-x86/desc_64.h
+++ b/include/asm-x86/desc_64.h
@@ -11,51 +11,6 @@
 
 #include <asm/segment.h>
 
-static inline void _set_gate(int gate, unsigned type, unsigned long func,
-			     unsigned dpl, unsigned ist)
-{
-	gate_desc s;
-
-	s.offset_low = PTR_LOW(func);
-	s.segment = __KERNEL_CS;
-	s.ist = ist;
-	s.p = 1;
-	s.dpl = dpl;
-	s.zero0 = 0;
-	s.zero1 = 0;
-	s.type = type;
-	s.offset_middle = PTR_MIDDLE(func);
-	s.offset_high = PTR_HIGH(func);
-	/*
-	 * does not need to be atomic because it is only done once at
-	 * setup time
-	 */
-	write_idt_entry(idt_table, gate, &s);
-}
-
-static inline void set_intr_gate(int nr, void *func)
-{
-	BUG_ON((unsigned)nr > 0xFF);
-	_set_gate(nr, GATE_INTERRUPT, (unsigned long) func, 0, 0);
-}
-
-static inline void set_intr_gate_ist(int nr, void *func, unsigned ist)
-{
-	BUG_ON((unsigned)nr > 0xFF);
-	_set_gate(nr, GATE_INTERRUPT, (unsigned long) func, 0, ist);
-}
-
-static inline void set_system_gate(int nr, void *func)
-{
-	BUG_ON((unsigned)nr > 0xFF);
-	_set_gate(nr, GATE_INTERRUPT, (unsigned long) func, 3, 0);
-}
-
-static inline void set_system_gate_ist(int nr, void *func, unsigned ist)
-{
-	_set_gate(nr, GATE_INTERRUPT, (unsigned long) func, 3, ist);
-}
-
 static inline void set_tss_desc(unsigned cpu, void *addr)
 {
 	struct desc_struct *d = get_cpu_gdt_table(cpu);
diff --git a/include/asm-x86/desc_defs.h b/include/asm-x86/desc_defs.h
index ebb64fe3a450..e33f078b3e54 100644
--- a/include/asm-x86/desc_defs.h
+++ b/include/asm-x86/desc_defs.h
@@ -36,6 +36,7 @@ enum {
 	GATE_INTERRUPT = 0xE,
 	GATE_TRAP = 0xF,
 	GATE_CALL = 0xC,
+	GATE_TASK = 0x5,
 };
 
 // 16byte gate
@@ -55,10 +56,6 @@ struct gate_struct64 {
 enum {
 	DESC_TSS = 0x9,
 	DESC_LDT = 0x2,
-	DESCTYPE_TASK = 0x85,   /* present, system, DPL-0, task gate */
-	DESCTYPE_INT =  0x8e,   /* present, system, DPL-0, interrupt gate */
-	DESCTYPE_TRAP = 0x8f,   /* present, system, DPL-0, trap gate */
-	DESCTYPE_DPL3 = 0x60,   /* DPL-3 */
 	DESCTYPE_S =	0x10,	/* !system */
 };
 

From c81c6ca45a69478c7877b729af1942d2b80ef582 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:14 +0100
Subject: [PATCH 297/890] x86: unify set_tss_desc

This patch unifies the set_tss_desc between i386 and x86_64,
which can now have a common implementation. After the old
functions are removed from desc_{32,64}.h, nothing important is
left, and the files can be removed.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/desc.h    | 39 +++++++++++++++++++++++++++++++++------
 include/asm-x86/desc_32.h | 27 ---------------------------
 include/asm-x86/desc_64.h | 33 ---------------------------------
 3 files changed, 33 insertions(+), 66 deletions(-)
 delete mode 100644 include/asm-x86/desc_32.h

diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index d75bc0634313..c31715cf2218 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -165,6 +165,39 @@ static inline void pack_ldt(ldt_desc *ldt, unsigned long addr,
 #endif
 }
 
+static inline void pack_tss(tss_desc *tss, unsigned long addr,
+			   unsigned size, unsigned entry)
+{
+#ifdef CONFIG_X86_64
+		set_tssldt_descriptor(tss,
+			     addr, entry, size);
+#else
+		pack_descriptor(tss, (unsigned long)addr,
+				size,
+				0x80 | entry, 0);
+#endif
+}
+
+static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
+{
+	struct desc_struct *d = get_cpu_gdt_table(cpu);
+	tss_desc tss;
+
+	/*
+	 * sizeof(unsigned long) coming from an extra "long" at the end
+	 * of the iobitmap. See tss_struct definition in processor.h
+	 *
+	 * -1? seg base+limit should be pointing to the address of the
+	 * last valid byte
+	 */
+	pack_tss(&tss, (unsigned long)addr,
+		IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1,
+		DESC_TSS);
+	write_gdt_entry(d, entry, &tss, DESC_TSS);
+}
+
+#define set_tss_desc(cpu, addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
+
 static inline void native_set_ldt(const void *addr, unsigned int entries)
 {
 	if (likely(entries == 0))
@@ -222,12 +255,6 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
 		gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
 }
 
-#ifdef CONFIG_X86_32
-# include "desc_32.h"
-#else
-# include "desc_64.h"
-#endif
-
 #define _LDT_empty(info) (\
 	(info)->base_addr	== 0	&& \
 	(info)->limit		== 0	&& \
diff --git a/include/asm-x86/desc_32.h b/include/asm-x86/desc_32.h
deleted file mode 100644
index 3b112ec186a0..000000000000
--- a/include/asm-x86/desc_32.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef __ARCH_DESC_H
-#define __ARCH_DESC_H
-
-#include <asm/ldt.h>
-#include <asm/segment.h>
-#include <asm/desc_defs.h>
-
-#ifndef __ASSEMBLY__
-
-#include <linux/preempt.h>
-#include <linux/percpu.h>
-
-static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
-{
-	tss_desc tss;
-	pack_descriptor(&tss, (unsigned long)addr,
-			offsetof(struct tss_struct, __cacheline_filler) - 1,
-			DESC_TSS, 0);
-	write_gdt_entry(get_cpu_gdt_table(cpu), entry, &tss, DESC_TSS);
-}
-
-
-#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
-
-#endif /* !__ASSEMBLY__ */
-
-#endif
diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h
index 6bc92e6e5cc3..8b137891791f 100644
--- a/include/asm-x86/desc_64.h
+++ b/include/asm-x86/desc_64.h
@@ -1,34 +1 @@
-/* Written 2000 by Andi Kleen */
-#ifndef __ARCH_DESC_H
-#define __ARCH_DESC_H
 
-#include <linux/threads.h>
-#include <asm/ldt.h>
-
-#ifndef __ASSEMBLY__
-
-#include <linux/string.h>
-
-#include <asm/segment.h>
-
-static inline void set_tss_desc(unsigned cpu, void *addr)
-{
-	struct desc_struct *d = get_cpu_gdt_table(cpu);
-	tss_desc tss;
-
-	/*
-	 * sizeof(unsigned long) coming from an extra "long" at the end
-	 * of the iobitmap. See tss_struct definition in processor.h
-	 *
-	 * -1? seg base+limit should be pointing to the address of the
-	 * last valid byte
-	 */
-	set_tssldt_descriptor(&tss,
-		(unsigned long)addr, DESC_TSS,
-		IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
-	write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS);
-}
-
-#endif /* !__ASSEMBLY__ */
-
-#endif

From 332540d5fe7f3370bb860090e4b906340a2e2afb Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:31:14 +0100
Subject: [PATCH 298/890] UML: remove remaining FASTCALL uses

With the x86 removal, FASTCALL is always empty now.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/um/kernel/ksyms.c   | 4 ++--
 include/asm-um/linkage.h | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index 1b388b41d95d..7c7142ba3bd7 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -71,10 +71,10 @@ EXPORT_SYMBOL(dump_thread);
 
 /* required for SMP */
 
-extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
+extern void __write_lock_failed(rwlock_t *rw);
 EXPORT_SYMBOL(__write_lock_failed);
 
-extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
+extern void __read_lock_failed(rwlock_t *rw);
 EXPORT_SYMBOL(__read_lock_failed);
 
 #endif
diff --git a/include/asm-um/linkage.h b/include/asm-um/linkage.h
index 78b862472b36..cdb3024a699a 100644
--- a/include/asm-um/linkage.h
+++ b/include/asm-um/linkage.h
@@ -6,7 +6,6 @@
 
 /* <linux/linkage.h> will pick sane defaults */
 #ifdef CONFIG_GPROF
-#undef FASTCALL
 #undef fastcall
 #endif
 

From 6b7d190b14d478c759be22dcb7ee4695a711b562 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:31:16 +0100
Subject: [PATCH 299/890] x86: remove last users of FASTCALL

FASTCALL() is always empty.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/acpi/sleep_32.c | 2 +-
 arch/x86/kernel/vm86_32.c       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/acpi/sleep_32.c b/arch/x86/kernel/acpi/sleep_32.c
index 10699489cfe7..09f820d920b1 100644
--- a/arch/x86/kernel/acpi/sleep_32.c
+++ b/arch/x86/kernel/acpi/sleep_32.c
@@ -17,7 +17,7 @@ unsigned long acpi_wakeup_address = 0;
 unsigned long acpi_realmode_flags;
 extern char wakeup_start, wakeup_end;
 
-extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
+extern unsigned long acpi_copy_wakeup_routine(unsigned long);
 
 /**
  * acpi_save_state_mem - save kernel state
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index e85bb44265cb..5aa5e2f9f0ad 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -120,7 +120,7 @@ static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs,
 	return ret;
 }
 
-struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs));
+struct pt_regs * save_v86_state(struct kernel_vm86_regs * regs);
 struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
 {
 	struct tss_struct *tss;

From 56c4da454de1264e381256f658f61b9ef690dd21 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:31:17 +0100
Subject: [PATCH 300/890] core: remove last users of empty FASTCALL macro

FASTCALL is always empty after the x86 removal.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/aio.c                  | 2 +-
 include/asm-m32r/signal.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 9dec7d2d546e..8a37dbbf3437 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -397,7 +397,7 @@ void fastcall __put_ioctx(struct kioctx *ctx)
  * This prevents races between the aio code path referencing the
  * req (after submitting it) and aio_complete() freeing the req.
  */
-static struct kiocb *FASTCALL(__aio_get_req(struct kioctx *ctx));
+static struct kiocb *__aio_get_req(struct kioctx *ctx);
 static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx)
 {
 	struct kiocb *req = NULL;
diff --git a/include/asm-m32r/signal.h b/include/asm-m32r/signal.h
index 937258686ba5..1a607066bc64 100644
--- a/include/asm-m32r/signal.h
+++ b/include/asm-m32r/signal.h
@@ -157,7 +157,7 @@ typedef struct sigaltstack {
 #undef __HAVE_ARCH_SIG_BITOPS
 
 struct pt_regs;
-extern int FASTCALL(do_signal(struct pt_regs *regs, sigset_t *oldset));
+extern int do_signal(struct pt_regs *regs, sigset_t *oldset);
 
 #define ptrace_signal_deliver(regs, cookie)	do { } while (0)
 

From 75604d7f7f1ee93e4d19d9e19f4497b7ed842f2a Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:31:17 +0100
Subject: [PATCH 301/890] x86: remove all definitions with fastcall

fastcall is always defined to be empty, remove it from arch/x86

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic_32.c            |  2 +-
 arch/x86/kernel/cpu/mcheck/k7.c      |  2 +-
 arch/x86/kernel/cpu/mcheck/mce.h     |  2 +-
 arch/x86/kernel/cpu/mcheck/mce_32.c  |  4 ++--
 arch/x86/kernel/cpu/mcheck/p4.c      |  4 ++--
 arch/x86/kernel/cpu/mcheck/p5.c      |  2 +-
 arch/x86/kernel/cpu/mcheck/p6.c      |  2 +-
 arch/x86/kernel/cpu/mcheck/winchip.c |  2 +-
 arch/x86/kernel/io_apic_32.c         |  2 +-
 arch/x86/kernel/irq_32.c             |  2 +-
 arch/x86/kernel/kprobes_32.c         |  2 +-
 arch/x86/kernel/process_32.c         |  2 +-
 arch/x86/kernel/signal_32.c          |  2 +-
 arch/x86/kernel/smp_32.c             |  8 ++++----
 arch/x86/kernel/traps_32.c           | 24 ++++++++++++------------
 arch/x86/kernel/vm86_32.c            |  3 +--
 arch/x86/mach-voyager/voyager_smp.c  | 20 ++++++++++----------
 arch/x86/mm/fault_32.c               |  5 ++---
 arch/x86/xen/events.c                |  2 +-
 19 files changed, 45 insertions(+), 47 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 1ee443a8e61b..69a13d127da3 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -578,7 +578,7 @@ static void local_apic_timer_interrupt(void)
  *   interrupt as well. Thus we cannot inline the local irq ... ]
  */
 
-void fastcall smp_apic_timer_interrupt(struct pt_regs *regs)
+void smp_apic_timer_interrupt(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index eef63e3630c2..39fa76fd3851 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -16,7 +16,7 @@
 #include "mce.h"
 
 /* Machine Check Handler For AMD Athlon/Duron */
-static fastcall void k7_machine_check(struct pt_regs * regs, long error_code)
+static void k7_machine_check(struct pt_regs * regs, long error_code)
 {
 	int recover=1;
 	u32 alow, ahigh, high, low;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h
index 81fb6e2d35f3..ae9f628838f1 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.h
+++ b/arch/x86/kernel/cpu/mcheck/mce.h
@@ -8,7 +8,7 @@ void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
 void winchip_mcheck_init(struct cpuinfo_x86 *c);
 
 /* Call the installed machine check handler for this CPU setup. */
-extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code);
+extern void (*machine_check_vector)(struct pt_regs *, long error_code);
 
 extern int nr_mce_banks;
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c
index 34c781eddee4..a5182dcd94ae 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_32.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_32.c
@@ -22,13 +22,13 @@ int nr_mce_banks;
 EXPORT_SYMBOL_GPL(nr_mce_banks);	/* non-fatal.o */
 
 /* Handle unconfigured int18 (should never happen) */
-static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_code)
+static void unexpected_machine_check(struct pt_regs * regs, long error_code)
 {	
 	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
 }
 
 /* Call the installed machine check handler for this CPU setup. */
-void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
+void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
 
 /* This has to be run for each processor */
 void mcheck_init(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index be4dabfee1f5..16a6238dbc2a 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -57,7 +57,7 @@ static void intel_thermal_interrupt(struct pt_regs *regs)
 /* Thermal interrupt handler for this CPU setup */
 static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;
 
-fastcall void smp_thermal_interrupt(struct pt_regs *regs)
+void smp_thermal_interrupt(struct pt_regs *regs)
 {
 	irq_enter();
 	vendor_thermal_interrupt(regs);
@@ -141,7 +141,7 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
 	rdmsr (MSR_IA32_MCG_EIP, r->eip, h);
 }
 
-static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
+static void intel_machine_check(struct pt_regs * regs, long error_code)
 {
 	int recover=1;
 	u32 alow, ahigh, high, low;
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 94bc43d950cf..a18310aaae0c 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -16,7 +16,7 @@
 #include "mce.h"
 
 /* Machine check handler for Pentium class Intel */
-static fastcall void pentium_machine_check(struct pt_regs * regs, long error_code)
+static void pentium_machine_check(struct pt_regs * regs, long error_code)
 {
 	u32 loaddr, hi, lotype;
 	rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
index deeae42ce199..cb3829e07987 100644
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ b/arch/x86/kernel/cpu/mcheck/p6.c
@@ -16,7 +16,7 @@
 #include "mce.h"
 
 /* Machine Check Handler For PII/PIII */
-static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
+static void intel_machine_check(struct pt_regs * regs, long error_code)
 {
 	int recover=1;
 	u32 alow, ahigh, high, low;
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 9e424b6c293d..3d428d5afc52 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -15,7 +15,7 @@
 #include "mce.h"
 
 /* Machine check handler for WinChip C6 */
-static fastcall void winchip_machine_check(struct pt_regs * regs, long error_code)
+static void winchip_machine_check(struct pt_regs * regs, long error_code)
 {
 	printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
 	add_taint(TAINT_MACHINE_CHECK);
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 75bf8dc77650..76f11c3e3906 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -725,7 +725,7 @@ late_initcall(balanced_irq_init);
 #endif /* CONFIG_SMP */
 
 #ifndef CONFIG_SMP
-void fastcall send_IPI_self(int vector)
+void send_IPI_self(int vector)
 {
 	unsigned int cfg;
 
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index b49616bcc16b..cef054b09d27 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -66,7 +66,7 @@ static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
  * SMP cross-CPU interrupts have their own specific
  * handlers).
  */
-fastcall unsigned int do_IRQ(struct pt_regs *regs)
+unsigned int do_IRQ(struct pt_regs *regs)
 {	
 	struct pt_regs *old_regs;
 	/* high bit used in ret_from_ code */
diff --git a/arch/x86/kernel/kprobes_32.c b/arch/x86/kernel/kprobes_32.c
index d708cd4f956f..8eccd2d04709 100644
--- a/arch/x86/kernel/kprobes_32.c
+++ b/arch/x86/kernel/kprobes_32.c
@@ -403,7 +403,7 @@ no_kprobe:
 /*
  * Called from kretprobe_trampoline
  */
-fastcall void *__kprobes trampoline_handler(struct pt_regs *regs)
+void *__kprobes trampoline_handler(struct pt_regs *regs)
 {
 	struct kretprobe_instance *ri = NULL;
 	struct hlist_head *head, empty_rp;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 2b9db9371060..48e92e3758c2 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -716,7 +716,7 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
  * the task-switch, and shows up in ret_from_fork in entry.S,
  * for example.
  */
-struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
 	struct thread_struct *prev = &prev_p->thread,
 				 *next = &next_p->thread;
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 2bf5c9aed106..74df55895c8c 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -575,7 +575,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
  * want to handle. Thus you cannot kill init even with a SIGKILL even by
  * mistake.
  */
-static void fastcall do_signal(struct pt_regs *regs)
+static void do_signal(struct pt_regs *regs)
 {
 	siginfo_t info;
 	int signr;
diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c
index d4c01a4aca60..070816ac79e1 100644
--- a/arch/x86/kernel/smp_32.c
+++ b/arch/x86/kernel/smp_32.c
@@ -159,7 +159,7 @@ void __send_IPI_shortcut(unsigned int shortcut, int vector)
 	apic_write_around(APIC_ICR, cfg);
 }
 
-void fastcall send_IPI_self(int vector)
+void send_IPI_self(int vector)
 {
 	__send_IPI_shortcut(APIC_DEST_SELF, vector);
 }
@@ -310,7 +310,7 @@ void leave_mm(unsigned long cpu)
  * 2) Leave the mm if we are in the lazy tlb mode.
  */
 
-fastcall void smp_invalidate_interrupt(struct pt_regs *regs)
+void smp_invalidate_interrupt(struct pt_regs *regs)
 {
 	unsigned long cpu;
 
@@ -638,13 +638,13 @@ static void native_smp_send_stop(void)
  * all the work is done automatically when
  * we return from the interrupt.
  */
-fastcall void smp_reschedule_interrupt(struct pt_regs *regs)
+void smp_reschedule_interrupt(struct pt_regs *regs)
 {
 	ack_APIC_irq();
 	__get_cpu_var(irq_stat).irq_resched_count++;
 }
 
-fastcall void smp_call_function_interrupt(struct pt_regs *regs)
+void smp_call_function_interrupt(struct pt_regs *regs)
 {
 	void (*func) (void *info) = call_data->func;
 	void *info = call_data->info;
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 3065b3f41928..2eb6ca0ef672 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -501,7 +501,7 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
 }
 
 #define DO_ERROR(trapnr, signr, str, name) \
-fastcall void do_##name(struct pt_regs * regs, long error_code) \
+void do_##name(struct pt_regs * regs, long error_code) \
 { \
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
 						== NOTIFY_STOP) \
@@ -510,7 +510,7 @@ fastcall void do_##name(struct pt_regs * regs, long error_code) \
 }
 
 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \
-fastcall void do_##name(struct pt_regs * regs, long error_code) \
+void do_##name(struct pt_regs * regs, long error_code) \
 { \
 	siginfo_t info; \
 	if (irq) \
@@ -526,7 +526,7 @@ fastcall void do_##name(struct pt_regs * regs, long error_code) \
 }
 
 #define DO_VM86_ERROR(trapnr, signr, str, name) \
-fastcall void do_##name(struct pt_regs * regs, long error_code) \
+void do_##name(struct pt_regs * regs, long error_code) \
 { \
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
 						== NOTIFY_STOP) \
@@ -535,7 +535,7 @@ fastcall void do_##name(struct pt_regs * regs, long error_code) \
 }
 
 #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
-fastcall void do_##name(struct pt_regs * regs, long error_code) \
+void do_##name(struct pt_regs * regs, long error_code) \
 { \
 	siginfo_t info; \
 	info.si_signo = signr; \
@@ -563,7 +563,7 @@ DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
 DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
 
-fastcall void __kprobes do_general_protection(struct pt_regs * regs,
+void __kprobes do_general_protection(struct pt_regs * regs,
 					      long error_code)
 {
 	int cpu = get_cpu();
@@ -764,7 +764,7 @@ static __kprobes void default_do_nmi(struct pt_regs * regs)
 
 static int ignore_nmis;
 
-fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
+__kprobes void do_nmi(struct pt_regs * regs, long error_code)
 {
 	int cpu;
 
@@ -793,7 +793,7 @@ void restart_nmi(void)
 }
 
 #ifdef CONFIG_KPROBES
-fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
+void __kprobes do_int3(struct pt_regs *regs, long error_code)
 {
 	trace_hardirqs_fixup();
 
@@ -829,7 +829,7 @@ fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
  * find every occurrence of the TF bit that could be saved away even
  * by user code)
  */
-fastcall void __kprobes do_debug(struct pt_regs * regs, long error_code)
+void __kprobes do_debug(struct pt_regs * regs, long error_code)
 {
 	unsigned int condition;
 	struct task_struct *tsk = current;
@@ -961,7 +961,7 @@ void math_error(void __user *ip)
 	force_sig_info(SIGFPE, &info, task);
 }
 
-fastcall void do_coprocessor_error(struct pt_regs * regs, long error_code)
+void do_coprocessor_error(struct pt_regs * regs, long error_code)
 {
 	ignore_fpu_irq = 1;
 	math_error((void __user *)regs->ip);
@@ -1015,7 +1015,7 @@ static void simd_math_error(void __user *ip)
 	force_sig_info(SIGFPE, &info, task);
 }
 
-fastcall void do_simd_coprocessor_error(struct pt_regs * regs,
+void do_simd_coprocessor_error(struct pt_regs * regs,
 					  long error_code)
 {
 	if (cpu_has_xmm) {
@@ -1039,7 +1039,7 @@ fastcall void do_simd_coprocessor_error(struct pt_regs * regs,
 	}
 }
 
-fastcall void do_spurious_interrupt_bug(struct pt_regs * regs,
+void do_spurious_interrupt_bug(struct pt_regs * regs,
 					  long error_code)
 {
 #if 0
@@ -1048,7 +1048,7 @@ fastcall void do_spurious_interrupt_bug(struct pt_regs * regs,
 #endif
 }
 
-fastcall unsigned long patch_espfix_desc(unsigned long uesp,
+unsigned long patch_espfix_desc(unsigned long uesp,
 					  unsigned long kesp)
 {
 	struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt;
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 5aa5e2f9f0ad..c9f67effbc42 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -120,8 +120,7 @@ static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs,
 	return ret;
 }
 
-struct pt_regs * save_v86_state(struct kernel_vm86_regs * regs);
-struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
+struct pt_regs * save_v86_state(struct kernel_vm86_regs * regs)
 {
 	struct tss_struct *tss;
 	struct pt_regs *ret;
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index f2c13482acc0..751777241881 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -755,7 +755,7 @@ void __init initialize_secondary(void)
  * System interrupts occur because some problem was detected on the
  * various busses.  To find out what you have to probe all the
  * hardware via the CAT bus.  FIXME: At the moment we do nothing. */
-fastcall void smp_vic_sys_interrupt(struct pt_regs *regs)
+void smp_vic_sys_interrupt(struct pt_regs *regs)
 {
 	ack_CPI(VIC_SYS_INT);
 	printk("Voyager SYSTEM INTERRUPT\n");
@@ -764,7 +764,7 @@ fastcall void smp_vic_sys_interrupt(struct pt_regs *regs)
 /* Handle a voyager CMN_INT; These interrupts occur either because of
  * a system status change or because a single bit memory error
  * occurred.  FIXME: At the moment, ignore all this. */
-fastcall void smp_vic_cmn_interrupt(struct pt_regs *regs)
+void smp_vic_cmn_interrupt(struct pt_regs *regs)
 {
 	static __u8 in_cmn_int = 0;
 	static DEFINE_SPINLOCK(cmn_int_lock);
@@ -1086,7 +1086,7 @@ voyager_smp_call_function_mask(cpumask_t cpumask,
  * no local APIC, so I can't do this
  *
  * This function is currently a placeholder and is unused in the code */
-fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
+void smp_apic_timer_interrupt(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	wrapper_smp_local_timer_interrupt();
@@ -1094,7 +1094,7 @@ fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
 }
 
 /* All of the QUAD interrupt GATES */
-fastcall void smp_qic_timer_interrupt(struct pt_regs *regs)
+void smp_qic_timer_interrupt(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	ack_QIC_CPI(QIC_TIMER_CPI);
@@ -1102,31 +1102,31 @@ fastcall void smp_qic_timer_interrupt(struct pt_regs *regs)
 	set_irq_regs(old_regs);
 }
 
-fastcall void smp_qic_invalidate_interrupt(struct pt_regs *regs)
+void smp_qic_invalidate_interrupt(struct pt_regs *regs)
 {
 	ack_QIC_CPI(QIC_INVALIDATE_CPI);
 	smp_invalidate_interrupt();
 }
 
-fastcall void smp_qic_reschedule_interrupt(struct pt_regs *regs)
+void smp_qic_reschedule_interrupt(struct pt_regs *regs)
 {
 	ack_QIC_CPI(QIC_RESCHEDULE_CPI);
 	smp_reschedule_interrupt();
 }
 
-fastcall void smp_qic_enable_irq_interrupt(struct pt_regs *regs)
+void smp_qic_enable_irq_interrupt(struct pt_regs *regs)
 {
 	ack_QIC_CPI(QIC_ENABLE_IRQ_CPI);
 	smp_enable_irq_interrupt();
 }
 
-fastcall void smp_qic_call_function_interrupt(struct pt_regs *regs)
+void smp_qic_call_function_interrupt(struct pt_regs *regs)
 {
 	ack_QIC_CPI(QIC_CALL_FUNCTION_CPI);
 	smp_call_function_interrupt();
 }
 
-fastcall void smp_vic_cpi_interrupt(struct pt_regs *regs)
+void smp_vic_cpi_interrupt(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	__u8 cpu = smp_processor_id();
@@ -1333,7 +1333,7 @@ int setup_profiling_timer(unsigned int multiplier)
 /* This is a bit of a mess, but forced on us by the genirq changes
  * there's no genirq handler that really does what voyager wants
  * so hack it up with the simple IRQ handler */
-static void fastcall handle_vic_irq(unsigned int irq, struct irq_desc *desc)
+static void handle_vic_irq(unsigned int irq, struct irq_desc *desc)
 {
 	before_handle_vic_irq(irq);
 	handle_simple_irq(irq, desc);
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index ef5ab2b925c4..8aed912b04ec 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -214,7 +214,7 @@ static noinline void force_sig_info_fault(int si_signo, int si_code,
 	force_sig_info(si_signo, &info, tsk);
 }
 
-fastcall void do_invalid_op(struct pt_regs *, unsigned long);
+void do_invalid_op(struct pt_regs *, unsigned long);
 
 static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
 {
@@ -293,8 +293,7 @@ int show_unhandled_signals = 1;
  *	bit 3 == 1 means use of reserved bit detected
  *	bit 4 == 1 means fault was an instruction fetch
  */
-fastcall void __kprobes do_page_fault(struct pt_regs *regs,
-				      unsigned long error_code)
+void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
 	struct task_struct *tsk;
 	struct mm_struct *mm;
diff --git a/arch/x86/xen/events.c b/arch/x86/xen/events.c
index aebab9704dd7..dcf613e17581 100644
--- a/arch/x86/xen/events.c
+++ b/arch/x86/xen/events.c
@@ -465,7 +465,7 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
  * a bitset of words which contain pending event bits.  The second
  * level is a bitset of pending events themselves.
  */
-fastcall void xen_evtchn_do_upcall(struct pt_regs *regs)
+void xen_evtchn_do_upcall(struct pt_regs *regs)
 {
 	int cpu = get_cpu();
 	struct shared_info *s = HYPERVISOR_shared_info;

From 341d8854d38fe6e7655f7748babae5c25a51aa70 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:31:17 +0100
Subject: [PATCH 302/890] x86: remove fastcall from include/asm-x86

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/apic.h         |  6 +++---
 include/asm-x86/hw_irq_32.h    | 14 +++++++-------
 include/asm-x86/mutex_32.h     |  7 +++----
 include/asm-x86/semaphore_32.h |  8 ++++----
 4 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index 423022759cb2..72bf09cf13ab 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -59,17 +59,17 @@ extern unsigned boot_cpu_id;
 #define setup_secondary_clock setup_secondary_APIC_clock
 #endif
 
-static inline fastcall void native_apic_write(unsigned long reg, u32 v)
+static inline void native_apic_write(unsigned long reg, u32 v)
 {
 	*((volatile u32 *)(APIC_BASE + reg)) = v;
 }
 
-static inline fastcall void native_apic_write_atomic(unsigned long reg, u32 v)
+static inline void native_apic_write_atomic(unsigned long reg, u32 v)
 {
 	(void) xchg((u32*)(APIC_BASE + reg), v);
 }
 
-static inline fastcall u32 native_apic_read(unsigned long reg)
+static inline u32 native_apic_read(unsigned long reg)
 {
 	return *((volatile u32 *)(APIC_BASE + reg));
 }
diff --git a/include/asm-x86/hw_irq_32.h b/include/asm-x86/hw_irq_32.h
index 0bedbdf5e907..b93e35a708ac 100644
--- a/include/asm-x86/hw_irq_32.h
+++ b/include/asm-x86/hw_irq_32.h
@@ -29,16 +29,16 @@
 extern void (*interrupt[NR_IRQS])(void);
 
 #ifdef CONFIG_SMP
-fastcall void reschedule_interrupt(void);
-fastcall void invalidate_interrupt(void);
-fastcall void call_function_interrupt(void);
+void reschedule_interrupt(void);
+void invalidate_interrupt(void);
+void call_function_interrupt(void);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
-fastcall void apic_timer_interrupt(void);
-fastcall void error_interrupt(void);
-fastcall void spurious_interrupt(void);
-fastcall void thermal_interrupt(void);
+void apic_timer_interrupt(void);
+void error_interrupt(void);
+void spurious_interrupt(void);
+void thermal_interrupt(void);
 #define platform_legacy_irq(irq)	((irq) < 16)
 #endif
 
diff --git a/include/asm-x86/mutex_32.h b/include/asm-x86/mutex_32.h
index 7a17d9e58ad6..bbeefb96ddfd 100644
--- a/include/asm-x86/mutex_32.h
+++ b/include/asm-x86/mutex_32.h
@@ -26,7 +26,7 @@ do {									\
 	unsigned int dummy;						\
 									\
 	typecheck(atomic_t *, count);					\
-	typecheck_fn(fastcall void (*)(atomic_t *), fail_fn);		\
+	typecheck_fn(void (*)(atomic_t *), fail_fn);		\
 									\
 	__asm__ __volatile__(						\
 		LOCK_PREFIX "   decl (%%eax)	\n"			\
@@ -51,8 +51,7 @@ do {									\
  * or anything the slow path function returns
  */
 static inline int
-__mutex_fastpath_lock_retval(atomic_t *count,
-			     int fastcall (*fail_fn)(atomic_t *))
+__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
 	if (unlikely(atomic_dec_return(count) < 0))
 		return fail_fn(count);
@@ -78,7 +77,7 @@ do {									\
 	unsigned int dummy;						\
 									\
 	typecheck(atomic_t *, count);					\
-	typecheck_fn(fastcall void (*)(atomic_t *), fail_fn);		\
+	typecheck_fn(void (*)(atomic_t *), fail_fn);		\
 									\
 	__asm__ __volatile__(						\
 		LOCK_PREFIX "   incl (%%eax)	\n"			\
diff --git a/include/asm-x86/semaphore_32.h b/include/asm-x86/semaphore_32.h
index 835c1d751a9f..cc826e85323f 100644
--- a/include/asm-x86/semaphore_32.h
+++ b/include/asm-x86/semaphore_32.h
@@ -83,10 +83,10 @@ static inline void init_MUTEX_LOCKED (struct semaphore *sem)
 	sema_init(sem, 0);
 }
 
-fastcall void __down_failed(void /* special register calling convention */);
-fastcall int  __down_failed_interruptible(void  /* params in registers */);
-fastcall int  __down_failed_trylock(void  /* params in registers */);
-fastcall void __up_wakeup(void /* special register calling convention */);
+void __down_failed(void /* special register calling convention */);
+int  __down_failed_interruptible(void  /* params in registers */);
+int  __down_failed_trylock(void  /* params in registers */);
+void __up_wakeup(void /* special register calling convention */);
 
 /*
  * This is ugly, but we want the default case to fall through.

From 1e160cc3f3b5c38bba79c58172e82c1e24934546 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 30 Jan 2008 13:31:17 +0100
Subject: [PATCH 303/890] x86: __vdso_getcpu() warning fix

arch/x86/vdso/vgetcpu.c: In function '__vdso_getcpu':
arch/x86/vdso/vgetcpu.c:22: warning: pointer targets in passing argument 1 of 'native_read_tscp' differ in signedness

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/msr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index 040d3910d891..088652a62d58 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -13,7 +13,7 @@
 #include <asm/asm.h>
 #include <asm/errno.h>
 
-static inline unsigned long long native_read_tscp(int *aux)
+static inline unsigned long long native_read_tscp(unsigned int *aux)
 {
 	unsigned long low, high;
 	asm volatile (".byte 0x0f,0x01,0xf9"

From 634e49723b08d15f4cb9860ea86547fbd2eeae18 Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Wed, 30 Jan 2008 13:31:17 +0100
Subject: [PATCH 304/890] uml: add asm-um/asm.h

git-x86.patch introduces asm-x86/asm.h.  UML needs to follow this with
an asm/asm.h of its own.

Signed-off-by: Jeff Dike <jdike@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-um/asm.h | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 include/asm-um/asm.h

diff --git a/include/asm-um/asm.h b/include/asm-um/asm.h
new file mode 100644
index 000000000000..af1269a1e9eb
--- /dev/null
+++ b/include/asm-um/asm.h
@@ -0,0 +1,6 @@
+#ifndef __UM_ASM_H
+#define __UM_ASM_H
+
+#include "asm/arch/asm.h"
+
+#endif

From 5f561d3be8f0db54f9b4fc5cb5db05343f372431 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:31:17 +0100
Subject: [PATCH 305/890] x86: dummy placeholder for acpi/reboot.h

dummy placeholder for acpi/reboot.h

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/acpi/reboot.h | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 include/acpi/reboot.h

diff --git a/include/acpi/reboot.h b/include/acpi/reboot.h
new file mode 100644
index 000000000000..8857f57e0b78
--- /dev/null
+++ b/include/acpi/reboot.h
@@ -0,0 +1,9 @@
+
+/*
+ * Dummy placeholder to make the EFI patches apply to the x86 tree.
+ * Andrew/Len, please just kill this file if you encounter it.
+ */
+#ifndef acpi_reboot
+# define acpi_reboot() do { } while (0)
+#endif
+

From fa20efd2fcd9349770113c6f72fc76ce437b62f5 Mon Sep 17 00:00:00 2001
From: Aaron Durbin <adurbin@google.com>
Date: Wed, 30 Jan 2008 13:31:17 +0100
Subject: [PATCH 306/890] x86: add ACPI reboot option

Add the ability to reboot an x86_64 based machine using the RESET_REG in the
FADT ACPI table.

Signed-off-by: Aaron Durbin <adurbin@google.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/x86_64/boot-options.txt |  5 ++++-
 arch/x86/kernel/reboot_64.c           | 11 ++++++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt
index 945311840a10..d7a333258682 100644
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86_64/boot-options.txt
@@ -110,12 +110,15 @@ Idle loop
 
 Rebooting
 
-   reboot=b[ios] | t[riple] | k[bd] [, [w]arm | [c]old]
+   reboot=b[ios] | t[riple] | k[bd] | a[cpi] [, [w]arm | [c]old]
    bios	  Use the CPU reboot vector for warm reset
    warm   Don't set the cold reboot flag
    cold   Set the cold reboot flag
    triple Force a triple fault (init)
    kbd    Use the keyboard controller. cold reset (default)
+   acpi   Use the ACPI RESET_REG in the FADT. If ACPI is not configured or the
+          ACPI reset does not work, the reboot path attempts the reset using
+          the keyboard controller.
 
    Using warm reset will be much faster especially on big memory
    systems because the BIOS will not go through the memory check.
diff --git a/arch/x86/kernel/reboot_64.c b/arch/x86/kernel/reboot_64.c
index 53620a92a8fd..307f996a3933 100644
--- a/arch/x86/kernel/reboot_64.c
+++ b/arch/x86/kernel/reboot_64.c
@@ -9,6 +9,7 @@
 #include <linux/pm.h>
 #include <linux/kdebug.h>
 #include <linux/sched.h>
+#include <acpi/reboot.h>
 #include <asm/io.h>
 #include <asm/delay.h>
 #include <asm/desc.h>
@@ -29,7 +30,8 @@ EXPORT_SYMBOL(pm_power_off);
 static long no_idt[3];
 static enum { 
 	BOOT_TRIPLE = 't',
-	BOOT_KBD = 'k'
+	BOOT_KBD = 'k',
+	BOOT_ACPI = 'a'
 } reboot_type = BOOT_KBD;
 static int reboot_mode = 0;
 int reboot_force;
@@ -39,6 +41,7 @@ int reboot_force;
    cold   Set the cold reboot flag
    triple Force a triple fault (init)
    kbd    Use the keyboard controller. cold reset (default)
+   acpi   Use the RESET_REG in the FADT
    force  Avoid anything that could hang.
  */ 
 static int __init reboot_setup(char *str)
@@ -54,6 +57,7 @@ static int __init reboot_setup(char *str)
 			break;
 
 		case 't':
+		case 'a':
 		case 'b':
 		case 'k':
 			reboot_type = *str;
@@ -146,6 +150,11 @@ void machine_emergency_restart(void)
 
 			reboot_type = BOOT_KBD;
 			break;
+
+		case BOOT_ACPI:
+			acpi_reboot();
+			reboot_type = BOOT_KBD;
+			break;
 		}      
 	}      
 }

From 8c8b8859b64baf6d7c33900e8720c7bafe775b2c Mon Sep 17 00:00:00 2001
From: Daniel Walker <dwalker@mvista.com>
Date: Wed, 30 Jan 2008 13:31:17 +0100
Subject: [PATCH 307/890] mcheck mce_64: mce_read_sem to mutex

Converted to a mutex, and changed the name to mce_read_mutex.

Signed-off-by: Daniel Walker <dwalker@mvista.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mcheck/mce_64.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 0adad772d0da..8cd47fe2ef2c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -564,7 +564,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
 			loff_t *off)
 {
 	unsigned long *cpu_tsc;
-	static DECLARE_MUTEX(mce_read_sem);
+	static DEFINE_MUTEX(mce_read_mutex);
 	unsigned next;
 	char __user *buf = ubuf;
 	int i, err;
@@ -573,12 +573,12 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
 	if (!cpu_tsc)
 		return -ENOMEM;
 
-	down(&mce_read_sem);
+	mutex_lock(&mce_read_mutex);
 	next = rcu_dereference(mcelog.next);
 
 	/* Only supports full reads right now */
 	if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
-		up(&mce_read_sem);
+		mutex_unlock(&mce_read_mutex);
 		kfree(cpu_tsc);
 		return -EINVAL;
 	}
@@ -621,7 +621,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
 			memset(&mcelog.entry[i], 0, sizeof(struct mce));
 		}
 	}
-	up(&mce_read_sem);
+	mutex_unlock(&mce_read_mutex);
 	kfree(cpu_tsc);
 	return err ? -EFAULT : buf - ubuf;
 }

From 5b83683f32b113d07edfb67a33ce389fc624423d Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Wed, 30 Jan 2008 13:31:19 +0100
Subject: [PATCH 308/890] x86: EFI runtime service support

This patch adds basic runtime services support for EFI x86_64 system.  The
main file of the patch is the addition of efi_64.c for x86_64.  This file is
modeled after the EFI IA32 avatar.  EFI runtime services initialization are
implemented in efi_64.c.  Some x86_64 specifics are worth noting here.  On
x86_64, parameters passed to EFI firmware services need to follow the EFI
calling convention.  For this purpose, a set of functions named efi_call<x>
(<x> is the number of parameters) are implemented.  EFI function calls are
wrapped before calling the firmware service.  The duplicated code between
efi_32.c and efi_64.c is placed in efi.c to remove them from efi_32.c.

Signed-off-by: Chandramouli Narayanan <mouli@linux.intel.com>
Signed-off-by: Huang Ying <ying.huang@intel.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig              |   2 +-
 arch/x86/kernel/Makefile_64   |   1 +
 arch/x86/kernel/efi.c         | 480 ++++++++++++++++++++++++++++++++++
 arch/x86/kernel/efi_64.c      | 164 ++++++++++++
 arch/x86/kernel/efi_stub_64.S | 109 ++++++++
 arch/x86/kernel/setup_64.c    |  17 +-
 include/asm-x86/bootparam.h   |   5 +-
 include/asm-x86/efi.h         |  70 +++++
 include/asm-x86/fixmap_64.h   |   3 +
 9 files changed, 847 insertions(+), 4 deletions(-)
 create mode 100644 arch/x86/kernel/efi.c
 create mode 100644 arch/x86/kernel/efi_64.c
 create mode 100644 arch/x86/kernel/efi_stub_64.S
 create mode 100644 include/asm-x86/efi.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index fef944bb920e..23936301db56 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -959,7 +959,7 @@ config MTRR
 config EFI
 	def_bool n
 	prompt "Boot from EFI support"
-	depends on X86_32 && ACPI
+	depends on ACPI
 	---help---
 	This enables the kernel to boot on EFI platforms using
 	system configuration information passed to it from the firmware.
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index 2ec96acf6486..e5093dd8cf01 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -39,6 +39,7 @@ obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
 obj-$(CONFIG_X86_VSMP)		+= vsmp_64.o
 obj-$(CONFIG_K8_NB)		+= k8.o
 obj-$(CONFIG_AUDIT)		+= audit_64.o
+obj-$(CONFIG_EFI)		+= efi.o efi_64.o efi_stub_64.o
 
 obj-$(CONFIG_MODULES)		+= module_64.o
 obj-$(CONFIG_PCI)		+= early-quirks.o
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
new file mode 100644
index 000000000000..0a61522e85c7
--- /dev/null
+++ b/arch/x86/kernel/efi.c
@@ -0,0 +1,480 @@
+/*
+ * Common EFI (Extensible Firmware Interface) support functions
+ * Based on Extensible Firmware Interface Specification version 1.0
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999-2002 Hewlett-Packard Co.
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2005-2008 Intel Co.
+ *	Fenghua Yu <fenghua.yu@intel.com>
+ *	Bibo Mao <bibo.mao@intel.com>
+ *	Chandramouli Narayanan <mouli@linux.intel.com>
+ *	Huang Ying <ying.huang@intel.com>
+ *
+ * Copied from efi_32.c to eliminate the duplicated code between EFI
+ * 32/64 support code. --ying 2007-10-26
+ *
+ * All EFI Runtime Services are not implemented yet as EFI only
+ * supports physical mode addressing on SoftSDV. This is to be fixed
+ * in a future version.  --drummond 1999-07-20
+ *
+ * Implemented EFI runtime services and virtual mode calls.  --davidm
+ *
+ * Goutham Rao: <goutham.rao@intel.com>
+ *	Skip non-WB memory and ignore empty memory ranges.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/efi.h>
+#include <linux/bootmem.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/time.h>
+#include <linux/io.h>
+#include <linux/reboot.h>
+#include <linux/bcd.h>
+
+#include <asm/setup.h>
+#include <asm/efi.h>
+#include <asm/time.h>
+
+#define EFI_DEBUG	1
+#define PFX 		"EFI: "
+
+int efi_enabled;
+EXPORT_SYMBOL(efi_enabled);
+
+struct efi efi;
+EXPORT_SYMBOL(efi);
+
+struct efi_memory_map memmap;
+
+struct efi efi_phys __initdata;
+static efi_system_table_t efi_systab __initdata;
+
+static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
+{
+	return efi_call_virt2(get_time, tm, tc);
+}
+
+static efi_status_t virt_efi_set_time(efi_time_t *tm)
+{
+	return efi_call_virt1(set_time, tm);
+}
+
+static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled,
+					     efi_bool_t *pending,
+					     efi_time_t *tm)
+{
+	return efi_call_virt3(get_wakeup_time,
+			      enabled, pending, tm);
+}
+
+static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
+{
+	return efi_call_virt2(set_wakeup_time,
+			      enabled, tm);
+}
+
+static efi_status_t virt_efi_get_variable(efi_char16_t *name,
+					  efi_guid_t *vendor,
+					  u32 *attr,
+					  unsigned long *data_size,
+					  void *data)
+{
+	return efi_call_virt5(get_variable,
+			      name, vendor, attr,
+			      data_size, data);
+}
+
+static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
+					       efi_char16_t *name,
+					       efi_guid_t *vendor)
+{
+	return efi_call_virt3(get_next_variable,
+			      name_size, name, vendor);
+}
+
+static efi_status_t virt_efi_set_variable(efi_char16_t *name,
+					  efi_guid_t *vendor,
+					  unsigned long attr,
+					  unsigned long data_size,
+					  void *data)
+{
+	return efi_call_virt5(set_variable,
+			      name, vendor, attr,
+			      data_size, data);
+}
+
+static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
+{
+	return efi_call_virt1(get_next_high_mono_count, count);
+}
+
+static void virt_efi_reset_system(int reset_type,
+				  efi_status_t status,
+				  unsigned long data_size,
+				  efi_char16_t *data)
+{
+	efi_call_virt4(reset_system, reset_type, status,
+		       data_size, data);
+}
+
+static efi_status_t virt_efi_set_virtual_address_map(
+	unsigned long memory_map_size,
+	unsigned long descriptor_size,
+	u32 descriptor_version,
+	efi_memory_desc_t *virtual_map)
+{
+	return efi_call_virt4(set_virtual_address_map,
+			      memory_map_size, descriptor_size,
+			      descriptor_version, virtual_map);
+}
+
+static efi_status_t __init phys_efi_set_virtual_address_map(
+	unsigned long memory_map_size,
+	unsigned long descriptor_size,
+	u32 descriptor_version,
+	efi_memory_desc_t *virtual_map)
+{
+	efi_status_t status;
+
+	efi_call_phys_prelog();
+	status = efi_call_phys4(efi_phys.set_virtual_address_map,
+				memory_map_size, descriptor_size,
+				descriptor_version, virtual_map);
+	efi_call_phys_epilog();
+	return status;
+}
+
+static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
+					     efi_time_cap_t *tc)
+{
+	efi_status_t status;
+
+	efi_call_phys_prelog();
+	status = efi_call_phys2(efi_phys.get_time, tm, tc);
+	efi_call_phys_epilog();
+	return status;
+}
+
+int efi_set_rtc_mmss(unsigned long nowtime)
+{
+	int real_seconds, real_minutes;
+	efi_status_t 	status;
+	efi_time_t 	eft;
+	efi_time_cap_t 	cap;
+
+	status = efi.get_time(&eft, &cap);
+	if (status != EFI_SUCCESS) {
+		printk(KERN_ERR "Oops: efitime: can't read time!\n");
+		return -1;
+	}
+
+	real_seconds = nowtime % 60;
+	real_minutes = nowtime / 60;
+	if (((abs(real_minutes - eft.minute) + 15)/30) & 1)
+		real_minutes += 30;
+	real_minutes %= 60;
+	eft.minute = real_minutes;
+	eft.second = real_seconds;
+
+	status = efi.set_time(&eft);
+	if (status != EFI_SUCCESS) {
+		printk(KERN_ERR "Oops: efitime: can't write time!\n");
+		return -1;
+	}
+	return 0;
+}
+
+unsigned long efi_get_time(void)
+{
+	efi_status_t status;
+	efi_time_t eft;
+	efi_time_cap_t cap;
+
+	status = efi.get_time(&eft, &cap);
+	if (status != EFI_SUCCESS)
+		printk(KERN_ERR "Oops: efitime: can't read time!\n");
+
+	return mktime(eft.year, eft.month, eft.day, eft.hour,
+		      eft.minute, eft.second);
+}
+
+#if EFI_DEBUG
+static void __init print_efi_memmap(void)
+{
+	efi_memory_desc_t *md;
+	void *p;
+	int i;
+
+	for (p = memmap.map, i = 0;
+	     p < memmap.map_end;
+	     p += memmap.desc_size, i++) {
+		md = p;
+		printk(KERN_INFO PFX "mem%02u: type=%u, attr=0x%llx, "
+			"range=[0x%016llx-0x%016llx) (%lluMB)\n",
+			i, md->type, md->attribute, md->phys_addr,
+			md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
+			(md->num_pages >> (20 - EFI_PAGE_SHIFT)));
+	}
+}
+#endif  /*  EFI_DEBUG  */
+
+void __init efi_init(void)
+{
+	efi_config_table_t *config_tables;
+	efi_runtime_services_t *runtime;
+	efi_char16_t *c16;
+	char vendor[100] = "unknown";
+	int i = 0;
+	void *tmp;
+
+#ifdef CONFIG_X86_32
+	efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
+	memmap.phys_map = (void *)boot_params.efi_info.efi_memmap;
+#else
+	efi_phys.systab = (efi_system_table_t *)
+		(boot_params.efi_info.efi_systab |
+		 ((__u64)boot_params.efi_info.efi_systab_hi<<32));
+	memmap.phys_map = (void *)
+		(boot_params.efi_info.efi_memmap |
+		 ((__u64)boot_params.efi_info.efi_memmap_hi<<32));
+#endif
+	memmap.nr_map = boot_params.efi_info.efi_memmap_size /
+		boot_params.efi_info.efi_memdesc_size;
+	memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
+	memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
+
+	efi.systab = efi_early_ioremap((unsigned long)efi_phys.systab,
+				       sizeof(efi_system_table_t));
+	if (efi.systab == NULL)
+		printk(KERN_ERR "Couldn't map the EFI system table!\n");
+	memcpy(&efi_systab, efi.systab, sizeof(efi_system_table_t));
+	efi_early_iounmap(efi.systab, sizeof(efi_system_table_t));
+	efi.systab = &efi_systab;
+
+	/*
+	 * Verify the EFI Table
+	 */
+	if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
+		printk(KERN_ERR "EFI system table signature incorrect!\n");
+	if ((efi.systab->hdr.revision >> 16) == 0)
+		printk(KERN_ERR "Warning: EFI system table version "
+		       "%d.%02d, expected 1.00 or greater!\n",
+		       efi.systab->hdr.revision >> 16,
+		       efi.systab->hdr.revision & 0xffff);
+
+	/*
+	 * Show what we know for posterity
+	 */
+	c16 = tmp = efi_early_ioremap(efi.systab->fw_vendor, 2);
+	if (c16) {
+		for (i = 0; i < sizeof(vendor) && *c16; ++i)
+			vendor[i] = *c16++;
+		vendor[i] = '\0';
+	} else
+		printk(KERN_ERR PFX "Could not map the firmware vendor!\n");
+	efi_early_iounmap(tmp, 2);
+
+	printk(KERN_INFO "EFI v%u.%.02u by %s \n",
+	       efi.systab->hdr.revision >> 16,
+	       efi.systab->hdr.revision & 0xffff, vendor);
+
+	/*
+	 * Let's see what config tables the firmware passed to us.
+	 */
+	config_tables = efi_early_ioremap(
+		efi.systab->tables,
+		efi.systab->nr_tables * sizeof(efi_config_table_t));
+	if (config_tables == NULL)
+		printk(KERN_ERR "Could not map EFI Configuration Table!\n");
+
+	printk(KERN_INFO);
+	for (i = 0; i < efi.systab->nr_tables; i++) {
+		if (!efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID)) {
+			efi.mps = config_tables[i].table;
+			printk(" MPS=0x%lx ", config_tables[i].table);
+		} else if (!efi_guidcmp(config_tables[i].guid,
+					ACPI_20_TABLE_GUID)) {
+			efi.acpi20 = config_tables[i].table;
+			printk(" ACPI 2.0=0x%lx ", config_tables[i].table);
+		} else if (!efi_guidcmp(config_tables[i].guid,
+					ACPI_TABLE_GUID)) {
+			efi.acpi = config_tables[i].table;
+			printk(" ACPI=0x%lx ", config_tables[i].table);
+		} else if (!efi_guidcmp(config_tables[i].guid,
+					SMBIOS_TABLE_GUID)) {
+			efi.smbios = config_tables[i].table;
+			printk(" SMBIOS=0x%lx ", config_tables[i].table);
+		} else if (!efi_guidcmp(config_tables[i].guid,
+					HCDP_TABLE_GUID)) {
+			efi.hcdp = config_tables[i].table;
+			printk(" HCDP=0x%lx ", config_tables[i].table);
+		} else if (!efi_guidcmp(config_tables[i].guid,
+					UGA_IO_PROTOCOL_GUID)) {
+			efi.uga = config_tables[i].table;
+			printk(" UGA=0x%lx ", config_tables[i].table);
+		}
+	}
+	printk("\n");
+	efi_early_iounmap(config_tables,
+			  efi.systab->nr_tables * sizeof(efi_config_table_t));
+
+	/*
+	 * Check out the runtime services table. We need to map
+	 * the runtime services table so that we can grab the physical
+	 * address of several of the EFI runtime functions, needed to
+	 * set the firmware into virtual mode.
+	 */
+	runtime = efi_early_ioremap((unsigned long)efi.systab->runtime,
+				    sizeof(efi_runtime_services_t));
+	if (runtime != NULL) {
+		/*
+		 * We will only need *early* access to the following
+		 * two EFI runtime services before set_virtual_address_map
+		 * is invoked.
+		 */
+		efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
+		efi_phys.set_virtual_address_map =
+			(efi_set_virtual_address_map_t *)
+			runtime->set_virtual_address_map;
+		/*
+		 * Make efi_get_time can be called before entering
+		 * virtual mode.
+		 */
+		efi.get_time = phys_efi_get_time;
+	} else
+		printk(KERN_ERR "Could not map the EFI runtime service "
+		       "table!\n");
+	efi_early_iounmap(runtime, sizeof(efi_runtime_services_t));
+
+	/* Map the EFI memory map */
+	memmap.map = efi_early_ioremap((unsigned long)memmap.phys_map,
+				       memmap.nr_map * memmap.desc_size);
+	if (memmap.map == NULL)
+		printk(KERN_ERR "Could not map the EFI memory map!\n");
+	memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
+	if (memmap.desc_size != sizeof(efi_memory_desc_t))
+		printk(KERN_WARNING "Kernel-defined memdesc"
+		       "doesn't match the one from EFI!\n");
+
+#ifdef CONFIG_X86_64
+	/* Setup for EFI runtime service */
+	reboot_type = BOOT_EFI;
+
+#endif
+#if EFI_DEBUG
+	print_efi_memmap();
+#endif
+}
+
+/*
+ * This function will switch the EFI runtime services to virtual mode.
+ * Essentially, look through the EFI memmap and map every region that
+ * has the runtime attribute bit set in its memory descriptor and update
+ * that memory descriptor with the virtual address obtained from ioremap().
+ * This enables the runtime services to be called without having to
+ * thunk back into physical mode for every invocation.
+ */
+void __init efi_enter_virtual_mode(void)
+{
+	efi_memory_desc_t *md;
+	efi_status_t status;
+	unsigned long end;
+	void *p;
+
+	efi.systab = NULL;
+	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+		md = p;
+		if (!(md->attribute & EFI_MEMORY_RUNTIME))
+			continue;
+		if ((md->attribute & EFI_MEMORY_WB) &&
+		    (((md->phys_addr + (md->num_pages<<EFI_PAGE_SHIFT)) >>
+		      PAGE_SHIFT) < end_pfn_map))
+			md->virt_addr = (unsigned long)__va(md->phys_addr);
+		else
+			md->virt_addr = (unsigned long)
+				efi_ioremap(md->phys_addr,
+					    md->num_pages << EFI_PAGE_SHIFT);
+		if (!md->virt_addr)
+			printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n",
+			       (unsigned long long)md->phys_addr);
+		end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
+		if ((md->phys_addr <= (unsigned long)efi_phys.systab) &&
+		    ((unsigned long)efi_phys.systab < end))
+			efi.systab = (efi_system_table_t *)(unsigned long)
+				(md->virt_addr - md->phys_addr +
+				 (unsigned long)efi_phys.systab);
+	}
+
+	BUG_ON(!efi.systab);
+
+	status = phys_efi_set_virtual_address_map(
+		memmap.desc_size * memmap.nr_map,
+		memmap.desc_size,
+		memmap.desc_version,
+		memmap.phys_map);
+
+	if (status != EFI_SUCCESS) {
+		printk(KERN_ALERT "Unable to switch EFI into virtual mode "
+		       "(status=%lx)!\n", status);
+		panic("EFI call to SetVirtualAddressMap() failed!");
+	}
+
+	/*
+	 * Now that EFI is in virtual mode, update the function
+	 * pointers in the runtime service table to the new virtual addresses.
+	 *
+	 * Call EFI services through wrapper functions.
+	 */
+	efi.get_time = virt_efi_get_time;
+	efi.set_time = virt_efi_set_time;
+	efi.get_wakeup_time = virt_efi_get_wakeup_time;
+	efi.set_wakeup_time = virt_efi_set_wakeup_time;
+	efi.get_variable = virt_efi_get_variable;
+	efi.get_next_variable = virt_efi_get_next_variable;
+	efi.set_variable = virt_efi_set_variable;
+	efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
+	efi.reset_system = virt_efi_reset_system;
+	efi.set_virtual_address_map = virt_efi_set_virtual_address_map;
+#ifdef CONFIG_X86_64
+	runtime_code_page_mkexec();
+#endif
+}
+
+/*
+ * Convenience functions to obtain memory types and attributes
+ */
+u32 efi_mem_type(unsigned long phys_addr)
+{
+	efi_memory_desc_t *md;
+	void *p;
+
+	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+		md = p;
+		if ((md->phys_addr <= phys_addr) &&
+		    (phys_addr < (md->phys_addr +
+				  (md->num_pages << EFI_PAGE_SHIFT))))
+			return md->type;
+	}
+	return 0;
+}
+
+u64 efi_mem_attributes(unsigned long phys_addr)
+{
+	efi_memory_desc_t *md;
+	void *p;
+
+	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+		md = p;
+		if ((md->phys_addr <= phys_addr) &&
+		    (phys_addr < (md->phys_addr +
+				  (md->num_pages << EFI_PAGE_SHIFT))))
+			return md->attribute;
+	}
+	return 0;
+}
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
new file mode 100644
index 000000000000..f2000dbc7195
--- /dev/null
+++ b/arch/x86/kernel/efi_64.c
@@ -0,0 +1,164 @@
+/*
+ * x86_64 specific EFI support functions
+ * Based on Extensible Firmware Interface Specification version 1.0
+ *
+ * Copyright (C) 2005-2008 Intel Co.
+ *	Fenghua Yu <fenghua.yu@intel.com>
+ *	Bibo Mao <bibo.mao@intel.com>
+ *	Chandramouli Narayanan <mouli@linux.intel.com>
+ *	Huang Ying <ying.huang@intel.com>
+ *
+ * Code to convert EFI to E820 map has been implemented in elilo bootloader
+ * based on a EFI patch by Edgar Hucek. Based on the E820 map, the page table
+ * is setup appropriately for EFI runtime code.
+ * - mouli 06/14/2007.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/bootmem.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/efi.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/reboot.h>
+
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/e820.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+#include <asm/proto.h>
+#include <asm/efi.h>
+
+static pgd_t save_pgd __initdata;
+static unsigned long efi_flags __initdata;
+
+static int __init setup_noefi(char *arg)
+{
+	efi_enabled = 0;
+	return 0;
+}
+early_param("noefi", setup_noefi);
+
+static void __init early_mapping_set_exec(unsigned long start,
+					  unsigned long end,
+					  int executable)
+{
+	pte_t *kpte;
+	int level;
+
+	while (start < end) {
+		kpte = lookup_address((unsigned long)__va(start), &level);
+		BUG_ON(!kpte);
+		if (executable)
+			set_pte(kpte, pte_mkexec(*kpte));
+		else
+			set_pte(kpte, __pte((pte_val(*kpte) | _PAGE_NX) & \
+					    __supported_pte_mask));
+		if (pte_huge(*kpte))
+			start = (start + PMD_SIZE) & PMD_MASK;
+		else
+			start = (start + PAGE_SIZE) & PAGE_MASK;
+	}
+}
+
+static void __init early_runtime_code_mapping_set_exec(int executable)
+{
+	efi_memory_desc_t *md;
+	void *p;
+
+	/* Make EFI runtime service code area executable */
+	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+		md = p;
+		if (md->type == EFI_RUNTIME_SERVICES_CODE) {
+			unsigned long end;
+			end = md->phys_addr + (md->num_pages << PAGE_SHIFT);
+			early_mapping_set_exec(md->phys_addr, end, executable);
+		}
+	}
+}
+
+void __init efi_call_phys_prelog(void)
+{
+	unsigned long vaddress;
+
+	local_irq_save(efi_flags);
+	early_runtime_code_mapping_set_exec(1);
+	vaddress = (unsigned long)__va(0x0UL);
+	pgd_val(save_pgd) = pgd_val(*pgd_offset_k(0x0UL));
+	set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress));
+	__flush_tlb_all();
+}
+
+void __init efi_call_phys_epilog(void)
+{
+	/*
+	 * After the lock is released, the original page table is restored.
+	 */
+	set_pgd(pgd_offset_k(0x0UL), save_pgd);
+	early_runtime_code_mapping_set_exec(0);
+	__flush_tlb_all();
+	local_irq_restore(efi_flags);
+}
+
+/*
+ * We need to map the EFI memory map again after init_memory_mapping().
+ */
+void __init efi_map_memmap(void)
+{
+	memmap.map = __va(memmap.phys_map);
+	memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
+}
+
+void __init efi_reserve_bootmem(void)
+{
+	reserve_bootmem_generic((unsigned long)memmap.phys_map,
+				memmap.nr_map * memmap.desc_size);
+}
+
+void __init runtime_code_page_mkexec(void)
+{
+	efi_memory_desc_t *md;
+	void *p;
+
+	/* Make EFI runtime service code area executable */
+	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+		md = p;
+		if (md->type == EFI_RUNTIME_SERVICES_CODE)
+			change_page_attr_addr(md->virt_addr,
+					      md->num_pages,
+					      PAGE_KERNEL_EXEC);
+	}
+	__flush_tlb_all();
+}
+
+void __iomem * __init efi_ioremap(unsigned long offset,
+				  unsigned long size)
+{
+	static unsigned pages_mapped;
+	unsigned long last_addr;
+	unsigned i, pages;
+
+	last_addr = offset + size - 1;
+	offset &= PAGE_MASK;
+	pages = (PAGE_ALIGN(last_addr) - offset) >> PAGE_SHIFT;
+	if (pages_mapped + pages > MAX_EFI_IO_PAGES)
+		return NULL;
+
+	for (i = 0; i < pages; i++) {
+		set_fixmap_nocache(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped,
+				   offset);
+		offset += PAGE_SIZE;
+		pages_mapped++;
+	}
+
+	return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \
+					     (pages_mapped - pages));
+}
diff --git a/arch/x86/kernel/efi_stub_64.S b/arch/x86/kernel/efi_stub_64.S
new file mode 100644
index 000000000000..99b47d48c9f4
--- /dev/null
+++ b/arch/x86/kernel/efi_stub_64.S
@@ -0,0 +1,109 @@
+/*
+ * Function calling ABI conversion from Linux to EFI for x86_64
+ *
+ * Copyright (C) 2007 Intel Corp
+ *	Bibo Mao <bibo.mao@intel.com>
+ *	Huang Ying <ying.huang@intel.com>
+ */
+
+#include <linux/linkage.h>
+
+#define SAVE_XMM			\
+	mov %rsp, %rax;			\
+	subq $0x70, %rsp;		\
+	and $~0xf, %rsp;		\
+	mov %rax, (%rsp);		\
+	mov %cr0, %rax;			\
+	clts;				\
+	mov %rax, 0x8(%rsp);		\
+	movaps %xmm0, 0x60(%rsp);	\
+	movaps %xmm1, 0x50(%rsp);	\
+	movaps %xmm2, 0x40(%rsp);	\
+	movaps %xmm3, 0x30(%rsp);	\
+	movaps %xmm4, 0x20(%rsp);	\
+	movaps %xmm5, 0x10(%rsp)
+
+#define RESTORE_XMM			\
+	movaps 0x60(%rsp), %xmm0;	\
+	movaps 0x50(%rsp), %xmm1;	\
+	movaps 0x40(%rsp), %xmm2;	\
+	movaps 0x30(%rsp), %xmm3;	\
+	movaps 0x20(%rsp), %xmm4;	\
+	movaps 0x10(%rsp), %xmm5;	\
+	mov 0x8(%rsp), %rsi;		\
+	mov %rsi, %cr0;			\
+	mov (%rsp), %rsp
+
+ENTRY(efi_call0)
+	SAVE_XMM
+	subq $32, %rsp
+	call *%rdi
+	addq $32, %rsp
+	RESTORE_XMM
+	ret
+
+ENTRY(efi_call1)
+	SAVE_XMM
+	subq $32, %rsp
+	mov  %rsi, %rcx
+	call *%rdi
+	addq $32, %rsp
+	RESTORE_XMM
+	ret
+
+ENTRY(efi_call2)
+	SAVE_XMM
+	subq $32, %rsp
+	mov  %rsi, %rcx
+	call *%rdi
+	addq $32, %rsp
+	RESTORE_XMM
+	ret
+
+ENTRY(efi_call3)
+	SAVE_XMM
+	subq $32, %rsp
+	mov  %rcx, %r8
+	mov  %rsi, %rcx
+	call *%rdi
+	addq $32, %rsp
+	RESTORE_XMM
+	ret
+
+ENTRY(efi_call4)
+	SAVE_XMM
+	subq $32, %rsp
+	mov %r8, %r9
+	mov %rcx, %r8
+	mov %rsi, %rcx
+	call *%rdi
+	addq $32, %rsp
+	RESTORE_XMM
+	ret
+
+ENTRY(efi_call5)
+	SAVE_XMM
+	subq $48, %rsp
+	mov %r9, 32(%rsp)
+	mov %r8, %r9
+	mov %rcx, %r8
+	mov %rsi, %rcx
+	call *%rdi
+	addq $48, %rsp
+	RESTORE_XMM
+	ret
+
+ENTRY(efi_call6)
+	SAVE_XMM
+	mov (%rsp), %rax
+	mov 8(%rax), %rax
+	subq $48, %rsp
+	mov %r9, 32(%rsp)
+	mov %rax, 40(%rsp)
+	mov %r8, %r9
+	mov %rcx, %r8
+	mov %rsi, %rcx
+	call *%rdi
+	addq $48, %rsp
+	RESTORE_XMM
+	ret
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 8dd110d93e73..90b8bb4748b9 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -30,6 +30,7 @@
 #include <linux/crash_dump.h>
 #include <linux/root_dev.h>
 #include <linux/pci.h>
+#include <linux/efi.h>
 #include <linux/acpi.h>
 #include <linux/kallsyms.h>
 #include <linux/edd.h>
@@ -299,6 +300,11 @@ void __init setup_arch(char **cmdline_p)
 	rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
 	rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
 #endif
+#ifdef CONFIG_EFI
+	if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
+		     "EL64", 4))
+		efi_enabled = 1;
+#endif
 
 	ARCH_SETUP
 
@@ -341,6 +347,8 @@ void __init setup_arch(char **cmdline_p)
 	discover_ebda();
 
 	init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
+	if (efi_enabled)
+		efi_init();
 
 	dmi_scan_machine();
 
@@ -414,6 +422,12 @@ void __init setup_arch(char **cmdline_p)
 	 */
        acpi_reserve_bootmem();
 #endif
+
+	if (efi_enabled) {
+		efi_map_memmap();
+		efi_reserve_bootmem();
+	}
+
        /*
 	* Find and reserve possible boot-time SMP configuration:
 	*/
@@ -479,7 +493,8 @@ void __init setup_arch(char **cmdline_p)
 
 #ifdef CONFIG_VT
 #if defined(CONFIG_VGA_CONSOLE)
-	conswitchp = &vga_con;
+	if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
+		conswitchp = &vga_con;
 #elif defined(CONFIG_DUMMY_CONSOLE)
 	conswitchp = &dummy_con;
 #endif
diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h
index 19f3ddf2df4b..51151356840f 100644
--- a/include/asm-x86/bootparam.h
+++ b/include/asm-x86/bootparam.h
@@ -54,13 +54,14 @@ struct sys_desc_table {
 };
 
 struct efi_info {
-	__u32 _pad1;
+	__u32 efi_loader_signature;
 	__u32 efi_systab;
 	__u32 efi_memdesc_size;
 	__u32 efi_memdesc_version;
 	__u32 efi_memmap;
 	__u32 efi_memmap_size;
-	__u32 _pad2[2];
+	__u32 efi_systab_hi;
+	__u32 efi_memmap_hi;
 };
 
 /* The so-called "zeropage" */
diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h
new file mode 100644
index 000000000000..1459d3d58653
--- /dev/null
+++ b/include/asm-x86/efi.h
@@ -0,0 +1,70 @@
+#ifndef _ASM_X86_EFI_H
+#define _ASM_X86_EFI_H
+
+#ifdef CONFIG_X86_32
+#else /* !CONFIG_X86_32 */
+
+#define MAX_EFI_IO_PAGES	100
+
+extern u64 efi_call0(void *fp);
+extern u64 efi_call1(void *fp, u64 arg1);
+extern u64 efi_call2(void *fp, u64 arg1, u64 arg2);
+extern u64 efi_call3(void *fp, u64 arg1, u64 arg2, u64 arg3);
+extern u64 efi_call4(void *fp, u64 arg1, u64 arg2, u64 arg3, u64 arg4);
+extern u64 efi_call5(void *fp, u64 arg1, u64 arg2, u64 arg3,
+		     u64 arg4, u64 arg5);
+extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
+		     u64 arg4, u64 arg5, u64 arg6);
+
+#define efi_call_phys0(f)			\
+	efi_call0((void *)(f))
+#define efi_call_phys1(f, a1)			\
+	efi_call1((void *)(f), (u64)(a1))
+#define efi_call_phys2(f, a1, a2)			\
+	efi_call2((void *)(f), (u64)(a1), (u64)(a2))
+#define efi_call_phys3(f, a1, a2, a3)				\
+	efi_call3((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3))
+#define efi_call_phys4(f, a1, a2, a3, a4)				\
+	efi_call4((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3),		\
+		  (u64)(a4))
+#define efi_call_phys5(f, a1, a2, a3, a4, a5)				\
+	efi_call5((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3),		\
+		  (u64)(a4), (u64)(a5))
+#define efi_call_phys6(f, a1, a2, a3, a4, a5, a6)			\
+	efi_call6((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3),		\
+		  (u64)(a4), (u64)(a5), (u64)(a6))
+
+#define efi_call_virt0(f)				\
+	efi_call0((void *)(efi.systab->runtime->f))
+#define efi_call_virt1(f, a1)					\
+	efi_call1((void *)(efi.systab->runtime->f), (u64)(a1))
+#define efi_call_virt2(f, a1, a2)					\
+	efi_call2((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2))
+#define efi_call_virt3(f, a1, a2, a3)					\
+	efi_call3((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+		  (u64)(a3))
+#define efi_call_virt4(f, a1, a2, a3, a4)				\
+	efi_call4((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+		  (u64)(a3), (u64)(a4))
+#define efi_call_virt5(f, a1, a2, a3, a4, a5)				\
+	efi_call5((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+		  (u64)(a3), (u64)(a4), (u64)(a5))
+#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)			\
+	efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+		  (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
+
+#define efi_early_ioremap(addr, size)		early_ioremap(addr, size)
+#define efi_early_iounmap(vaddr, size)		early_iounmap(vaddr, size)
+
+extern void *efi_ioremap(unsigned long offset, unsigned long size);
+
+extern int efi_time;
+
+#endif /* CONFIG_X86_32 */
+
+extern void efi_reserve_bootmem(void);
+extern void efi_call_phys_prelog(void);
+extern void efi_call_phys_epilog(void);
+extern void runtime_code_page_mkexec(void);
+
+#endif
diff --git a/include/asm-x86/fixmap_64.h b/include/asm-x86/fixmap_64.h
index cdfbe4a6ae6f..8f44782e5fe5 100644
--- a/include/asm-x86/fixmap_64.h
+++ b/include/asm-x86/fixmap_64.h
@@ -15,6 +15,7 @@
 #include <asm/apicdef.h>
 #include <asm/page.h>
 #include <asm/vsyscall.h>
+#include <asm/efi.h>
 
 /*
  * Here we define all the compile-time 'special' virtual
@@ -41,6 +42,8 @@ enum fixed_addresses {
 	FIX_APIC_BASE,	/* local (CPU) APIC) -- required for SMP or not */
 	FIX_IO_APIC_BASE_0,
 	FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
+	FIX_EFI_IO_MAP_LAST_PAGE,
+	FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE+MAX_EFI_IO_PAGES-1,
 	__end_of_fixed_addresses
 };
 

From de18c850af701ac9512b7239e88fa45e4c168771 Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Wed, 30 Jan 2008 13:31:19 +0100
Subject: [PATCH 309/890] x86: EFI runtime service support: EFI runtime
 services

This patch adds support for several EFI runtime services for EFI x86_64
system.

The EFI support for emergency_restart is added.

Signed-off-by: Chandramouli Narayanan <mouli@linux.intel.com>
Signed-off-by: Huang Ying <ying.huang@intel.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/reboot_64.c         | 20 +++++++++++++-------
 include/asm-x86/emergency-restart.h |  9 +++++++++
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/reboot_64.c b/arch/x86/kernel/reboot_64.c
index 307f996a3933..d6bdf93ffca9 100644
--- a/arch/x86/kernel/reboot_64.c
+++ b/arch/x86/kernel/reboot_64.c
@@ -9,6 +9,7 @@
 #include <linux/pm.h>
 #include <linux/kdebug.h>
 #include <linux/sched.h>
+#include <linux/efi.h>
 #include <acpi/reboot.h>
 #include <asm/io.h>
 #include <asm/delay.h>
@@ -28,20 +29,17 @@ void (*pm_power_off)(void);
 EXPORT_SYMBOL(pm_power_off);
 
 static long no_idt[3];
-static enum { 
-	BOOT_TRIPLE = 't',
-	BOOT_KBD = 'k',
-	BOOT_ACPI = 'a'
-} reboot_type = BOOT_KBD;
+enum reboot_type reboot_type = BOOT_KBD;
 static int reboot_mode = 0;
 int reboot_force;
 
-/* reboot=t[riple] | k[bd] [, [w]arm | [c]old]
+/* reboot=t[riple] | k[bd] | e[fi] [, [w]arm | [c]old]
    warm   Don't set the cold reboot flag
    cold   Set the cold reboot flag
    triple Force a triple fault (init)
    kbd    Use the keyboard controller. cold reset (default)
    acpi   Use the RESET_REG in the FADT
+   efi    Use efi reset_system runtime service
    force  Avoid anything that could hang.
  */ 
 static int __init reboot_setup(char *str)
@@ -60,6 +58,7 @@ static int __init reboot_setup(char *str)
 		case 'a':
 		case 'b':
 		case 'k':
+		case 'e':
 			reboot_type = *str;
 			break;
 		case 'f':
@@ -155,7 +154,14 @@ void machine_emergency_restart(void)
 			acpi_reboot();
 			reboot_type = BOOT_KBD;
 			break;
-		}      
+
+		case BOOT_EFI:
+			if (efi_enabled)
+				efi.reset_system(reboot_mode ? EFI_RESET_WARM : EFI_RESET_COLD,
+						 EFI_SUCCESS, 0, NULL);
+			reboot_type = BOOT_KBD;
+			break;
+		}
 	}      
 }
 
diff --git a/include/asm-x86/emergency-restart.h b/include/asm-x86/emergency-restart.h
index 680c39563345..54189084462a 100644
--- a/include/asm-x86/emergency-restart.h
+++ b/include/asm-x86/emergency-restart.h
@@ -1,6 +1,15 @@
 #ifndef _ASM_EMERGENCY_RESTART_H
 #define _ASM_EMERGENCY_RESTART_H
 
+enum reboot_type {
+	BOOT_TRIPLE = 't',
+	BOOT_KBD = 'k',
+	BOOT_ACPI = 'a',
+	BOOT_EFI = 'e'
+};
+
+extern enum reboot_type reboot_type;
+
 extern void machine_emergency_restart(void);
 
 #endif /* _ASM_EMERGENCY_RESTART_H */

From 9ad65e4748f55e3159283d7fa9d54fb30c086113 Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Wed, 30 Jan 2008 13:31:19 +0100
Subject: [PATCH 310/890] x86: EFI runtime service support: document for EFI
 runtime services

This patch adds document for EFI x86_64 runtime services support.

Signed-off-by: Chandramouli Narayanan <mouli@linux.intel.com>
Signed-off-by: Huang Ying <ying.huang@intel.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/x86_64/boot-options.txt | 9 ++++++++-
 Documentation/x86_64/uefi.txt         | 9 +++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt
index d7a333258682..638bf46ca059 100644
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86_64/boot-options.txt
@@ -110,7 +110,7 @@ Idle loop
 
 Rebooting
 
-   reboot=b[ios] | t[riple] | k[bd] | a[cpi] [, [w]arm | [c]old]
+   reboot=b[ios] | t[riple] | k[bd] | a[cpi] | e[fi] [, [w]arm | [c]old]
    bios	  Use the CPU reboot vector for warm reset
    warm   Don't set the cold reboot flag
    cold   Set the cold reboot flag
@@ -119,6 +119,9 @@ Rebooting
    acpi   Use the ACPI RESET_REG in the FADT. If ACPI is not configured or the
           ACPI reset does not work, the reboot path attempts the reset using
           the keyboard controller.
+   efi    Use efi reset_system runtime service. If EFI is not configured or the
+          EFI reset does not work, the reboot path attempts the reset using
+          the keyboard controller.
 
    Using warm reset will be much faster especially on big memory
    systems because the BIOS will not go through the memory check.
@@ -303,4 +306,8 @@ Debugging
 		newfallback: use new unwinder but fall back to old if it gets
 			stuck (default)
 
+EFI
+
+  noefi		Disable EFI support
+
 Miscellaneous
diff --git a/Documentation/x86_64/uefi.txt b/Documentation/x86_64/uefi.txt
index 91a98edfb588..7d77120a5184 100644
--- a/Documentation/x86_64/uefi.txt
+++ b/Documentation/x86_64/uefi.txt
@@ -19,6 +19,10 @@ Mechanics:
 - Build the kernel with the following configuration.
 	CONFIG_FB_EFI=y
 	CONFIG_FRAMEBUFFER_CONSOLE=y
+  If EFI runtime services are expected, the following configuration should
+  be selected.
+	CONFIG_EFI=y
+	CONFIG_EFI_VARS=y or m		# optional
 - Create a VFAT partition on the disk
 - Copy the following to the VFAT partition:
 	elilo bootloader with x86_64 support, elilo configuration file,
@@ -27,3 +31,8 @@ Mechanics:
 	can be found in the elilo sourceforge project.
 - Boot to EFI shell and invoke elilo choosing the kernel image built
   in first step.
+- If some or all EFI runtime services don't work, you can try following
+  kernel command line parameters to turn off some or all EFI runtime
+  services.
+	noefi		turn off all EFI runtime services
+	reboot_type=k	turn off EFI reboot runtime service

From e429795c68d3001ecae74f6465420c9f043b0ece Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Wed, 30 Jan 2008 13:31:19 +0100
Subject: [PATCH 311/890] x86: EFI runtime service support: remove duplicated
 code from efi_32.c

This patch removes the duplicated code between efi_32.c and efi.c.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/Makefile_32 |   2 +-
 arch/x86/kernel/e820_32.c   |   5 -
 arch/x86/kernel/efi_32.c    | 430 +-----------------------------------
 arch/x86/kernel/setup_32.c  |  11 +-
 include/asm-x86/efi.h       |  42 ++++
 5 files changed, 47 insertions(+), 443 deletions(-)

diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index cc2651bcc07f..a854e23eac0b 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -38,7 +38,7 @@ obj-$(CONFIG_X86_SUMMIT_NUMA)	+= summit_32.o
 obj-$(CONFIG_KPROBES)		+= kprobes_32.o
 obj-$(CONFIG_MODULES)		+= module_32.o
 obj-$(CONFIG_ACPI_SRAT) 	+= srat_32.o
-obj-$(CONFIG_EFI) 		+= efi_32.o efi_stub_32.o
+obj-$(CONFIG_EFI) 		+= efi.o efi_32.o efi_stub_32.o
 obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault_32.o
 obj-$(CONFIG_VM86)		+= vm86_32.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 87cadc86d5ee..56335a85a15a 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -17,11 +17,6 @@
 #include <asm/e820.h>
 #include <asm/setup.h>
 
-#ifdef CONFIG_EFI
-int efi_enabled = 0;
-EXPORT_SYMBOL(efi_enabled);
-#endif
-
 struct e820map e820;
 struct change_member {
 	struct e820entry *pbios; /* pointer to original bios entry */
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c
index 863e8926f2bb..1df13725e519 100644
--- a/arch/x86/kernel/efi_32.c
+++ b/arch/x86/kernel/efi_32.c
@@ -39,21 +39,8 @@
 #include <asm/desc.h>
 #include <asm/tlbflush.h>
 
-#define EFI_DEBUG	0
 #define PFX 		"EFI: "
 
-extern efi_status_t asmlinkage efi_call_phys(void *, ...);
-
-struct efi efi;
-EXPORT_SYMBOL(efi);
-static struct efi efi_phys;
-struct efi_memory_map memmap;
-
-/*
- * We require an early boot_ioremap mapping mechanism initially
- */
-extern void * boot_ioremap(unsigned long, unsigned long);
-
 /*
  * To make EFI call EFI runtime service in physical addressing mode we need
  * prelog/epilog before/after the invocation to disable interrupt, to
@@ -65,7 +52,7 @@ static unsigned long efi_rt_eflags;
 static DEFINE_SPINLOCK(efi_rt_lock);
 static pgd_t efi_bak_pg_dir_pointer[2];
 
-static void efi_call_phys_prelog(void) __acquires(efi_rt_lock)
+void efi_call_phys_prelog(void) __acquires(efi_rt_lock)
 {
 	unsigned long cr4;
 	unsigned long temp;
@@ -108,7 +95,7 @@ static void efi_call_phys_prelog(void) __acquires(efi_rt_lock)
 	load_gdt(&gdt_descr);
 }
 
-static void efi_call_phys_epilog(void) __releases(efi_rt_lock)
+void efi_call_phys_epilog(void) __releases(efi_rt_lock)
 {
 	unsigned long cr4;
 	struct desc_ptr gdt_descr;
@@ -138,87 +125,6 @@ static void efi_call_phys_epilog(void) __releases(efi_rt_lock)
 	spin_unlock(&efi_rt_lock);
 }
 
-static efi_status_t
-phys_efi_set_virtual_address_map(unsigned long memory_map_size,
-				 unsigned long descriptor_size,
-				 u32 descriptor_version,
-				 efi_memory_desc_t *virtual_map)
-{
-	efi_status_t status;
-
-	efi_call_phys_prelog();
-	status = efi_call_phys(efi_phys.set_virtual_address_map,
-				     memory_map_size, descriptor_size,
-				     descriptor_version, virtual_map);
-	efi_call_phys_epilog();
-	return status;
-}
-
-static efi_status_t
-phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
-{
-	efi_status_t status;
-
-	efi_call_phys_prelog();
-	status = efi_call_phys(efi_phys.get_time, tm, tc);
-	efi_call_phys_epilog();
-	return status;
-}
-
-inline int efi_set_rtc_mmss(unsigned long nowtime)
-{
-	int real_seconds, real_minutes;
-	efi_status_t 	status;
-	efi_time_t 	eft;
-	efi_time_cap_t 	cap;
-
-	spin_lock(&efi_rt_lock);
-	status = efi.get_time(&eft, &cap);
-	spin_unlock(&efi_rt_lock);
-	if (status != EFI_SUCCESS)
-		panic("Ooops, efitime: can't read time!\n");
-	real_seconds = nowtime % 60;
-	real_minutes = nowtime / 60;
-
-	if (((abs(real_minutes - eft.minute) + 15)/30) & 1)
-		real_minutes += 30;
-	real_minutes %= 60;
-
-	eft.minute = real_minutes;
-	eft.second = real_seconds;
-
-	if (status != EFI_SUCCESS) {
-		printk("Ooops: efitime: can't read time!\n");
-		return -1;
-	}
-	return 0;
-}
-/*
- * This is used during kernel init before runtime
- * services have been remapped and also during suspend, therefore,
- * we'll need to call both in physical and virtual modes.
- */
-inline unsigned long efi_get_time(void)
-{
-	efi_status_t status;
-	efi_time_t eft;
-	efi_time_cap_t cap;
-
-	if (efi.get_time) {
-		/* if we are in virtual mode use remapped function */
- 		status = efi.get_time(&eft, &cap);
-	} else {
-		/* we are in physical mode */
-		status = phys_efi_get_time(&eft, &cap);
-	}
-
-	if (status != EFI_SUCCESS)
-		printk("Oops: efitime: can't read time status: 0x%lx\n",status);
-
-	return mktime(eft.year, eft.month, eft.day, eft.hour,
-			eft.minute, eft.second);
-}
-
 int is_available_memory(efi_memory_desc_t * md)
 {
 	if (!(md->attribute & EFI_MEMORY_WB))
@@ -250,24 +156,6 @@ void __init efi_map_memmap(void)
 	memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
 }
 
-#if EFI_DEBUG
-static void __init print_efi_memmap(void)
-{
-	efi_memory_desc_t *md;
-	void *p;
-	int i;
-
-	for (p = memmap.map, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) {
-		md = p;
-		printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, "
-			"range=[0x%016llx-0x%016llx) (%lluMB)\n",
-			i, md->type, md->attribute, md->phys_addr,
-			md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
-			(md->num_pages >> (20 - EFI_PAGE_SHIFT)));
-	}
-}
-#endif  /*  EFI_DEBUG  */
-
 /*
  * Walks the EFI memory map and calls CALLBACK once for each EFI
  * memory descriptor that has memory that is available for kernel use.
@@ -319,288 +207,6 @@ void efi_memmap_walk(efi_freemem_callback_t callback, void *arg)
 	}
 }
 
-void __init efi_init(void)
-{
-	efi_config_table_t *config_tables;
-	efi_runtime_services_t *runtime;
-	efi_char16_t *c16;
-	char vendor[100] = "unknown";
-	unsigned long num_config_tables;
-	int i = 0;
-
-	memset(&efi, 0, sizeof(efi) );
-	memset(&efi_phys, 0, sizeof(efi_phys));
-
-	efi_phys.systab =
-		(efi_system_table_t *)boot_params.efi_info.efi_systab;
-	memmap.phys_map = (void *)boot_params.efi_info.efi_memmap;
-	memmap.nr_map = boot_params.efi_info.efi_memmap_size/
-		boot_params.efi_info.efi_memdesc_size;
-	memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
-	memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
-
-	efi.systab = (efi_system_table_t *)
-		boot_ioremap((unsigned long) efi_phys.systab,
-			sizeof(efi_system_table_t));
-	/*
-	 * Verify the EFI Table
-	 */
-	if (efi.systab == NULL)
-		printk(KERN_ERR PFX "Woah! Couldn't map the EFI system table.\n");
-	if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
-		printk(KERN_ERR PFX "Woah! EFI system table signature incorrect\n");
-	if ((efi.systab->hdr.revision >> 16) == 0)
-		printk(KERN_ERR PFX "Warning: EFI system table version "
-		       "%d.%02d, expected 1.00 or greater\n",
-		       efi.systab->hdr.revision >> 16,
-		       efi.systab->hdr.revision & 0xffff);
-
-	/*
-	 * Grab some details from the system table
-	 */
-	num_config_tables = efi.systab->nr_tables;
-	config_tables = (efi_config_table_t *)efi.systab->tables;
-	runtime = efi.systab->runtime;
-
-	/*
-	 * Show what we know for posterity
-	 */
-	c16 = (efi_char16_t *) boot_ioremap(efi.systab->fw_vendor, 2);
-	if (c16) {
-		for (i = 0; i < (sizeof(vendor) - 1) && *c16; ++i)
-			vendor[i] = *c16++;
-		vendor[i] = '\0';
-	} else
-		printk(KERN_ERR PFX "Could not map the firmware vendor!\n");
-
-	printk(KERN_INFO PFX "EFI v%u.%.02u by %s \n",
-	       efi.systab->hdr.revision >> 16,
-	       efi.systab->hdr.revision & 0xffff, vendor);
-
-	/*
-	 * Let's see what config tables the firmware passed to us.
-	 */
-	config_tables = (efi_config_table_t *)
-				boot_ioremap((unsigned long) config_tables,
-			        num_config_tables * sizeof(efi_config_table_t));
-
-	if (config_tables == NULL)
-		printk(KERN_ERR PFX "Could not map EFI Configuration Table!\n");
-
-	efi.mps        = EFI_INVALID_TABLE_ADDR;
-	efi.acpi       = EFI_INVALID_TABLE_ADDR;
-	efi.acpi20     = EFI_INVALID_TABLE_ADDR;
-	efi.smbios     = EFI_INVALID_TABLE_ADDR;
-	efi.sal_systab = EFI_INVALID_TABLE_ADDR;
-	efi.boot_info  = EFI_INVALID_TABLE_ADDR;
-	efi.hcdp       = EFI_INVALID_TABLE_ADDR;
-	efi.uga        = EFI_INVALID_TABLE_ADDR;
-
-	for (i = 0; i < num_config_tables; i++) {
-		if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
-			efi.mps = config_tables[i].table;
-			printk(KERN_INFO " MPS=0x%lx ", config_tables[i].table);
-		} else
-		    if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
-			efi.acpi20 = config_tables[i].table;
-			printk(KERN_INFO " ACPI 2.0=0x%lx ", config_tables[i].table);
-		} else
-		    if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
-			efi.acpi = config_tables[i].table;
-			printk(KERN_INFO " ACPI=0x%lx ", config_tables[i].table);
-		} else
-		    if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
-			efi.smbios = config_tables[i].table;
-			printk(KERN_INFO " SMBIOS=0x%lx ", config_tables[i].table);
-		} else
-		    if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
-			efi.hcdp = config_tables[i].table;
-			printk(KERN_INFO " HCDP=0x%lx ", config_tables[i].table);
-		} else
-		    if (efi_guidcmp(config_tables[i].guid, UGA_IO_PROTOCOL_GUID) == 0) {
-			efi.uga = config_tables[i].table;
-			printk(KERN_INFO " UGA=0x%lx ", config_tables[i].table);
-		}
-	}
-	printk("\n");
-
-	/*
-	 * Check out the runtime services table. We need to map
-	 * the runtime services table so that we can grab the physical
-	 * address of several of the EFI runtime functions, needed to
-	 * set the firmware into virtual mode.
-	 */
-
-	runtime = (efi_runtime_services_t *) boot_ioremap((unsigned long)
-						runtime,
-				      		sizeof(efi_runtime_services_t));
-	if (runtime != NULL) {
-		/*
-	 	 * We will only need *early* access to the following
-		 * two EFI runtime services before set_virtual_address_map
-		 * is invoked.
- 	 	 */
-		efi_phys.get_time = (efi_get_time_t *) runtime->get_time;
-		efi_phys.set_virtual_address_map =
-			(efi_set_virtual_address_map_t *)
-				runtime->set_virtual_address_map;
-	} else
-		printk(KERN_ERR PFX "Could not map the runtime service table!\n");
-
-	/* Map the EFI memory map for use until paging_init() */
-	memmap.map = boot_ioremap(boot_params.efi_info.efi_memmap,
-				  boot_params.efi_info.efi_memmap_size);
-	if (memmap.map == NULL)
-		printk(KERN_ERR PFX "Could not map the EFI memory map!\n");
-
-	memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
-
-#if EFI_DEBUG
-	print_efi_memmap();
-#endif
-}
-
-static inline void __init check_range_for_systab(efi_memory_desc_t *md)
-{
-	if (((unsigned long)md->phys_addr <= (unsigned long)efi_phys.systab) &&
-		((unsigned long)efi_phys.systab < md->phys_addr +
-		((unsigned long)md->num_pages << EFI_PAGE_SHIFT))) {
-		unsigned long addr;
-
-		addr = md->virt_addr - md->phys_addr +
-			(unsigned long)efi_phys.systab;
-		efi.systab = (efi_system_table_t *)addr;
-	}
-}
-
-/*
- * Wrap all the virtual calls in a way that forces the parameters on the stack.
- */
-
-#define efi_call_virt(f, args...) \
-     ((efi_##f##_t __attribute__((regparm(0)))*)efi.systab->runtime->f)(args)
-
-static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
-{
-	return efi_call_virt(get_time, tm, tc);
-}
-
-static efi_status_t virt_efi_set_time (efi_time_t *tm)
-{
-	return efi_call_virt(set_time, tm);
-}
-
-static efi_status_t virt_efi_get_wakeup_time (efi_bool_t *enabled,
-					      efi_bool_t *pending,
-					      efi_time_t *tm)
-{
-	return efi_call_virt(get_wakeup_time, enabled, pending, tm);
-}
-
-static efi_status_t virt_efi_set_wakeup_time (efi_bool_t enabled,
-					      efi_time_t *tm)
-{
-	return efi_call_virt(set_wakeup_time, enabled, tm);
-}
-
-static efi_status_t virt_efi_get_variable (efi_char16_t *name,
-					   efi_guid_t *vendor, u32 *attr,
-					   unsigned long *data_size, void *data)
-{
-	return efi_call_virt(get_variable, name, vendor, attr, data_size, data);
-}
-
-static efi_status_t virt_efi_get_next_variable (unsigned long *name_size,
-						efi_char16_t *name,
-						efi_guid_t *vendor)
-{
-	return efi_call_virt(get_next_variable, name_size, name, vendor);
-}
-
-static efi_status_t virt_efi_set_variable (efi_char16_t *name,
-					   efi_guid_t *vendor,
-					   unsigned long attr,
-					   unsigned long data_size, void *data)
-{
-	return efi_call_virt(set_variable, name, vendor, attr, data_size, data);
-}
-
-static efi_status_t virt_efi_get_next_high_mono_count (u32 *count)
-{
-	return efi_call_virt(get_next_high_mono_count, count);
-}
-
-static void virt_efi_reset_system (int reset_type, efi_status_t status,
-				   unsigned long data_size,
-				   efi_char16_t *data)
-{
-	efi_call_virt(reset_system, reset_type, status, data_size, data);
-}
-
-/*
- * This function will switch the EFI runtime services to virtual mode.
- * Essentially, look through the EFI memmap and map every region that
- * has the runtime attribute bit set in its memory descriptor and update
- * that memory descriptor with the virtual address obtained from ioremap().
- * This enables the runtime services to be called without having to
- * thunk back into physical mode for every invocation.
- */
-
-void __init efi_enter_virtual_mode(void)
-{
-	efi_memory_desc_t *md;
-	efi_status_t status;
-	void *p;
-
-	efi.systab = NULL;
-
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		md = p;
-
-		if (!(md->attribute & EFI_MEMORY_RUNTIME))
-			continue;
-
-		md->virt_addr = (unsigned long)ioremap(md->phys_addr,
-			md->num_pages << EFI_PAGE_SHIFT);
-		if (!(unsigned long)md->virt_addr) {
-			printk(KERN_ERR PFX "ioremap of 0x%lX failed\n",
-				(unsigned long)md->phys_addr);
-		}
-		/* update the virtual address of the EFI system table */
-		check_range_for_systab(md);
-	}
-
-	BUG_ON(!efi.systab);
-
-	status = phys_efi_set_virtual_address_map(
-			memmap.desc_size * memmap.nr_map,
-			memmap.desc_size,
-			memmap.desc_version,
-		       	memmap.phys_map);
-
-	if (status != EFI_SUCCESS) {
-		printk (KERN_ALERT "You are screwed! "
-			"Unable to switch EFI into virtual mode "
-			"(status=%lx)\n", status);
-		panic("EFI call to SetVirtualAddressMap() failed!");
-	}
-
-	/*
-	 * Now that EFI is in virtual mode, update the function
-	 * pointers in the runtime service table to the new virtual addresses.
-	 */
-
-	efi.get_time = virt_efi_get_time;
-	efi.set_time = virt_efi_set_time;
-	efi.get_wakeup_time = virt_efi_get_wakeup_time;
-	efi.set_wakeup_time = virt_efi_set_wakeup_time;
-	efi.get_variable = virt_efi_get_variable;
-	efi.get_next_variable = virt_efi_get_next_variable;
-	efi.set_variable = virt_efi_set_variable;
-	efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
-	efi.reset_system = virt_efi_reset_system;
-}
-
 void __init
 efi_initialize_iomem_resources(struct resource *code_resource,
 			       struct resource *data_resource,
@@ -683,35 +289,3 @@ efi_initialize_iomem_resources(struct resource *code_resource,
 		}
 	}
 }
-
-/*
- * Convenience functions to obtain memory types and attributes
- */
-
-u32 efi_mem_type(unsigned long phys_addr)
-{
-	efi_memory_desc_t *md;
-	void *p;
-
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		md = p;
-		if ((md->phys_addr <= phys_addr) && (phys_addr <
-			(md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
-			return md->type;
-	}
-	return 0;
-}
-
-u64 efi_mem_attributes(unsigned long phys_addr)
-{
-	efi_memory_desc_t *md;
-	void *p;
-
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		md = p;
-		if ((md->phys_addr <= phys_addr) && (phys_addr <
-			(md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
-			return md->attribute;
-	}
-	return 0;
-}
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index e9ede0fc585a..32fc87adc4a3 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -618,16 +618,9 @@ void __init setup_arch(char **cmdline_p)
 	pre_setup_arch_hook();
 	early_cpu_init();
 
-	/*
-	 * FIXME: This isn't an official loader_type right
-	 * now but does currently work with elilo.
-	 * If we were configured as an EFI kernel, check to make
-	 * sure that we were loaded correctly from elilo and that
-	 * the system table is valid.  If not, then initialize normally.
-	 */
 #ifdef CONFIG_EFI
-	if ((boot_params.hdr.type_of_loader == 0x50) &&
-	    boot_params.efi_info.efi_systab)
+	if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
+		     "EL32", 4))
 		efi_enabled = 1;
 #endif
 
diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h
index 1459d3d58653..6d54502755aa 100644
--- a/include/asm-x86/efi.h
+++ b/include/asm-x86/efi.h
@@ -2,6 +2,48 @@
 #define _ASM_X86_EFI_H
 
 #ifdef CONFIG_X86_32
+
+extern unsigned long asmlinkage efi_call_phys(void *, ...);
+
+#define efi_call_phys0(f)		efi_call_phys(f)
+#define efi_call_phys1(f, a1)		efi_call_phys(f, a1)
+#define efi_call_phys2(f, a1, a2)	efi_call_phys(f, a1, a2)
+#define efi_call_phys3(f, a1, a2, a3)	efi_call_phys(f, a1, a2, a3)
+#define efi_call_phys4(f, a1, a2, a3, a4)	\
+	efi_call_phys(f, a1, a2, a3, a4)
+#define efi_call_phys5(f, a1, a2, a3, a4, a5)	\
+	efi_call_phys(f, a1, a2, a3, a4, a5)
+#define efi_call_phys6(f, a1, a2, a3, a4, a5, a6)	\
+	efi_call_phys(f, a1, a2, a3, a4, a5, a6)
+/*
+ * Wrap all the virtual calls in a way that forces the parameters on the stack.
+ */
+
+#define efi_call_virt(f, args...) \
+     ((efi_##f##_t __attribute__((regparm(0)))*)efi.systab->runtime->f)(args)
+
+#define efi_call_virt0(f)		efi_call_virt(f)
+#define efi_call_virt1(f, a1)		efi_call_virt(f, a1)
+#define efi_call_virt2(f, a1, a2)	efi_call_virt(f, a1, a2)
+#define efi_call_virt3(f, a1, a2, a3)	efi_call_virt(f, a1, a2, a3)
+#define efi_call_virt4(f, a1, a2, a3, a4)	\
+	efi_call_virt(f, a1, a2, a3, a4)
+#define efi_call_virt5(f, a1, a2, a3, a4, a5)	\
+	efi_call_virt(f, a1, a2, a3, a4, a5)
+#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)	\
+	efi_call_virt(f, a1, a2, a3, a4, a5, a6)
+/*
+ * We require an early boot_ioremap mapping mechanism initially
+ */
+extern void *boot_ioremap(unsigned long, unsigned long);
+
+#define efi_early_ioremap(addr, size)		boot_ioremap(addr, size)
+#define efi_early_iounmap(vaddr, size)
+
+#define efi_ioremap(addr, size)			ioremap(addr, size)
+
+#define end_pfn_map				max_low_pfn
+
 #else /* !CONFIG_X86_32 */
 
 #define MAX_EFI_IO_PAGES	100

From 2215e69d2cf5024647f9a034807990590d25dd4e Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Wed, 30 Jan 2008 13:31:19 +0100
Subject: [PATCH 312/890] x86 boot: use E820 memory map on EFI 32 platform

Because the EFI memory map are converted to e820 memory map in bootloader, the
EFI memory map handling code is removed to clean up.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Cc: Andi Kleen <ak@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/e820_32.c  | 117 +++--------------------------
 arch/x86/kernel/efi_32.c   | 150 -------------------------------------
 arch/x86/kernel/setup_32.c |  16 ++--
 arch/x86/mm/init_32.c      |  18 -----
 include/asm-x86/e820_32.h  |   2 +-
 5 files changed, 16 insertions(+), 287 deletions(-)

diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 56335a85a15a..931934a7b353 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -7,7 +7,6 @@
 #include <linux/kexec.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/efi.h>
 #include <linux/pfn.h>
 #include <linux/uaccess.h>
 #include <linux/suspend.h>
@@ -181,7 +180,7 @@ static void __init probe_roms(void)
  * Request address space for all standard RAM and ROM resources
  * and also for regions reported as reserved by the e820.
  */
-void __init legacy_init_iomem_resources(struct resource *code_resource,
+void __init init_iomem_resources(struct resource *code_resource,
 		struct resource *data_resource,
 		struct resource *bss_resource)
 {
@@ -261,19 +260,17 @@ void __init add_memory_region(unsigned long long start,
 {
 	int x;
 
-	if (!efi_enabled) {
-       		x = e820.nr_map;
+	x = e820.nr_map;
 
-		if (x == E820MAX) {
-		    printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
-		    return;
-		}
-
-		e820.map[x].addr = start;
-		e820.map[x].size = size;
-		e820.map[x].type = type;
-		e820.nr_map++;
+	if (x == E820MAX) {
+		printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
+		return;
 	}
+
+	e820.map[x].addr = start;
+	e820.map[x].size = size;
+	e820.map[x].type = type;
+	e820.nr_map++;
 } /* add_memory_region */
 
 /*
@@ -488,29 +485,6 @@ int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
 	return 0;
 }
 
-/*
- * Callback for efi_memory_walk.
- */
-static int __init
-efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
-{
-	unsigned long *max_pfn = arg, pfn;
-
-	if (start < end) {
-		pfn = PFN_UP(end -1);
-		if (pfn > *max_pfn)
-			*max_pfn = pfn;
-	}
-	return 0;
-}
-
-static int __init
-efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
-{
-	memory_present(0, PFN_UP(start), PFN_DOWN(end));
-	return 0;
-}
-
 /*
  * Find the highest page frame number we have available
  */
@@ -519,11 +493,6 @@ void __init find_max_pfn(void)
 	int i;
 
 	max_pfn = 0;
-	if (efi_enabled) {
-		efi_memmap_walk(efi_find_max_pfn, &max_pfn);
-		efi_memmap_walk(efi_memory_present_wrapper, NULL);
-		return;
-	}
 
 	for (i = 0; i < e820.nr_map; i++) {
 		unsigned long start, end;
@@ -540,24 +509,6 @@ void __init find_max_pfn(void)
 	}
 }
 
-/*
- * Free all available memory for boot time allocation.  Used
- * as a callback function by efi_memory_walk()
- */
-
-static int __init
-free_available_memory(unsigned long start, unsigned long end, void *arg)
-{
-	/* check max_low_pfn */
-	if (start >= (max_low_pfn << PAGE_SHIFT))
-		return 0;
-	if (end >= (max_low_pfn << PAGE_SHIFT))
-		end = max_low_pfn << PAGE_SHIFT;
-	if (start < end)
-		free_bootmem(start, end - start);
-
-	return 0;
-}
 /*
  * Register fully available low RAM pages with the bootmem allocator.
  */
@@ -565,10 +516,6 @@ void __init register_bootmem_low_pages(unsigned long max_low_pfn)
 {
 	int i;
 
-	if (efi_enabled) {
-		efi_memmap_walk(free_available_memory, NULL);
-		return;
-	}
 	for (i = 0; i < e820.nr_map; i++) {
 		unsigned long curr_pfn, last_pfn, size;
 		/*
@@ -676,56 +623,12 @@ void __init print_memory_map(char *who)
 	}
 }
 
-static __init __always_inline void efi_limit_regions(unsigned long long size)
-{
-	unsigned long long current_addr = 0;
-	efi_memory_desc_t *md, *next_md;
-	void *p, *p1;
-	int i, j;
-
-	j = 0;
-	p1 = memmap.map;
-	for (p = p1, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) {
-		md = p;
-		next_md = p1;
-		current_addr = md->phys_addr +
-			PFN_PHYS(md->num_pages);
-		if (is_available_memory(md)) {
-			if (md->phys_addr >= size) continue;
-			memcpy(next_md, md, memmap.desc_size);
-			if (current_addr >= size) {
-				next_md->num_pages -=
-					PFN_UP(current_addr-size);
-			}
-			p1 += memmap.desc_size;
-			next_md = p1;
-			j++;
-		} else if ((md->attribute & EFI_MEMORY_RUNTIME) ==
-			   EFI_MEMORY_RUNTIME) {
-			/* In order to make runtime services
-			 * available we have to include runtime
-			 * memory regions in memory map */
-			memcpy(next_md, md, memmap.desc_size);
-			p1 += memmap.desc_size;
-			next_md = p1;
-			j++;
-		}
-	}
-	memmap.nr_map = j;
-	memmap.map_end = memmap.map +
-		(memmap.nr_map * memmap.desc_size);
-}
-
 void __init limit_regions(unsigned long long size)
 {
 	unsigned long long current_addr;
 	int i;
 
 	print_memory_map("limit_regions start");
-	if (efi_enabled) {
-		efi_limit_regions(size);
-		return;
-	}
 	for (i = 0; i < e820.nr_map; i++) {
 		current_addr = e820.map[i].addr + e820.map[i].size;
 		if (current_addr < size)
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c
index 1df13725e519..30f937116288 100644
--- a/arch/x86/kernel/efi_32.c
+++ b/arch/x86/kernel/efi_32.c
@@ -125,22 +125,6 @@ void efi_call_phys_epilog(void) __releases(efi_rt_lock)
 	spin_unlock(&efi_rt_lock);
 }
 
-int is_available_memory(efi_memory_desc_t * md)
-{
-	if (!(md->attribute & EFI_MEMORY_WB))
-		return 0;
-
-	switch (md->type) {
-		case EFI_LOADER_CODE:
-		case EFI_LOADER_DATA:
-		case EFI_BOOT_SERVICES_CODE:
-		case EFI_BOOT_SERVICES_DATA:
-		case EFI_CONVENTIONAL_MEMORY:
-			return 1;
-	}
-	return 0;
-}
-
 /*
  * We need to map the EFI memory map again after paging_init().
  */
@@ -155,137 +139,3 @@ void __init efi_map_memmap(void)
 
 	memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
 }
-
-/*
- * Walks the EFI memory map and calls CALLBACK once for each EFI
- * memory descriptor that has memory that is available for kernel use.
- */
-void efi_memmap_walk(efi_freemem_callback_t callback, void *arg)
-{
-	int prev_valid = 0;
-	struct range {
-		unsigned long start;
-		unsigned long end;
-	} uninitialized_var(prev), curr;
-	efi_memory_desc_t *md;
-	unsigned long start, end;
-	void *p;
-
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		md = p;
-
-		if ((md->num_pages == 0) || (!is_available_memory(md)))
-			continue;
-
-		curr.start = md->phys_addr;
-		curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT);
-
-		if (!prev_valid) {
-			prev = curr;
-			prev_valid = 1;
-		} else {
-			if (curr.start < prev.start)
-				printk(KERN_INFO PFX "Unordered memory map\n");
-			if (prev.end == curr.start)
-				prev.end = curr.end;
-			else {
-				start =
-				    (unsigned long) (PAGE_ALIGN(prev.start));
-				end = (unsigned long) (prev.end & PAGE_MASK);
-				if ((end > start)
-				    && (*callback) (start, end, arg) < 0)
-					return;
-				prev = curr;
-			}
-		}
-	}
-	if (prev_valid) {
-		start = (unsigned long) PAGE_ALIGN(prev.start);
-		end = (unsigned long) (prev.end & PAGE_MASK);
-		if (end > start)
-			(*callback) (start, end, arg);
-	}
-}
-
-void __init
-efi_initialize_iomem_resources(struct resource *code_resource,
-			       struct resource *data_resource,
-			       struct resource *bss_resource)
-{
-	struct resource *res;
-	efi_memory_desc_t *md;
-	void *p;
-
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		md = p;
-
-		if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >
-		    0x100000000ULL)
-			continue;
-		res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
-		switch (md->type) {
-		case EFI_RESERVED_TYPE:
-			res->name = "Reserved Memory";
-			break;
-		case EFI_LOADER_CODE:
-			res->name = "Loader Code";
-			break;
-		case EFI_LOADER_DATA:
-			res->name = "Loader Data";
-			break;
-		case EFI_BOOT_SERVICES_DATA:
-			res->name = "BootServices Data";
-			break;
-		case EFI_BOOT_SERVICES_CODE:
-			res->name = "BootServices Code";
-			break;
-		case EFI_RUNTIME_SERVICES_CODE:
-			res->name = "Runtime Service Code";
-			break;
-		case EFI_RUNTIME_SERVICES_DATA:
-			res->name = "Runtime Service Data";
-			break;
-		case EFI_CONVENTIONAL_MEMORY:
-			res->name = "Conventional Memory";
-			break;
-		case EFI_UNUSABLE_MEMORY:
-			res->name = "Unusable Memory";
-			break;
-		case EFI_ACPI_RECLAIM_MEMORY:
-			res->name = "ACPI Reclaim";
-			break;
-		case EFI_ACPI_MEMORY_NVS:
-			res->name = "ACPI NVS";
-			break;
-		case EFI_MEMORY_MAPPED_IO:
-			res->name = "Memory Mapped IO";
-			break;
-		case EFI_MEMORY_MAPPED_IO_PORT_SPACE:
-			res->name = "Memory Mapped IO Port Space";
-			break;
-		default:
-			res->name = "Reserved";
-			break;
-		}
-		res->start = md->phys_addr;
-		res->end = res->start + ((md->num_pages << EFI_PAGE_SHIFT) - 1);
-		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-		if (request_resource(&iomem_resource, res) < 0)
-			printk(KERN_ERR PFX "Failed to allocate res %s : "
-				"0x%llx-0x%llx\n", res->name,
-				(unsigned long long)res->start,
-				(unsigned long long)res->end);
-		/*
-		 * We don't know which region contains kernel data so we try
-		 * it repeatedly and let the resource manager test it.
-		 */
-		if (md->type == EFI_CONVENTIONAL_MEMORY) {
-			request_resource(res, code_resource);
-			request_resource(res, data_resource);
-			request_resource(res, bss_resource);
-#ifdef CONFIG_KEXEC
-			request_resource(res, &crashk_res);
-#endif
-		}
-	}
-}
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 32fc87adc4a3..2e805da337a2 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -644,12 +644,12 @@ void __init setup_arch(char **cmdline_p)
 	rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
 #endif
 	ARCH_SETUP
+
+	printk(KERN_INFO "BIOS-provided physical RAM map:\n");
+	print_memory_map(memory_setup());
+
 	if (efi_enabled)
 		efi_init();
-	else {
-		printk(KERN_INFO "BIOS-provided physical RAM map:\n");
-		print_memory_map(memory_setup());
-	}
 
 	copy_edd();
 
@@ -769,14 +769,8 @@ static int __init request_standard_resources(void)
 	int i;
 
 	printk(KERN_INFO "Setting up standard PCI resources\n");
-	if (efi_enabled)
-		efi_initialize_iomem_resources(&code_resource,
-				&data_resource, &bss_resource);
-	else
-		legacy_init_iomem_resources(&code_resource,
-				&data_resource, &bss_resource);
+	init_iomem_resources(&code_resource, &data_resource, &bss_resource);
 
-	/* EFI systems may still have VGA */
 	request_resource(&iomem_resource, &video_ram_resource);
 
 	/* request I/O space for devices used on all i[345]86 PCs */
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index e6e34c7dcabf..29130970c193 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -27,7 +27,6 @@
 #include <linux/bootmem.h>
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
-#include <linux/efi.h>
 #include <linux/memory_hotplug.h>
 #include <linux/initrd.h>
 #include <linux/cpumask.h>
@@ -216,23 +215,6 @@ int page_is_ram(unsigned long pagenr)
 	int i;
 	unsigned long addr, end;
 
-	if (efi_enabled) {
-		efi_memory_desc_t *md;
-		void *p;
-
-		for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-			md = p;
-			if (!is_available_memory(md))
-				continue;
-			addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT;
-			end = (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> PAGE_SHIFT;
-
-			if ((pagenr >= addr) && (pagenr < end))
-				return 1;
-		}
-		return 0;
-	}
-
 	for (i = 0; i < e820.nr_map; i++) {
 
 		if (e820.map[i].type != E820_RAM)	/* not usable memory */
diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h
index ae5ea19623fa..e2faf5f3a0bb 100644
--- a/include/asm-x86/e820_32.h
+++ b/include/asm-x86/e820_32.h
@@ -28,7 +28,7 @@ extern void register_bootmem_low_pages(unsigned long max_low_pfn);
 extern void e820_register_memory(void);
 extern void limit_regions(unsigned long long size);
 extern void print_memory_map(char *who);
-extern void legacy_init_iomem_resources(struct resource *code_resource,
+extern void init_iomem_resources(struct resource *code_resource,
 			    struct resource *data_resource,
 			    struct resource *bss_resource);
 

From f6e0eba1b133192e298052d616e9e17356d32517 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:20 +0100
Subject: [PATCH 313/890] x86: avoid ifdefs in desc.h, getting rid of pack_ldt
 and pack_tss

By Andi Kleen's suggestion, this patch removes pack_ldt() and pack_tss()
wrappers in favour of a general wrapper. It saves us an ifdef and some lines
of code, but more importantly, it's more elegant.

No functional change is made, although the object code is expected to be
different.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/desc.h | 58 +++++++++++++-----------------------------
 1 file changed, 18 insertions(+), 40 deletions(-)

diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index c31715cf2218..e9356a852020 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -125,21 +125,6 @@ static inline void native_write_gdt_entry(struct desc_struct *gdt, int entry,
 	memcpy(&gdt[entry], desc, size);
 }
 
-static inline void set_tssldt_descriptor(struct ldttss_desc64 *d,
-					 unsigned long tss, unsigned type,
-					 unsigned size)
-{
-	memset(d, 0, sizeof(*d));
-	d->limit0 = size & 0xFFFF;
-	d->base0 = PTR_LOW(tss);
-	d->base1 = PTR_MIDDLE(tss) & 0xFF;
-	d->type = type;
-	d->p = 1;
-	d->limit1 = (size >> 16) & 0xF;
-	d->base2 = (PTR_MIDDLE(tss) >> 8) & 0xFF;
-	d->base3 = PTR_HIGH(tss);
-}
-
 static inline void pack_descriptor(struct desc_struct *desc, unsigned long base,
 				   unsigned long limit, unsigned char type,
 				   unsigned char flags)
@@ -151,30 +136,24 @@ static inline void pack_descriptor(struct desc_struct *desc, unsigned long base,
 	desc->p = 1;
 }
 
-static inline void pack_ldt(ldt_desc *ldt, unsigned long addr,
-			   unsigned size)
-{
 
-#ifdef CONFIG_X86_64
-		set_tssldt_descriptor(ldt,
-			     addr, DESC_LDT, size);
-#else
-		pack_descriptor(ldt, (unsigned long)addr,
-				size,
-				0x80 | DESC_LDT, 0);
-#endif
-}
-
-static inline void pack_tss(tss_desc *tss, unsigned long addr,
-			   unsigned size, unsigned entry)
+static inline void set_tssldt_descriptor(void *d, unsigned long addr,
+					 unsigned type, unsigned size)
 {
 #ifdef CONFIG_X86_64
-		set_tssldt_descriptor(tss,
-			     addr, entry, size);
+	struct ldttss_desc64 *desc = d;
+	memset(desc, 0, sizeof(*desc));
+	desc->limit0 = size & 0xFFFF;
+	desc->base0 = PTR_LOW(addr);
+	desc->base1 = PTR_MIDDLE(addr) & 0xFF;
+	desc->type = type;
+	desc->p = 1;
+	desc->limit1 = (size >> 16) & 0xF;
+	desc->base2 = (PTR_MIDDLE(addr) >> 8) & 0xFF;
+	desc->base3 = PTR_HIGH(addr);
 #else
-		pack_descriptor(tss, (unsigned long)addr,
-				size,
-				0x80 | entry, 0);
+
+	pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0);
 #endif
 }
 
@@ -190,9 +169,8 @@ static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
 	 * -1? seg base+limit should be pointing to the address of the
 	 * last valid byte
 	 */
-	pack_tss(&tss, (unsigned long)addr,
-		IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1,
-		DESC_TSS);
+	set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
+		IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
 	write_gdt_entry(d, entry, &tss, DESC_TSS);
 }
 
@@ -206,8 +184,8 @@ static inline void native_set_ldt(const void *addr, unsigned int entries)
 		unsigned cpu = smp_processor_id();
 		ldt_desc ldt;
 
-		pack_ldt(&ldt, (unsigned long)addr,
-				entries * sizeof(ldt) - 1);
+		set_tssldt_descriptor(&ldt, (unsigned long)addr,
+				      DESC_LDT, entries * sizeof(ldt) - 1);
 		write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
 				&ldt, DESC_LDT);
 		__asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));

From 2f4aaf53c21e644ba0f581ce62b985d767388c64 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Wed, 30 Jan 2008 13:31:20 +0100
Subject: [PATCH 314/890] x86, ptrace: remove bad comment

Remove no longer correct comment.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process_64.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index f91521e26335..836a71adfa11 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -608,10 +608,6 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
 		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
 	}
 
-	/*
-	 * Last branch recording recofiguration of trace hardware and
-	 * disentangling of trace data per task.
-	 */
 	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
 		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
 

From 3c68904fee1459b6d51040864e15d19098eedef7 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Wed, 30 Jan 2008 13:31:20 +0100
Subject: [PATCH 315/890] x86, ptrace: use jiffies for BTS timestamps

Replace sched_clock() with jiffies for BTS timestamps.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ds.c     | 30 ++++++++++--------------------
 arch/x86/kernel/ptrace.c |  2 +-
 include/asm-x86/ds.h     |  2 +-
 3 files changed, 12 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 996a7c4f5963..e7855def97c3 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -167,23 +167,13 @@ static inline void set_info_type(char *base, unsigned char value)
 {
 	(*(unsigned char *)(base + ds_cfg.info_type.offset)) = value;
 }
-/*
- * The info data might overlap with the info type on some architectures.
- * We therefore read and write the exact number of bytes.
- */
-static inline unsigned long long get_info_data(char *base)
+static inline unsigned long get_info_data(char *base)
 {
-	unsigned long long value = 0;
-	memcpy(&value,
-	       base + ds_cfg.info_data.offset,
-	       ds_cfg.info_data.size);
-	return value;
+	return *(unsigned long *)(base + ds_cfg.info_data.offset);
 }
-static inline void set_info_data(char *base, unsigned long long value)
+static inline void set_info_data(char *base, unsigned long value)
 {
-	memcpy(base + ds_cfg.info_data.offset,
-	       &value,
-	       ds_cfg.info_data.size);
+	(*(unsigned long *)(base + ds_cfg.info_data.offset)) = value;
 }
 
 
@@ -282,8 +272,8 @@ int ds_read_bts(void *ds, size_t index, struct bts_struct *out)
 
 	memset(out, 0, sizeof(*out));
 	if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) {
-		out->qualifier         = get_info_type(bts);
-		out->variant.timestamp = get_info_data(bts);
+		out->qualifier       = get_info_type(bts);
+		out->variant.jiffies = get_info_data(bts);
 	} else {
 		out->qualifier = BTS_BRANCH;
 		out->variant.lbr.from_ip = get_from_ip(bts);
@@ -319,7 +309,7 @@ int ds_write_bts(void *ds, const struct bts_struct *in)
 	case BTS_TASK_DEPARTS:
 		set_from_ip(bts, BTS_ESCAPE_ADDRESS);
 		set_info_type(bts, in->qualifier);
-		set_info_data(bts, in->variant.timestamp);
+		set_info_data(bts, in->variant.jiffies);
 		break;
 
 	default:
@@ -350,7 +340,7 @@ static const struct ds_configuration ds_cfg_netburst = {
 	.from_ip = { 0, 4 },
 	.to_ip = { 4, 4 },
 	.info_type = { 4, 1 },
-	.info_data = { 5, 7 },
+	.info_data = { 8, 4 },
 	.debugctl_mask = (1<<2)|(1<<3)
 };
 
@@ -364,7 +354,7 @@ static const struct ds_configuration ds_cfg_pentium_m = {
 	.from_ip = { 0, 4 },
 	.to_ip = { 4, 4 },
 	.info_type = { 4, 1 },
-	.info_data = { 5, 7 },
+	.info_data = { 8, 4 },
 	.debugctl_mask = (1<<6)|(1<<7)
 };
 #endif /* _i386_ */
@@ -379,7 +369,7 @@ static const struct ds_configuration ds_cfg_core2 = {
 	.from_ip = { 0, 8 },
 	.to_ip = { 8, 8 },
 	.info_type = { 8, 1 },
-	.info_data = { 9, 7 },
+	.info_data = { 16, 8 },
 	.debugctl_mask = (1<<6)|(1<<7)|(1<<9)
 };
 
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 8d0dd8b5effe..ec86abaab530 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -616,7 +616,7 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk,
 {
 	struct bts_struct rec = {
 		.qualifier = qualifier,
-		.variant.timestamp = sched_clock()
+		.variant.jiffies = jiffies
 	};
 
 	if (ptrace_bts_get_buffer_size(tsk) <= 0)
diff --git a/include/asm-x86/ds.h b/include/asm-x86/ds.h
index edd8467740a6..c9e15381dc7f 100644
--- a/include/asm-x86/ds.h
+++ b/include/asm-x86/ds.h
@@ -48,7 +48,7 @@ struct bts_struct {
 		} lbr;
 		/* BTS_TASK_ARRIVES or
 		   BTS_TASK_DEPARTS */
-		unsigned long long timestamp;
+		unsigned long jiffies;
 	} variant;
 };
 

From e4811f2568c55e595a7bf15a3b9aba863b31fb94 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Wed, 30 Jan 2008 13:31:20 +0100
Subject: [PATCH 316/890] x86, ptrace: change BTS GET ptrace interface

Change the ptrace interface to mimick an array from newst to oldest.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace.c     | 30 ++++++++++++++++--------------
 include/asm-x86/ptrace-abi.h | 17 ++++++-----------
 2 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index ec86abaab530..3e78c124e2d2 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -479,26 +479,33 @@ static int ptrace_bts_get_buffer_size(struct task_struct *child)
 	return ds_get_bts_size((void *)child->thread.ds_area_msr);
 }
 
-static int ptrace_bts_get_index(struct task_struct *child)
-{
-	if (!child->thread.ds_area_msr)
-		return -ENXIO;
-
-	return ds_get_bts_index((void *)child->thread.ds_area_msr);
-}
-
 static int ptrace_bts_read_record(struct task_struct *child,
 				  long index,
 				  struct bts_struct __user *out)
 {
 	struct bts_struct ret;
 	int retval;
+	int bts_size;
+	int bts_index;
 
 	if (!child->thread.ds_area_msr)
 		return -ENXIO;
 
+	if (index < 0)
+		return -EINVAL;
+
+	bts_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
+	if (bts_size <= index)
+		return -EINVAL;
+
+	/* translate the ptrace bts index into the ds bts index */
+	bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr);
+	bts_index -= (index + 1);
+	if (bts_index < 0)
+		bts_index += bts_size;
+
 	retval = ds_read_bts((void *)child->thread.ds_area_msr,
-			     index, &ret);
+			     bts_index, &ret);
 	if (retval)
 		return retval;
 
@@ -813,10 +820,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		ret = ptrace_bts_get_buffer_size(child);
 		break;
 
-	case PTRACE_BTS_GET_INDEX:
-		ret = ptrace_bts_get_index(child);
-		break;
-
 	case PTRACE_BTS_READ_RECORD:
 		ret = ptrace_bts_read_record
 			(child, data,
@@ -1017,7 +1020,6 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 	case PTRACE_BTS_MAX_BUFFER_SIZE:
 	case PTRACE_BTS_ALLOCATE_BUFFER:
 	case PTRACE_BTS_GET_BUFFER_SIZE:
-	case PTRACE_BTS_GET_INDEX:
 	case PTRACE_BTS_READ_RECORD:
 	case PTRACE_BTS_CONFIG:
 	case PTRACE_BTS_STATUS:
diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h
index 6fadc5214e14..b473ad45e9ca 100644
--- a/include/asm-x86/ptrace-abi.h
+++ b/include/asm-x86/ptrace-abi.h
@@ -99,32 +99,27 @@
    ENXIO........no buffer allocated */
 #define PTRACE_BTS_GET_BUFFER_SIZE 42
 
-/* Return the index of the next bts record to be written,
-   if successful; -1, otherwise.
-   EOPNOTSUPP...processor does not support bts tracing
-   ENXIO........no buffer allocated
-   After the first warp-around, this is the start of the circular bts buffer. */
-#define PTRACE_BTS_GET_INDEX 43
-
-/* Read the DATA'th bts record into a ptrace_bts_record buffer provided in ADDR.
+/* Read the DATA'th bts record into a ptrace_bts_record buffer
+   provided in ADDR.
+   Records are ordered from newest to oldest.
    Return 0, if successful; -1, otherwise
    EOPNOTSUPP...processor does not support bts tracing
    ENXIO........no buffer allocated
    EINVAL.......invalid index */
-#define PTRACE_BTS_READ_RECORD 44
+#define PTRACE_BTS_READ_RECORD 43
 
 /* Configure last branch trace; the configuration is given as a bit-mask of
    PTRACE_BTS_O_* options in DATA; parameter ADDR is ignored.
    Return 0, if successful; -1, otherwise
    EOPNOTSUPP...processor does not support bts tracing
    ENXIO........no buffer allocated */
-#define PTRACE_BTS_CONFIG 45
+#define PTRACE_BTS_CONFIG 44
 
 /* Return the configuration as bit-mask of PTRACE_BTS_O_* options
    if successful; -1, otherwise.
    EOPNOTSUPP...processor does not support bts tracing
    ENXIO........no buffer allocated */
-#define PTRACE_BTS_STATUS 46
+#define PTRACE_BTS_STATUS 45
 
 /* Trace configuration options */
 /* Collect last branch trace */

From a95d67f87e1a5f1b4429be3ba3bf7b4051657908 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Wed, 30 Jan 2008 13:31:20 +0100
Subject: [PATCH 317/890] x86, ptrace: new ptrace BTS API

Here's the new ptrace BTS API that supports two different overflow handling mechanisms (wrap-around and buffer-full-signal) to support two different use cases (debugging and profiling).

It further combines buffer allocation and configuration.

Opens:
- memory rlimit
- overflow signal

What would be the right signal to use?

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ds.c         |  56 ++++++++-
 arch/x86/kernel/ptrace.c     | 237 ++++++++++++++++++++---------------
 include/asm-x86/ds.h         |   7 ++
 include/asm-x86/ptrace-abi.h |  90 ++++++-------
 include/asm-x86/ptrace.h     |   1 +
 5 files changed, 243 insertions(+), 148 deletions(-)

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index e7855def97c3..6eb5d49a36bb 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -177,18 +177,20 @@ static inline void set_info_data(char *base, unsigned long value)
 }
 
 
-int ds_allocate(void **dsp, size_t bts_size_in_records)
+int ds_allocate(void **dsp, size_t bts_size_in_bytes)
 {
-	size_t bts_size_in_bytes = 0;
-	void *bts = 0;
-	void *ds = 0;
+	size_t bts_size_in_records;
+	void *bts;
+	void *ds;
 
 	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
 		return -EOPNOTSUPP;
 
-	if (bts_size_in_records < 0)
+	if (bts_size_in_bytes < 0)
 		return -EINVAL;
 
+	bts_size_in_records =
+		bts_size_in_bytes / ds_cfg.sizeof_bts;
 	bts_size_in_bytes =
 		bts_size_in_records * ds_cfg.sizeof_bts;
 
@@ -233,9 +235,21 @@ int ds_get_bts_size(void *ds)
 	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
 		return -EOPNOTSUPP;
 
+	if (!ds)
+		return 0;
+
 	size_in_bytes =
 		get_bts_absolute_maximum(ds) -
 		get_bts_buffer_base(ds);
+	return size_in_bytes;
+}
+
+int ds_get_bts_end(void *ds)
+{
+	size_t size_in_bytes = ds_get_bts_size(ds);
+
+	if (size_in_bytes <= 0)
+		return size_in_bytes;
 
 	return size_in_bytes / ds_cfg.sizeof_bts;
 }
@@ -254,6 +268,38 @@ int ds_get_bts_index(void *ds)
 	return index_offset_in_bytes / ds_cfg.sizeof_bts;
 }
 
+int ds_set_overflow(void *ds, int method)
+{
+	switch (method) {
+	case DS_O_SIGNAL:
+		return -EOPNOTSUPP;
+	case DS_O_WRAP:
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+int ds_get_overflow(void *ds)
+{
+	return DS_O_WRAP;
+}
+
+int ds_clear(void *ds)
+{
+	int bts_size = ds_get_bts_size(ds);
+	void *bts_base;
+
+	if (bts_size <= 0)
+		return bts_size;
+
+	bts_base = get_bts_buffer_base(ds);
+	memset(bts_base, 0, bts_size);
+
+	set_bts_index(ds, bts_base);
+	return 0;
+}
+
 int ds_read_bts(void *ds, size_t index, struct bts_struct *out)
 {
 	void *bts;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 3e78c124e2d2..18972a305890 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -32,12 +32,6 @@
 #include <asm/ds.h>
 
 
-/*
- * The maximal size of a BTS buffer per traced task in number of BTS
- * records.
- */
-#define PTRACE_BTS_BUFFER_MAX 4000
-
 /*
  * does not yet catch signals sent when the child dies.
  * in exit.c or in signal.c.
@@ -466,17 +460,12 @@ static int ptrace_set_debugreg(struct task_struct *child,
 	return 0;
 }
 
-static int ptrace_bts_max_buffer_size(void)
-{
-	return PTRACE_BTS_BUFFER_MAX;
-}
-
-static int ptrace_bts_get_buffer_size(struct task_struct *child)
+static int ptrace_bts_get_size(struct task_struct *child)
 {
 	if (!child->thread.ds_area_msr)
 		return -ENXIO;
 
-	return ds_get_bts_size((void *)child->thread.ds_area_msr);
+	return ds_get_bts_index((void *)child->thread.ds_area_msr);
 }
 
 static int ptrace_bts_read_record(struct task_struct *child,
@@ -485,7 +474,7 @@ static int ptrace_bts_read_record(struct task_struct *child,
 {
 	struct bts_struct ret;
 	int retval;
-	int bts_size;
+	int bts_end;
 	int bts_index;
 
 	if (!child->thread.ds_area_msr)
@@ -494,15 +483,15 @@ static int ptrace_bts_read_record(struct task_struct *child,
 	if (index < 0)
 		return -EINVAL;
 
-	bts_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
-	if (bts_size <= index)
+	bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr);
+	if (bts_end <= index)
 		return -EINVAL;
 
 	/* translate the ptrace bts index into the ds bts index */
 	bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr);
 	bts_index -= (index + 1);
 	if (bts_index < 0)
-		bts_index += bts_size;
+		bts_index += bts_end;
 
 	retval = ds_read_bts((void *)child->thread.ds_area_msr,
 			     bts_index, &ret);
@@ -530,19 +519,97 @@ static int ptrace_bts_write_record(struct task_struct *child,
 	return sizeof(*in);
 }
 
-static int ptrace_bts_config(struct task_struct *child,
-			     unsigned long options)
+static int ptrace_bts_clear(struct task_struct *child)
 {
-	unsigned long debugctl_mask = ds_debugctl_mask();
-	int retval;
-
-	retval = ptrace_bts_get_buffer_size(child);
-	if (retval < 0)
-		return retval;
-	if (retval == 0)
+	if (!child->thread.ds_area_msr)
 		return -ENXIO;
 
-	if (options & PTRACE_BTS_O_TRACE_TASK) {
+	return ds_clear((void *)child->thread.ds_area_msr);
+}
+
+static int ptrace_bts_drain(struct task_struct *child,
+			    struct bts_struct __user *out)
+{
+	int end, i;
+	void *ds = (void *)child->thread.ds_area_msr;
+
+	if (!ds)
+		return -ENXIO;
+
+	end = ds_get_bts_index(ds);
+	if (end <= 0)
+		return end;
+
+	for (i = 0; i < end; i++, out++) {
+		struct bts_struct ret;
+		int retval;
+
+		retval = ds_read_bts(ds, i, &ret);
+		if (retval < 0)
+			return retval;
+
+		if (copy_to_user(out, &ret, sizeof(ret)))
+			return -EFAULT;
+	}
+
+	ds_clear(ds);
+
+	return i;
+}
+
+static int ptrace_bts_config(struct task_struct *child,
+			     const struct ptrace_bts_config __user *ucfg)
+{
+	struct ptrace_bts_config cfg;
+	unsigned long debugctl_mask;
+	int bts_size, ret;
+	void *ds;
+
+	if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
+		return -EFAULT;
+
+	bts_size = 0;
+	ds = (void *)child->thread.ds_area_msr;
+	if (ds) {
+		bts_size = ds_get_bts_size(ds);
+		if (bts_size < 0)
+			return bts_size;
+	}
+
+	if (bts_size != cfg.size) {
+		ret = ds_free((void **)&child->thread.ds_area_msr);
+		if (ret < 0)
+			return ret;
+
+		if (cfg.size > 0)
+			ret = ds_allocate((void **)&child->thread.ds_area_msr,
+					  cfg.size);
+		ds = (void *)child->thread.ds_area_msr;
+		if (ds)
+			set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
+		else
+			clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
+
+		if (ret < 0)
+			return ret;
+
+		bts_size = ds_get_bts_size(ds);
+		if (bts_size <= 0)
+			return bts_size;
+	}
+
+	if (ds) {
+		if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
+			ret = ds_set_overflow(ds, DS_O_SIGNAL);
+		} else {
+			ret = ds_set_overflow(ds, DS_O_WRAP);
+		}
+		if (ret < 0)
+			return ret;
+	}
+
+	debugctl_mask = ds_debugctl_mask();
+	if (ds && (cfg.flags & PTRACE_BTS_O_TRACE)) {
 		child->thread.debugctlmsr |= debugctl_mask;
 		set_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
 	} else {
@@ -555,7 +622,7 @@ static int ptrace_bts_config(struct task_struct *child,
 			clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
 	}
 
-	if (options & PTRACE_BTS_O_TIMESTAMPS)
+	if (ds && (cfg.flags & PTRACE_BTS_O_SCHED))
 		set_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
 	else
 		clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
@@ -563,59 +630,32 @@ static int ptrace_bts_config(struct task_struct *child,
 	return 0;
 }
 
-static int ptrace_bts_status(struct task_struct *child)
+static int ptrace_bts_status(struct task_struct *child,
+			     struct ptrace_bts_config __user *ucfg)
 {
-	unsigned long debugctl_mask = ds_debugctl_mask();
-	int retval, status = 0;
+	void *ds = (void *)child->thread.ds_area_msr;
+	struct ptrace_bts_config cfg;
 
-	retval = ptrace_bts_get_buffer_size(child);
-	if (retval < 0)
-		return retval;
-	if (retval == 0)
-		return -ENXIO;
+	memset(&cfg, 0, sizeof(cfg));
 
-	if (ptrace_bts_get_buffer_size(child) <= 0)
-		return -ENXIO;
+	if (ds) {
+		cfg.size = ds_get_bts_size(ds);
 
-	if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
-	    child->thread.debugctlmsr & debugctl_mask)
-		status |= PTRACE_BTS_O_TRACE_TASK;
-	if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
-		status |= PTRACE_BTS_O_TIMESTAMPS;
+		if (ds_get_overflow(ds) == DS_O_SIGNAL)
+			cfg.flags |= PTRACE_BTS_O_SIGNAL;
 
-	return status;
-}
+		if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
+		    child->thread.debugctlmsr & ds_debugctl_mask())
+			cfg.flags |= PTRACE_BTS_O_TRACE;
 
-static int ptrace_bts_allocate_bts(struct task_struct *child,
-				   int size_in_records)
-{
-	int retval = 0;
-	void *ds;
-
-	if (size_in_records < 0)
-		return -EINVAL;
-
-	if (size_in_records > ptrace_bts_max_buffer_size())
-		return -EINVAL;
-
-	if (size_in_records == 0) {
-		ptrace_bts_config(child, /* options = */ 0);
-	} else {
-		retval = ds_allocate(&ds, size_in_records);
-		if (retval)
-			return retval;
+		if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
+			cfg.flags |= PTRACE_BTS_O_SCHED;
 	}
 
-	if (child->thread.ds_area_msr)
-		ds_free((void **)&child->thread.ds_area_msr);
+	if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
+		return -EFAULT;
 
-	child->thread.ds_area_msr = (unsigned long)ds;
-	if (child->thread.ds_area_msr)
-		set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
-	else
-		clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
-
-	return retval;
+	return sizeof(cfg);
 }
 
 void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -626,9 +666,6 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk,
 		.variant.jiffies = jiffies
 	};
 
-	if (ptrace_bts_get_buffer_size(tsk) <= 0)
-		return;
-
 	ptrace_bts_write_record(tsk, &rec);
 }
 
@@ -808,30 +845,32 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		break;
 #endif
 
-	case PTRACE_BTS_MAX_BUFFER_SIZE:
-		ret = ptrace_bts_max_buffer_size();
-		break;
-
-	case PTRACE_BTS_ALLOCATE_BUFFER:
-		ret = ptrace_bts_allocate_bts(child, data);
-		break;
-
-	case PTRACE_BTS_GET_BUFFER_SIZE:
-		ret = ptrace_bts_get_buffer_size(child);
-		break;
-
-	case PTRACE_BTS_READ_RECORD:
-		ret = ptrace_bts_read_record
-			(child, data,
-			 (struct bts_struct __user *) addr);
-		break;
-
 	case PTRACE_BTS_CONFIG:
-		ret = ptrace_bts_config(child, data);
+		ret = ptrace_bts_config
+			(child, (struct ptrace_bts_config __user *)addr);
 		break;
 
 	case PTRACE_BTS_STATUS:
-		ret = ptrace_bts_status(child);
+		ret = ptrace_bts_status
+			(child, (struct ptrace_bts_config __user *)addr);
+		break;
+
+	case PTRACE_BTS_SIZE:
+		ret = ptrace_bts_get_size(child);
+		break;
+
+	case PTRACE_BTS_GET:
+		ret = ptrace_bts_read_record
+			(child, data, (struct bts_struct __user *) addr);
+		break;
+
+	case PTRACE_BTS_CLEAR:
+		ret = ptrace_bts_clear(child);
+		break;
+
+	case PTRACE_BTS_DRAIN:
+		ret = ptrace_bts_drain
+			(child, (struct bts_struct __user *) addr);
 		break;
 
 	default:
@@ -1017,12 +1056,12 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 	case PTRACE_SETOPTIONS:
 	case PTRACE_SET_THREAD_AREA:
 	case PTRACE_GET_THREAD_AREA:
-	case PTRACE_BTS_MAX_BUFFER_SIZE:
-	case PTRACE_BTS_ALLOCATE_BUFFER:
-	case PTRACE_BTS_GET_BUFFER_SIZE:
-	case PTRACE_BTS_READ_RECORD:
 	case PTRACE_BTS_CONFIG:
 	case PTRACE_BTS_STATUS:
+	case PTRACE_BTS_SIZE:
+	case PTRACE_BTS_GET:
+	case PTRACE_BTS_CLEAR:
+	case PTRACE_BTS_DRAIN:
 		return sys_ptrace(request, pid, addr, data);
 
 	default:
diff --git a/include/asm-x86/ds.h b/include/asm-x86/ds.h
index c9e15381dc7f..b84040abee68 100644
--- a/include/asm-x86/ds.h
+++ b/include/asm-x86/ds.h
@@ -52,11 +52,18 @@ struct bts_struct {
 	} variant;
 };
 
+/* Overflow handling mechanisms */
+#define DS_O_SIGNAL	1 /* send overflow signal */
+#define DS_O_WRAP	2 /* wrap around */
 
 extern int ds_allocate(void **, size_t);
 extern int ds_free(void **);
 extern int ds_get_bts_size(void *);
+extern int ds_get_bts_end(void *);
 extern int ds_get_bts_index(void *);
+extern int ds_set_overflow(void *, int);
+extern int ds_get_overflow(void *);
+extern int ds_clear(void *);
 extern int ds_read_bts(void *, size_t, struct bts_struct *);
 extern int ds_write_bts(void *, const struct bts_struct *);
 extern unsigned long ds_debugctl_mask(void);
diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h
index b473ad45e9ca..cf2fe4633ee5 100644
--- a/include/asm-x86/ptrace-abi.h
+++ b/include/asm-x86/ptrace-abi.h
@@ -80,51 +80,53 @@
 
 #define PTRACE_SINGLEBLOCK	33	/* resume execution until next branch */
 
-/* Return maximal BTS buffer size in number of records,
-   if successuf; -1, otherwise.
-   EOPNOTSUPP...processor does not support bts tracing */
-#define PTRACE_BTS_MAX_BUFFER_SIZE 40
+/* configuration/status structure used in PTRACE_BTS_CONFIG and
+   PTRACE_BTS_STATUS commands.
+*/
+struct ptrace_bts_config {
+	/* requested or actual size of BTS buffer in bytes */
+	unsigned long size;
+	/* bitmask of below flags */
+	unsigned long flags;
+};
 
-/* Allocate new bts buffer (free old one, if exists) of size DATA bts records;
-   parameter ADDR is ignored.
-   Return 0, if successful; -1, otherwise.
-   EOPNOTSUPP...processor does not support bts tracing
-   EINVAL.......invalid size in records
-   ENOMEM.......out of memory */
-#define PTRACE_BTS_ALLOCATE_BUFFER 41
+#define PTRACE_BTS_O_TRACE	0x1 /* branch trace */
+#define PTRACE_BTS_O_SCHED	0x2 /* scheduling events w/ jiffies */
+#define PTRACE_BTS_O_SIGNAL     0x4 /* send SIG? on buffer overflow
+				       instead of wrapping around */
+#define PTRACE_BTS_O_CUT_SIZE	0x8 /* cut requested size to max available
+				       instead of failing */
 
-/* Return the size of the bts buffer in number of bts records,
-   if successful; -1, otherwise.
-   EOPNOTSUPP...processor does not support bts tracing
-   ENXIO........no buffer allocated */
-#define PTRACE_BTS_GET_BUFFER_SIZE 42
-
-/* Read the DATA'th bts record into a ptrace_bts_record buffer
-   provided in ADDR.
-   Records are ordered from newest to oldest.
-   Return 0, if successful; -1, otherwise
-   EOPNOTSUPP...processor does not support bts tracing
-   ENXIO........no buffer allocated
-   EINVAL.......invalid index */
-#define PTRACE_BTS_READ_RECORD 43
-
-/* Configure last branch trace; the configuration is given as a bit-mask of
-   PTRACE_BTS_O_* options in DATA; parameter ADDR is ignored.
-   Return 0, if successful; -1, otherwise
-   EOPNOTSUPP...processor does not support bts tracing
-   ENXIO........no buffer allocated */
-#define PTRACE_BTS_CONFIG 44
-
-/* Return the configuration as bit-mask of PTRACE_BTS_O_* options
-   if successful; -1, otherwise.
-   EOPNOTSUPP...processor does not support bts tracing
-   ENXIO........no buffer allocated */
-#define PTRACE_BTS_STATUS 45
-
-/* Trace configuration options */
-/* Collect last branch trace */
-#define PTRACE_BTS_O_TRACE_TASK 0x1
-/* Take timestamps when the task arrives and departs */
-#define PTRACE_BTS_O_TIMESTAMPS 0x2
+#define PTRACE_BTS_CONFIG	40
+/* Configure branch trace recording.
+   DATA is ignored, ADDR points to a struct ptrace_bts_config.
+   A new buffer is allocated, iff the size changes.
+*/
+#define PTRACE_BTS_STATUS	41
+/* Return the current configuration.
+   DATA is ignored, ADDR points to a struct ptrace_bts_config
+   that will contain the result.
+*/
+#define PTRACE_BTS_SIZE		42
+/* Return the number of available BTS records.
+   DATA and ADDR are ignored.
+*/
+#define PTRACE_BTS_GET		43
+/* Get a single BTS record.
+   DATA defines the index into the BTS array, where 0 is the newest
+   entry, and higher indices refer to older entries.
+   ADDR is pointing to struct bts_struct (see asm/ds.h).
+*/
+#define PTRACE_BTS_CLEAR	44
+/* Clear the BTS buffer.
+   DATA and ADDR are ignored.
+*/
+#define PTRACE_BTS_DRAIN	45
+/* Read all available BTS records and clear the buffer.
+   DATA is ignored. ADDR points to an array of struct bts_struct of
+   suitable size.
+   BTS records are read from oldest to newest.
+   Returns number of BTS records drained.
+*/
 
 #endif
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index a9a1bab1451a..61946fe8c085 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -9,6 +9,7 @@
 
 #ifdef __KERNEL__
 
+/* the DS BTS struct is used for ptrace as well */
 #include <asm/ds.h>
 
 struct task_struct;

From 95c354fe9f7d6decc08a92aa26eb233ecc2155bf Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 30 Jan 2008 13:31:20 +0100
Subject: [PATCH 318/890] spinlock: lockbreak cleanup

The break_lock data structure and code for spinlocks is quite nasty.
Not only does it double the size of a spinlock but it changes locking to
a potentially less optimal trylock.

Put all of that under CONFIG_GENERIC_LOCKBREAK, and introduce a
__raw_spin_is_contended that uses the lock data itself to determine whether
there are waiters on the lock, to be used if CONFIG_GENERIC_LOCKBREAK is
not set.

Rename need_lockbreak to spin_needbreak, make it use spin_is_contended to
decouple it from the spinlock implementation, and make it typesafe (rwlocks
do not have any need_lockbreak sites -- why do they even get bloated up
with that break_lock then?).

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/arm/Kconfig               |  5 +++++
 arch/ia64/Kconfig              |  5 +++++
 arch/m32r/Kconfig              |  5 +++++
 arch/mips/Kconfig              |  5 +++++
 arch/parisc/Kconfig            |  5 +++++
 arch/powerpc/Kconfig           |  5 +++++
 arch/sparc64/Kconfig           |  5 +++++
 arch/x86/Kconfig               |  4 ++++
 fs/jbd/checkpoint.c            |  3 ++-
 fs/jbd/commit.c                |  2 +-
 fs/jbd2/checkpoint.c           |  3 ++-
 fs/jbd2/commit.c               |  2 +-
 include/linux/sched.h          | 21 +++++++--------------
 include/linux/spinlock.h       |  6 ++++++
 include/linux/spinlock_types.h |  4 ++--
 include/linux/spinlock_up.h    |  2 ++
 kernel/sched.c                 | 16 ++++++----------
 kernel/spinlock.c              |  3 +--
 mm/memory.c                    |  8 +++-----
 19 files changed, 72 insertions(+), 37 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index de211ac3853e..77201d3f7479 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -91,6 +91,11 @@ config GENERIC_IRQ_PROBE
 	bool
 	default y
 
+config GENERIC_LOCKBREAK
+	bool
+	default y
+	depends on SMP && PREEMPT
+
 config RWSEM_GENERIC_SPINLOCK
 	bool
 	default y
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index bef47725d4ad..4a81b7fb191a 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -42,6 +42,11 @@ config MMU
 config SWIOTLB
        bool
 
+config GENERIC_LOCKBREAK
+	bool
+	default y
+	depends on SMP && PREEMPT
+
 config RWSEM_XCHGADD_ALGORITHM
 	bool
 	default y
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index ab9a264cb194..f7237c5f531e 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -235,6 +235,11 @@ config IRAM_SIZE
 # Define implied options from the CPU selection here
 #
 
+config GENERIC_LOCKBREAK
+	bool
+	default y
+	depends on SMP && PREEMPT
+
 config RWSEM_GENERIC_SPINLOCK
 	bool
 	depends on M32R
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 6b0f85f02c79..4fad0a34b997 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -694,6 +694,11 @@ source "arch/mips/vr41xx/Kconfig"
 
 endmenu
 
+config GENERIC_LOCKBREAK
+	bool
+	default y
+	depends on SMP && PREEMPT
+
 config RWSEM_GENERIC_SPINLOCK
 	bool
 	default y
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index b8ef1787a191..2b649c46631c 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -19,6 +19,11 @@ config MMU
 config STACK_GROWSUP
 	def_bool y
 
+config GENERIC_LOCKBREAK
+	bool
+	default y
+	depends on SMP && PREEMPT
+
 config RWSEM_GENERIC_SPINLOCK
 	def_bool y
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 232c298c933f..c17a194beb0e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -53,6 +53,11 @@ config RWSEM_XCHGADD_ALGORITHM
 	bool
 	default y
 
+config GENERIC_LOCKBREAK
+	bool
+	default y
+	depends on SMP && PREEMPT
+
 config ARCH_HAS_ILOG2_U32
 	bool
 	default y
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 10b212a1f9f5..1e25bce0366d 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -200,6 +200,11 @@ config US2E_FREQ
 	  If in doubt, say N.
 
 # Global things across all Sun machines.
+config GENERIC_LOCKBREAK
+	bool
+	default y
+	depends on SMP && PREEMPT
+
 config RWSEM_GENERIC_SPINLOCK
 	bool
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 23936301db56..db434f8171d3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -19,6 +19,10 @@ config X86_64
 config X86
 	def_bool y
 
+config GENERIC_LOCKBREAK
+	def_bool y
+	depends on SMP && PREEMPT
+
 config GENERIC_TIME
 	def_bool y
 
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 0f69c416eebc..a5432bbbfb88 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -347,7 +347,8 @@ restart:
 				break;
 			}
 			retry = __process_buffer(journal, jh, bhs,&batch_count);
-			if (!retry && lock_need_resched(&journal->j_list_lock)){
+			if (!retry && (need_resched() ||
+				spin_needbreak(&journal->j_list_lock))) {
 				spin_unlock(&journal->j_list_lock);
 				retry = 1;
 				break;
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 610264b99a8e..31853eb65b4c 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -265,7 +265,7 @@ write_out_data:
 			put_bh(bh);
 		}
 
-		if (lock_need_resched(&journal->j_list_lock)) {
+		if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
 			spin_unlock(&journal->j_list_lock);
 			goto write_out_data;
 		}
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 1b7f282c1ae9..6914598022ce 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -353,7 +353,8 @@ restart:
 			}
 			retry = __process_buffer(journal, jh, bhs, &batch_count,
 						 transaction);
-			if (!retry && lock_need_resched(&journal->j_list_lock)){
+			if (!retry && (need_resched() ||
+				spin_needbreak(&journal->j_list_lock))) {
 				spin_unlock(&journal->j_list_lock);
 				retry = 1;
 				break;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index da8d0eb3b7b9..4f302d279279 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -341,7 +341,7 @@ write_out_data:
 			put_bh(bh);
 		}
 
-		if (lock_need_resched(&journal->j_list_lock)) {
+		if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
 			spin_unlock(&journal->j_list_lock);
 			goto write_out_data;
 		}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2d0546e884ea..9d4797609aa5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1922,23 +1922,16 @@ extern int cond_resched_softirq(void);
 
 /*
  * Does a critical section need to be broken due to another
- * task waiting?:
+ * task waiting?: (technically does not depend on CONFIG_PREEMPT,
+ * but a general need for low latency)
  */
-#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
-# define need_lockbreak(lock) ((lock)->break_lock)
-#else
-# define need_lockbreak(lock) 0
-#endif
-
-/*
- * Does a critical section need to be broken due to another
- * task waiting or preemption being signalled:
- */
-static inline int lock_need_resched(spinlock_t *lock)
+static inline int spin_needbreak(spinlock_t *lock)
 {
-	if (need_lockbreak(lock) || need_resched())
-		return 1;
+#ifdef CONFIG_PREEMPT
+	return spin_is_contended(lock);
+#else
 	return 0;
+#endif
 }
 
 /*
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index c376f3b36c89..124449733c55 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -120,6 +120,12 @@ do {								\
 
 #define spin_is_locked(lock)	__raw_spin_is_locked(&(lock)->raw_lock)
 
+#ifdef CONFIG_GENERIC_LOCKBREAK
+#define spin_is_contended(lock) ((lock)->break_lock)
+#else
+#define spin_is_contended(lock)	__raw_spin_is_contended(&(lock)->raw_lock)
+#endif
+
 /**
  * spin_unlock_wait - wait until the spinlock gets unlocked
  * @lock: the spinlock in question.
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index f6a3a951b79e..68d88f71f1a2 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -19,7 +19,7 @@
 
 typedef struct {
 	raw_spinlock_t raw_lock;
-#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
+#ifdef CONFIG_GENERIC_LOCKBREAK
 	unsigned int break_lock;
 #endif
 #ifdef CONFIG_DEBUG_SPINLOCK
@@ -35,7 +35,7 @@ typedef struct {
 
 typedef struct {
 	raw_rwlock_t raw_lock;
-#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
+#ifdef CONFIG_GENERIC_LOCKBREAK
 	unsigned int break_lock;
 #endif
 #ifdef CONFIG_DEBUG_SPINLOCK
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index ea54c4c9a4ec..938234c4a996 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -64,6 +64,8 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 # define __raw_spin_trylock(lock)	({ (void)(lock); 1; })
 #endif /* DEBUG_SPINLOCK */
 
+#define __raw_spin_is_contended(lock)	(((void)(lock), 0))
+
 #define __raw_read_can_lock(lock)	(((void)(lock), 1))
 #define __raw_write_can_lock(lock)	(((void)(lock), 1))
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 524285e46fa7..ba4c88088f62 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4945,19 +4945,15 @@ EXPORT_SYMBOL(_cond_resched);
  */
 int cond_resched_lock(spinlock_t *lock)
 {
+	int resched = need_resched() && system_state == SYSTEM_RUNNING;
 	int ret = 0;
 
-	if (need_lockbreak(lock)) {
+	if (spin_needbreak(lock) || resched) {
 		spin_unlock(lock);
-		cpu_relax();
-		ret = 1;
-		spin_lock(lock);
-	}
-	if (need_resched() && system_state == SYSTEM_RUNNING) {
-		spin_release(&lock->dep_map, 1, _THIS_IP_);
-		_raw_spin_unlock(lock);
-		preempt_enable_no_resched();
-		__cond_resched();
+		if (resched && need_resched())
+			__cond_resched();
+		else
+			cpu_relax();
 		ret = 1;
 		spin_lock(lock);
 	}
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index cd72424c2662..ae28c8245123 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -65,8 +65,7 @@ EXPORT_SYMBOL(_write_trylock);
  * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
  * not re-enabled during lock-acquire (which the preempt-spin-ops do):
  */
-#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) || \
-	defined(CONFIG_DEBUG_LOCK_ALLOC)
+#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
 
 void __lockfunc _read_lock(rwlock_t *lock)
 {
diff --git a/mm/memory.c b/mm/memory.c
index 4b0144b24c12..673ebbf499c7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -513,8 +513,7 @@ again:
 		if (progress >= 32) {
 			progress = 0;
 			if (need_resched() ||
-			    need_lockbreak(src_ptl) ||
-			    need_lockbreak(dst_ptl))
+			    spin_needbreak(src_ptl) || spin_needbreak(dst_ptl))
 				break;
 		}
 		if (pte_none(*src_pte)) {
@@ -853,7 +852,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 			tlb_finish_mmu(*tlbp, tlb_start, start);
 
 			if (need_resched() ||
-				(i_mmap_lock && need_lockbreak(i_mmap_lock))) {
+				(i_mmap_lock && spin_needbreak(i_mmap_lock))) {
 				if (i_mmap_lock) {
 					*tlbp = NULL;
 					goto out;
@@ -1768,8 +1767,7 @@ again:
 
 	restart_addr = zap_page_range(vma, start_addr,
 					end_addr - start_addr, details);
-	need_break = need_resched() ||
-			need_lockbreak(details->i_mmap_lock);
+	need_break = need_resched() || spin_needbreak(details->i_mmap_lock);
 
 	if (restart_addr >= end_addr) {
 		/* We have now completed this vma: mark it so */

From 314cdbefd1fd0a7acf3780e9628465b77ea6a836 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 30 Jan 2008 13:31:21 +0100
Subject: [PATCH 319/890] x86: FIFO ticket spinlocks

Introduce ticket lock spinlocks for x86 which are FIFO. The implementation
is described in the comments. The straight-line lock/unlock instruction
sequence is slightly slower than the dec based locks on modern x86 CPUs,
however the difference is quite small on Core2 and Opteron when working out of
cache, and becomes almost insignificant even on P4 when the lock misses cache.
trylock is more significantly slower, but they are relatively rare.

On an 8 core (2 socket) Opteron, spinlock unfairness is extremely noticable,
with a userspace test having a difference of up to 2x runtime per thread, and
some threads are starved or "unfairly" granted the lock up to 1 000 000 (!)
times. After this patch, all threads appear to finish at exactly the same
time.

The memory ordering of the lock does conform to x86 standards, and the
implementation has been reviewed by Intel and AMD engineers.

The algorithm also tells us how many CPUs are contending the lock, so
lockbreak becomes trivial and we no longer have to waste 4 bytes per
spinlock for it.

After this, we can no longer spin on any locks with preempt enabled
and cannot reenable interrupts when spinning on an irq safe lock, because
at that point we have already taken a ticket and the would deadlock if
the same CPU tries to take the lock again.  These are questionable anyway:
if the lock happens to be called under a preempt or interrupt disabled section,
then it will just have the same latency problems. The real fix is to keep
critical sections short, and ensure locks are reasonably fair (which this
patch does).

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig                 |   3 +-
 include/asm-x86/paravirt.h       |  21 -----
 include/asm-x86/spinlock.h       | 153 ++++++++++++++++---------------
 include/asm-x86/spinlock_types.h |   2 +-
 4 files changed, 83 insertions(+), 96 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index db434f8171d3..1992b8fe6a2f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -20,8 +20,7 @@ config X86
 	def_bool y
 
 config GENERIC_LOCKBREAK
-	def_bool y
-	depends on SMP && PREEMPT
+	def_bool n
 
 config GENERIC_TIME
 	def_bool y
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 4f23f434a1f3..24406703007f 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -1077,27 +1077,6 @@ static inline unsigned long __raw_local_irq_save(void)
 	return f;
 }
 
-#define CLI_STRING							\
-	_paravirt_alt("pushl %%ecx; pushl %%edx;"			\
-		      "call *%[paravirt_cli_opptr];"			\
-		      "popl %%edx; popl %%ecx",				\
-		      "%c[paravirt_cli_type]", "%c[paravirt_clobber]")
-
-#define STI_STRING							\
-	_paravirt_alt("pushl %%ecx; pushl %%edx;"			\
-		      "call *%[paravirt_sti_opptr];"			\
-		      "popl %%edx; popl %%ecx",				\
-		      "%c[paravirt_sti_type]", "%c[paravirt_clobber]")
-
-#define CLI_STI_CLOBBERS , "%eax"
-#define CLI_STI_INPUT_ARGS						\
-	,								\
-	[paravirt_cli_type] "i" (PARAVIRT_PATCH(pv_irq_ops.irq_disable)),		\
-	[paravirt_cli_opptr] "m" (pv_irq_ops.irq_disable),		\
-	[paravirt_sti_type] "i" (PARAVIRT_PATCH(pv_irq_ops.irq_enable)),		\
-	[paravirt_sti_opptr] "m" (pv_irq_ops.irq_enable),		\
-	paravirt_clobber(CLBR_EAX)
-
 /* Make sure as little as possible of this mess escapes. */
 #undef PARAVIRT_CALL
 #undef __PVOP_CALL
diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h
index afd4b80ff0ad..97d52b506af8 100644
--- a/include/asm-x86/spinlock.h
+++ b/include/asm-x86/spinlock.h
@@ -5,6 +5,7 @@
 #include <asm/rwlock.h>
 #include <asm/page.h>
 #include <asm/processor.h>
+#include <linux/compiler.h>
 
 /*
  * Your basic SMP spinlocks, allowing only a single CPU anywhere
@@ -12,7 +13,8 @@
  * Simple spin lock operations.  There are two variants, one clears IRQ's
  * on the local processor, one does not.
  *
- * We make no fairness assumptions. They have a cost.
+ * These are fair FIFO ticket locks, which are currently limited to 256
+ * CPUs.
  *
  * (the type definitions are in asm/spinlock_types.h)
  */
@@ -42,103 +44,102 @@ typedef int _slock_t;
 # define LOCK_PTR_REG "D"
 #endif
 
+#if (NR_CPUS > 256)
+#error spinlock supports a maximum of 256 CPUs
+#endif
+
 static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
 {
-	return *(volatile _slock_t *)(&(lock)->slock) <= 0;
+	int tmp = *(volatile signed int *)(&(lock)->slock);
+
+	return (((tmp >> 8) & 0xff) != (tmp & 0xff));
+}
+
+static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
+{
+	int tmp = *(volatile signed int *)(&(lock)->slock);
+
+	return (((tmp >> 8) & 0xff) - (tmp & 0xff)) > 1;
 }
 
 static inline void __raw_spin_lock(raw_spinlock_t *lock)
 {
-	asm volatile(
-		"\n1:\t"
-		LOCK_PREFIX " ; " LOCK_INS_DEC " %0\n\t"
-		"jns 3f\n"
-		"2:\t"
-		"rep;nop\n\t"
-		LOCK_INS_CMP " $0,%0\n\t"
-		"jle 2b\n\t"
+	short inc = 0x0100;
+
+	/*
+	 * Ticket locks are conceptually two bytes, one indicating the current
+	 * head of the queue, and the other indicating the current tail. The
+	 * lock is acquired by atomically noting the tail and incrementing it
+	 * by one (thus adding ourself to the queue and noting our position),
+	 * then waiting until the head becomes equal to the the initial value
+	 * of the tail.
+	 *
+	 * This uses a 16-bit xadd to increment the tail and also load the
+	 * position of the head, which takes care of memory ordering issues
+	 * and should be optimal for the uncontended case. Note the tail must
+	 * be in the high byte, otherwise the 16-bit wide increment of the low
+	 * byte would carry up and contaminate the high byte.
+	 */
+
+	__asm__ __volatile__ (
+		LOCK_PREFIX "xaddw %w0, %1\n"
+		"1:\t"
+		"cmpb %h0, %b0\n\t"
+		"je 2f\n\t"
+		"rep ; nop\n\t"
+		"movb %1, %b0\n\t"
+		/* don't need lfence here, because loads are in-order */
 		"jmp 1b\n"
-		"3:\n\t"
-		: "+m" (lock->slock) : : "memory");
+		"2:"
+		:"+Q" (inc), "+m" (lock->slock)
+		:
+		:"memory", "cc");
 }
 
-/*
- * It is easier for the lock validator if interrupts are not re-enabled
- * in the middle of a lock-acquire. This is a performance feature anyway
- * so we turn it off:
- *
- * NOTE: there's an irqs-on section here, which normally would have to be
- * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use this variant.
- */
-#ifndef CONFIG_PROVE_LOCKING
-static inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
-					 unsigned long flags)
-{
-	asm volatile(
-		"\n1:\t"
-		LOCK_PREFIX " ; " LOCK_INS_DEC " %[slock]\n\t"
-		"jns 5f\n"
-		"testl $0x200, %[flags]\n\t"
-		"jz 4f\n\t"
-		STI_STRING "\n"
-		"3:\t"
-		"rep;nop\n\t"
-		LOCK_INS_CMP " $0, %[slock]\n\t"
-		"jle 3b\n\t"
-		CLI_STRING "\n\t"
-		"jmp 1b\n"
-		"4:\t"
-		"rep;nop\n\t"
-		LOCK_INS_CMP " $0, %[slock]\n\t"
-		"jg 1b\n\t"
-		"jmp 4b\n"
-		"5:\n\t"
-		: [slock] "+m" (lock->slock)
-		: [flags] "r" ((u32)flags)
-		  CLI_STI_INPUT_ARGS
-		: "memory" CLI_STI_CLOBBERS);
-}
-#endif
+#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
 
 static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 {
-	_slock_t oldval;
+	int tmp;
+	short new;
 
 	asm volatile(
-		LOCK_INS_XCH " %0,%1"
-		:"=q" (oldval), "+m" (lock->slock)
-		:"0" (0) : "memory");
+		"movw %2,%w0\n\t"
+		"cmpb %h0,%b0\n\t"
+		"jne 1f\n\t"
+		"movw %w0,%w1\n\t"
+		"incb %h1\n\t"
+		"lock ; cmpxchgw %w1,%2\n\t"
+		"1:"
+		"sete %b1\n\t"
+		"movzbl %b1,%0\n\t"
+		:"=&a" (tmp), "=Q" (new), "+m" (lock->slock)
+		:
+		: "memory", "cc");
 
-	return oldval > 0;
+	return tmp;
 }
 
+#if defined(CONFIG_X86_32) && \
+	(defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE))
 /*
- * __raw_spin_unlock based on writing $1 to the low byte.
- * This method works. Despite all the confusion.
- * (except on PPro SMP or if we are using OOSTORE, so we use xchgb there)
+ * On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock
  * (PPro errata 66, 92)
  */
-#if defined(X86_64) || \
-	(!defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE))
-
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
-{
-	asm volatile(LOCK_INS_MOV " $1,%0" : "=m" (lock->slock) :: "memory");
-}
-
+# define UNLOCK_LOCK_PREFIX LOCK_PREFIX
 #else
+# define UNLOCK_LOCK_PREFIX
+#endif
 
 static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 {
-	unsigned char oldval = 1;
-
-	asm volatile("xchgb %b0, %1"
-		     : "=q" (oldval), "+m" (lock->slock)
-		     : "0" (oldval) : "memory");
+	__asm__ __volatile__(
+		UNLOCK_LOCK_PREFIX "incb %0"
+		:"+m" (lock->slock)
+		:
+		:"memory", "cc");
 }
 
-#endif
-
 static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
 {
 	while (__raw_spin_is_locked(lock))
@@ -159,11 +160,19 @@ static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
  * with the high bit (sign) being the "contended" bit.
  */
 
+/**
+ * read_can_lock - would read_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
 static inline int __raw_read_can_lock(raw_rwlock_t *lock)
 {
 	return (int)(lock)->lock > 0;
 }
 
+/**
+ * write_can_lock - would write_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
 static inline int __raw_write_can_lock(raw_rwlock_t *lock)
 {
 	return (lock)->lock == RW_LOCK_BIAS;
diff --git a/include/asm-x86/spinlock_types.h b/include/asm-x86/spinlock_types.h
index 4da9345c1500..9029cf78cf5d 100644
--- a/include/asm-x86/spinlock_types.h
+++ b/include/asm-x86/spinlock_types.h
@@ -9,7 +9,7 @@ typedef struct {
 	unsigned int slock;
 } raw_spinlock_t;
 
-#define __RAW_SPIN_LOCK_UNLOCKED	{ 1 }
+#define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
 
 typedef struct {
 	unsigned int lock;

From aa470140e86e45723cf8387292edbce9106ddc1f Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Wed, 30 Jan 2008 13:31:21 +0100
Subject: [PATCH 320/890] x86: kprobe-booster for x86-64

This patch adds kprobe-booster to kprobes_64.c.

- Changes are based on x86-32.
- Add REX prefix checking code.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Jim Keniston <jkenisto@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/kprobes_64.c | 142 ++++++++++++++++++++++++++++++++++-
 include/asm-x86/kprobes_64.h |  10 +++
 2 files changed, 149 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/kprobes_64.c b/arch/x86/kernel/kprobes_64.c
index f6837cd3bed5..bf0e18473677 100644
--- a/arch/x86/kernel/kprobes_64.c
+++ b/arch/x86/kernel/kprobes_64.c
@@ -55,6 +55,105 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
 };
 const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
 
+/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
+static __always_inline void set_jmp_op(void *from, void *to)
+{
+	struct __arch_jmp_op {
+		char op;
+		s32 raddr;
+	} __attribute__((packed)) * jop;
+	jop = (struct __arch_jmp_op *)from;
+	jop->raddr = (s32)((long)(to) - ((long)(from) + 5));
+	jop->op = RELATIVEJUMP_INSTRUCTION;
+}
+
+/*
+ * returns non-zero if opcode is boostable
+ * RIP relative instructions are adjusted at copying time
+ */
+static __always_inline int can_boost(kprobe_opcode_t *opcodes)
+{
+#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
+	(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \
+	  (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) |   \
+	  (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) |   \
+	  (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf))    \
+	 << (row % 64))
+	/*
+	 * Undefined/reserved opcodes, conditional jump, Opcode Extension
+	 * Groups, and some special opcodes can not boost.
+	 */
+	static const unsigned long twobyte_is_boostable[256 / 64] = {
+		/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f        */
+		/*      ----------------------------------------------        */
+		W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0)|/* 00 */
+		W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)|/* 10 */
+		W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)|/* 20 */
+		W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),/* 30 */
+		W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)|/* 40 */
+		W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)|/* 50 */
+		W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1)|/* 60 */
+		W(0x70, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1),/* 70 */
+		W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)|/* 80 */
+		W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)|/* 90 */
+		W(0xa0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1)|/* a0 */
+		W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1),/* b0 */
+		W(0xc0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1)|/* c0 */
+		W(0xd0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1)|/* d0 */
+		W(0xe0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1)|/* e0 */
+		W(0xf0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0) /* f0 */
+		/*      -----------------------------------------------       */
+		/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f        */
+	};
+#undef W
+	kprobe_opcode_t opcode;
+	kprobe_opcode_t *orig_opcodes = opcodes;
+
+retry:
+	if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
+		return 0;
+	opcode = *(opcodes++);
+
+	/* 2nd-byte opcode */
+	if (opcode == 0x0f) {
+		if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
+			return 0;
+		return test_bit(*opcodes, twobyte_is_boostable);
+	}
+
+	switch (opcode & 0xf0) {
+	case 0x40:
+		goto retry; /* REX prefix is boostable */
+	case 0x60:
+		if (0x63 < opcode && opcode < 0x67)
+			goto retry; /* prefixes */
+		/* can't boost Address-size override and bound */
+		return (opcode != 0x62 && opcode != 0x67);
+	case 0x70:
+		return 0; /* can't boost conditional jump */
+	case 0xc0:
+		/* can't boost software-interruptions */
+		return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
+	case 0xd0:
+		/* can boost AA* and XLAT */
+		return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
+	case 0xe0:
+		/* can boost in/out and absolute jmps */
+		return ((opcode & 0x04) || opcode == 0xea);
+	case 0xf0:
+		if ((opcode & 0x0c) == 0 && opcode != 0xf1)
+			goto retry; /* lock/rep(ne) prefix */
+		/* clear and set flags are boostable */
+		return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
+	default:
+		/* segment override prefixes are boostable */
+		if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
+			goto retry; /* prefixes */
+		/* CS override prefix and call are not boostable */
+		return (opcode != 0x2e && opcode != 0x9a);
+	}
+}
+
 /*
  * returns non-zero if opcode modifies the interrupt flag.
  */
@@ -86,7 +185,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 
 /*
  * Determine if the instruction uses the %rip-relative addressing mode.
- * If it does, return the address of the 32-bit displacement word.
+ * If it does, Return the address of the 32-bit displacement word.
  * If not, return null.
  */
 static s32 __kprobes *is_riprel(u8 *insn)
@@ -210,6 +309,11 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
 		BUG_ON((s64) (s32) disp != disp); /* Sanity check.  */
 		*ripdisp = disp;
 	}
+	if (can_boost(p->addr)) {
+		p->ainsn.boostable = 0;
+	} else {
+		p->ainsn.boostable = -1;
+	}
 	p->opcode = *p->addr;
 }
 
@@ -226,7 +330,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
 	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn, 0);
+	free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
 	mutex_unlock(&kprobe_mutex);
 }
 
@@ -384,6 +488,15 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
 		return 1;
 
 ss_probe:
+#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
+	if (p->ainsn.boostable == 1 && !p->post_handler) {
+		/* Boost up -- we can execute copied instructions directly */
+		reset_current_kprobe();
+		regs->ip = (unsigned long)p->ainsn.insn;
+		preempt_enable_no_resched();
+		return 1;
+	}
+#endif
 	prepare_singlestep(p, regs);
 	kcb->kprobe_status = KPROBE_HIT_SS;
 	return 1;
@@ -493,6 +606,11 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
  * 2) If the single-stepped instruction was a call, the return address
  * that is atop the stack is the address following the copied instruction.
  * We need to make it the address following the original instruction.
+ *
+ * If this is the first time we've single-stepped the instruction at
+ * this probepoint, and the instruction is boostable, boost it: add a
+ * jump instruction after the copied instruction, that jumps to the next
+ * instruction after the probepoint.
  */
 static void __kprobes resume_execution(struct kprobe *p,
 		struct pt_regs *regs, struct kprobe_ctlblk *kcb)
@@ -519,6 +637,7 @@ static void __kprobes resume_execution(struct kprobe *p,
 	case 0xcf:
 	case 0xea:	/* jmp absolute -- ip is correct */
 		/* ip is already adjusted, no more changes required */
+		p->ainsn.boostable = 1;
 		goto no_change;
 	case 0xe8:	/* call relative - Fix return addr */
 		*tos = orig_rip + (*tos - copy_rip);
@@ -527,17 +646,34 @@ static void __kprobes resume_execution(struct kprobe *p,
 		if ((insn[1] & 0x30) == 0x10) {
 			/* call absolute, indirect */
 			/* Fix return addr; ip is correct. */
+			/* not boostable */
 			*tos = orig_rip + (*tos - copy_rip);
 			goto no_change;
 		} else if (((insn[1] & 0x31) == 0x20) ||	/* jmp near, absolute indirect */
 			   ((insn[1] & 0x31) == 0x21)) {	/* jmp far, absolute indirect */
-			/* ip is correct. */
+			/* ip is correct. And this is boostable */
+			p->ainsn.boostable = 1;
 			goto no_change;
 		}
 	default:
 		break;
 	}
 
+	if (p->ainsn.boostable == 0) {
+		if ((regs->ip > copy_rip) &&
+		    (regs->ip - copy_rip) + 5 < MAX_INSN_SIZE) {
+			/*
+			 * These instructions can be executed directly if it
+			 * jumps back to correct address.
+			 */
+			set_jmp_op((void *)regs->ip,
+				   (void *)orig_rip + (regs->ip - copy_rip));
+			p->ainsn.boostable = 1;
+		} else {
+			p->ainsn.boostable = -1;
+		}
+	}
+
 	regs->ip = orig_rip + (regs->ip - copy_rip);
 
 no_change:
diff --git a/include/asm-x86/kprobes_64.h b/include/asm-x86/kprobes_64.h
index 8c919d35cdd3..e7e921dac7c2 100644
--- a/include/asm-x86/kprobes_64.h
+++ b/include/asm-x86/kprobes_64.h
@@ -34,6 +34,7 @@ struct kprobe;
 
 typedef u8 kprobe_opcode_t;
 #define BREAKPOINT_INSTRUCTION	0xcc
+#define RELATIVEJUMP_INSTRUCTION 0xe9
 #define MAX_INSN_SIZE 15
 #define MAX_STACK_SIZE 64
 #define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
@@ -52,6 +53,15 @@ extern void arch_remove_kprobe(struct kprobe *p);
 struct arch_specific_insn {
 	/* copy of the original instruction */
 	kprobe_opcode_t *insn;
+	/*
+	 * boostable = -1: This instruction type is not boostable.
+	 * boostable = 0: This instruction type is boostable.
+	 * boostable = 1: This instruction has been boosted: we have
+	 * added a relative jump after the instruction copy in insn,
+	 * so no single-step and fixup are needed (unless there's
+	 * a post_handler or break_handler).
+	 */
+	int boostable;
 };
 
 struct prev_kprobe {

From da07ab0375897bb9e108b28129df140ecd3ee94e Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Wed, 30 Jan 2008 13:31:21 +0100
Subject: [PATCH 321/890] x86: return probe-booster for x86-64

This patch adds kretprobe-booster to kprobes_64.c.

- Changes are based on x86-32.
- Rewrite register saving/restoring code

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/kprobes_64.c | 92 +++++++++++++++++++++++++-----------
 1 file changed, 65 insertions(+), 27 deletions(-)

diff --git a/arch/x86/kernel/kprobes_64.c b/arch/x86/kernel/kprobes_64.c
index bf0e18473677..bc93b1dd9a01 100644
--- a/arch/x86/kernel/kprobes_64.c
+++ b/arch/x86/kernel/kprobes_64.c
@@ -28,6 +28,8 @@
  *		Fixed to handle %rip-relative addressing mode correctly.
  * 2005-May     Rusty Lynch <rusty.lynch@intel.com>
  *              Added function return probes functionality
+ * 2007-Dec	Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster
+ * 		and kretprobe-booster for x86-64
  */
 
 #include <linux/kprobes.h>
@@ -507,21 +509,65 @@ no_kprobe:
 }
 
 /*
- * For function-return probes, init_kprobes() establishes a probepoint
- * here. When a retprobed function returns, this probe is hit and
- * trampoline_probe_handler() runs, calling the kretprobe's handler.
+ * When a retprobed function returns, this code saves registers and
+ * calls trampoline_handler() runs, which calls the kretprobe's handler.
  */
- void kretprobe_trampoline_holder(void)
+ void __kprobes kretprobe_trampoline_holder(void)
  {
  	asm volatile (  ".global kretprobe_trampoline\n"
- 			"kretprobe_trampoline: \n"
- 			"nop\n");
+			"kretprobe_trampoline: \n"
+			/* We don't bother saving the ss register */
+			"	pushq %rsp\n"
+			"	pushfq\n"
+			/*
+			 * Skip cs, ip, orig_ax.
+			 * trampoline_handler() will plug in these values
+			 */
+			"	subq $24, %rsp\n"
+			"	pushq %rdi\n"
+			"	pushq %rsi\n"
+			"	pushq %rdx\n"
+			"	pushq %rcx\n"
+			"	pushq %rax\n"
+			"	pushq %r8\n"
+			"	pushq %r9\n"
+			"	pushq %r10\n"
+			"	pushq %r11\n"
+			"	pushq %rbx\n"
+			"	pushq %rbp\n"
+			"	pushq %r12\n"
+			"	pushq %r13\n"
+			"	pushq %r14\n"
+			"	pushq %r15\n"
+			"	movq %rsp, %rdi\n"
+			"	call trampoline_handler\n"
+			/* Replace saved sp with true return address. */
+			"	movq %rax, 152(%rsp)\n"
+			"	popq %r15\n"
+			"	popq %r14\n"
+			"	popq %r13\n"
+			"	popq %r12\n"
+			"	popq %rbp\n"
+			"	popq %rbx\n"
+			"	popq %r11\n"
+			"	popq %r10\n"
+			"	popq %r9\n"
+			"	popq %r8\n"
+			"	popq %rax\n"
+			"	popq %rcx\n"
+			"	popq %rdx\n"
+			"	popq %rsi\n"
+			"	popq %rdi\n"
+			/* Skip orig_ax, ip, cs */
+			"	addq $24, %rsp\n"
+			"	popfq\n"
+			"	ret\n");
  }
 
 /*
- * Called when we hit the probe point at kretprobe_trampoline
+ * Called from kretprobe_trampoline
  */
-int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+fastcall void * __kprobes trampoline_handler(struct pt_regs *regs)
 {
 	struct kretprobe_instance *ri = NULL;
 	struct hlist_head *head, empty_rp;
@@ -532,6 +578,10 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	INIT_HLIST_HEAD(&empty_rp);
 	spin_lock_irqsave(&kretprobe_lock, flags);
 	head = kretprobe_inst_table_head(current);
+	/* fixup rt_regs */
+	regs->cs = __KERNEL_CS;
+	regs->ip = trampoline_address;
+	regs->orig_ax = 0xffffffffffffffff;
 
 	/*
 	 * It is possible to have multiple instances associated with a given
@@ -551,8 +601,12 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 			/* another task is sharing our hash bucket */
 			continue;
 
-		if (ri->rp && ri->rp->handler)
+		if (ri->rp && ri->rp->handler) {
+			__get_cpu_var(current_kprobe) = &ri->rp->kp;
+			get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
 			ri->rp->handler(ri, regs);
+			__get_cpu_var(current_kprobe) = NULL;
+		}
 
 		orig_ret_address = (unsigned long)ri->ret_addr;
 		recycle_rp_inst(ri, &empty_rp);
@@ -567,22 +621,14 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	}
 
 	kretprobe_assert(ri, orig_ret_address, trampoline_address);
-	regs->ip = orig_ret_address;
 
-	reset_current_kprobe();
 	spin_unlock_irqrestore(&kretprobe_lock, flags);
-	preempt_enable_no_resched();
 
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
 		kfree(ri);
 	}
-	/*
-	 * By returning a non-zero value, we are telling
-	 * kprobe_handler() that we don't want the post_handler
-	 * to run (and have re-enabled preemption)
-	 */
-	return 1;
+	return (void *)orig_ret_address;
 }
 
 /*
@@ -881,20 +927,12 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	return 0;
 }
 
-static struct kprobe trampoline_p = {
-	.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
-	.pre_handler = trampoline_probe_handler
-};
-
 int __init arch_init_kprobes(void)
 {
-	return register_kprobe(&trampoline_p);
+	return 0;
 }
 
 int __kprobes arch_trampoline_kprobe(struct kprobe *p)
 {
-	if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
-		return 1;
-
 	return 0;
 }

From 8533bbe9f87b01f49ff951f665ea1988252fa3c2 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Wed, 30 Jan 2008 13:31:21 +0100
Subject: [PATCH 322/890] x86: prepare kprobes code for x86 unification

This patch cleanup kprobes code on x86 for unification.
This patch is based on Arjan's previous work.

- Remove spurious whitespace changes
- Add harmless includes
- Make the 32/64 files more identical
 - Generalize structure fields' and local variable name.
 - Wrap accessing to stack address by macros.
 - Modify bitmap making macro.
 - Merge fixup code into is_riprel() and change its name to fix_riprel().
 - Set MAX_INSN_SIZE to 16 on both arch.
 - Use u32 for bitmaps on both architectures.
 - Clarify some comments.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Jim Keniston <jkenisto@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/kprobes_32.c | 341 ++++++++++++++++++-----------
 arch/x86/kernel/kprobes_64.c | 409 ++++++++++++++++++-----------------
 arch/x86/mm/fault_32.c       |   1 -
 arch/x86/mm/fault_64.c       |   1 -
 include/asm-x86/kprobes_32.h |  32 ++-
 include/asm-x86/kprobes_64.h |  30 +--
 6 files changed, 457 insertions(+), 357 deletions(-)

diff --git a/arch/x86/kernel/kprobes_32.c b/arch/x86/kernel/kprobes_32.c
index 8eccd2d04709..8e06431d8b03 100644
--- a/arch/x86/kernel/kprobes_32.c
+++ b/arch/x86/kernel/kprobes_32.c
@@ -29,10 +29,15 @@
 
 #include <linux/kprobes.h>
 #include <linux/ptrace.h>
+#include <linux/string.h>
+#include <linux/slab.h>
 #include <linux/preempt.h>
+#include <linux/module.h>
 #include <linux/kdebug.h>
+
 #include <asm/cacheflush.h>
 #include <asm/desc.h>
+#include <asm/pgtable.h>
 #include <asm/uaccess.h>
 #include <asm/alternative.h>
 
@@ -41,31 +46,17 @@ void jprobe_return_end(void);
 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
-struct kretprobe_blackpoint kretprobe_blacklist[] = {
-	{"__switch_to", }, /* This function switches only current task, but
-			     doesn't switch kernel stack.*/
-	{NULL, NULL}	/* Terminator */
-};
-const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
-
-/* insert a jmp code */
-static __always_inline void set_jmp_op(void *from, void *to)
-{
-	struct __arch_jmp_op {
-		char op;
-		long raddr;
-	} __attribute__((packed)) *jop;
-	jop = (struct __arch_jmp_op *)from;
-	jop->raddr = (long)(to) - ((long)(from) + 5);
-	jop->op = RELATIVEJUMP_INSTRUCTION;
-}
-
 /*
- * returns non-zero if opcodes can be boosted.
+ * "&regs->sp" looks wrong, but it's correct for x86_32.  x86_32 CPUs
+ * don't save the ss and esp registers if the CPU is already in kernel
+ * mode when it traps.  So for kprobes, regs->sp and regs->ss are not
+ * the [nonexistent] saved stack pointer and ss register, but rather
+ * the top 8 bytes of the pre-int3 stack.  So &regs->sp happens to
+ * point to the top of the pre-int3 stack.
  */
-static __always_inline int can_boost(kprobe_opcode_t *opcodes)
-{
-#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf)		      \
+#define stack_addr(regs) ((unsigned long *)&regs->sp)
+
+#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
 	(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \
 	  (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) |   \
 	  (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) |   \
@@ -73,33 +64,103 @@ static __always_inline int can_boost(kprobe_opcode_t *opcodes)
 	 << (row % 32))
 	/*
 	 * Undefined/reserved opcodes, conditional jump, Opcode Extension
-	 * Groups, and some special opcodes can not be boost.
+	 * Groups, and some special opcodes can not boost.
 	 */
-	static const unsigned long twobyte_is_boostable[256 / 32] = {
-		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
-		/*      -------------------------------         */
-		W(0x00, 0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0)| /* 00 */
-		W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */
-		W(0x20, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */
-		W(0x30, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */
-		W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */
-		W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */
-		W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1)| /* 60 */
-		W(0x70, 0,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */
-		W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 80 */
-		W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */
-		W(0xa0, 1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1)| /* a0 */
-		W(0xb0, 1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1), /* b0 */
-		W(0xc0, 1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1)| /* c0 */
-		W(0xd0, 0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1), /* d0 */
-		W(0xe0, 0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1)| /* e0 */
-		W(0xf0, 0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0)  /* f0 */
-		/*      -------------------------------         */
-		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
-	};
+static const u32 twobyte_is_boostable[256 / 32] = {
+	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
+	/*      ----------------------------------------------          */
+	W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */
+	W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 10 */
+	W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */
+	W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
+	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
+	W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
+	W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1) | /* 60 */
+	W(0x70, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
+	W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 80 */
+	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
+	W(0xa0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* a0 */
+	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) , /* b0 */
+	W(0xc0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
+	W(0xd0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) , /* d0 */
+	W(0xe0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* e0 */
+	W(0xf0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0)   /* f0 */
+	/*      -----------------------------------------------         */
+	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
+};
+static const u32 onebyte_has_modrm[256 / 32] = {
+	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
+	/*      -----------------------------------------------         */
+	W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */
+	W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */
+	W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */
+	W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */
+	W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
+	W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
+	W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */
+	W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */
+	W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
+	W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */
+	W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */
+	W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */
+	W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */
+	W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
+	W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */
+	W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1)   /* f0 */
+	/*      -----------------------------------------------         */
+	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
+};
+static const u32 twobyte_has_modrm[256 / 32] = {
+	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
+	/*      -----------------------------------------------         */
+	W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */
+	W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */
+	W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */
+	W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */
+	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */
+	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */
+	W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */
+	W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */
+	W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */
+	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */
+	W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */
+	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */
+	W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */
+	W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */
+	W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */
+	W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)   /* ff */
+	/*      -----------------------------------------------         */
+	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
+};
 #undef W
+
+struct kretprobe_blackpoint kretprobe_blacklist[] = {
+	{"__switch_to", }, /* This function switches only current task, but
+			      doesn't switch kernel stack.*/
+	{NULL, NULL}	/* Terminator */
+};
+const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
+
+/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
+static __always_inline void set_jmp_op(void *from, void *to)
+{
+	struct __arch_jmp_op {
+		char op;
+		s32 raddr;
+	} __attribute__((packed)) * jop;
+	jop = (struct __arch_jmp_op *)from;
+	jop->raddr = (s32)((long)(to) - ((long)(from) + 5));
+	jop->op = RELATIVEJUMP_INSTRUCTION;
+}
+
+/*
+ * returns non-zero if opcode is boostable.
+ */
+static __always_inline int can_boost(kprobe_opcode_t *opcodes)
+{
 	kprobe_opcode_t opcode;
 	kprobe_opcode_t *orig_opcodes = opcodes;
+
 retry:
 	if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
 		return 0;
@@ -109,7 +170,8 @@ retry:
 	if (opcode == 0x0f) {
 		if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
 			return 0;
-		return test_bit(*opcodes, twobyte_is_boostable);
+		return test_bit(*opcodes,
+				(unsigned long *)twobyte_is_boostable);
 	}
 
 	switch (opcode & 0xf0) {
@@ -132,12 +194,13 @@ retry:
 	case 0xf0:
 		if ((opcode & 0x0c) == 0 && opcode != 0xf1)
 			goto retry; /* lock/rep(ne) prefix */
-		/* clear and set flags can be boost */
+		/* clear and set flags are boostable */
 		return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
 	default:
+		/* segment override prefixes are boostable */
 		if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
 			goto retry; /* prefixes */
-		/* can't boost CS override and call */
+		/* CS override prefix and call are not boostable */
 		return (opcode != 0x2e && opcode != 0x9a);
 	}
 }
@@ -145,9 +208,9 @@ retry:
 /*
  * returns non-zero if opcode modifies the interrupt flag.
  */
-static int __kprobes is_IF_modifier(kprobe_opcode_t opcode)
+static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
 {
-	switch (opcode) {
+	switch (*insn) {
 	case 0xfa:		/* cli */
 	case 0xfb:		/* sti */
 	case 0xcf:		/* iret/iretd */
@@ -157,20 +220,24 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t opcode)
 	return 0;
 }
 
+static void __kprobes arch_copy_kprobe(struct kprobe *p)
+{
+	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+	if (can_boost(p->addr))
+		p->ainsn.boostable = 0;
+	else
+		p->ainsn.boostable = -1;
+
+	p->opcode = *p->addr;
+}
+
 int __kprobes arch_prepare_kprobe(struct kprobe *p)
 {
-	/* insn: must be on special executable page on i386. */
+	/* insn: must be on special executable page on x86. */
 	p->ainsn.insn = get_insn_slot();
 	if (!p->ainsn.insn)
 		return -ENOMEM;
-
-	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
-	p->opcode = *p->addr;
-	if (can_boost(p->addr)) {
-		p->ainsn.boostable = 0;
-	} else {
-		p->ainsn.boostable = -1;
-	}
+	arch_copy_kprobe(p);
 	return 0;
 }
 
@@ -195,26 +262,26 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
 	kcb->prev_kprobe.kp = kprobe_running();
 	kcb->prev_kprobe.status = kcb->kprobe_status;
-	kcb->prev_kprobe.old_eflags = kcb->kprobe_old_eflags;
-	kcb->prev_kprobe.saved_eflags = kcb->kprobe_saved_eflags;
+	kcb->prev_kprobe.old_flags = kcb->kprobe_old_flags;
+	kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags;
 }
 
 static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
 	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
 	kcb->kprobe_status = kcb->prev_kprobe.status;
-	kcb->kprobe_old_eflags = kcb->prev_kprobe.old_eflags;
-	kcb->kprobe_saved_eflags = kcb->prev_kprobe.saved_eflags;
+	kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags;
+	kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags;
 }
 
 static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 				struct kprobe_ctlblk *kcb)
 {
 	__get_cpu_var(current_kprobe) = p;
-	kcb->kprobe_saved_eflags = kcb->kprobe_old_eflags
+	kcb->kprobe_saved_flags = kcb->kprobe_old_flags
 		= (regs->flags & (TF_MASK | IF_MASK));
-	if (is_IF_modifier(p->opcode))
-		kcb->kprobe_saved_eflags &= ~IF_MASK;
+	if (is_IF_modifier(p->ainsn.insn))
+		kcb->kprobe_saved_flags &= ~IF_MASK;
 }
 
 static __always_inline void clear_btf(void)
@@ -245,7 +312,7 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
-	unsigned long *sara = (unsigned long *)&regs->sp;
+	unsigned long *sara = stack_addr(regs);
 
 	ri->ret_addr = (kprobe_opcode_t *) *sara;
 
@@ -280,7 +347,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 			if (kcb->kprobe_status == KPROBE_HIT_SS &&
 				*p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
 				regs->flags &= ~TF_MASK;
-				regs->flags |= kcb->kprobe_saved_eflags;
+				regs->flags |= kcb->kprobe_saved_flags;
 				goto no_kprobe;
 			}
 			/* We have reentered the kprobe_handler(), since
@@ -301,7 +368,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 			 * another cpu right after we hit, no further
 			 * handling of this interrupt is appropriate
 			 */
-				regs->ip -= sizeof(kprobe_opcode_t);
+				regs->ip = (unsigned long)addr;
 				ret = 1;
 				goto no_kprobe;
 			}
@@ -325,7 +392,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 			 * Back up over the (now missing) int3 and run
 			 * the original instruction.
 			 */
-			regs->ip -= sizeof(kprobe_opcode_t);
+			regs->ip = (unsigned long)addr;
 			ret = 1;
 		}
 		/* Not one of ours: let kernel handle it */
@@ -341,7 +408,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 
 ss_probe:
 #if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
-	if (p->ainsn.boostable == 1 && !p->post_handler){
+	if (p->ainsn.boostable == 1 && !p->post_handler) {
 		/* Boost up -- we can execute copied instructions directly */
 		reset_current_kprobe();
 		regs->ip = (unsigned long)p->ainsn.insn;
@@ -359,16 +426,18 @@ no_kprobe:
 }
 
 /*
- * For function-return probes, init_kprobes() establishes a probepoint
- * here. When a retprobed function returns, this probe is hit and
- * trampoline_probe_handler() runs, calling the kretprobe's handler.
+ * When a retprobed function returns, this code saves registers and
+ * calls trampoline_handler() runs, which calls the kretprobe's handler.
  */
  void __kprobes kretprobe_trampoline_holder(void)
  {
 	asm volatile ( ".global kretprobe_trampoline\n"
 			"kretprobe_trampoline: \n"
 			"	pushf\n"
-			/* skip cs, ip, orig_ax */
+			/*
+			 * Skip cs, ip, orig_ax.
+			 * trampoline_handler() will plug in these values
+			 */
 			"	subl $12, %esp\n"
 			"	pushl %fs\n"
 			"	pushl %ds\n"
@@ -382,10 +451,10 @@ no_kprobe:
 			"	pushl %ebx\n"
 			"	movl %esp, %eax\n"
 			"	call trampoline_handler\n"
-			/* move flags to cs */
+			/* Move flags to cs */
 			"	movl 52(%esp), %edx\n"
 			"	movl %edx, 48(%esp)\n"
-			/* save true return address on flags */
+			/* Replace saved flags with true return address. */
 			"	movl %eax, 52(%esp)\n"
 			"	popl %ebx\n"
 			"	popl %ecx\n"
@@ -394,16 +463,16 @@ no_kprobe:
 			"	popl %edi\n"
 			"	popl %ebp\n"
 			"	popl %eax\n"
-			/* skip ip, orig_ax, es, ds, fs */
+			/* Skip ip, orig_ax, es, ds, fs */
 			"	addl $20, %esp\n"
 			"	popf\n"
 			"	ret\n");
-}
+ }
 
 /*
  * Called from kretprobe_trampoline
  */
-void *__kprobes trampoline_handler(struct pt_regs *regs)
+void * __kprobes trampoline_handler(struct pt_regs *regs)
 {
 	struct kretprobe_instance *ri = NULL;
 	struct hlist_head *head, empty_rp;
@@ -417,27 +486,27 @@ void *__kprobes trampoline_handler(struct pt_regs *regs)
 	/* fixup registers */
 	regs->cs = __KERNEL_CS | get_kernel_rpl();
 	regs->ip = trampoline_address;
-	regs->orig_ax = 0xffffffff;
+	regs->orig_ax = ~0UL;
 
 	/*
 	 * It is possible to have multiple instances associated with a given
-	 * task either because an multiple functions in the call path
-	 * have a return probe installed on them, and/or more then one return
+	 * task either because multiple functions in the call path have
+	 * return probes installed on them, and/or more then one
 	 * return probe was registered for a target function.
 	 *
 	 * We can handle this because:
-	 *     - instances are always inserted at the head of the list
+	 *     - instances are always pushed into the head of the list
 	 *     - when multiple return probes are registered for the same
-	 *       function, the first instance's ret_addr will point to the
-	 *       real return address, and all the rest will point to
-	 *       kretprobe_trampoline
+	 *	 function, the (chronologically) first instance's ret_addr
+	 *	 will be the real return address, and all the rest will
+	 *	 point to kretprobe_trampoline.
 	 */
 	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
 		if (ri->task != current)
 			/* another task is sharing our hash bucket */
 			continue;
 
-		if (ri->rp && ri->rp->handler){
+		if (ri->rp && ri->rp->handler) {
 			__get_cpu_var(current_kprobe) = &ri->rp->kp;
 			get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
 			ri->rp->handler(ri, regs);
@@ -457,13 +526,14 @@ void *__kprobes trampoline_handler(struct pt_regs *regs)
 	}
 
 	kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
 	spin_unlock_irqrestore(&kretprobe_lock, flags);
 
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
 		kfree(ri);
 	}
-	return (void*)orig_ret_address;
+	return (void *)orig_ret_address;
 }
 
 /*
@@ -488,48 +558,55 @@ void *__kprobes trampoline_handler(struct pt_regs *regs)
  * that is atop the stack is the address following the copied instruction.
  * We need to make it the address following the original instruction.
  *
- * This function also checks instruction size for preparing direct execution.
+ * If this is the first time we've single-stepped the instruction at
+ * this probepoint, and the instruction is boostable, boost it: add a
+ * jump instruction after the copied instruction, that jumps to the next
+ * instruction after the probepoint.
  */
 static void __kprobes resume_execution(struct kprobe *p,
 		struct pt_regs *regs, struct kprobe_ctlblk *kcb)
 {
-	unsigned long *tos = (unsigned long *)&regs->sp;
-	unsigned long copy_eip = (unsigned long)p->ainsn.insn;
-	unsigned long orig_eip = (unsigned long)p->addr;
+	unsigned long *tos = stack_addr(regs);
+	unsigned long copy_ip = (unsigned long)p->ainsn.insn;
+	unsigned long orig_ip = (unsigned long)p->addr;
+	kprobe_opcode_t *insn = p->ainsn.insn;
 
 	regs->flags &= ~TF_MASK;
-	switch (p->ainsn.insn[0]) {
-	case 0x9c:		/* pushfl */
+	switch (*insn) {
+	case 0x9c:	/* pushfl */
 		*tos &= ~(TF_MASK | IF_MASK);
-		*tos |= kcb->kprobe_old_eflags;
+		*tos |= kcb->kprobe_old_flags;
 		break;
-	case 0xc2:		/* iret/ret/lret */
+	case 0xc2:	/* iret/ret/lret */
 	case 0xc3:
 	case 0xca:
 	case 0xcb:
 	case 0xcf:
-	case 0xea:		/* jmp absolute -- ip is correct */
+	case 0xea:	/* jmp absolute -- ip is correct */
 		/* ip is already adjusted, no more changes required */
 		p->ainsn.boostable = 1;
 		goto no_change;
-	case 0xe8:		/* call relative - Fix return addr */
-		*tos = orig_eip + (*tos - copy_eip);
+	case 0xe8:	/* call relative - Fix return addr */
+		*tos = orig_ip + (*tos - copy_ip);
 		break;
-	case 0x9a:		/* call absolute -- same as call absolute, indirect */
-		*tos = orig_eip + (*tos - copy_eip);
+	case 0x9a:	/* call absolute -- same as call absolute, indirect */
+		*tos = orig_ip + (*tos - copy_ip);
 		goto no_change;
 	case 0xff:
-		if ((p->ainsn.insn[1] & 0x30) == 0x10) {
+		if ((insn[1] & 0x30) == 0x10) {
 			/*
 			 * call absolute, indirect
 			 * Fix return addr; ip is correct.
 			 * But this is not boostable
 			 */
-			*tos = orig_eip + (*tos - copy_eip);
+			*tos = orig_ip + (*tos - copy_ip);
 			goto no_change;
-		} else if (((p->ainsn.insn[1] & 0x31) == 0x20) ||	/* jmp near, absolute indirect */
-			   ((p->ainsn.insn[1] & 0x31) == 0x21)) {	/* jmp far, absolute indirect */
-			/* ip is correct. And this is boostable */
+		} else if (((insn[1] & 0x31) == 0x20) ||
+			   ((insn[1] & 0x31) == 0x21)) {
+			/*
+			 * jmp near and far, absolute indirect
+			 * ip is correct. And this is boostable
+			 */
 			p->ainsn.boostable = 1;
 			goto no_change;
 		}
@@ -538,21 +615,21 @@ static void __kprobes resume_execution(struct kprobe *p,
 	}
 
 	if (p->ainsn.boostable == 0) {
-		if ((regs->ip > copy_eip) &&
-		    (regs->ip - copy_eip) + 5 < MAX_INSN_SIZE) {
+		if ((regs->ip > copy_ip) &&
+		    (regs->ip - copy_ip) + 5 < MAX_INSN_SIZE) {
 			/*
 			 * These instructions can be executed directly if it
 			 * jumps back to correct address.
 			 */
 			set_jmp_op((void *)regs->ip,
-				   (void *)orig_eip + (regs->ip - copy_eip));
+				   (void *)orig_ip + (regs->ip - copy_ip));
 			p->ainsn.boostable = 1;
 		} else {
 			p->ainsn.boostable = -1;
 		}
 	}
 
-	regs->ip = orig_eip + (regs->ip - copy_eip);
+	regs->ip += orig_ip - copy_ip;
 
 no_change:
 	restore_btf();
@@ -578,10 +655,10 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs)
 	}
 
 	resume_execution(cur, regs, kcb);
-	regs->flags |= kcb->kprobe_saved_eflags;
+	regs->flags |= kcb->kprobe_saved_flags;
 	trace_hardirqs_fixup_flags(regs->flags);
 
-	/*Restore back the original saved kprobes variables and continue. */
+	/* Restore back the original saved kprobes variables and continue. */
 	if (kcb->kprobe_status == KPROBE_REENTER) {
 		restore_previous_kprobe(kcb);
 		goto out;
@@ -617,7 +694,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		 * normal page fault.
 		 */
 		regs->ip = (unsigned long)cur->addr;
-		regs->flags |= kcb->kprobe_old_eflags;
+		regs->flags |= kcb->kprobe_old_flags;
 		if (kcb->kprobe_status == KPROBE_REENTER)
 			restore_previous_kprobe(kcb);
 		else
@@ -628,7 +705,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 	case KPROBE_HIT_SSDONE:
 		/*
 		 * We increment the nmissed count for accounting,
-		 * we can also use npre/npostfault count for accouting
+		 * we can also use npre/npostfault count for accounting
 		 * these specific fault cases.
 		 */
 		kprobes_inc_nmissed_count(cur);
@@ -651,7 +728,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 			return 1;
 
 		/*
-		 * fixup_exception() could not handle it,
+		 * fixup routine could not handle it,
 		 * Let do_page_fault() fix it.
 		 */
 		break;
@@ -662,7 +739,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 }
 
 /*
- * Wrapper routine to for handling exceptions.
+ * Wrapper routine for handling exceptions.
  */
 int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 				       unsigned long val, void *data)
@@ -703,11 +780,11 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
 	kcb->jprobe_saved_regs = *regs;
-	kcb->jprobe_saved_esp = &regs->sp;
-	addr = (unsigned long)(kcb->jprobe_saved_esp);
+	kcb->jprobe_saved_sp = stack_addr(regs);
+	addr = (unsigned long)(kcb->jprobe_saved_sp);
 
 	/*
-	 * TBD: As Linus pointed out, gcc assumes that the callee
+	 * As Linus pointed out, gcc assumes that the callee
 	 * owns the argument space and could overwrite it, e.g.
 	 * tailcall optimization. So, to be absolutely safe
 	 * we also save and restore enough stack bytes to cover
@@ -730,21 +807,20 @@ void __kprobes jprobe_return(void)
 		      "       .globl jprobe_return_end	\n"
 		      "       jprobe_return_end:	\n"
 		      "       nop			\n"::"b"
-		      (kcb->jprobe_saved_esp):"memory");
+		      (kcb->jprobe_saved_sp):"memory");
 }
 
 int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	u8 *addr = (u8 *) (regs->ip - 1);
-	unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_esp);
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
 
 	if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
-		if (&regs->sp != kcb->jprobe_saved_esp) {
+		if (stack_addr(regs) != kcb->jprobe_saved_sp) {
 			struct pt_regs *saved_regs = &kcb->jprobe_saved_regs;
 			printk("current sp %p does not match saved sp %p\n",
-			       &regs->sp, kcb->jprobe_saved_esp);
+			       stack_addr(regs), kcb->jprobe_saved_sp);
 			printk("Saved registers for jprobe %p\n", jp);
 			show_registers(saved_regs);
 			printk("Current registers\n");
@@ -752,20 +828,21 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 			BUG();
 		}
 		*regs = kcb->jprobe_saved_regs;
-		memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
-		       MIN_STACK_SIZE(stack_addr));
+		memcpy((kprobe_opcode_t *)(kcb->jprobe_saved_sp),
+		       kcb->jprobes_stack,
+		       MIN_STACK_SIZE(kcb->jprobe_saved_sp));
 		preempt_enable_no_resched();
 		return 1;
 	}
 	return 0;
 }
 
-int __kprobes arch_trampoline_kprobe(struct kprobe *p)
-{
-	return 0;
-}
-
 int __init arch_init_kprobes(void)
 {
 	return 0;
 }
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+	return 0;
+}
diff --git a/arch/x86/kernel/kprobes_64.c b/arch/x86/kernel/kprobes_64.c
index bc93b1dd9a01..2d7763749b1b 100644
--- a/arch/x86/kernel/kprobes_64.c
+++ b/arch/x86/kernel/kprobes_64.c
@@ -40,16 +40,97 @@
 #include <linux/module.h>
 #include <linux/kdebug.h>
 
+#include <asm/cacheflush.h>
+#include <asm/desc.h>
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
 #include <asm/alternative.h>
 
 void jprobe_return_end(void);
-static void __kprobes arch_copy_kprobe(struct kprobe *p);
 
 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
+#define stack_addr(regs) ((unsigned long *)regs->sp)
+
+#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
+	(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \
+	  (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) |   \
+	  (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) |   \
+	  (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf))    \
+	 << (row % 32))
+	/*
+	 * Undefined/reserved opcodes, conditional jump, Opcode Extension
+	 * Groups, and some special opcodes can not boost.
+	 */
+static const u32 twobyte_is_boostable[256 / 32] = {
+	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
+	/*      ----------------------------------------------          */
+	W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */
+	W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 10 */
+	W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */
+	W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
+	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
+	W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
+	W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1) | /* 60 */
+	W(0x70, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
+	W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 80 */
+	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
+	W(0xa0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* a0 */
+	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) , /* b0 */
+	W(0xc0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
+	W(0xd0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) , /* d0 */
+	W(0xe0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* e0 */
+	W(0xf0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0)   /* f0 */
+	/*      -----------------------------------------------         */
+	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
+};
+static const u32 onebyte_has_modrm[256 / 32] = {
+	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
+	/*      -----------------------------------------------         */
+	W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */
+	W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */
+	W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */
+	W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */
+	W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
+	W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
+	W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */
+	W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */
+	W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
+	W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */
+	W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */
+	W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */
+	W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */
+	W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
+	W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */
+	W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1)   /* f0 */
+	/*      -----------------------------------------------         */
+	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
+};
+static const u32 twobyte_has_modrm[256 / 32] = {
+	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
+	/*      -----------------------------------------------         */
+	W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */
+	W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */
+	W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */
+	W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */
+	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */
+	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */
+	W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */
+	W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */
+	W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */
+	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */
+	W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */
+	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */
+	W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */
+	W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */
+	W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */
+	W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)   /* ff */
+	/*      -----------------------------------------------         */
+	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
+};
+#undef W
+
 struct kretprobe_blackpoint kretprobe_blacklist[] = {
 	{"__switch_to", }, /* This function switches only current task, but
 			      doesn't switch kernel stack.*/
@@ -70,44 +151,11 @@ static __always_inline void set_jmp_op(void *from, void *to)
 }
 
 /*
- * returns non-zero if opcode is boostable
+ * returns non-zero if opcode is boostable.
  * RIP relative instructions are adjusted at copying time
  */
 static __always_inline int can_boost(kprobe_opcode_t *opcodes)
 {
-#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
-	(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \
-	  (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) |   \
-	  (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) |   \
-	  (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf))    \
-	 << (row % 64))
-	/*
-	 * Undefined/reserved opcodes, conditional jump, Opcode Extension
-	 * Groups, and some special opcodes can not boost.
-	 */
-	static const unsigned long twobyte_is_boostable[256 / 64] = {
-		/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f        */
-		/*      ----------------------------------------------        */
-		W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0)|/* 00 */
-		W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)|/* 10 */
-		W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)|/* 20 */
-		W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),/* 30 */
-		W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)|/* 40 */
-		W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)|/* 50 */
-		W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1)|/* 60 */
-		W(0x70, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1),/* 70 */
-		W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)|/* 80 */
-		W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)|/* 90 */
-		W(0xa0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1)|/* a0 */
-		W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1),/* b0 */
-		W(0xc0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1)|/* c0 */
-		W(0xd0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1)|/* d0 */
-		W(0xe0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1)|/* e0 */
-		W(0xf0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0) /* f0 */
-		/*      -----------------------------------------------       */
-		/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f        */
-	};
-#undef W
 	kprobe_opcode_t opcode;
 	kprobe_opcode_t *orig_opcodes = opcodes;
 
@@ -120,7 +168,8 @@ retry:
 	if (opcode == 0x0f) {
 		if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
 			return 0;
-		return test_bit(*opcodes, twobyte_is_boostable);
+		return test_bit(*opcodes,
+				(unsigned long *)twobyte_is_boostable);
 	}
 
 	switch (opcode & 0xf0) {
@@ -169,80 +218,25 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
 		return 1;
 	}
 
-	if (*insn  >= 0x40 && *insn <= 0x4f && *++insn == 0xcf)
-		return 1;
-	return 0;
-}
-
-int __kprobes arch_prepare_kprobe(struct kprobe *p)
-{
-	/* insn: must be on special executable page on x86_64. */
-	p->ainsn.insn = get_insn_slot();
-	if (!p->ainsn.insn) {
-		return -ENOMEM;
-	}
-	arch_copy_kprobe(p);
+	/*
+	 * on 64 bit x86, 0x40-0x4f are prefixes so we need to look
+	 * at the next byte instead.. but of course not recurse infinitely
+	 */
+	if (*insn  >= 0x40 && *insn <= 0x4f)
+		return is_IF_modifier(++insn);
 	return 0;
 }
 
 /*
- * Determine if the instruction uses the %rip-relative addressing mode.
+ * Adjust the displacement if the instruction uses the %rip-relative
+ * addressing mode.
  * If it does, Return the address of the 32-bit displacement word.
  * If not, return null.
  */
-static s32 __kprobes *is_riprel(u8 *insn)
+static void __kprobes fix_riprel(struct kprobe *p)
 {
-#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf)		      \
-	(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \
-	  (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) |   \
-	  (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) |   \
-	  (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf))    \
-	 << (row % 64))
-	static const u64 onebyte_has_modrm[256 / 64] = {
-		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
-		/*      -------------------------------         */
-		W(0x00, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 00 */
-		W(0x10, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 10 */
-		W(0x20, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 20 */
-		W(0x30, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0), /* 30 */
-		W(0x40, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 40 */
-		W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 50 */
-		W(0x60, 0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0)| /* 60 */
-		W(0x70, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 70 */
-		W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
-		W(0x90, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 90 */
-		W(0xa0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* a0 */
-		W(0xb0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* b0 */
-		W(0xc0, 1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0)| /* c0 */
-		W(0xd0, 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1)| /* d0 */
-		W(0xe0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* e0 */
-		W(0xf0, 0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1)  /* f0 */
-		/*      -------------------------------         */
-		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
-	};
-	static const u64 twobyte_has_modrm[256 / 64] = {
-		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
-		/*      -------------------------------         */
-		W(0x00, 1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,1)| /* 0f */
-		W(0x10, 1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0)| /* 1f */
-		W(0x20, 1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1)| /* 2f */
-		W(0x30, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 3f */
-		W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 4f */
-		W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 5f */
-		W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 6f */
-		W(0x70, 1,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1), /* 7f */
-		W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 8f */
-		W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 9f */
-		W(0xa0, 0,0,0,1,1,1,1,1,0,0,0,1,1,1,1,1)| /* af */
-		W(0xb0, 1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1), /* bf */
-		W(0xc0, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0)| /* cf */
-		W(0xd0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* df */
-		W(0xe0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* ef */
-		W(0xf0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0)  /* ff */
-		/*      -------------------------------         */
-		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
-	};
-#undef	W
+	u8 *insn = p->ainsn.insn;
+	s64 disp;
 	int need_modrm;
 
 	/* Skip legacy instruction prefixes.  */
@@ -271,54 +265,60 @@ static s32 __kprobes *is_riprel(u8 *insn)
 
 	if (*insn == 0x0f) {	/* Two-byte opcode.  */
 		++insn;
-		need_modrm = test_bit(*insn, twobyte_has_modrm);
-	} else {		/* One-byte opcode.  */
-		need_modrm = test_bit(*insn, onebyte_has_modrm);
-	}
+		need_modrm = test_bit(*insn,
+				      (unsigned long *)twobyte_has_modrm);
+	} else			/* One-byte opcode.  */
+		need_modrm = test_bit(*insn,
+				      (unsigned long *)onebyte_has_modrm);
 
 	if (need_modrm) {
 		u8 modrm = *++insn;
 		if ((modrm & 0xc7) == 0x05) { /* %rip+disp32 addressing mode */
 			/* Displacement follows ModRM byte.  */
-			return (s32 *) ++insn;
+			++insn;
+			/*
+			 * The copied instruction uses the %rip-relative
+			 * addressing mode.  Adjust the displacement for the
+			 * difference between the original location of this
+			 * instruction and the location of the copy that will
+			 * actually be run.  The tricky bit here is making sure
+			 * that the sign extension happens correctly in this
+			 * calculation, since we need a signed 32-bit result to
+			 * be sign-extended to 64 bits when it's added to the
+			 * %rip value and yield the same 64-bit result that the
+			 * sign-extension of the original signed 32-bit
+			 * displacement would have given.
+			 */
+			disp = (u8 *) p->addr + *((s32 *) insn) -
+			       (u8 *) p->ainsn.insn;
+			BUG_ON((s64) (s32) disp != disp); /* Sanity check.  */
+			*(s32 *)insn = (s32) disp;
 		}
 	}
-
-	/* No %rip-relative addressing mode here.  */
-	return NULL;
 }
 
 static void __kprobes arch_copy_kprobe(struct kprobe *p)
 {
-	s32 *ripdisp;
-	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE);
-	ripdisp = is_riprel(p->ainsn.insn);
-	if (ripdisp) {
-		/*
-		 * The copied instruction uses the %rip-relative
-		 * addressing mode.  Adjust the displacement for the
-		 * difference between the original location of this
-		 * instruction and the location of the copy that will
-		 * actually be run.  The tricky bit here is making sure
-		 * that the sign extension happens correctly in this
-		 * calculation, since we need a signed 32-bit result to
-		 * be sign-extended to 64 bits when it's added to the
-		 * %rip value and yield the same 64-bit result that the
-		 * sign-extension of the original signed 32-bit
-		 * displacement would have given.
-		 */
-		s64 disp = (u8 *) p->addr + *ripdisp - (u8 *) p->ainsn.insn;
-		BUG_ON((s64) (s32) disp != disp); /* Sanity check.  */
-		*ripdisp = disp;
-	}
-	if (can_boost(p->addr)) {
+	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+	fix_riprel(p);
+	if (can_boost(p->addr))
 		p->ainsn.boostable = 0;
-	} else {
+	else
 		p->ainsn.boostable = -1;
-	}
+
 	p->opcode = *p->addr;
 }
 
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+	/* insn: must be on special executable page on x86. */
+	p->ainsn.insn = get_insn_slot();
+	if (!p->ainsn.insn)
+		return -ENOMEM;
+	arch_copy_kprobe(p);
+	return 0;
+}
+
 void __kprobes arch_arm_kprobe(struct kprobe *p)
 {
 	text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1);
@@ -340,26 +340,26 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
 	kcb->prev_kprobe.kp = kprobe_running();
 	kcb->prev_kprobe.status = kcb->kprobe_status;
-	kcb->prev_kprobe.old_rflags = kcb->kprobe_old_rflags;
-	kcb->prev_kprobe.saved_rflags = kcb->kprobe_saved_rflags;
+	kcb->prev_kprobe.old_flags = kcb->kprobe_old_flags;
+	kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags;
 }
 
 static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
 	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
 	kcb->kprobe_status = kcb->prev_kprobe.status;
-	kcb->kprobe_old_rflags = kcb->prev_kprobe.old_rflags;
-	kcb->kprobe_saved_rflags = kcb->prev_kprobe.saved_rflags;
+	kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags;
+	kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags;
 }
 
 static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 				struct kprobe_ctlblk *kcb)
 {
 	__get_cpu_var(current_kprobe) = p;
-	kcb->kprobe_saved_rflags = kcb->kprobe_old_rflags
+	kcb->kprobe_saved_flags = kcb->kprobe_old_flags
 		= (regs->flags & (TF_MASK | IF_MASK));
 	if (is_IF_modifier(p->ainsn.insn))
-		kcb->kprobe_saved_rflags &= ~IF_MASK;
+		kcb->kprobe_saved_flags &= ~IF_MASK;
 }
 
 static __always_inline void clear_btf(void)
@@ -390,20 +390,27 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
-	unsigned long *sara = (unsigned long *)regs->sp;
+	unsigned long *sara = stack_addr(regs);
 
 	ri->ret_addr = (kprobe_opcode_t *) *sara;
+
 	/* Replace the return addr with trampoline addr */
 	*sara = (unsigned long) &kretprobe_trampoline;
 }
 
-int __kprobes kprobe_handler(struct pt_regs *regs)
+/*
+ * Interrupts are disabled on entry as trap3 is an interrupt gate and they
+ * remain disabled thorough out this function.
+ */
+static int __kprobes kprobe_handler(struct pt_regs *regs)
 {
 	struct kprobe *p;
 	int ret = 0;
-	kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
+	kprobe_opcode_t *addr;
 	struct kprobe_ctlblk *kcb;
 
+	addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
+
 	/*
 	 * We don't want to be preempted for the entire
 	 * duration of kprobe processing
@@ -418,7 +425,7 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
 			if (kcb->kprobe_status == KPROBE_HIT_SS &&
 				*p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
 				regs->flags &= ~TF_MASK;
-				regs->flags |= kcb->kprobe_saved_rflags;
+				regs->flags |= kcb->kprobe_saved_flags;
 				goto no_kprobe;
 			} else if (kcb->kprobe_status == KPROBE_HIT_SSDONE) {
 				/* TODO: Provide re-entrancy from
@@ -429,22 +436,20 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
 				arch_disarm_kprobe(p);
 				regs->ip = (unsigned long)p->addr;
 				reset_current_kprobe();
-				ret = 1;
-			} else {
-				/* We have reentered the kprobe_handler(), since
-				 * another probe was hit while within the
-				 * handler. We here save the original kprobe
-				 * variables and just single step on instruction
-				 * of the new probe without calling any user
-				 * handlers.
-				 */
-				save_previous_kprobe(kcb);
-				set_current_kprobe(p, regs, kcb);
-				kprobes_inc_nmissed_count(p);
-				prepare_singlestep(p, regs);
-				kcb->kprobe_status = KPROBE_REENTER;
 				return 1;
 			}
+			/* We have reentered the kprobe_handler(), since
+			 * another probe was hit while within the handler.
+			 * We here save the original kprobes variables and
+			 * just single step on the instruction of the new probe
+			 * without calling any user handlers.
+			 */
+			save_previous_kprobe(kcb);
+			set_current_kprobe(p, regs, kcb);
+			kprobes_inc_nmissed_count(p);
+			prepare_singlestep(p, regs);
+			kcb->kprobe_status = KPROBE_REENTER;
+			return 1;
 		} else {
 			if (*addr != BREAKPOINT_INSTRUCTION) {
 			/* The breakpoint instruction was removed by
@@ -578,23 +583,23 @@ fastcall void * __kprobes trampoline_handler(struct pt_regs *regs)
 	INIT_HLIST_HEAD(&empty_rp);
 	spin_lock_irqsave(&kretprobe_lock, flags);
 	head = kretprobe_inst_table_head(current);
-	/* fixup rt_regs */
+	/* fixup registers */
 	regs->cs = __KERNEL_CS;
 	regs->ip = trampoline_address;
-	regs->orig_ax = 0xffffffffffffffff;
+	regs->orig_ax = ~0UL;
 
 	/*
 	 * It is possible to have multiple instances associated with a given
-	 * task either because an multiple functions in the call path
-	 * have a return probe installed on them, and/or more then one return
+	 * task either because multiple functions in the call path have
+	 * return probes installed on them, and/or more then one
 	 * return probe was registered for a target function.
 	 *
 	 * We can handle this because:
-	 *     - instances are always inserted at the head of the list
+	 *     - instances are always pushed into the head of the list
 	 *     - when multiple return probes are registered for the same
-	 *       function, the first instance's ret_addr will point to the
-	 *       real return address, and all the rest will point to
-	 *       kretprobe_trampoline
+	 *	 function, the (chronologically) first instance's ret_addr
+	 *	 will be the real return address, and all the rest will
+	 *	 point to kretprobe_trampoline.
 	 */
 	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
 		if (ri->task != current)
@@ -661,9 +666,9 @@ fastcall void * __kprobes trampoline_handler(struct pt_regs *regs)
 static void __kprobes resume_execution(struct kprobe *p,
 		struct pt_regs *regs, struct kprobe_ctlblk *kcb)
 {
-	unsigned long *tos = (unsigned long *)regs->sp;
-	unsigned long copy_rip = (unsigned long)p->ainsn.insn;
-	unsigned long orig_rip = (unsigned long)p->addr;
+	unsigned long *tos = stack_addr(regs);
+	unsigned long copy_ip = (unsigned long)p->ainsn.insn;
+	unsigned long orig_ip = (unsigned long)p->addr;
 	kprobe_opcode_t *insn = p->ainsn.insn;
 
 	/*skip the REX prefix*/
@@ -674,7 +679,7 @@ static void __kprobes resume_execution(struct kprobe *p,
 	switch (*insn) {
 	case 0x9c:	/* pushfl */
 		*tos &= ~(TF_MASK | IF_MASK);
-		*tos |= kcb->kprobe_old_rflags;
+		*tos |= kcb->kprobe_old_flags;
 		break;
 	case 0xc2:	/* iret/ret/lret */
 	case 0xc3:
@@ -686,18 +691,23 @@ static void __kprobes resume_execution(struct kprobe *p,
 		p->ainsn.boostable = 1;
 		goto no_change;
 	case 0xe8:	/* call relative - Fix return addr */
-		*tos = orig_rip + (*tos - copy_rip);
+		*tos = orig_ip + (*tos - copy_ip);
 		break;
 	case 0xff:
 		if ((insn[1] & 0x30) == 0x10) {
-			/* call absolute, indirect */
-			/* Fix return addr; ip is correct. */
-			/* not boostable */
-			*tos = orig_rip + (*tos - copy_rip);
+			/*
+			 * call absolute, indirect
+			 * Fix return addr; ip is correct.
+			 * But this is not boostable
+			 */
+			*tos = orig_ip + (*tos - copy_ip);
 			goto no_change;
-		} else if (((insn[1] & 0x31) == 0x20) ||	/* jmp near, absolute indirect */
-			   ((insn[1] & 0x31) == 0x21)) {	/* jmp far, absolute indirect */
-			/* ip is correct. And this is boostable */
+		} else if (((insn[1] & 0x31) == 0x20) ||
+			   ((insn[1] & 0x31) == 0x21)) {
+			/*
+			 * jmp near and far, absolute indirect
+			 * ip is correct. And this is boostable
+			 */
 			p->ainsn.boostable = 1;
 			goto no_change;
 		}
@@ -706,21 +716,21 @@ static void __kprobes resume_execution(struct kprobe *p,
 	}
 
 	if (p->ainsn.boostable == 0) {
-		if ((regs->ip > copy_rip) &&
-		    (regs->ip - copy_rip) + 5 < MAX_INSN_SIZE) {
+		if ((regs->ip > copy_ip) &&
+		    (regs->ip - copy_ip) + 5 < MAX_INSN_SIZE) {
 			/*
 			 * These instructions can be executed directly if it
 			 * jumps back to correct address.
 			 */
 			set_jmp_op((void *)regs->ip,
-				   (void *)orig_rip + (regs->ip - copy_rip));
+				   (void *)orig_ip + (regs->ip - copy_ip));
 			p->ainsn.boostable = 1;
 		} else {
 			p->ainsn.boostable = -1;
 		}
 	}
 
-	regs->ip = orig_rip + (regs->ip - copy_rip);
+	regs->ip += orig_ip - copy_ip;
 
 no_change:
 	restore_btf();
@@ -728,7 +738,11 @@ no_change:
 	return;
 }
 
-int __kprobes post_kprobe_handler(struct pt_regs *regs)
+/*
+ * Interrupts are disabled on entry as trap1 is an interrupt gate and they
+ * remain disabled thoroughout this function.
+ */
+static int __kprobes post_kprobe_handler(struct pt_regs *regs)
 {
 	struct kprobe *cur = kprobe_running();
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -742,10 +756,10 @@ int __kprobes post_kprobe_handler(struct pt_regs *regs)
 	}
 
 	resume_execution(cur, regs, kcb);
-	regs->flags |= kcb->kprobe_saved_rflags;
+	regs->flags |= kcb->kprobe_saved_flags;
 	trace_hardirqs_fixup_flags(regs->flags);
 
-	/* Restore the original saved kprobes variables and continue. */
+	/* Restore back the original saved kprobes variables and continue. */
 	if (kcb->kprobe_status == KPROBE_REENTER) {
 		restore_previous_kprobe(kcb);
 		goto out;
@@ -782,7 +796,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		 * normal page fault.
 		 */
 		regs->ip = (unsigned long)cur->addr;
-		regs->flags |= kcb->kprobe_old_rflags;
+		regs->flags |= kcb->kprobe_old_flags;
 		if (kcb->kprobe_status == KPROBE_REENTER)
 			restore_previous_kprobe(kcb);
 		else
@@ -793,7 +807,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 	case KPROBE_HIT_SSDONE:
 		/*
 		 * We increment the nmissed count for accounting,
-		 * we can also use npre/npostfault count for accouting
+		 * we can also use npre/npostfault count for accounting
 		 * these specific fault cases.
 		 */
 		kprobes_inc_nmissed_count(cur);
@@ -819,7 +833,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		}
 
 		/*
-		 * fixup() could not handle it,
+		 * fixup routine could not handle it,
 		 * Let do_page_fault() fix it.
 		 */
 		break;
@@ -838,7 +852,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 	struct die_args *args = (struct die_args *)data;
 	int ret = NOTIFY_DONE;
 
-	if (args->regs && user_mode(args->regs))
+	if (args->regs && user_mode_vm(args->regs))
 		return ret;
 
 	switch (val) {
@@ -871,8 +885,9 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
 	kcb->jprobe_saved_regs = *regs;
-	kcb->jprobe_saved_rsp = (long *) regs->sp;
-	addr = (unsigned long)(kcb->jprobe_saved_rsp);
+	kcb->jprobe_saved_sp = stack_addr(regs);
+	addr = (unsigned long)(kcb->jprobe_saved_sp);
+
 	/*
 	 * As Linus pointed out, gcc assumes that the callee
 	 * owns the argument space and could overwrite it, e.g.
@@ -897,21 +912,20 @@ void __kprobes jprobe_return(void)
 		      "       .globl jprobe_return_end	\n"
 		      "       jprobe_return_end:	\n"
 		      "       nop			\n"::"b"
-		      (kcb->jprobe_saved_rsp):"memory");
+		      (kcb->jprobe_saved_sp):"memory");
 }
 
 int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	u8 *addr = (u8 *) (regs->ip - 1);
-	unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_rsp);
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
 
 	if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
-		if ((unsigned long *)regs->sp != kcb->jprobe_saved_rsp) {
+		if (stack_addr(regs) != kcb->jprobe_saved_sp) {
 			struct pt_regs *saved_regs = &kcb->jprobe_saved_regs;
 			printk("current sp %p does not match saved sp %p\n",
-			       (long *)regs->sp, kcb->jprobe_saved_rsp);
+			       stack_addr(regs), kcb->jprobe_saved_sp);
 			printk("Saved registers for jprobe %p\n", jp);
 			show_registers(saved_regs);
 			printk("Current registers\n");
@@ -919,8 +933,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 			BUG();
 		}
 		*regs = kcb->jprobe_saved_regs;
-		memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
-		       MIN_STACK_SIZE(stack_addr));
+		memcpy((kprobe_opcode_t *)(kcb->jprobe_saved_sp),
+		       kcb->jprobes_stack,
+		       MIN_STACK_SIZE(kcb->jprobe_saved_sp));
 		preempt_enable_no_resched();
 		return 1;
 	}
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 8aed912b04ec..db8d748814e4 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -25,7 +25,6 @@
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
 #include <linux/kdebug.h>
-#include <linux/kprobes.h>
 
 #include <asm/system.h>
 #include <asm/desc.h>
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 88a7abda29ce..162050d4e5a3 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -25,7 +25,6 @@
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
 #include <linux/kdebug.h>
-#include <linux/kprobes.h>
 
 #include <asm/system.h>
 #include <asm/pgalloc.h>
diff --git a/include/asm-x86/kprobes_32.h b/include/asm-x86/kprobes_32.h
index 2f38315bc39f..94dff09b2ebe 100644
--- a/include/asm-x86/kprobes_32.h
+++ b/include/asm-x86/kprobes_32.h
@@ -2,7 +2,6 @@
 #define _ASM_KPROBES_H
 /*
  *  Kernel Probes (KProbes)
- *  include/asm-i386/kprobes.h
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -23,14 +22,17 @@
  * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
  *		Probes initial implementation ( includes suggestions from
  *		Rusty Russell).
+ * 2004-Oct	Prasanna S Panchamukhi <prasanna@in.ibm.com> and Jim Keniston
+ *		kenistoj@us.ibm.com adopted from i386.
  */
 #include <linux/types.h>
 #include <linux/ptrace.h>
+#include <linux/percpu.h>
 
 #define  __ARCH_WANT_KPROBES_INSN_SLOT
 
-struct kprobe;
 struct pt_regs;
+struct kprobe;
 
 typedef u8 kprobe_opcode_t;
 #define BREAKPOINT_INSTRUCTION	0xcc
@@ -38,9 +40,11 @@ typedef u8 kprobe_opcode_t;
 #define MAX_INSN_SIZE 16
 #define MAX_STACK_SIZE 64
 #define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
-	(((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) \
+	(((unsigned long)current_thread_info()) + THREAD_SIZE \
+	 - (unsigned long)(ADDR))) \
 	? (MAX_STACK_SIZE) \
-	: (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR)))
+	: (((unsigned long)current_thread_info()) + THREAD_SIZE \
+	   - (unsigned long)(ADDR)))
 
 #define ARCH_SUPPORTS_KRETPROBES
 #define flush_insn_slot(p)	do { } while (0)
@@ -55,8 +59,12 @@ struct arch_specific_insn {
 	/* copy of the original instruction */
 	kprobe_opcode_t *insn;
 	/*
-	 * If this flag is not 0, this kprobe can be boost when its
-	 * post_handler and break_handler is not set.
+	 * boostable = -1: This instruction type is not boostable.
+	 * boostable = 0: This instruction type is boostable.
+	 * boostable = 1: This instruction has been boosted: we have
+	 * added a relative jump after the instruction copy in insn,
+	 * so no single-step and fixup are needed (unless there's
+	 * a post_handler or break_handler).
 	 */
 	int boostable;
 };
@@ -64,16 +72,16 @@ struct arch_specific_insn {
 struct prev_kprobe {
 	struct kprobe *kp;
 	unsigned long status;
-	unsigned long old_eflags;
-	unsigned long saved_eflags;
+	unsigned long old_flags;
+	unsigned long saved_flags;
 };
 
 /* per-cpu kprobe control block */
 struct kprobe_ctlblk {
 	unsigned long kprobe_status;
-	unsigned long kprobe_old_eflags;
-	unsigned long kprobe_saved_eflags;
-	unsigned long *jprobe_saved_esp;
+	unsigned long kprobe_old_flags;
+	unsigned long kprobe_saved_flags;
+	unsigned long *jprobe_saved_sp;
 	struct pt_regs jprobe_saved_regs;
 	kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
 	struct prev_kprobe prev_kprobe;
@@ -88,7 +96,7 @@ static inline void restore_interrupts(struct pt_regs *regs)
 		local_irq_enable();
 }
 
+extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 extern int kprobe_exceptions_notify(struct notifier_block *self,
 				    unsigned long val, void *data);
-extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 #endif				/* _ASM_KPROBES_H */
diff --git a/include/asm-x86/kprobes_64.h b/include/asm-x86/kprobes_64.h
index e7e921dac7c2..94dff09b2ebe 100644
--- a/include/asm-x86/kprobes_64.h
+++ b/include/asm-x86/kprobes_64.h
@@ -2,7 +2,6 @@
 #define _ASM_KPROBES_H
 /*
  *  Kernel Probes (KProbes)
- *  include/asm-x86_64/kprobes.h
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -20,6 +19,9 @@
  *
  * Copyright (C) IBM Corporation, 2002, 2004
  *
+ * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
+ *		Probes initial implementation ( includes suggestions from
+ *		Rusty Russell).
  * 2004-Oct	Prasanna S Panchamukhi <prasanna@in.ibm.com> and Jim Keniston
  *		kenistoj@us.ibm.com adopted from i386.
  */
@@ -35,19 +37,22 @@ struct kprobe;
 typedef u8 kprobe_opcode_t;
 #define BREAKPOINT_INSTRUCTION	0xcc
 #define RELATIVEJUMP_INSTRUCTION 0xe9
-#define MAX_INSN_SIZE 15
+#define MAX_INSN_SIZE 16
 #define MAX_STACK_SIZE 64
 #define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
-	(((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) \
+	(((unsigned long)current_thread_info()) + THREAD_SIZE \
+	 - (unsigned long)(ADDR))) \
 	? (MAX_STACK_SIZE) \
-	: (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR)))
+	: (((unsigned long)current_thread_info()) + THREAD_SIZE \
+	   - (unsigned long)(ADDR)))
 
 #define ARCH_SUPPORTS_KRETPROBES
+#define flush_insn_slot(p)	do { } while (0)
+
 extern const int kretprobe_blacklist_size;
 
+void arch_remove_kprobe(struct kprobe *p);
 void kretprobe_trampoline(void);
-extern void arch_remove_kprobe(struct kprobe *p);
-#define flush_insn_slot(p)	do { } while (0)
 
 /* Architecture specific copy of original instruction*/
 struct arch_specific_insn {
@@ -67,16 +72,16 @@ struct arch_specific_insn {
 struct prev_kprobe {
 	struct kprobe *kp;
 	unsigned long status;
-	unsigned long old_rflags;
-	unsigned long saved_rflags;
+	unsigned long old_flags;
+	unsigned long saved_flags;
 };
 
 /* per-cpu kprobe control block */
 struct kprobe_ctlblk {
 	unsigned long kprobe_status;
-	unsigned long kprobe_old_rflags;
-	unsigned long kprobe_saved_rflags;
-	unsigned long *jprobe_saved_rsp;
+	unsigned long kprobe_old_flags;
+	unsigned long kprobe_saved_flags;
+	unsigned long *jprobe_saved_sp;
 	struct pt_regs jprobe_saved_regs;
 	kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
 	struct prev_kprobe prev_kprobe;
@@ -91,10 +96,7 @@ static inline void restore_interrupts(struct pt_regs *regs)
 		local_irq_enable();
 }
 
-extern int post_kprobe_handler(struct pt_regs *regs);
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
-extern int kprobe_handler(struct pt_regs *regs);
-
 extern int kprobe_exceptions_notify(struct notifier_block *self,
 				    unsigned long val, void *data);
 #endif				/* _ASM_KPROBES_H */

From d6be29b871e285d33be0e3025929e2d6bcabb0c0 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Wed, 30 Jan 2008 13:31:21 +0100
Subject: [PATCH 323/890] x86: kprobes code for x86 unification

This patch unifies kprobes code.

- Unify kprobes_*.h to kprobes.h
- Unify kprobes_*.c to kprobes.c
  (Differences are separated by ifdefs)
 - Most differences are related to REX prefix and rip relatives.
 - Two inline assembly code are different.
 - One difference in kprobe_handlre()
 - One fixup exception code is different, but it will be unified
   if mm/extable_*.c are unified.
- Merge history logs into arch/x86/kernel/kprobes.c.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Jim Keniston <jkenisto@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/Makefile_32                 |   2 +-
 arch/x86/kernel/Makefile_64                 |   2 +-
 arch/x86/kernel/{kprobes_64.c => kprobes.c} | 164 +++-
 arch/x86/kernel/kprobes_32.c                | 848 --------------------
 include/asm-x86/kprobes.h                   | 103 ++-
 include/asm-x86/kprobes_32.h                | 102 ---
 include/asm-x86/kprobes_64.h                | 102 ---
 7 files changed, 228 insertions(+), 1095 deletions(-)
 rename arch/x86/kernel/{kprobes_64.c => kprobes.c} (87%)
 delete mode 100644 arch/x86/kernel/kprobes_32.c
 delete mode 100644 include/asm-x86/kprobes_32.h
 delete mode 100644 include/asm-x86/kprobes_64.h

diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index a854e23eac0b..c7a959da363a 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -35,7 +35,7 @@ obj-$(CONFIG_KEXEC)		+= machine_kexec_32.o relocate_kernel_32.o crash.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_32.o
 obj-$(CONFIG_X86_NUMAQ)		+= numaq_32.o
 obj-$(CONFIG_X86_SUMMIT_NUMA)	+= summit_32.o
-obj-$(CONFIG_KPROBES)		+= kprobes_32.o
+obj-$(CONFIG_KPROBES)		+= kprobes.o
 obj-$(CONFIG_MODULES)		+= module_32.o
 obj-$(CONFIG_ACPI_SRAT) 	+= srat_32.o
 obj-$(CONFIG_EFI) 		+= efi.o efi_32.o efi_stub_32.o
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index e5093dd8cf01..fbb370071239 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -34,7 +34,7 @@ obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 obj-$(CONFIG_GART_IOMMU)	+= pci-gart_64.o aperture_64.o
 obj-$(CONFIG_CALGARY_IOMMU)	+= pci-calgary_64.o tce_64.o
 obj-$(CONFIG_SWIOTLB)		+= pci-swiotlb_64.o
-obj-$(CONFIG_KPROBES)		+= kprobes_64.o
+obj-$(CONFIG_KPROBES)		+= kprobes.o
 obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
 obj-$(CONFIG_X86_VSMP)		+= vsmp_64.o
 obj-$(CONFIG_K8_NB)		+= k8.o
diff --git a/arch/x86/kernel/kprobes_64.c b/arch/x86/kernel/kprobes.c
similarity index 87%
rename from arch/x86/kernel/kprobes_64.c
rename to arch/x86/kernel/kprobes.c
index 2d7763749b1b..9aadd4d4a229 100644
--- a/arch/x86/kernel/kprobes_64.c
+++ b/arch/x86/kernel/kprobes.c
@@ -22,14 +22,22 @@
  *		Rusty Russell).
  * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
  *		interface to access function arguments.
- * 2004-Oct	Jim Keniston <kenistoj@us.ibm.com> and Prasanna S Panchamukhi
- *		<prasanna@in.ibm.com> adapted for x86_64
+ * 2004-Oct	Jim Keniston <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
+ *		<prasanna@in.ibm.com> adapted for x86_64 from i386.
  * 2005-Mar	Roland McGrath <roland@redhat.com>
  *		Fixed to handle %rip-relative addressing mode correctly.
- * 2005-May     Rusty Lynch <rusty.lynch@intel.com>
- *              Added function return probes functionality
+ * 2005-May	Hien Nguyen <hien@us.ibm.com>, Jim Keniston
+ *		<jkenisto@us.ibm.com> and Prasanna S Panchamukhi
+ *		<prasanna@in.ibm.com> added function-return probes.
+ * 2005-May	Rusty Lynch <rusty.lynch@intel.com>
+ * 		Added function return probes functionality
+ * 2006-Feb	Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added
+ * 		kprobe-booster and kretprobe-booster for i386.
  * 2007-Dec	Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster
  * 		and kretprobe-booster for x86-64
+ * 2007-Dec	Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven
+ * 		<arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com>
+ * 		unified x86 kprobes code.
  */
 
 #include <linux/kprobes.h>
@@ -51,7 +59,19 @@ void jprobe_return_end(void);
 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
+#ifdef CONFIG_X86_64
 #define stack_addr(regs) ((unsigned long *)regs->sp)
+#else
+/*
+ * "&regs->sp" looks wrong, but it's correct for x86_32.  x86_32 CPUs
+ * don't save the ss and esp registers if the CPU is already in kernel
+ * mode when it traps.  So for kprobes, regs->sp and regs->ss are not
+ * the [nonexistent] saved stack pointer and ss register, but rather
+ * the top 8 bytes of the pre-int3 stack.  So &regs->sp happens to
+ * point to the top of the pre-int3 stack.
+ */
+#define stack_addr(regs) ((unsigned long *)&regs->sp)
+#endif
 
 #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
 	(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \
@@ -151,8 +171,8 @@ static __always_inline void set_jmp_op(void *from, void *to)
 }
 
 /*
- * returns non-zero if opcode is boostable.
- * RIP relative instructions are adjusted at copying time
+ * Returns non-zero if opcode is boostable.
+ * RIP relative instructions are adjusted at copying time in 64 bits mode
  */
 static __always_inline int can_boost(kprobe_opcode_t *opcodes)
 {
@@ -173,8 +193,10 @@ retry:
 	}
 
 	switch (opcode & 0xf0) {
+#ifdef CONFIG_X86_64
 	case 0x40:
 		goto retry; /* REX prefix is boostable */
+#endif
 	case 0x60:
 		if (0x63 < opcode && opcode < 0x67)
 			goto retry; /* prefixes */
@@ -206,7 +228,7 @@ retry:
 }
 
 /*
- * returns non-zero if opcode modifies the interrupt flag.
+ * Returns non-zero if opcode modifies the interrupt flag.
  */
 static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
 {
@@ -217,16 +239,18 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
 	case 0x9d:		/* popf/popfd */
 		return 1;
 	}
-
+#ifdef CONFIG_X86_64
 	/*
 	 * on 64 bit x86, 0x40-0x4f are prefixes so we need to look
 	 * at the next byte instead.. but of course not recurse infinitely
 	 */
 	if (*insn  >= 0x40 && *insn <= 0x4f)
 		return is_IF_modifier(++insn);
+#endif
 	return 0;
 }
 
+#ifdef CONFIG_X86_64
 /*
  * Adjust the displacement if the instruction uses the %rip-relative
  * addressing mode.
@@ -263,17 +287,20 @@ static void __kprobes fix_riprel(struct kprobe *p)
 	if ((*insn & 0xf0) == 0x40)
 		++insn;
 
-	if (*insn == 0x0f) {	/* Two-byte opcode.  */
+	if (*insn == 0x0f) {
+		/* Two-byte opcode.  */
 		++insn;
 		need_modrm = test_bit(*insn,
 				      (unsigned long *)twobyte_has_modrm);
-	} else			/* One-byte opcode.  */
+	} else
+		/* One-byte opcode.  */
 		need_modrm = test_bit(*insn,
 				      (unsigned long *)onebyte_has_modrm);
 
 	if (need_modrm) {
 		u8 modrm = *++insn;
-		if ((modrm & 0xc7) == 0x05) { /* %rip+disp32 addressing mode */
+		if ((modrm & 0xc7) == 0x05) {
+			/* %rip+disp32 addressing mode */
 			/* Displacement follows ModRM byte.  */
 			++insn;
 			/*
@@ -296,11 +323,14 @@ static void __kprobes fix_riprel(struct kprobe *p)
 		}
 	}
 }
+#endif
 
 static void __kprobes arch_copy_kprobe(struct kprobe *p)
 {
 	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+#ifdef CONFIG_X86_64
 	fix_riprel(p);
+#endif
 	if (can_boost(p->addr))
 		p->ainsn.boostable = 0;
 	else
@@ -365,13 +395,13 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 static __always_inline void clear_btf(void)
 {
 	if (test_thread_flag(TIF_DEBUGCTLMSR))
-		wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
+		wrmsr(MSR_IA32_DEBUGCTLMSR, 0, 0);
 }
 
 static __always_inline void restore_btf(void)
 {
 	if (test_thread_flag(TIF_DEBUGCTLMSR))
-		wrmsrl(MSR_IA32_DEBUGCTLMSR, current->thread.debugctlmsr);
+		wrmsr(MSR_IA32_DEBUGCTLMSR, current->thread.debugctlmsr, 0);
 }
 
 static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
@@ -427,6 +457,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 				regs->flags &= ~TF_MASK;
 				regs->flags |= kcb->kprobe_saved_flags;
 				goto no_kprobe;
+#ifdef CONFIG_X86_64
 			} else if (kcb->kprobe_status == KPROBE_HIT_SSDONE) {
 				/* TODO: Provide re-entrancy from
 				 * post_kprobes_handler() and avoid exception
@@ -437,6 +468,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 				regs->ip = (unsigned long)p->addr;
 				reset_current_kprobe();
 				return 1;
+#endif
 			}
 			/* We have reentered the kprobe_handler(), since
 			 * another probe was hit while within the handler.
@@ -461,9 +493,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 				goto no_kprobe;
 			}
 			p = __get_cpu_var(current_kprobe);
-			if (p->break_handler && p->break_handler(p, regs)) {
+			if (p->break_handler && p->break_handler(p, regs))
 				goto ss_probe;
-			}
 		}
 		goto no_kprobe;
 	}
@@ -519,8 +550,10 @@ no_kprobe:
  */
  void __kprobes kretprobe_trampoline_holder(void)
  {
- 	asm volatile (  ".global kretprobe_trampoline\n"
+	asm volatile (
+			".global kretprobe_trampoline\n"
 			"kretprobe_trampoline: \n"
+#ifdef CONFIG_X86_64
 			/* We don't bother saving the ss register */
 			"	pushq %rsp\n"
 			"	pushfq\n"
@@ -566,25 +599,64 @@ no_kprobe:
 			/* Skip orig_ax, ip, cs */
 			"	addq $24, %rsp\n"
 			"	popfq\n"
+#else
+			"	pushf\n"
+			/*
+			 * Skip cs, ip, orig_ax.
+			 * trampoline_handler() will plug in these values
+			 */
+			"	subl $12, %esp\n"
+			"	pushl %fs\n"
+			"	pushl %ds\n"
+			"	pushl %es\n"
+			"	pushl %eax\n"
+			"	pushl %ebp\n"
+			"	pushl %edi\n"
+			"	pushl %esi\n"
+			"	pushl %edx\n"
+			"	pushl %ecx\n"
+			"	pushl %ebx\n"
+			"	movl %esp, %eax\n"
+			"	call trampoline_handler\n"
+			/* Move flags to cs */
+			"	movl 52(%esp), %edx\n"
+			"	movl %edx, 48(%esp)\n"
+			/* Replace saved flags with true return address. */
+			"	movl %eax, 52(%esp)\n"
+			"	popl %ebx\n"
+			"	popl %ecx\n"
+			"	popl %edx\n"
+			"	popl %esi\n"
+			"	popl %edi\n"
+			"	popl %ebp\n"
+			"	popl %eax\n"
+			/* Skip ip, orig_ax, es, ds, fs */
+			"	addl $20, %esp\n"
+			"	popf\n"
+#endif
 			"	ret\n");
  }
 
 /*
  * Called from kretprobe_trampoline
  */
-fastcall void * __kprobes trampoline_handler(struct pt_regs *regs)
+void * __kprobes trampoline_handler(struct pt_regs *regs)
 {
 	struct kretprobe_instance *ri = NULL;
 	struct hlist_head *head, empty_rp;
 	struct hlist_node *node, *tmp;
 	unsigned long flags, orig_ret_address = 0;
-	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
+	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
 
 	INIT_HLIST_HEAD(&empty_rp);
 	spin_lock_irqsave(&kretprobe_lock, flags);
 	head = kretprobe_inst_table_head(current);
 	/* fixup registers */
+#ifdef CONFIG_X86_64
 	regs->cs = __KERNEL_CS;
+#else
+	regs->cs = __KERNEL_CS | get_kernel_rpl();
+#endif
 	regs->ip = trampoline_address;
 	regs->orig_ax = ~0UL;
 
@@ -671,9 +743,11 @@ static void __kprobes resume_execution(struct kprobe *p,
 	unsigned long orig_ip = (unsigned long)p->addr;
 	kprobe_opcode_t *insn = p->ainsn.insn;
 
+#ifdef CONFIG_X86_64
 	/*skip the REX prefix*/
 	if (*insn >= 0x40 && *insn <= 0x4f)
 		insn++;
+#endif
 
 	regs->flags &= ~TF_MASK;
 	switch (*insn) {
@@ -693,6 +767,11 @@ static void __kprobes resume_execution(struct kprobe *p,
 	case 0xe8:	/* call relative - Fix return addr */
 		*tos = orig_ip + (*tos - copy_ip);
 		break;
+#ifndef CONFIG_X86_64
+	case 0x9a:	/* call absolute -- same as call absolute, indirect */
+		*tos = orig_ip + (*tos - copy_ip);
+		goto no_change;
+#endif
 	case 0xff:
 		if ((insn[1] & 0x30) == 0x10) {
 			/*
@@ -783,9 +862,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 {
 	struct kprobe *cur = kprobe_running();
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-	const struct exception_table_entry *fixup;
 
-	switch(kcb->kprobe_status) {
+	switch (kcb->kprobe_status) {
 	case KPROBE_HIT_SS:
 	case KPROBE_REENTER:
 		/*
@@ -826,12 +904,19 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
 		 */
-		fixup = search_exception_tables(regs->ip);
-		if (fixup) {
-			regs->ip = fixup->fixup;
-			return 1;
+#ifdef CONFIG_X86_64
+		{
+			const struct exception_table_entry *fixup;
+			fixup = search_exception_tables(regs->ip);
+			if (fixup) {
+				regs->ip = fixup->fixup;
+				return 1;
+			}
 		}
-
+#else
+		if (fixup_exception(regs))
+			return 1;
+#endif
 		/*
 		 * fixup routine could not handle it,
 		 * Let do_page_fault() fix it.
@@ -896,7 +981,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	 * the argument area.
 	 */
 	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
-			MIN_STACK_SIZE(addr));
+	       MIN_STACK_SIZE(addr));
 	regs->flags &= ~IF_MASK;
 	trace_hardirqs_off();
 	regs->ip = (unsigned long)(jp->entry);
@@ -907,12 +992,17 @@ void __kprobes jprobe_return(void)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
-	asm volatile ("       xchg   %%rbx,%%rsp     \n"
-		      "       int3			\n"
-		      "       .globl jprobe_return_end	\n"
-		      "       jprobe_return_end:	\n"
-		      "       nop			\n"::"b"
-		      (kcb->jprobe_saved_sp):"memory");
+	asm volatile (
+#ifdef CONFIG_X86_64
+			"       xchg   %%rbx,%%rsp	\n"
+#else
+			"       xchgl   %%ebx,%%esp	\n"
+#endif
+			"       int3			\n"
+			"       .globl jprobe_return_end\n"
+			"       jprobe_return_end:	\n"
+			"       nop			\n"::"b"
+			(kcb->jprobe_saved_sp):"memory");
 }
 
 int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
@@ -921,14 +1011,16 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	u8 *addr = (u8 *) (regs->ip - 1);
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
 
-	if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
+	if ((addr > (u8 *) jprobe_return) &&
+	    (addr < (u8 *) jprobe_return_end)) {
 		if (stack_addr(regs) != kcb->jprobe_saved_sp) {
 			struct pt_regs *saved_regs = &kcb->jprobe_saved_regs;
-			printk("current sp %p does not match saved sp %p\n",
+			printk(KERN_ERR
+			       "current sp %p does not match saved sp %p\n",
 			       stack_addr(regs), kcb->jprobe_saved_sp);
-			printk("Saved registers for jprobe %p\n", jp);
+			printk(KERN_ERR "Saved registers for jprobe %p\n", jp);
 			show_registers(saved_regs);
-			printk("Current registers\n");
+			printk(KERN_ERR "Current registers\n");
 			show_registers(regs);
 			BUG();
 		}
diff --git a/arch/x86/kernel/kprobes_32.c b/arch/x86/kernel/kprobes_32.c
deleted file mode 100644
index 8e06431d8b03..000000000000
--- a/arch/x86/kernel/kprobes_32.c
+++ /dev/null
@@ -1,848 +0,0 @@
-/*
- *  Kernel Probes (KProbes)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2002, 2004
- *
- * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
- *		Probes initial implementation ( includes contributions from
- *		Rusty Russell).
- * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
- *		interface to access function arguments.
- * 2005-May	Hien Nguyen <hien@us.ibm.com>, Jim Keniston
- *		<jkenisto@us.ibm.com> and Prasanna S Panchamukhi
- *		<prasanna@in.ibm.com> added function-return probes.
- */
-
-#include <linux/kprobes.h>
-#include <linux/ptrace.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/preempt.h>
-#include <linux/module.h>
-#include <linux/kdebug.h>
-
-#include <asm/cacheflush.h>
-#include <asm/desc.h>
-#include <asm/pgtable.h>
-#include <asm/uaccess.h>
-#include <asm/alternative.h>
-
-void jprobe_return_end(void);
-
-DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
-DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
-
-/*
- * "&regs->sp" looks wrong, but it's correct for x86_32.  x86_32 CPUs
- * don't save the ss and esp registers if the CPU is already in kernel
- * mode when it traps.  So for kprobes, regs->sp and regs->ss are not
- * the [nonexistent] saved stack pointer and ss register, but rather
- * the top 8 bytes of the pre-int3 stack.  So &regs->sp happens to
- * point to the top of the pre-int3 stack.
- */
-#define stack_addr(regs) ((unsigned long *)&regs->sp)
-
-#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
-	(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \
-	  (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) |   \
-	  (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) |   \
-	  (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf))    \
-	 << (row % 32))
-	/*
-	 * Undefined/reserved opcodes, conditional jump, Opcode Extension
-	 * Groups, and some special opcodes can not boost.
-	 */
-static const u32 twobyte_is_boostable[256 / 32] = {
-	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
-	/*      ----------------------------------------------          */
-	W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */
-	W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 10 */
-	W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */
-	W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
-	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
-	W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
-	W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1) | /* 60 */
-	W(0x70, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
-	W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 80 */
-	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
-	W(0xa0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* a0 */
-	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) , /* b0 */
-	W(0xc0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
-	W(0xd0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) , /* d0 */
-	W(0xe0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* e0 */
-	W(0xf0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0)   /* f0 */
-	/*      -----------------------------------------------         */
-	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
-};
-static const u32 onebyte_has_modrm[256 / 32] = {
-	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
-	/*      -----------------------------------------------         */
-	W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */
-	W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */
-	W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */
-	W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */
-	W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
-	W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
-	W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */
-	W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */
-	W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
-	W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */
-	W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */
-	W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */
-	W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */
-	W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
-	W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */
-	W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1)   /* f0 */
-	/*      -----------------------------------------------         */
-	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
-};
-static const u32 twobyte_has_modrm[256 / 32] = {
-	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
-	/*      -----------------------------------------------         */
-	W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */
-	W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */
-	W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */
-	W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */
-	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */
-	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */
-	W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */
-	W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */
-	W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */
-	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */
-	W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */
-	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */
-	W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */
-	W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */
-	W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */
-	W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)   /* ff */
-	/*      -----------------------------------------------         */
-	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
-};
-#undef W
-
-struct kretprobe_blackpoint kretprobe_blacklist[] = {
-	{"__switch_to", }, /* This function switches only current task, but
-			      doesn't switch kernel stack.*/
-	{NULL, NULL}	/* Terminator */
-};
-const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
-
-/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
-static __always_inline void set_jmp_op(void *from, void *to)
-{
-	struct __arch_jmp_op {
-		char op;
-		s32 raddr;
-	} __attribute__((packed)) * jop;
-	jop = (struct __arch_jmp_op *)from;
-	jop->raddr = (s32)((long)(to) - ((long)(from) + 5));
-	jop->op = RELATIVEJUMP_INSTRUCTION;
-}
-
-/*
- * returns non-zero if opcode is boostable.
- */
-static __always_inline int can_boost(kprobe_opcode_t *opcodes)
-{
-	kprobe_opcode_t opcode;
-	kprobe_opcode_t *orig_opcodes = opcodes;
-
-retry:
-	if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
-		return 0;
-	opcode = *(opcodes++);
-
-	/* 2nd-byte opcode */
-	if (opcode == 0x0f) {
-		if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
-			return 0;
-		return test_bit(*opcodes,
-				(unsigned long *)twobyte_is_boostable);
-	}
-
-	switch (opcode & 0xf0) {
-	case 0x60:
-		if (0x63 < opcode && opcode < 0x67)
-			goto retry; /* prefixes */
-		/* can't boost Address-size override and bound */
-		return (opcode != 0x62 && opcode != 0x67);
-	case 0x70:
-		return 0; /* can't boost conditional jump */
-	case 0xc0:
-		/* can't boost software-interruptions */
-		return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
-	case 0xd0:
-		/* can boost AA* and XLAT */
-		return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
-	case 0xe0:
-		/* can boost in/out and absolute jmps */
-		return ((opcode & 0x04) || opcode == 0xea);
-	case 0xf0:
-		if ((opcode & 0x0c) == 0 && opcode != 0xf1)
-			goto retry; /* lock/rep(ne) prefix */
-		/* clear and set flags are boostable */
-		return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
-	default:
-		/* segment override prefixes are boostable */
-		if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
-			goto retry; /* prefixes */
-		/* CS override prefix and call are not boostable */
-		return (opcode != 0x2e && opcode != 0x9a);
-	}
-}
-
-/*
- * returns non-zero if opcode modifies the interrupt flag.
- */
-static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
-{
-	switch (*insn) {
-	case 0xfa:		/* cli */
-	case 0xfb:		/* sti */
-	case 0xcf:		/* iret/iretd */
-	case 0x9d:		/* popf/popfd */
-		return 1;
-	}
-	return 0;
-}
-
-static void __kprobes arch_copy_kprobe(struct kprobe *p)
-{
-	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
-	if (can_boost(p->addr))
-		p->ainsn.boostable = 0;
-	else
-		p->ainsn.boostable = -1;
-
-	p->opcode = *p->addr;
-}
-
-int __kprobes arch_prepare_kprobe(struct kprobe *p)
-{
-	/* insn: must be on special executable page on x86. */
-	p->ainsn.insn = get_insn_slot();
-	if (!p->ainsn.insn)
-		return -ENOMEM;
-	arch_copy_kprobe(p);
-	return 0;
-}
-
-void __kprobes arch_arm_kprobe(struct kprobe *p)
-{
-	text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1);
-}
-
-void __kprobes arch_disarm_kprobe(struct kprobe *p)
-{
-	text_poke(p->addr, &p->opcode, 1);
-}
-
-void __kprobes arch_remove_kprobe(struct kprobe *p)
-{
-	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
-	mutex_unlock(&kprobe_mutex);
-}
-
-static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
-{
-	kcb->prev_kprobe.kp = kprobe_running();
-	kcb->prev_kprobe.status = kcb->kprobe_status;
-	kcb->prev_kprobe.old_flags = kcb->kprobe_old_flags;
-	kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags;
-}
-
-static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
-{
-	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
-	kcb->kprobe_status = kcb->prev_kprobe.status;
-	kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags;
-	kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags;
-}
-
-static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
-				struct kprobe_ctlblk *kcb)
-{
-	__get_cpu_var(current_kprobe) = p;
-	kcb->kprobe_saved_flags = kcb->kprobe_old_flags
-		= (regs->flags & (TF_MASK | IF_MASK));
-	if (is_IF_modifier(p->ainsn.insn))
-		kcb->kprobe_saved_flags &= ~IF_MASK;
-}
-
-static __always_inline void clear_btf(void)
-{
-	if (test_thread_flag(TIF_DEBUGCTLMSR))
-		wrmsr(MSR_IA32_DEBUGCTLMSR, 0, 0);
-}
-
-static __always_inline void restore_btf(void)
-{
-	if (test_thread_flag(TIF_DEBUGCTLMSR))
-		wrmsr(MSR_IA32_DEBUGCTLMSR, current->thread.debugctlmsr, 0);
-}
-
-static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
-{
-	clear_btf();
-	regs->flags |= TF_MASK;
-	regs->flags &= ~IF_MASK;
-	/*single step inline if the instruction is an int3*/
-	if (p->opcode == BREAKPOINT_INSTRUCTION)
-		regs->ip = (unsigned long)p->addr;
-	else
-		regs->ip = (unsigned long)p->ainsn.insn;
-}
-
-/* Called with kretprobe_lock held */
-void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
-				      struct pt_regs *regs)
-{
-	unsigned long *sara = stack_addr(regs);
-
-	ri->ret_addr = (kprobe_opcode_t *) *sara;
-
-	/* Replace the return addr with trampoline addr */
-	*sara = (unsigned long) &kretprobe_trampoline;
-}
-
-/*
- * Interrupts are disabled on entry as trap3 is an interrupt gate and they
- * remain disabled thorough out this function.
- */
-static int __kprobes kprobe_handler(struct pt_regs *regs)
-{
-	struct kprobe *p;
-	int ret = 0;
-	kprobe_opcode_t *addr;
-	struct kprobe_ctlblk *kcb;
-
-	addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
-
-	/*
-	 * We don't want to be preempted for the entire
-	 * duration of kprobe processing
-	 */
-	preempt_disable();
-	kcb = get_kprobe_ctlblk();
-
-	/* Check we're not actually recursing */
-	if (kprobe_running()) {
-		p = get_kprobe(addr);
-		if (p) {
-			if (kcb->kprobe_status == KPROBE_HIT_SS &&
-				*p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
-				regs->flags &= ~TF_MASK;
-				regs->flags |= kcb->kprobe_saved_flags;
-				goto no_kprobe;
-			}
-			/* We have reentered the kprobe_handler(), since
-			 * another probe was hit while within the handler.
-			 * We here save the original kprobes variables and
-			 * just single step on the instruction of the new probe
-			 * without calling any user handlers.
-			 */
-			save_previous_kprobe(kcb);
-			set_current_kprobe(p, regs, kcb);
-			kprobes_inc_nmissed_count(p);
-			prepare_singlestep(p, regs);
-			kcb->kprobe_status = KPROBE_REENTER;
-			return 1;
-		} else {
-			if (*addr != BREAKPOINT_INSTRUCTION) {
-			/* The breakpoint instruction was removed by
-			 * another cpu right after we hit, no further
-			 * handling of this interrupt is appropriate
-			 */
-				regs->ip = (unsigned long)addr;
-				ret = 1;
-				goto no_kprobe;
-			}
-			p = __get_cpu_var(current_kprobe);
-			if (p->break_handler && p->break_handler(p, regs)) {
-				goto ss_probe;
-			}
-		}
-		goto no_kprobe;
-	}
-
-	p = get_kprobe(addr);
-	if (!p) {
-		if (*addr != BREAKPOINT_INSTRUCTION) {
-			/*
-			 * The breakpoint instruction was removed right
-			 * after we hit it.  Another cpu has removed
-			 * either a probepoint or a debugger breakpoint
-			 * at this address.  In either case, no further
-			 * handling of this interrupt is appropriate.
-			 * Back up over the (now missing) int3 and run
-			 * the original instruction.
-			 */
-			regs->ip = (unsigned long)addr;
-			ret = 1;
-		}
-		/* Not one of ours: let kernel handle it */
-		goto no_kprobe;
-	}
-
-	set_current_kprobe(p, regs, kcb);
-	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
-
-	if (p->pre_handler && p->pre_handler(p, regs))
-		/* handler has already set things up, so skip ss setup */
-		return 1;
-
-ss_probe:
-#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
-	if (p->ainsn.boostable == 1 && !p->post_handler) {
-		/* Boost up -- we can execute copied instructions directly */
-		reset_current_kprobe();
-		regs->ip = (unsigned long)p->ainsn.insn;
-		preempt_enable_no_resched();
-		return 1;
-	}
-#endif
-	prepare_singlestep(p, regs);
-	kcb->kprobe_status = KPROBE_HIT_SS;
-	return 1;
-
-no_kprobe:
-	preempt_enable_no_resched();
-	return ret;
-}
-
-/*
- * When a retprobed function returns, this code saves registers and
- * calls trampoline_handler() runs, which calls the kretprobe's handler.
- */
- void __kprobes kretprobe_trampoline_holder(void)
- {
-	asm volatile ( ".global kretprobe_trampoline\n"
-			"kretprobe_trampoline: \n"
-			"	pushf\n"
-			/*
-			 * Skip cs, ip, orig_ax.
-			 * trampoline_handler() will plug in these values
-			 */
-			"	subl $12, %esp\n"
-			"	pushl %fs\n"
-			"	pushl %ds\n"
-			"	pushl %es\n"
-			"	pushl %eax\n"
-			"	pushl %ebp\n"
-			"	pushl %edi\n"
-			"	pushl %esi\n"
-			"	pushl %edx\n"
-			"	pushl %ecx\n"
-			"	pushl %ebx\n"
-			"	movl %esp, %eax\n"
-			"	call trampoline_handler\n"
-			/* Move flags to cs */
-			"	movl 52(%esp), %edx\n"
-			"	movl %edx, 48(%esp)\n"
-			/* Replace saved flags with true return address. */
-			"	movl %eax, 52(%esp)\n"
-			"	popl %ebx\n"
-			"	popl %ecx\n"
-			"	popl %edx\n"
-			"	popl %esi\n"
-			"	popl %edi\n"
-			"	popl %ebp\n"
-			"	popl %eax\n"
-			/* Skip ip, orig_ax, es, ds, fs */
-			"	addl $20, %esp\n"
-			"	popf\n"
-			"	ret\n");
- }
-
-/*
- * Called from kretprobe_trampoline
- */
-void * __kprobes trampoline_handler(struct pt_regs *regs)
-{
-	struct kretprobe_instance *ri = NULL;
-	struct hlist_head *head, empty_rp;
-	struct hlist_node *node, *tmp;
-	unsigned long flags, orig_ret_address = 0;
-	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
-
-	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(current);
-	/* fixup registers */
-	regs->cs = __KERNEL_CS | get_kernel_rpl();
-	regs->ip = trampoline_address;
-	regs->orig_ax = ~0UL;
-
-	/*
-	 * It is possible to have multiple instances associated with a given
-	 * task either because multiple functions in the call path have
-	 * return probes installed on them, and/or more then one
-	 * return probe was registered for a target function.
-	 *
-	 * We can handle this because:
-	 *     - instances are always pushed into the head of the list
-	 *     - when multiple return probes are registered for the same
-	 *	 function, the (chronologically) first instance's ret_addr
-	 *	 will be the real return address, and all the rest will
-	 *	 point to kretprobe_trampoline.
-	 */
-	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
-		if (ri->task != current)
-			/* another task is sharing our hash bucket */
-			continue;
-
-		if (ri->rp && ri->rp->handler) {
-			__get_cpu_var(current_kprobe) = &ri->rp->kp;
-			get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
-			ri->rp->handler(ri, regs);
-			__get_cpu_var(current_kprobe) = NULL;
-		}
-
-		orig_ret_address = (unsigned long)ri->ret_addr;
-		recycle_rp_inst(ri, &empty_rp);
-
-		if (orig_ret_address != trampoline_address)
-			/*
-			 * This is the real return address. Any other
-			 * instances associated with this task are for
-			 * other calls deeper on the call stack
-			 */
-			break;
-	}
-
-	kretprobe_assert(ri, orig_ret_address, trampoline_address);
-
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
-
-	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
-		hlist_del(&ri->hlist);
-		kfree(ri);
-	}
-	return (void *)orig_ret_address;
-}
-
-/*
- * Called after single-stepping.  p->addr is the address of the
- * instruction whose first byte has been replaced by the "int 3"
- * instruction.  To avoid the SMP problems that can occur when we
- * temporarily put back the original opcode to single-step, we
- * single-stepped a copy of the instruction.  The address of this
- * copy is p->ainsn.insn.
- *
- * This function prepares to return from the post-single-step
- * interrupt.  We have to fix up the stack as follows:
- *
- * 0) Except in the case of absolute or indirect jump or call instructions,
- * the new ip is relative to the copied instruction.  We need to make
- * it relative to the original instruction.
- *
- * 1) If the single-stepped instruction was pushfl, then the TF and IF
- * flags are set in the just-pushed flags, and may need to be cleared.
- *
- * 2) If the single-stepped instruction was a call, the return address
- * that is atop the stack is the address following the copied instruction.
- * We need to make it the address following the original instruction.
- *
- * If this is the first time we've single-stepped the instruction at
- * this probepoint, and the instruction is boostable, boost it: add a
- * jump instruction after the copied instruction, that jumps to the next
- * instruction after the probepoint.
- */
-static void __kprobes resume_execution(struct kprobe *p,
-		struct pt_regs *regs, struct kprobe_ctlblk *kcb)
-{
-	unsigned long *tos = stack_addr(regs);
-	unsigned long copy_ip = (unsigned long)p->ainsn.insn;
-	unsigned long orig_ip = (unsigned long)p->addr;
-	kprobe_opcode_t *insn = p->ainsn.insn;
-
-	regs->flags &= ~TF_MASK;
-	switch (*insn) {
-	case 0x9c:	/* pushfl */
-		*tos &= ~(TF_MASK | IF_MASK);
-		*tos |= kcb->kprobe_old_flags;
-		break;
-	case 0xc2:	/* iret/ret/lret */
-	case 0xc3:
-	case 0xca:
-	case 0xcb:
-	case 0xcf:
-	case 0xea:	/* jmp absolute -- ip is correct */
-		/* ip is already adjusted, no more changes required */
-		p->ainsn.boostable = 1;
-		goto no_change;
-	case 0xe8:	/* call relative - Fix return addr */
-		*tos = orig_ip + (*tos - copy_ip);
-		break;
-	case 0x9a:	/* call absolute -- same as call absolute, indirect */
-		*tos = orig_ip + (*tos - copy_ip);
-		goto no_change;
-	case 0xff:
-		if ((insn[1] & 0x30) == 0x10) {
-			/*
-			 * call absolute, indirect
-			 * Fix return addr; ip is correct.
-			 * But this is not boostable
-			 */
-			*tos = orig_ip + (*tos - copy_ip);
-			goto no_change;
-		} else if (((insn[1] & 0x31) == 0x20) ||
-			   ((insn[1] & 0x31) == 0x21)) {
-			/*
-			 * jmp near and far, absolute indirect
-			 * ip is correct. And this is boostable
-			 */
-			p->ainsn.boostable = 1;
-			goto no_change;
-		}
-	default:
-		break;
-	}
-
-	if (p->ainsn.boostable == 0) {
-		if ((regs->ip > copy_ip) &&
-		    (regs->ip - copy_ip) + 5 < MAX_INSN_SIZE) {
-			/*
-			 * These instructions can be executed directly if it
-			 * jumps back to correct address.
-			 */
-			set_jmp_op((void *)regs->ip,
-				   (void *)orig_ip + (regs->ip - copy_ip));
-			p->ainsn.boostable = 1;
-		} else {
-			p->ainsn.boostable = -1;
-		}
-	}
-
-	regs->ip += orig_ip - copy_ip;
-
-no_change:
-	restore_btf();
-
-	return;
-}
-
-/*
- * Interrupts are disabled on entry as trap1 is an interrupt gate and they
- * remain disabled thoroughout this function.
- */
-static int __kprobes post_kprobe_handler(struct pt_regs *regs)
-{
-	struct kprobe *cur = kprobe_running();
-	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
-	if (!cur)
-		return 0;
-
-	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
-		kcb->kprobe_status = KPROBE_HIT_SSDONE;
-		cur->post_handler(cur, regs, 0);
-	}
-
-	resume_execution(cur, regs, kcb);
-	regs->flags |= kcb->kprobe_saved_flags;
-	trace_hardirqs_fixup_flags(regs->flags);
-
-	/* Restore back the original saved kprobes variables and continue. */
-	if (kcb->kprobe_status == KPROBE_REENTER) {
-		restore_previous_kprobe(kcb);
-		goto out;
-	}
-	reset_current_kprobe();
-out:
-	preempt_enable_no_resched();
-
-	/*
-	 * if somebody else is singlestepping across a probe point, flags
-	 * will have TF set, in which case, continue the remaining processing
-	 * of do_debug, as if this is not a probe hit.
-	 */
-	if (regs->flags & TF_MASK)
-		return 0;
-
-	return 1;
-}
-
-int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
-{
-	struct kprobe *cur = kprobe_running();
-	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
-	switch(kcb->kprobe_status) {
-	case KPROBE_HIT_SS:
-	case KPROBE_REENTER:
-		/*
-		 * We are here because the instruction being single
-		 * stepped caused a page fault. We reset the current
-		 * kprobe and the ip points back to the probe address
-		 * and allow the page fault handler to continue as a
-		 * normal page fault.
-		 */
-		regs->ip = (unsigned long)cur->addr;
-		regs->flags |= kcb->kprobe_old_flags;
-		if (kcb->kprobe_status == KPROBE_REENTER)
-			restore_previous_kprobe(kcb);
-		else
-			reset_current_kprobe();
-		preempt_enable_no_resched();
-		break;
-	case KPROBE_HIT_ACTIVE:
-	case KPROBE_HIT_SSDONE:
-		/*
-		 * We increment the nmissed count for accounting,
-		 * we can also use npre/npostfault count for accounting
-		 * these specific fault cases.
-		 */
-		kprobes_inc_nmissed_count(cur);
-
-		/*
-		 * We come here because instructions in the pre/post
-		 * handler caused the page_fault, this could happen
-		 * if handler tries to access user space by
-		 * copy_from_user(), get_user() etc. Let the
-		 * user-specified handler try to fix it first.
-		 */
-		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
-			return 1;
-
-		/*
-		 * In case the user-specified fault handler returned
-		 * zero, try to fix up.
-		 */
-		if (fixup_exception(regs))
-			return 1;
-
-		/*
-		 * fixup routine could not handle it,
-		 * Let do_page_fault() fix it.
-		 */
-		break;
-	default:
-		break;
-	}
-	return 0;
-}
-
-/*
- * Wrapper routine for handling exceptions.
- */
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
-				       unsigned long val, void *data)
-{
-	struct die_args *args = (struct die_args *)data;
-	int ret = NOTIFY_DONE;
-
-	if (args->regs && user_mode_vm(args->regs))
-		return ret;
-
-	switch (val) {
-	case DIE_INT3:
-		if (kprobe_handler(args->regs))
-			ret = NOTIFY_STOP;
-		break;
-	case DIE_DEBUG:
-		if (post_kprobe_handler(args->regs))
-			ret = NOTIFY_STOP;
-		break;
-	case DIE_GPF:
-		/* kprobe_running() needs smp_processor_id() */
-		preempt_disable();
-		if (kprobe_running() &&
-		    kprobe_fault_handler(args->regs, args->trapnr))
-			ret = NOTIFY_STOP;
-		preempt_enable();
-		break;
-	default:
-		break;
-	}
-	return ret;
-}
-
-int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
-{
-	struct jprobe *jp = container_of(p, struct jprobe, kp);
-	unsigned long addr;
-	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
-	kcb->jprobe_saved_regs = *regs;
-	kcb->jprobe_saved_sp = stack_addr(regs);
-	addr = (unsigned long)(kcb->jprobe_saved_sp);
-
-	/*
-	 * As Linus pointed out, gcc assumes that the callee
-	 * owns the argument space and could overwrite it, e.g.
-	 * tailcall optimization. So, to be absolutely safe
-	 * we also save and restore enough stack bytes to cover
-	 * the argument area.
-	 */
-	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
-			MIN_STACK_SIZE(addr));
-	regs->flags &= ~IF_MASK;
-	trace_hardirqs_off();
-	regs->ip = (unsigned long)(jp->entry);
-	return 1;
-}
-
-void __kprobes jprobe_return(void)
-{
-	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
-	asm volatile ("       xchgl   %%ebx,%%esp     \n"
-		      "       int3			\n"
-		      "       .globl jprobe_return_end	\n"
-		      "       jprobe_return_end:	\n"
-		      "       nop			\n"::"b"
-		      (kcb->jprobe_saved_sp):"memory");
-}
-
-int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
-{
-	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-	u8 *addr = (u8 *) (regs->ip - 1);
-	struct jprobe *jp = container_of(p, struct jprobe, kp);
-
-	if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
-		if (stack_addr(regs) != kcb->jprobe_saved_sp) {
-			struct pt_regs *saved_regs = &kcb->jprobe_saved_regs;
-			printk("current sp %p does not match saved sp %p\n",
-			       stack_addr(regs), kcb->jprobe_saved_sp);
-			printk("Saved registers for jprobe %p\n", jp);
-			show_registers(saved_regs);
-			printk("Current registers\n");
-			show_registers(regs);
-			BUG();
-		}
-		*regs = kcb->jprobe_saved_regs;
-		memcpy((kprobe_opcode_t *)(kcb->jprobe_saved_sp),
-		       kcb->jprobes_stack,
-		       MIN_STACK_SIZE(kcb->jprobe_saved_sp));
-		preempt_enable_no_resched();
-		return 1;
-	}
-	return 0;
-}
-
-int __init arch_init_kprobes(void)
-{
-	return 0;
-}
-
-int __kprobes arch_trampoline_kprobe(struct kprobe *p)
-{
-	return 0;
-}
diff --git a/include/asm-x86/kprobes.h b/include/asm-x86/kprobes.h
index b7bbd25ba2a6..6e6371a856e7 100644
--- a/include/asm-x86/kprobes.h
+++ b/include/asm-x86/kprobes.h
@@ -1,5 +1,98 @@
-#ifdef CONFIG_X86_32
-# include "kprobes_32.h"
-#else
-# include "kprobes_64.h"
-#endif
+#ifndef _ASM_KPROBES_H
+#define _ASM_KPROBES_H
+/*
+ *  Kernel Probes (KProbes)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * See arch/x86/kernel/kprobes.c for x86 kprobes history.
+ */
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+
+#define  __ARCH_WANT_KPROBES_INSN_SLOT
+
+struct pt_regs;
+struct kprobe;
+
+typedef u8 kprobe_opcode_t;
+#define BREAKPOINT_INSTRUCTION	0xcc
+#define RELATIVEJUMP_INSTRUCTION 0xe9
+#define MAX_INSN_SIZE 16
+#define MAX_STACK_SIZE 64
+#define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
+	(((unsigned long)current_thread_info()) + THREAD_SIZE \
+	 - (unsigned long)(ADDR))) \
+	? (MAX_STACK_SIZE) \
+	: (((unsigned long)current_thread_info()) + THREAD_SIZE \
+	   - (unsigned long)(ADDR)))
+
+#define ARCH_SUPPORTS_KRETPROBES
+#define flush_insn_slot(p)	do { } while (0)
+
+extern const int kretprobe_blacklist_size;
+
+void arch_remove_kprobe(struct kprobe *p);
+void kretprobe_trampoline(void);
+
+/* Architecture specific copy of original instruction*/
+struct arch_specific_insn {
+	/* copy of the original instruction */
+	kprobe_opcode_t *insn;
+	/*
+	 * boostable = -1: This instruction type is not boostable.
+	 * boostable = 0: This instruction type is boostable.
+	 * boostable = 1: This instruction has been boosted: we have
+	 * added a relative jump after the instruction copy in insn,
+	 * so no single-step and fixup are needed (unless there's
+	 * a post_handler or break_handler).
+	 */
+	int boostable;
+};
+
+struct prev_kprobe {
+	struct kprobe *kp;
+	unsigned long status;
+	unsigned long old_flags;
+	unsigned long saved_flags;
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+	unsigned long kprobe_status;
+	unsigned long kprobe_old_flags;
+	unsigned long kprobe_saved_flags;
+	unsigned long *jprobe_saved_sp;
+	struct pt_regs jprobe_saved_regs;
+	kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
+	struct prev_kprobe prev_kprobe;
+};
+
+/* trap3/1 are intr gates for kprobes.  So, restore the status of IF,
+ * if necessary, before executing the original int3/1 (trap) handler.
+ */
+static inline void restore_interrupts(struct pt_regs *regs)
+{
+	if (regs->flags & IF_MASK)
+		local_irq_enable();
+}
+
+extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
+extern int kprobe_exceptions_notify(struct notifier_block *self,
+				    unsigned long val, void *data);
+#endif				/* _ASM_KPROBES_H */
diff --git a/include/asm-x86/kprobes_32.h b/include/asm-x86/kprobes_32.h
deleted file mode 100644
index 94dff09b2ebe..000000000000
--- a/include/asm-x86/kprobes_32.h
+++ /dev/null
@@ -1,102 +0,0 @@
-#ifndef _ASM_KPROBES_H
-#define _ASM_KPROBES_H
-/*
- *  Kernel Probes (KProbes)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2002, 2004
- *
- * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
- *		Probes initial implementation ( includes suggestions from
- *		Rusty Russell).
- * 2004-Oct	Prasanna S Panchamukhi <prasanna@in.ibm.com> and Jim Keniston
- *		kenistoj@us.ibm.com adopted from i386.
- */
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/percpu.h>
-
-#define  __ARCH_WANT_KPROBES_INSN_SLOT
-
-struct pt_regs;
-struct kprobe;
-
-typedef u8 kprobe_opcode_t;
-#define BREAKPOINT_INSTRUCTION	0xcc
-#define RELATIVEJUMP_INSTRUCTION 0xe9
-#define MAX_INSN_SIZE 16
-#define MAX_STACK_SIZE 64
-#define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
-	(((unsigned long)current_thread_info()) + THREAD_SIZE \
-	 - (unsigned long)(ADDR))) \
-	? (MAX_STACK_SIZE) \
-	: (((unsigned long)current_thread_info()) + THREAD_SIZE \
-	   - (unsigned long)(ADDR)))
-
-#define ARCH_SUPPORTS_KRETPROBES
-#define flush_insn_slot(p)	do { } while (0)
-
-extern const int kretprobe_blacklist_size;
-
-void arch_remove_kprobe(struct kprobe *p);
-void kretprobe_trampoline(void);
-
-/* Architecture specific copy of original instruction*/
-struct arch_specific_insn {
-	/* copy of the original instruction */
-	kprobe_opcode_t *insn;
-	/*
-	 * boostable = -1: This instruction type is not boostable.
-	 * boostable = 0: This instruction type is boostable.
-	 * boostable = 1: This instruction has been boosted: we have
-	 * added a relative jump after the instruction copy in insn,
-	 * so no single-step and fixup are needed (unless there's
-	 * a post_handler or break_handler).
-	 */
-	int boostable;
-};
-
-struct prev_kprobe {
-	struct kprobe *kp;
-	unsigned long status;
-	unsigned long old_flags;
-	unsigned long saved_flags;
-};
-
-/* per-cpu kprobe control block */
-struct kprobe_ctlblk {
-	unsigned long kprobe_status;
-	unsigned long kprobe_old_flags;
-	unsigned long kprobe_saved_flags;
-	unsigned long *jprobe_saved_sp;
-	struct pt_regs jprobe_saved_regs;
-	kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
-	struct prev_kprobe prev_kprobe;
-};
-
-/* trap3/1 are intr gates for kprobes.  So, restore the status of IF,
- * if necessary, before executing the original int3/1 (trap) handler.
- */
-static inline void restore_interrupts(struct pt_regs *regs)
-{
-	if (regs->flags & IF_MASK)
-		local_irq_enable();
-}
-
-extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
-extern int kprobe_exceptions_notify(struct notifier_block *self,
-				    unsigned long val, void *data);
-#endif				/* _ASM_KPROBES_H */
diff --git a/include/asm-x86/kprobes_64.h b/include/asm-x86/kprobes_64.h
deleted file mode 100644
index 94dff09b2ebe..000000000000
--- a/include/asm-x86/kprobes_64.h
+++ /dev/null
@@ -1,102 +0,0 @@
-#ifndef _ASM_KPROBES_H
-#define _ASM_KPROBES_H
-/*
- *  Kernel Probes (KProbes)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2002, 2004
- *
- * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
- *		Probes initial implementation ( includes suggestions from
- *		Rusty Russell).
- * 2004-Oct	Prasanna S Panchamukhi <prasanna@in.ibm.com> and Jim Keniston
- *		kenistoj@us.ibm.com adopted from i386.
- */
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/percpu.h>
-
-#define  __ARCH_WANT_KPROBES_INSN_SLOT
-
-struct pt_regs;
-struct kprobe;
-
-typedef u8 kprobe_opcode_t;
-#define BREAKPOINT_INSTRUCTION	0xcc
-#define RELATIVEJUMP_INSTRUCTION 0xe9
-#define MAX_INSN_SIZE 16
-#define MAX_STACK_SIZE 64
-#define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
-	(((unsigned long)current_thread_info()) + THREAD_SIZE \
-	 - (unsigned long)(ADDR))) \
-	? (MAX_STACK_SIZE) \
-	: (((unsigned long)current_thread_info()) + THREAD_SIZE \
-	   - (unsigned long)(ADDR)))
-
-#define ARCH_SUPPORTS_KRETPROBES
-#define flush_insn_slot(p)	do { } while (0)
-
-extern const int kretprobe_blacklist_size;
-
-void arch_remove_kprobe(struct kprobe *p);
-void kretprobe_trampoline(void);
-
-/* Architecture specific copy of original instruction*/
-struct arch_specific_insn {
-	/* copy of the original instruction */
-	kprobe_opcode_t *insn;
-	/*
-	 * boostable = -1: This instruction type is not boostable.
-	 * boostable = 0: This instruction type is boostable.
-	 * boostable = 1: This instruction has been boosted: we have
-	 * added a relative jump after the instruction copy in insn,
-	 * so no single-step and fixup are needed (unless there's
-	 * a post_handler or break_handler).
-	 */
-	int boostable;
-};
-
-struct prev_kprobe {
-	struct kprobe *kp;
-	unsigned long status;
-	unsigned long old_flags;
-	unsigned long saved_flags;
-};
-
-/* per-cpu kprobe control block */
-struct kprobe_ctlblk {
-	unsigned long kprobe_status;
-	unsigned long kprobe_old_flags;
-	unsigned long kprobe_saved_flags;
-	unsigned long *jprobe_saved_sp;
-	struct pt_regs jprobe_saved_regs;
-	kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
-	struct prev_kprobe prev_kprobe;
-};
-
-/* trap3/1 are intr gates for kprobes.  So, restore the status of IF,
- * if necessary, before executing the original int3/1 (trap) handler.
- */
-static inline void restore_interrupts(struct pt_regs *regs)
-{
-	if (regs->flags & IF_MASK)
-		local_irq_enable();
-}
-
-extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
-extern int kprobe_exceptions_notify(struct notifier_block *self,
-				    unsigned long val, void *data);
-#endif				/* _ASM_KPROBES_H */

From bdb4f156064e5f627213af82292eb8b5cf2dc5aa Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Wed, 30 Jan 2008 13:31:21 +0100
Subject: [PATCH 324/890] i386: hard_{en,dis}able_TSC can be static

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process_32.c   | 4 ++--
 include/asm-x86/processor_32.h | 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 48e92e3758c2..40cc29695eba 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -588,7 +588,7 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
 }
 
 #ifdef CONFIG_SECCOMP
-void hard_disable_TSC(void)
+static void hard_disable_TSC(void)
 {
 	write_cr4(read_cr4() | X86_CR4_TSD);
 }
@@ -603,7 +603,7 @@ void disable_TSC(void)
 		hard_disable_TSC();
 	preempt_enable();
 }
-void hard_enable_TSC(void)
+static void hard_enable_TSC(void)
 {
 	write_cr4(read_cr4() & ~X86_CR4_TSD);
 }
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index e5056ab9dd9f..a5560a3b57ef 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -206,9 +206,7 @@ extern int bootloader_type;
 
 #define HAVE_ARCH_PICK_MMAP_LAYOUT
 
-extern void hard_disable_TSC(void);
 extern void disable_TSC(void);
-extern void hard_enable_TSC(void);
 
 /*
  * Size of io_bitmap.

From d88879b2d0225da3ba460bbdb8361bb049653671 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Wed, 30 Jan 2008 13:31:21 +0100
Subject: [PATCH 325/890] x86-64: make pda's cpunumber and nodenumber unsigned

This generally allows better code to be generated, since the zero-
extension during 32-bit operations comes for free (needed when the
result is used as array index or similar), whereas sign extension must
be done explicitly and frequently requires a one byte larger
instruction due to the necessary rex64 prefix.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pda.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h
index 3d9dd653628a..b620d0c39a93 100644
--- a/include/asm-x86/pda.h
+++ b/include/asm-x86/pda.h
@@ -15,14 +15,14 @@ struct x8664_pda {
 	unsigned long kernelstack;  /* 16 top of kernel stack for current */
 	unsigned long oldrsp; 	    /* 24 user rsp for system call */
         int irqcount;		    /* 32 Irq nesting counter. Starts with -1 */
-	int cpunumber;		    /* 36 Logical CPU number */
+	unsigned int cpunumber;	    /* 36 Logical CPU number */
 #ifdef CONFIG_CC_STACKPROTECTOR
 	unsigned long stack_canary;	/* 40 stack canary value */
 					/* gcc-ABI: this canary MUST be at
 					   offset 40!!! */
 #endif
 	char *irqstackptr;
-	int nodenumber;		    /* number of current node */
+	unsigned int nodenumber;	/* number of current node */
 	unsigned int __softirq_pending;
 	unsigned int __nmi_count;	/* number of NMI on this CPUs */
 	short mmu_state;

From 22f5991c85dec1281cce5c8df9ee92b43b1738c8 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Wed, 30 Jan 2008 13:31:23 +0100
Subject: [PATCH 326/890] x86-64: honor notify_die() returning NOTIFY_STOP

This requires making die() return a value, making its callers honor
this (and be prepared that it may return), and making oops_end() have
two additional parameters.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mcheck/mce_64.c |  8 ++++----
 arch/x86/kernel/traps_64.c          | 23 +++++++++++++++--------
 arch/x86/mm/fault_64.c              | 12 ++++++------
 include/asm-x86/kdebug.h            |  4 ++--
 4 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 8cd47fe2ef2c..bc6e35153d83 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -192,10 +192,10 @@ void do_machine_check(struct pt_regs * regs, long error_code)
 
 	atomic_inc(&mce_entry);
 
-	if (regs)
-		notify_die(DIE_NMI, "machine check", regs, error_code, 18,
-			   SIGKILL);
-	if (!banks)
+	if ((regs
+	     && notify_die(DIE_NMI, "machine check", regs, error_code,
+			   18, SIGKILL) == NOTIFY_STOP)
+	    || !banks)
 		goto out2;
 
 	memset(&m, 0, sizeof(struct mce));
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 1a12a81fdb1b..cf90ceb48f5f 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -488,7 +488,7 @@ unsigned __kprobes long oops_begin(void)
 	return flags;
 }
 
-void __kprobes oops_end(unsigned long flags)
+void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 { 
 	die_owner = -1;
 	bust_spinlocks(0);
@@ -497,12 +497,17 @@ void __kprobes oops_end(unsigned long flags)
 		/* Nest count reaches zero, release the lock. */
 		__raw_spin_unlock(&die_lock);
 	raw_local_irq_restore(flags);
+	if (!regs) {
+		oops_exit();
+		return;
+	}
 	if (panic_on_oops)
 		panic("Fatal exception");
 	oops_exit();
+	do_exit(signr);
 }
 
-void __kprobes __die(const char * str, struct pt_regs * regs, long err)
+int __kprobes __die(const char * str, struct pt_regs * regs, long err)
 {
 	static int die_counter;
 	printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter);
@@ -516,7 +521,8 @@ void __kprobes __die(const char * str, struct pt_regs * regs, long err)
 	printk("DEBUG_PAGEALLOC");
 #endif
 	printk("\n");
-	notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV);
+	if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
+		return 1;
 	show_registers(regs);
 	add_taint(TAINT_DIE);
 	/* Executive summary in case the oops scrolled away */
@@ -525,6 +531,7 @@ void __kprobes __die(const char * str, struct pt_regs * regs, long err)
 	printk(" RSP <%016lx>\n", regs->sp);
 	if (kexec_should_crash(current))
 		crash_kexec(regs);
+	return 0;
 }
 
 void die(const char * str, struct pt_regs * regs, long err)
@@ -534,9 +541,9 @@ void die(const char * str, struct pt_regs * regs, long err)
 	if (!user_mode(regs))
 		report_bug(regs->ip, regs);
 
-	__die(str, regs, err);
-	oops_end(flags);
-	do_exit(SIGSEGV); 
+	if (__die(str, regs, err))
+		regs = NULL;
+	oops_end(flags, regs, SIGSEGV);
 }
 
 void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
@@ -553,10 +560,10 @@ void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
 		crash_kexec(regs);
 	if (do_panic || panic_on_oops)
 		panic("Non maskable interrupt");
-	oops_end(flags);
+	oops_end(flags, NULL, SIGBUS);
 	nmi_exit();
 	local_irq_enable();
-	do_exit(SIGSEGV);
+	do_exit(SIGBUS);
 }
 
 static void __kprobes do_trap(int trapnr, int signr, char *str,
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 162050d4e5a3..121c7bda6297 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -227,9 +227,9 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
 	tsk->thread.cr2 = address;
 	tsk->thread.trap_no = 14;
 	tsk->thread.error_code = error_code;
-	__die("Bad pagetable", regs, error_code);
-	oops_end(flags);
-	do_exit(SIGKILL);
+	if (__die("Bad pagetable", regs, error_code))
+		regs = NULL;
+	oops_end(flags, regs, SIGKILL);
 }
 
 /*
@@ -541,11 +541,11 @@ no_context:
 	tsk->thread.cr2 = address;
 	tsk->thread.trap_no = 14;
 	tsk->thread.error_code = error_code;
-	__die("Oops", regs, error_code);
+	if (__die("Oops", regs, error_code))
+		regs = NULL;
 	/* Executive summary in case the body of the oops scrolled away */
 	printk(KERN_EMERG "CR2: %016lx\n", address);
-	oops_end(flags);
-	do_exit(SIGKILL);
+	oops_end(flags, regs, SIGKILL);
 
 /*
  * We ran out of memory, or some other thing happened to us that made
diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h
index 49e5c91d490c..a5e5e3b7eb23 100644
--- a/include/asm-x86/kdebug.h
+++ b/include/asm-x86/kdebug.h
@@ -25,7 +25,7 @@ enum die_val {
 extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2)));
 extern void printk_address(unsigned long address);
 extern void die(const char *,struct pt_regs *,long);
-extern void __die(const char *,struct pt_regs *,long);
+extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_registers(struct pt_regs *regs);
 extern void __show_registers(struct pt_regs *, int all);
 extern void show_trace(struct task_struct *, struct pt_regs *, unsigned long *);
@@ -33,6 +33,6 @@ extern void __show_regs(struct pt_regs *regs);
 extern void show_regs(struct pt_regs *regs);
 extern void dump_pagetable(unsigned long);
 extern unsigned long oops_begin(void);
-extern void oops_end(unsigned long);
+extern void oops_end(unsigned long, struct pt_regs *, int signr);
 
 #endif

From 3e7622f9d7807a0a826d042cafc211cd1a29448c Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Wed, 30 Jan 2008 13:31:23 +0100
Subject: [PATCH 327/890] x86: move to .rodata/.init.data

The array is never written, and on 64-bits it's not even being used
past initial boot.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/entry_32.S  | 2 +-
 arch/x86/kernel/i8259_64.c  | 2 +-
 include/asm-x86/hw_irq_32.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 6a474e1028c7..be5c31d04884 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -583,7 +583,7 @@ END(syscall_badsys)
  * Build the entry stubs and pointer table with
  * some assembler magic.
  */
-.data
+.section .rodata,"a"
 ENTRY(interrupt)
 .text
 
diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c
index be82b1217691..99c8406ae253 100644
--- a/arch/x86/kernel/i8259_64.c
+++ b/arch/x86/kernel/i8259_64.c
@@ -77,7 +77,7 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
 	IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
 
 /* for the irq vectors */
-static void (*interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
+static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
 					  IRQLIST_16(0x2), IRQLIST_16(0x3),
 	IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
 	IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
diff --git a/include/asm-x86/hw_irq_32.h b/include/asm-x86/hw_irq_32.h
index b93e35a708ac..6d65fbb6358b 100644
--- a/include/asm-x86/hw_irq_32.h
+++ b/include/asm-x86/hw_irq_32.h
@@ -26,7 +26,7 @@
  * Interrupt entry/exit code at both C and assembly level
  */
 
-extern void (*interrupt[NR_IRQS])(void);
+extern void (*const interrupt[NR_IRQS])(void);
 
 #ifdef CONFIG_SMP
 void reschedule_interrupt(void);

From cae4595764cb3b08f6517e99bac1e3862854b1a1 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Wed, 30 Jan 2008 13:31:23 +0100
Subject: [PATCH 328/890] x86: make __{save,restore}_processor_state static

.. allowing to remove their declarations from a global include file
(the symbols don't exist for anything but x86).

Likewise for 64-bits' fix_processor_context(), just that that one was
properly declared in an arch-specific header.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/suspend_64.c | 8 +++++---
 arch/x86/power/cpu.c         | 4 ++--
 include/asm-x86/suspend_64.h | 2 --
 include/linux/suspend.h      | 3 ---
 4 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/kernel/suspend_64.c
index 279c25775d19..09199511c256 100644
--- a/arch/x86/kernel/suspend_64.c
+++ b/arch/x86/kernel/suspend_64.c
@@ -17,6 +17,8 @@
 /* References to section boundaries */
 extern const void __nosave_begin, __nosave_end;
 
+static void fix_processor_context(void);
+
 struct saved_context saved_context;
 
 /**
@@ -34,7 +36,7 @@ struct saved_context saved_context;
  *	needed by kernel A, so that it can operate correctly after the resume
  *	regardless of what kernel B does in the meantime.
  */
-void __save_processor_state(struct saved_context *ctxt)
+static void __save_processor_state(struct saved_context *ctxt)
 {
 	kernel_fpu_begin();
 
@@ -89,7 +91,7 @@ static void do_fpu_end(void)
  *		by __save_processor_state()
  *	@ctxt - structure to load the registers contents from
  */
-void __restore_processor_state(struct saved_context *ctxt)
+static void __restore_processor_state(struct saved_context *ctxt)
 {
 	/*
 	 * control registers
@@ -133,7 +135,7 @@ void restore_processor_state(void)
 	__restore_processor_state(&saved_context);
 }
 
-void fix_processor_context(void)
+static void fix_processor_context(void)
 {
 	int cpu = smp_processor_id();
 	struct tss_struct *t = &per_cpu(init_tss, cpu);
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 5a98dc35addf..efcf620d1439 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -19,7 +19,7 @@ unsigned long saved_context_esp, saved_context_ebp;
 unsigned long saved_context_esi, saved_context_edi;
 unsigned long saved_context_eflags;
 
-void __save_processor_state(struct saved_context *ctxt)
+static void __save_processor_state(struct saved_context *ctxt)
 {
 	mtrr_save_fixed_ranges(NULL);
 	kernel_fpu_begin();
@@ -86,7 +86,7 @@ static void fix_processor_context(void)
 
 }
 
-void __restore_processor_state(struct saved_context *ctxt)
+static void __restore_processor_state(struct saved_context *ctxt)
 {
 	/*
 	 * control registers
diff --git a/include/asm-x86/suspend_64.h b/include/asm-x86/suspend_64.h
index 4404668f9aa4..2eb92cb81a0d 100644
--- a/include/asm-x86/suspend_64.h
+++ b/include/asm-x86/suspend_64.h
@@ -45,8 +45,6 @@ struct saved_context {
 #define loaddebug(thread,register) \
 	set_debugreg((thread)->debugreg##register, register)
 
-extern void fix_processor_context(void);
-
 /* routines for saving/restoring kernel state */
 extern int acpi_save_state_mem(void);
 extern char core_restore_code;
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 4360e0816956..40280df2a3db 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -211,9 +211,6 @@ static inline int hibernate(void) { return -ENOSYS; }
 #ifdef CONFIG_PM_SLEEP
 void save_processor_state(void);
 void restore_processor_state(void);
-struct saved_context;
-void __save_processor_state(struct saved_context *ctxt);
-void __restore_processor_state(struct saved_context *ctxt);
 
 /* kernel/power/main.c */
 extern struct blocking_notifier_head pm_chain_head;

From e94271017f0933b29362a3c9dea5a6b9d04d98e1 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Wed, 30 Jan 2008 13:31:24 +0100
Subject: [PATCH 329/890] x86: adjust enable_NMI_through_LVT0()

Its previous use in a call to on_each_cpu() was pointless, as at the
time that code gets executed only one CPU is online. Further, the
function can be __cpuinit, and for this to work without
CONFIG_HOTPLUG_CPU setup_nmi() must also get an attribute (this one
can even be __init; on 64-bits check_timer() also was lacking that
attribute).

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic_32.c    | 2 +-
 arch/x86/kernel/apic_64.c    | 2 +-
 arch/x86/kernel/io_apic_32.c | 4 ++--
 arch/x86/kernel/io_apic_64.c | 6 +++---
 arch/x86/kernel/smpboot_32.c | 2 +-
 arch/x86/kernel/smpboot_64.c | 2 +-
 include/asm-x86/apic.h       | 2 +-
 7 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 69a13d127da3..4c014fca2057 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -152,7 +152,7 @@ u32 safe_apic_wait_icr_idle(void)
 /**
  * enable_NMI_through_LVT0 - enable NMI through local vector table 0
  */
-void enable_NMI_through_LVT0 (void * dummy)
+void __cpuinit enable_NMI_through_LVT0(void)
 {
 	unsigned int v = APIC_DM_NMI;
 
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index ab4ae50399fd..47b8ef51dde0 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -130,7 +130,7 @@ u32 safe_apic_wait_icr_idle(void)
 /**
  * enable_NMI_through_LVT0 - enable NMI through local vector table 0
  */
-void enable_NMI_through_LVT0(void *dummy)
+void __cpuinit enable_NMI_through_LVT0(void)
 {
 	unsigned int v;
 
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 76f11c3e3906..0d204237489e 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -2078,7 +2078,7 @@ static struct irq_chip lapic_chip __read_mostly = {
 	.eoi		= ack_apic,
 };
 
-static void setup_nmi (void)
+static void __init setup_nmi(void)
 {
 	/*
  	 * Dirty trick to enable the NMI watchdog ...
@@ -2091,7 +2091,7 @@ static void setup_nmi (void)
 	 */ 
 	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
 
-	on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1);
+	enable_NMI_through_LVT0();
 
 	apic_printk(APIC_VERBOSE, " done.\n");
 }
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index fa70005be5e8..f914d84a21da 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1566,7 +1566,7 @@ static struct hw_interrupt_type lapic_irq_type __read_mostly = {
 	.end = end_lapic_irq,
 };
 
-static void setup_nmi (void)
+static void __init setup_nmi(void)
 {
 	/*
  	 * Dirty trick to enable the NMI watchdog ...
@@ -1579,7 +1579,7 @@ static void setup_nmi (void)
 	 */ 
 	printk(KERN_INFO "activating NMI Watchdog ...");
 
-	enable_NMI_through_LVT0(NULL);
+	enable_NMI_through_LVT0();
 
 	printk(" done.\n");
 }
@@ -1655,7 +1655,7 @@ static inline void unlock_ExtINT_logic(void)
  *
  * FIXME: really need to revamp this for modern platforms only.
  */
-static inline void check_timer(void)
+static inline void __init check_timer(void)
 {
 	struct irq_cfg *cfg = irq_cfg + 0;
 	int apic1, pin1, apic2, pin2;
diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c
index 0f294d6e22cf..2034332ad080 100644
--- a/arch/x86/kernel/smpboot_32.c
+++ b/arch/x86/kernel/smpboot_32.c
@@ -404,7 +404,7 @@ static void __cpuinit start_secondary(void *unused)
 	setup_secondary_clock();
 	if (nmi_watchdog == NMI_IO_APIC) {
 		disable_8259A_irq(0);
-		enable_NMI_through_LVT0(NULL);
+		enable_NMI_through_LVT0();
 		enable_8259A_irq(0);
 	}
 	/*
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index cb73c4da87fc..4c03ddccd681 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -337,7 +337,7 @@ void __cpuinit start_secondary(void)
 
 	if (nmi_watchdog == NMI_IO_APIC) {
 		disable_8259A_irq(0);
-		enable_NMI_through_LVT0(NULL);
+		enable_NMI_through_LVT0();
 		enable_8259A_irq(0);
 	}
 
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index 72bf09cf13ab..bcfc07fd3661 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -117,7 +117,7 @@ extern void init_apic_mappings(void);
 extern void setup_boot_APIC_clock(void);
 extern void setup_secondary_APIC_clock(void);
 extern int APIC_init_uniprocessor(void);
-extern void enable_NMI_through_LVT0(void *dummy);
+extern void enable_NMI_through_LVT0(void);
 
 /*
  * On 32bit this is mach-xxx local

From ded9aa0db815b4e1271001561e177755cb8b9468 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Wed, 30 Jan 2008 13:31:24 +0100
Subject: [PATCH 330/890] x86: also define AT_VECTOR_SIZE_ARCH

The patch introducing this left out x86-64, despite it also having
extra entries.

[ mingo@elte.hu: re-merged this to after the unification patches. ]

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/system.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index 8a37dad38bc0..6c7d1fda4995 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -9,8 +9,14 @@
 #include <linux/kernel.h>
 #include <linux/irqflags.h>
 
+/* entries in ARCH_DLINFO: */
+#ifdef CONFIG_IA32_EMULATION
+# define AT_VECTOR_SIZE_ARCH 2
+#else
+# define AT_VECTOR_SIZE_ARCH 1
+#endif
+
 #ifdef CONFIG_X86_32
-#define AT_VECTOR_SIZE_ARCH 2 /* entries in ARCH_DLINFO */
 
 struct task_struct; /* one of the stranger aspects of C forward declarations */
 extern struct task_struct *FASTCALL(__switch_to(struct task_struct *prev,
@@ -56,7 +62,7 @@ extern struct task_struct *FASTCALL(__switch_to(struct task_struct *prev,
 
 /* Save restore flags to clear handle leaking NT */
 #define switch_to(prev, next, last) \
-	asm volatile(SAVE_CONTEXT					  \
+	asm volatile(SAVE_CONTEXT						    \
 	     "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */	  \
 	     "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */	  \
 	     "call __switch_to\n\t"					  \

From 41e191e85a122ad822deb7525a015410012e6c70 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:31:24 +0100
Subject: [PATCH 331/890] x86: replace outb_p() with udelay(2) in
 drivers/input/mouse/pc110pad.c

replace outb_p() with udelay(2). This is a real ISA device so it likely
needs this particular delay.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/input/mouse/pc110pad.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/input/mouse/pc110pad.c b/drivers/input/mouse/pc110pad.c
index 8991ab0b4fe3..61cff8374e6c 100644
--- a/drivers/input/mouse/pc110pad.c
+++ b/drivers/input/mouse/pc110pad.c
@@ -39,6 +39,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
+#include <linux/delay.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -62,8 +63,10 @@ static irqreturn_t pc110pad_interrupt(int irq, void *ptr)
 	int value     = inb_p(pc110pad_io);
 	int handshake = inb_p(pc110pad_io + 2);
 
-	outb_p(handshake |  1, pc110pad_io + 2);
-	outb_p(handshake & ~1, pc110pad_io + 2);
+	outb(handshake |  1, pc110pad_io + 2);
+	udelay(2);
+	outb(handshake & ~1, pc110pad_io + 2);
+	udelay(2);
 	inb_p(0x64);
 
 	pc110pad_data[pc110pad_count++] = value;

From c6b48324325ffb637c3aafb2d795408febf40198 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Wed, 30 Jan 2008 13:31:25 +0100
Subject: [PATCH 332/890] x86, kexec: force x86 arches to boot kdump kernels on
 boot cpu

Recently a kdump bug was discovered in which a system would hang inside
calibrate_delay during the booting of the kdump kernel.  This was caused
by the fact that the jiffies counter was not being incremented during
timer calibration.  The root cause of this problem was found to be a
bios misconfiguration of the hypertransport bus.  On system affected by
this hang, the bios had assigned APIC ids which used extended apic bits
(more than the nominal 4 bit ids's), but failed to configure bit 17 of
the hypertransport transaction config register, which indicated that the
mask for the destination field of interrupt packets accross the ht bus
(see section 3.3.9 of
http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/26094.PDF).
If a crash occurs on a cpu with an APIC id that extends beyond 4 bits,
it will not recieve interrupts during the kdump kernel boot, and this
hang will be the result.  The fix is to add this patch, whcih add an
early pci quirk check, to forcibly enable this bit in the httcfg
register.  This enables all cpus on a system to receive interrupts, and
allows kdump kernel bootup to procede normally.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/early-quirks.c | 86 +++++++++++++++++++++++++---------
 1 file changed, 65 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 88bb83ec895f..b55258e49208 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -21,7 +21,30 @@
 #include <asm/gart.h>
 #endif
 
-static void __init via_bugs(void)
+static void __init fix_hypertransport_config(int num, int slot, int func)
+{
+	u32 htcfg;
+	/*
+	 * we found a hypertransport bus
+	 * make sure that we are broadcasting
+	 * interrupts to all cpus on the ht bus
+	 * if we're using extended apic ids
+	 */
+	htcfg = read_pci_config(num, slot, func, 0x68);
+	if (htcfg & (1 << 18)) {
+		printk(KERN_INFO "Detected use of extended apic ids on hypertransport bus\n");
+		if ((htcfg & (1 << 17)) == 0) {
+			printk(KERN_INFO "Enabling hypertransport extended apic interrupt broadcast\n");
+			printk(KERN_INFO "Note this is a bios bug, please contact your hw vendor\n");
+			htcfg |= (1 << 17);
+			write_pci_config(num, slot, func, 0x68, htcfg);
+		}
+	}
+
+
+}
+
+static void __init via_bugs(int  num, int slot, int func)
 {
 #ifdef CONFIG_GART_IOMMU
 	if ((end_pfn > MAX_DMA32_PFN ||  force_iommu) &&
@@ -44,7 +67,7 @@ static int __init nvidia_hpet_check(struct acpi_table_header *header)
 #endif /* CONFIG_X86_IO_APIC */
 #endif /* CONFIG_ACPI */
 
-static void __init nvidia_bugs(void)
+static void __init nvidia_bugs(int num, int slot, int func)
 {
 #ifdef CONFIG_ACPI
 #ifdef CONFIG_X86_IO_APIC
@@ -72,7 +95,7 @@ static void __init nvidia_bugs(void)
 
 }
 
-static void __init ati_bugs(void)
+static void __init ati_bugs(int num, int slot, int func)
 {
 #ifdef CONFIG_X86_IO_APIC
 	if (timer_over_8254 == 1) {
@@ -83,15 +106,27 @@ static void __init ati_bugs(void)
 #endif
 }
 
+#define QFLAG_APPLY_ONCE 	0x1
+#define QFLAG_APPLIED		0x2
+#define QFLAG_DONE		(QFLAG_APPLY_ONCE|QFLAG_APPLIED)
 struct chipset {
-	u16 vendor;
-	void (*f)(void);
+	u32 vendor;
+	u32 device;
+	u32 class;
+	u32 class_mask;
+	u32 flags;
+	void (*f)(int num, int slot, int func);
 };
 
 static struct chipset early_qrk[] __initdata = {
-	{ PCI_VENDOR_ID_NVIDIA, nvidia_bugs },
-	{ PCI_VENDOR_ID_VIA, via_bugs },
-	{ PCI_VENDOR_ID_ATI, ati_bugs },
+	{ PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
+	  PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs },
+	{ PCI_VENDOR_ID_VIA, PCI_ANY_ID,
+	  PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs },
+	{ PCI_VENDOR_ID_ATI, PCI_ANY_ID,
+	  PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, ati_bugs },
+	{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
+	  PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config },
 	{}
 };
 
@@ -106,27 +141,36 @@ void __init early_quirks(void)
 	for (num = 0; num < 32; num++) {
 		for (slot = 0; slot < 32; slot++) {
 			for (func = 0; func < 8; func++) {
-				u32 class;
-				u32 vendor;
+				u16 class;
+				u16 vendor;
+				u16 device;
 				u8 type;
 				int i;
-				class = read_pci_config(num,slot,func,
+
+				class = read_pci_config_16(num,slot,func,
 							PCI_CLASS_REVISION);
-				if (class == 0xffffffff)
+				if (class == 0xffff)
 					break;
 
-				if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
-					continue;
-
-				vendor = read_pci_config(num, slot, func,
+				vendor = read_pci_config_16(num, slot, func,
 							 PCI_VENDOR_ID);
-				vendor &= 0xffff;
 
-				for (i = 0; early_qrk[i].f; i++)
-					if (early_qrk[i].vendor == vendor) {
-						early_qrk[i].f();
-						return;
+				device = read_pci_config_16(num, slot, func,
+							PCI_DEVICE_ID);
+
+				for(i=0;early_qrk[i].f != NULL;i++) {
+					if (((early_qrk[i].vendor == PCI_ANY_ID) ||
+					    (early_qrk[i].vendor == vendor)) &&
+					   ((early_qrk[i].device == PCI_ANY_ID) ||
+					    (early_qrk[i].device == device)) &&
+					   (!((early_qrk[i].class ^ class) &
+					     early_qrk[i].class_mask))) {
+						if ((early_qrk[i].flags & QFLAG_DONE) != QFLAG_DONE)
+							early_qrk[i].f(num, slot, func);
+						early_qrk[i].flags |= QFLAG_APPLIED;
+
 					}
+				}
 
 				type = read_pci_config_byte(num, slot, func,
 							    PCI_HEADER_TYPE);

From fe758fb1192790f8e465a7f59efe47ca73717d3e Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Wed, 30 Jan 2008 13:31:25 +0100
Subject: [PATCH 333/890] x86: clean up include/asm-x86/pda.h

clean up include/asm-x86/pda.h, as suggested by checkpatch.pl.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pda.h | 70 +++++++++++++++++++++----------------------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h
index b620d0c39a93..c0305bff0f19 100644
--- a/include/asm-x86/pda.h
+++ b/include/asm-x86/pda.h
@@ -7,15 +7,15 @@
 #include <linux/cache.h>
 #include <asm/page.h>
 
-/* Per processor datastructure. %gs points to it while the kernel runs */ 
+/* Per processor datastructure. %gs points to it while the kernel runs */
 struct x8664_pda {
 	struct task_struct *pcurrent;	/* 0  Current process */
 	unsigned long data_offset;	/* 8 Per cpu data offset from linker
 					   address */
-	unsigned long kernelstack;  /* 16 top of kernel stack for current */
-	unsigned long oldrsp; 	    /* 24 user rsp for system call */
-        int irqcount;		    /* 32 Irq nesting counter. Starts with -1 */
-	unsigned int cpunumber;	    /* 36 Logical CPU number */
+	unsigned long kernelstack;	/* 16 top of kernel stack for current */
+	unsigned long oldrsp;		/* 24 user rsp for system call */
+	int irqcount;			/* 32 Irq nesting counter. Starts -1 */
+	unsigned int cpunumber;		/* 36 Logical CPU number */
 #ifdef CONFIG_CC_STACKPROTECTOR
 	unsigned long stack_canary;	/* 40 stack canary value */
 					/* gcc-ABI: this canary MUST be at
@@ -44,10 +44,10 @@ extern void pda_init(int);
 
 #define cpu_pda(i) (_cpu_pda[i])
 
-/* 
+/*
  * There is no fast way to get the base address of the PDA, all the accesses
  * have to mention %fs/%gs.  So it needs to be done this Torvaldian way.
- */ 
+ */
 extern void __bad_pda_field(void) __attribute__((noreturn));
 
 /*
@@ -58,70 +58,70 @@ extern struct x8664_pda _proxy_pda;
 
 #define pda_offset(field) offsetof(struct x8664_pda, field)
 
-#define pda_to_op(op,field,val) do {		\
+#define pda_to_op(op, field, val) do {		\
 	typedef typeof(_proxy_pda.field) T__;	\
 	if (0) { T__ tmp__; tmp__ = (val); }	/* type checking */ \
 	switch (sizeof(_proxy_pda.field)) {	\
 	case 2:					\
-		asm(op "w %1,%%gs:%c2" : 	\
+		asm(op "w %1,%%gs:%c2" :	\
 		    "+m" (_proxy_pda.field) :	\
 		    "ri" ((T__)val),		\
-		    "i"(pda_offset(field))); 	\
- 		break;				\
+		    "i"(pda_offset(field)));	\
+		break;				\
 	case 4:					\
-		asm(op "l %1,%%gs:%c2" : 	\
+		asm(op "l %1,%%gs:%c2" :	\
 		    "+m" (_proxy_pda.field) :	\
 		    "ri" ((T__)val),		\
-		    "i" (pda_offset(field))); 	\
+		    "i" (pda_offset(field)));	\
 		break;				\
 	case 8:					\
-		asm(op "q %1,%%gs:%c2": 	\
+		asm(op "q %1,%%gs:%c2":		\
 		    "+m" (_proxy_pda.field) :	\
 		    "ri" ((T__)val),		\
-		    "i"(pda_offset(field))); 	\
+		    "i"(pda_offset(field)));	\
 		break;				\
-       default: 				\
+	default:				\
 		__bad_pda_field();		\
-       }					\
-       } while (0)
+	}					\
+	} while (0)
 
 #define pda_from_op(op,field) ({		\
 	typeof(_proxy_pda.field) ret__;		\
 	switch (sizeof(_proxy_pda.field)) {	\
-       	case 2:					\
-		asm(op "w %%gs:%c1,%0" : 	\
+	case 2:					\
+		asm(op "w %%gs:%c1,%0" :	\
 		    "=r" (ret__) :		\
-		    "i" (pda_offset(field)), 	\
-		    "m" (_proxy_pda.field)); 	\
+		    "i" (pda_offset(field)),	\
+		    "m" (_proxy_pda.field));	\
 		 break;				\
 	case 4:					\
 		asm(op "l %%gs:%c1,%0":		\
 		    "=r" (ret__):		\
-		    "i" (pda_offset(field)), 	\
-		    "m" (_proxy_pda.field)); 	\
+		    "i" (pda_offset(field)),	\
+		    "m" (_proxy_pda.field));	\
 		 break;				\
-       case 8:					\
+	case 8:					\
 		asm(op "q %%gs:%c1,%0":		\
 		    "=r" (ret__) :		\
-		    "i" (pda_offset(field)), 	\
-		    "m" (_proxy_pda.field)); 	\
+		    "i" (pda_offset(field)),	\
+		    "m" (_proxy_pda.field));	\
 		 break;				\
-       default: 				\
+	default:				\
 		__bad_pda_field();		\
        }					\
        ret__; })
 
-#define read_pda(field) pda_from_op("mov",field)
-#define write_pda(field,val) pda_to_op("mov",field,val)
-#define add_pda(field,val) pda_to_op("add",field,val)
-#define sub_pda(field,val) pda_to_op("sub",field,val)
-#define or_pda(field,val) pda_to_op("or",field,val)
+#define read_pda(field)		pda_from_op("mov", field)
+#define write_pda(field, val)	pda_to_op("mov", field, val)
+#define add_pda(field, val)	pda_to_op("add", field, val)
+#define sub_pda(field, val)	pda_to_op("sub", field, val)
+#define or_pda(field, val)	pda_to_op("or", field, val)
 
 /* This is not atomic against other CPUs -- CPU preemption needs to be off */
-#define test_and_clear_bit_pda(bit,field) ({		\
+#define test_and_clear_bit_pda(bit, field) ({		\
 	int old__;						\
 	asm volatile("btr %2,%%gs:%c3\n\tsbbl %0,%0"		\
-	    : "=r" (old__), "+m" (_proxy_pda.field) 		\
+	    : "=r" (old__), "+m" (_proxy_pda.field)		\
 	    : "dIr" (bit), "i" (pda_offset(field)) : "memory");	\
 	old__;							\
 })

From 4ad02718345783a3b84bae1895917666b779850d Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:31:26 +0100
Subject: [PATCH 334/890] x86: clean up local_{32|64}.h

Common prefix from both files moved to local.h

Change __inline__ to inline

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/local_32.h | 34 ++++++++++------------------------
 include/asm-x86/local_64.h | 25 ++++++-------------------
 2 files changed, 16 insertions(+), 43 deletions(-)

diff --git a/include/asm-x86/local_32.h b/include/asm-x86/local_32.h
index 6e85975b9ed2..33d9c7bf463c 100644
--- a/include/asm-x86/local_32.h
+++ b/include/asm-x86/local_32.h
@@ -1,35 +1,21 @@
 #ifndef _ARCH_I386_LOCAL_H
 #define _ARCH_I386_LOCAL_H
 
-#include <linux/percpu.h>
-#include <asm/system.h>
-#include <asm/atomic.h>
-
-typedef struct
-{
-	atomic_long_t a;
-} local_t;
-
-#define LOCAL_INIT(i)	{ ATOMIC_LONG_INIT(i) }
-
-#define local_read(l)	atomic_long_read(&(l)->a)
-#define local_set(l,i)	atomic_long_set(&(l)->a, (i))
-
-static __inline__ void local_inc(local_t *l)
+static inline void local_inc(local_t *l)
 {
 	__asm__ __volatile__(
 		"incl %0"
 		:"+m" (l->a.counter));
 }
 
-static __inline__ void local_dec(local_t *l)
+static inline void local_dec(local_t *l)
 {
 	__asm__ __volatile__(
 		"decl %0"
 		:"+m" (l->a.counter));
 }
 
-static __inline__ void local_add(long i, local_t *l)
+static inline void local_add(long i, local_t *l)
 {
 	__asm__ __volatile__(
 		"addl %1,%0"
@@ -37,7 +23,7 @@ static __inline__ void local_add(long i, local_t *l)
 		:"ir" (i));
 }
 
-static __inline__ void local_sub(long i, local_t *l)
+static inline void local_sub(long i, local_t *l)
 {
 	__asm__ __volatile__(
 		"subl %1,%0"
@@ -54,7 +40,7 @@ static __inline__ void local_sub(long i, local_t *l)
  * true if the result is zero, or false for all
  * other cases.
  */
-static __inline__ int local_sub_and_test(long i, local_t *l)
+static inline int local_sub_and_test(long i, local_t *l)
 {
 	unsigned char c;
 
@@ -73,7 +59,7 @@ static __inline__ int local_sub_and_test(long i, local_t *l)
  * returns true if the result is 0, or false for all other
  * cases.
  */
-static __inline__ int local_dec_and_test(local_t *l)
+static inline int local_dec_and_test(local_t *l)
 {
 	unsigned char c;
 
@@ -92,7 +78,7 @@ static __inline__ int local_dec_and_test(local_t *l)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-static __inline__ int local_inc_and_test(local_t *l)
+static inline int local_inc_and_test(local_t *l)
 {
 	unsigned char c;
 
@@ -112,7 +98,7 @@ static __inline__ int local_inc_and_test(local_t *l)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-static __inline__ int local_add_negative(long i, local_t *l)
+static inline int local_add_negative(long i, local_t *l)
 {
 	unsigned char c;
 
@@ -130,7 +116,7 @@ static __inline__ int local_add_negative(long i, local_t *l)
  *
  * Atomically adds @i to @l and returns @i + @l
  */
-static __inline__ long local_add_return(long i, local_t *l)
+static inline long local_add_return(long i, local_t *l)
 {
 	long __i;
 #ifdef CONFIG_M386
@@ -156,7 +142,7 @@ no_xadd: /* Legacy 386 processor */
 #endif
 }
 
-static __inline__ long local_sub_return(long i, local_t *l)
+static inline long local_sub_return(long i, local_t *l)
 {
 	return local_add_return(-i,l);
 }
diff --git a/include/asm-x86/local_64.h b/include/asm-x86/local_64.h
index e87492bb0693..92330f8135f8 100644
--- a/include/asm-x86/local_64.h
+++ b/include/asm-x86/local_64.h
@@ -1,19 +1,6 @@
 #ifndef _ARCH_X8664_LOCAL_H
 #define _ARCH_X8664_LOCAL_H
 
-#include <linux/percpu.h>
-#include <asm/atomic.h>
-
-typedef struct
-{
-	atomic_long_t a;
-} local_t;
-
-#define LOCAL_INIT(i)	{ ATOMIC_LONG_INIT(i) }
-
-#define local_read(l)	atomic_long_read(&(l)->a)
-#define local_set(l,i)	atomic_long_set(&(l)->a, (i))
-
 static inline void local_inc(local_t *l)
 {
 	__asm__ __volatile__(
@@ -55,7 +42,7 @@ static inline void local_sub(long i, local_t *l)
  * true if the result is zero, or false for all
  * other cases.
  */
-static __inline__ int local_sub_and_test(long i, local_t *l)
+static inline int local_sub_and_test(long i, local_t *l)
 {
 	unsigned char c;
 
@@ -74,7 +61,7 @@ static __inline__ int local_sub_and_test(long i, local_t *l)
  * returns true if the result is 0, or false for all other
  * cases.
  */
-static __inline__ int local_dec_and_test(local_t *l)
+static inline int local_dec_and_test(local_t *l)
 {
 	unsigned char c;
 
@@ -93,7 +80,7 @@ static __inline__ int local_dec_and_test(local_t *l)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-static __inline__ int local_inc_and_test(local_t *l)
+static inline int local_inc_and_test(local_t *l)
 {
 	unsigned char c;
 
@@ -113,7 +100,7 @@ static __inline__ int local_inc_and_test(local_t *l)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-static __inline__ int local_add_negative(long i, local_t *l)
+static inline int local_add_negative(long i, local_t *l)
 {
 	unsigned char c;
 
@@ -131,7 +118,7 @@ static __inline__ int local_add_negative(long i, local_t *l)
  *
  * Atomically adds @i to @l and returns @i + @l
  */
-static __inline__ long local_add_return(long i, local_t *l)
+static inline long local_add_return(long i, local_t *l)
 {
 	long __i = i;
 	__asm__ __volatile__(
@@ -141,7 +128,7 @@ static __inline__ long local_add_return(long i, local_t *l)
 	return i + __i;
 }
 
-static __inline__ long local_sub_return(long i, local_t *l)
+static inline long local_sub_return(long i, local_t *l)
 {
 	return local_add_return(-i,l);
 }

From 992b95920a311db3267659ea17160e4812a05830 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:31:26 +0100
Subject: [PATCH 335/890] x86: fix asm memory constraints in local_64.h

Use the shorter +m form rather than =m and m.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/local_64.h | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/include/asm-x86/local_64.h b/include/asm-x86/local_64.h
index 92330f8135f8..50e99eddd628 100644
--- a/include/asm-x86/local_64.h
+++ b/include/asm-x86/local_64.h
@@ -5,32 +5,30 @@ static inline void local_inc(local_t *l)
 {
 	__asm__ __volatile__(
 		"incq %0"
-		:"=m" (l->a.counter)
-		:"m" (l->a.counter));
+		:"+m" (l->a.counter));
 }
 
 static inline void local_dec(local_t *l)
 {
 	__asm__ __volatile__(
 		"decq %0"
-		:"=m" (l->a.counter)
-		:"m" (l->a.counter));
+		:"+m" (l->a.counter));
 }
 
 static inline void local_add(long i, local_t *l)
 {
 	__asm__ __volatile__(
 		"addq %1,%0"
-		:"=m" (l->a.counter)
-		:"ir" (i), "m" (l->a.counter));
+		:"+m" (l->a.counter)
+		:"ir" (i));
 }
 
 static inline void local_sub(long i, local_t *l)
 {
 	__asm__ __volatile__(
 		"subq %1,%0"
-		:"=m" (l->a.counter)
-		:"ir" (i), "m" (l->a.counter));
+		:"+m" (l->a.counter)
+		:"ir" (i));
 }
 
 /**
@@ -48,8 +46,8 @@ static inline int local_sub_and_test(long i, local_t *l)
 
 	__asm__ __volatile__(
 		"subq %2,%0; sete %1"
-		:"=m" (l->a.counter), "=qm" (c)
-		:"ir" (i), "m" (l->a.counter) : "memory");
+		:"+m" (l->a.counter), "=qm" (c)
+		:"ir" (i) : "memory");
 	return c;
 }
 
@@ -67,8 +65,8 @@ static inline int local_dec_and_test(local_t *l)
 
 	__asm__ __volatile__(
 		"decq %0; sete %1"
-		:"=m" (l->a.counter), "=qm" (c)
-		:"m" (l->a.counter) : "memory");
+		:"+m" (l->a.counter), "=qm" (c)
+		: : "memory");
 	return c != 0;
 }
 
@@ -86,8 +84,8 @@ static inline int local_inc_and_test(local_t *l)
 
 	__asm__ __volatile__(
 		"incq %0; sete %1"
-		:"=m" (l->a.counter), "=qm" (c)
-		:"m" (l->a.counter) : "memory");
+		:"+m" (l->a.counter), "=qm" (c)
+		: : "memory");
 	return c != 0;
 }
 
@@ -106,8 +104,8 @@ static inline int local_add_negative(long i, local_t *l)
 
 	__asm__ __volatile__(
 		"addq %2,%0; sets %1"
-		:"=m" (l->a.counter), "=qm" (c)
-		:"ir" (i), "m" (l->a.counter) : "memory");
+		:"+m" (l->a.counter), "=qm" (c)
+		:"ir" (i) : "memory");
 	return c;
 }
 

From 8ee5797a91bdb713b4031741b33bd035f9c43870 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:31:26 +0100
Subject: [PATCH 336/890] x86: introduce asm helpers in local_{32|64}.h

Handle the use of long on X86_32 and quad on X86_64

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/asm.h      | 12 ++++++++++++
 include/asm-x86/local_32.h | 18 +++++++++---------
 include/asm-x86/local_64.h | 18 +++++++++---------
 3 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/include/asm-x86/asm.h b/include/asm-x86/asm.h
index 8661ae75488a..1a6980a60fc6 100644
--- a/include/asm-x86/asm.h
+++ b/include/asm-x86/asm.h
@@ -8,6 +8,12 @@
 # define _ASM_ALIGN	" .balign 4 "
 # define _ASM_MOV_UL	" movl "
 
+# define _ASM_INC	" incl "
+# define _ASM_DEC	" decl "
+# define _ASM_ADD	" addl "
+# define _ASM_SUB	" subl "
+# define _ASM_XADD	" xaddl "
+
 #else
 /* 64 bits */
 
@@ -15,6 +21,12 @@
 # define _ASM_ALIGN	" .balign 8 "
 # define _ASM_MOV_UL	" movq "
 
+# define _ASM_INC	" incq "
+# define _ASM_DEC	" decq "
+# define _ASM_ADD	" addq "
+# define _ASM_SUB	" subq "
+# define _ASM_XADD	" xaddq "
+
 #endif /* CONFIG_X86_32 */
 
 #endif /* _ASM_X86_ASM_H */
diff --git a/include/asm-x86/local_32.h b/include/asm-x86/local_32.h
index 33d9c7bf463c..c219fe56b3df 100644
--- a/include/asm-x86/local_32.h
+++ b/include/asm-x86/local_32.h
@@ -4,21 +4,21 @@
 static inline void local_inc(local_t *l)
 {
 	__asm__ __volatile__(
-		"incl %0"
+		_ASM_INC "%0"
 		:"+m" (l->a.counter));
 }
 
 static inline void local_dec(local_t *l)
 {
 	__asm__ __volatile__(
-		"decl %0"
+		_ASM_DEC "%0"
 		:"+m" (l->a.counter));
 }
 
 static inline void local_add(long i, local_t *l)
 {
 	__asm__ __volatile__(
-		"addl %1,%0"
+		_ASM_ADD "%1,%0"
 		:"+m" (l->a.counter)
 		:"ir" (i));
 }
@@ -26,7 +26,7 @@ static inline void local_add(long i, local_t *l)
 static inline void local_sub(long i, local_t *l)
 {
 	__asm__ __volatile__(
-		"subl %1,%0"
+		_ASM_SUB "%1,%0"
 		:"+m" (l->a.counter)
 		:"ir" (i));
 }
@@ -45,7 +45,7 @@ static inline int local_sub_and_test(long i, local_t *l)
 	unsigned char c;
 
 	__asm__ __volatile__(
-		"subl %2,%0; sete %1"
+		_ASM_SUB "%2,%0; sete %1"
 		:"+m" (l->a.counter), "=qm" (c)
 		:"ir" (i) : "memory");
 	return c;
@@ -64,7 +64,7 @@ static inline int local_dec_and_test(local_t *l)
 	unsigned char c;
 
 	__asm__ __volatile__(
-		"decl %0; sete %1"
+		_ASM_DEC "%0; sete %1"
 		:"+m" (l->a.counter), "=qm" (c)
 		: : "memory");
 	return c != 0;
@@ -83,7 +83,7 @@ static inline int local_inc_and_test(local_t *l)
 	unsigned char c;
 
 	__asm__ __volatile__(
-		"incl %0; sete %1"
+		_ASM_INC "%0; sete %1"
 		:"+m" (l->a.counter), "=qm" (c)
 		: : "memory");
 	return c != 0;
@@ -103,7 +103,7 @@ static inline int local_add_negative(long i, local_t *l)
 	unsigned char c;
 
 	__asm__ __volatile__(
-		"addl %2,%0; sets %1"
+		_ASM_ADD "%2,%0; sets %1"
 		:"+m" (l->a.counter), "=qm" (c)
 		:"ir" (i) : "memory");
 	return c;
@@ -127,7 +127,7 @@ static inline long local_add_return(long i, local_t *l)
 	/* Modern 486+ processor */
 	__i = i;
 	__asm__ __volatile__(
-		"xaddl %0, %1;"
+		_ASM_XADD "%0, %1;"
 		:"+r" (i), "+m" (l->a.counter)
 		: : "memory");
 	return i + __i;
diff --git a/include/asm-x86/local_64.h b/include/asm-x86/local_64.h
index 50e99eddd628..d685cd7e014f 100644
--- a/include/asm-x86/local_64.h
+++ b/include/asm-x86/local_64.h
@@ -4,21 +4,21 @@
 static inline void local_inc(local_t *l)
 {
 	__asm__ __volatile__(
-		"incq %0"
+		_ASM_INC "%0"
 		:"+m" (l->a.counter));
 }
 
 static inline void local_dec(local_t *l)
 {
 	__asm__ __volatile__(
-		"decq %0"
+		_ASM_DEC "%0"
 		:"+m" (l->a.counter));
 }
 
 static inline void local_add(long i, local_t *l)
 {
 	__asm__ __volatile__(
-		"addq %1,%0"
+		_ASM_ADD "%1,%0"
 		:"+m" (l->a.counter)
 		:"ir" (i));
 }
@@ -26,7 +26,7 @@ static inline void local_add(long i, local_t *l)
 static inline void local_sub(long i, local_t *l)
 {
 	__asm__ __volatile__(
-		"subq %1,%0"
+		_ASM_SUB "%1,%0"
 		:"+m" (l->a.counter)
 		:"ir" (i));
 }
@@ -45,7 +45,7 @@ static inline int local_sub_and_test(long i, local_t *l)
 	unsigned char c;
 
 	__asm__ __volatile__(
-		"subq %2,%0; sete %1"
+		_ASM_SUB "%2,%0; sete %1"
 		:"+m" (l->a.counter), "=qm" (c)
 		:"ir" (i) : "memory");
 	return c;
@@ -64,7 +64,7 @@ static inline int local_dec_and_test(local_t *l)
 	unsigned char c;
 
 	__asm__ __volatile__(
-		"decq %0; sete %1"
+		_ASM_DEC "%0; sete %1"
 		:"+m" (l->a.counter), "=qm" (c)
 		: : "memory");
 	return c != 0;
@@ -83,7 +83,7 @@ static inline int local_inc_and_test(local_t *l)
 	unsigned char c;
 
 	__asm__ __volatile__(
-		"incq %0; sete %1"
+		_ASM_INC "%0; sete %1"
 		:"+m" (l->a.counter), "=qm" (c)
 		: : "memory");
 	return c != 0;
@@ -103,7 +103,7 @@ static inline int local_add_negative(long i, local_t *l)
 	unsigned char c;
 
 	__asm__ __volatile__(
-		"addq %2,%0; sets %1"
+		_ASM_ADD "%2,%0; sets %1"
 		:"+m" (l->a.counter), "=qm" (c)
 		:"ir" (i) : "memory");
 	return c;
@@ -120,7 +120,7 @@ static inline long local_add_return(long i, local_t *l)
 {
 	long __i = i;
 	__asm__ __volatile__(
-		"xaddq %0, %1;"
+		_ASM_XADD "%0, %1;"
 		:"+r" (i), "+m" (l->a.counter)
 		: : "memory");
 	return i + __i;

From 5638f99394d478bcba6384ac7c51e763d1a3a448 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:31:26 +0100
Subject: [PATCH 337/890] x86: unify local_{32|64}.h

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/local.h    | 244 ++++++++++++++++++++++++++++++++++++-
 include/asm-x86/local_32.h | 219 ---------------------------------
 include/asm-x86/local_64.h | 207 -------------------------------
 3 files changed, 240 insertions(+), 430 deletions(-)
 delete mode 100644 include/asm-x86/local_32.h
 delete mode 100644 include/asm-x86/local_64.h

diff --git a/include/asm-x86/local.h b/include/asm-x86/local.h
index c7a1b1c66c96..f5677e208601 100644
--- a/include/asm-x86/local.h
+++ b/include/asm-x86/local.h
@@ -1,5 +1,241 @@
-#ifdef CONFIG_X86_32
-# include "local_32.h"
-#else
-# include "local_64.h"
+#ifndef _ARCH_LOCAL_H
+#define _ARCH_LOCAL_H
+
+#include <linux/percpu.h>
+
+#include <asm/system.h>
+#include <asm/atomic.h>
+#include <asm/asm.h>
+
+typedef struct
+{
+	atomic_long_t a;
+} local_t;
+
+#define LOCAL_INIT(i)	{ ATOMIC_LONG_INIT(i) }
+
+#define local_read(l)	atomic_long_read(&(l)->a)
+#define local_set(l,i)	atomic_long_set(&(l)->a, (i))
+
+static inline void local_inc(local_t *l)
+{
+	__asm__ __volatile__(
+		_ASM_INC "%0"
+		:"+m" (l->a.counter));
+}
+
+static inline void local_dec(local_t *l)
+{
+	__asm__ __volatile__(
+		_ASM_DEC "%0"
+		:"+m" (l->a.counter));
+}
+
+static inline void local_add(long i, local_t *l)
+{
+	__asm__ __volatile__(
+		_ASM_ADD "%1,%0"
+		:"+m" (l->a.counter)
+		:"ir" (i));
+}
+
+static inline void local_sub(long i, local_t *l)
+{
+	__asm__ __volatile__(
+		_ASM_SUB "%1,%0"
+		:"+m" (l->a.counter)
+		:"ir" (i));
+}
+
+/**
+ * local_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @l: pointer to type local_t
+ *
+ * Atomically subtracts @i from @l and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+static inline int local_sub_and_test(long i, local_t *l)
+{
+	unsigned char c;
+
+	__asm__ __volatile__(
+		_ASM_SUB "%2,%0; sete %1"
+		:"+m" (l->a.counter), "=qm" (c)
+		:"ir" (i) : "memory");
+	return c;
+}
+
+/**
+ * local_dec_and_test - decrement and test
+ * @l: pointer to type local_t
+ *
+ * Atomically decrements @l by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static inline int local_dec_and_test(local_t *l)
+{
+	unsigned char c;
+
+	__asm__ __volatile__(
+		_ASM_DEC "%0; sete %1"
+		:"+m" (l->a.counter), "=qm" (c)
+		: : "memory");
+	return c != 0;
+}
+
+/**
+ * local_inc_and_test - increment and test
+ * @l: pointer to type local_t
+ *
+ * Atomically increments @l by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+static inline int local_inc_and_test(local_t *l)
+{
+	unsigned char c;
+
+	__asm__ __volatile__(
+		_ASM_INC "%0; sete %1"
+		:"+m" (l->a.counter), "=qm" (c)
+		: : "memory");
+	return c != 0;
+}
+
+/**
+ * local_add_negative - add and test if negative
+ * @i: integer value to add
+ * @l: pointer to type local_t
+ *
+ * Atomically adds @i to @l and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+static inline int local_add_negative(long i, local_t *l)
+{
+	unsigned char c;
+
+	__asm__ __volatile__(
+		_ASM_ADD "%2,%0; sets %1"
+		:"+m" (l->a.counter), "=qm" (c)
+		:"ir" (i) : "memory");
+	return c;
+}
+
+/**
+ * local_add_return - add and return
+ * @i: integer value to add
+ * @l: pointer to type local_t
+ *
+ * Atomically adds @i to @l and returns @i + @l
+ */
+static inline long local_add_return(long i, local_t *l)
+{
+	long __i;
+#ifdef CONFIG_M386
+	unsigned long flags;
+	if(unlikely(boot_cpu_data.x86 <= 3))
+		goto no_xadd;
 #endif
+	/* Modern 486+ processor */
+	__i = i;
+	__asm__ __volatile__(
+		_ASM_XADD "%0, %1;"
+		:"+r" (i), "+m" (l->a.counter)
+		: : "memory");
+	return i + __i;
+
+#ifdef CONFIG_M386
+no_xadd: /* Legacy 386 processor */
+	local_irq_save(flags);
+	__i = local_read(l);
+	local_set(l, i + __i);
+	local_irq_restore(flags);
+	return i + __i;
+#endif
+}
+
+static inline long local_sub_return(long i, local_t *l)
+{
+	return local_add_return(-i,l);
+}
+
+#define local_inc_return(l)  (local_add_return(1,l))
+#define local_dec_return(l)  (local_sub_return(1,l))
+
+#define local_cmpxchg(l, o, n) \
+	(cmpxchg_local(&((l)->a.counter), (o), (n)))
+/* Always has a lock prefix */
+#define local_xchg(l, n) (xchg(&((l)->a.counter), (n)))
+
+/**
+ * local_add_unless - add unless the number is a given value
+ * @l: pointer of type local_t
+ * @a: the amount to add to l...
+ * @u: ...unless l is equal to u.
+ *
+ * Atomically adds @a to @l, so long as it was not @u.
+ * Returns non-zero if @l was not @u, and zero otherwise.
+ */
+#define local_add_unless(l, a, u)				\
+({								\
+	long c, old;						\
+	c = local_read(l);					\
+	for (;;) {						\
+		if (unlikely(c == (u)))				\
+			break;					\
+		old = local_cmpxchg((l), c, c + (a));	\
+		if (likely(old == c))				\
+			break;					\
+		c = old;					\
+	}							\
+	c != (u);						\
+})
+#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
+
+/* On x86_32, these are no better than the atomic variants.
+ * On x86-64 these are better than the atomic variants on SMP kernels
+ * because they dont use a lock prefix.
+ */
+#define __local_inc(l)		local_inc(l)
+#define __local_dec(l)		local_dec(l)
+#define __local_add(i,l)	local_add((i),(l))
+#define __local_sub(i,l)	local_sub((i),(l))
+
+/* Use these for per-cpu local_t variables: on some archs they are
+ * much more efficient than these naive implementations.  Note they take
+ * a variable, not an address.
+ *
+ * X86_64: This could be done better if we moved the per cpu data directly
+ * after GS.
+ */
+
+/* Need to disable preemption for the cpu local counters otherwise we could
+   still access a variable of a previous CPU in a non atomic way. */
+#define cpu_local_wrap_v(l)	 	\
+	({ local_t res__;		\
+	   preempt_disable(); 		\
+	   res__ = (l);			\
+	   preempt_enable();		\
+	   res__; })
+#define cpu_local_wrap(l)		\
+	({ preempt_disable();		\
+	   l;				\
+	   preempt_enable(); })		\
+
+#define cpu_local_read(l)    cpu_local_wrap_v(local_read(&__get_cpu_var(l)))
+#define cpu_local_set(l, i)  cpu_local_wrap(local_set(&__get_cpu_var(l), (i)))
+#define cpu_local_inc(l)     cpu_local_wrap(local_inc(&__get_cpu_var(l)))
+#define cpu_local_dec(l)     cpu_local_wrap(local_dec(&__get_cpu_var(l)))
+#define cpu_local_add(i, l)  cpu_local_wrap(local_add((i), &__get_cpu_var(l)))
+#define cpu_local_sub(i, l)  cpu_local_wrap(local_sub((i), &__get_cpu_var(l)))
+
+#define __cpu_local_inc(l)	cpu_local_inc(l)
+#define __cpu_local_dec(l)	cpu_local_dec(l)
+#define __cpu_local_add(i, l)	cpu_local_add((i), (l))
+#define __cpu_local_sub(i, l)	cpu_local_sub((i), (l))
+
+#endif /* _ARCH_LOCAL_H */
diff --git a/include/asm-x86/local_32.h b/include/asm-x86/local_32.h
deleted file mode 100644
index c219fe56b3df..000000000000
--- a/include/asm-x86/local_32.h
+++ /dev/null
@@ -1,219 +0,0 @@
-#ifndef _ARCH_I386_LOCAL_H
-#define _ARCH_I386_LOCAL_H
-
-static inline void local_inc(local_t *l)
-{
-	__asm__ __volatile__(
-		_ASM_INC "%0"
-		:"+m" (l->a.counter));
-}
-
-static inline void local_dec(local_t *l)
-{
-	__asm__ __volatile__(
-		_ASM_DEC "%0"
-		:"+m" (l->a.counter));
-}
-
-static inline void local_add(long i, local_t *l)
-{
-	__asm__ __volatile__(
-		_ASM_ADD "%1,%0"
-		:"+m" (l->a.counter)
-		:"ir" (i));
-}
-
-static inline void local_sub(long i, local_t *l)
-{
-	__asm__ __volatile__(
-		_ASM_SUB "%1,%0"
-		:"+m" (l->a.counter)
-		:"ir" (i));
-}
-
-/**
- * local_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @l: pointer of type local_t
- *
- * Atomically subtracts @i from @l and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-static inline int local_sub_and_test(long i, local_t *l)
-{
-	unsigned char c;
-
-	__asm__ __volatile__(
-		_ASM_SUB "%2,%0; sete %1"
-		:"+m" (l->a.counter), "=qm" (c)
-		:"ir" (i) : "memory");
-	return c;
-}
-
-/**
- * local_dec_and_test - decrement and test
- * @l: pointer of type local_t
- *
- * Atomically decrements @l by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-static inline int local_dec_and_test(local_t *l)
-{
-	unsigned char c;
-
-	__asm__ __volatile__(
-		_ASM_DEC "%0; sete %1"
-		:"+m" (l->a.counter), "=qm" (c)
-		: : "memory");
-	return c != 0;
-}
-
-/**
- * local_inc_and_test - increment and test
- * @l: pointer of type local_t
- *
- * Atomically increments @l by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-static inline int local_inc_and_test(local_t *l)
-{
-	unsigned char c;
-
-	__asm__ __volatile__(
-		_ASM_INC "%0; sete %1"
-		:"+m" (l->a.counter), "=qm" (c)
-		: : "memory");
-	return c != 0;
-}
-
-/**
- * local_add_negative - add and test if negative
- * @l: pointer of type local_t
- * @i: integer value to add
- *
- * Atomically adds @i to @l and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-static inline int local_add_negative(long i, local_t *l)
-{
-	unsigned char c;
-
-	__asm__ __volatile__(
-		_ASM_ADD "%2,%0; sets %1"
-		:"+m" (l->a.counter), "=qm" (c)
-		:"ir" (i) : "memory");
-	return c;
-}
-
-/**
- * local_add_return - add and return
- * @l: pointer of type local_t
- * @i: integer value to add
- *
- * Atomically adds @i to @l and returns @i + @l
- */
-static inline long local_add_return(long i, local_t *l)
-{
-	long __i;
-#ifdef CONFIG_M386
-	unsigned long flags;
-	if(unlikely(boot_cpu_data.x86 <= 3))
-		goto no_xadd;
-#endif
-	/* Modern 486+ processor */
-	__i = i;
-	__asm__ __volatile__(
-		_ASM_XADD "%0, %1;"
-		:"+r" (i), "+m" (l->a.counter)
-		: : "memory");
-	return i + __i;
-
-#ifdef CONFIG_M386
-no_xadd: /* Legacy 386 processor */
-	local_irq_save(flags);
-	__i = local_read(l);
-	local_set(l, i + __i);
-	local_irq_restore(flags);
-	return i + __i;
-#endif
-}
-
-static inline long local_sub_return(long i, local_t *l)
-{
-	return local_add_return(-i,l);
-}
-
-#define local_inc_return(l)  (local_add_return(1,l))
-#define local_dec_return(l)  (local_sub_return(1,l))
-
-#define local_cmpxchg(l, o, n) \
-	(cmpxchg_local(&((l)->a.counter), (o), (n)))
-/* Always has a lock prefix */
-#define local_xchg(l, n) (xchg(&((l)->a.counter), (n)))
-
-/**
- * local_add_unless - add unless the number is a given value
- * @l: pointer of type local_t
- * @a: the amount to add to l...
- * @u: ...unless l is equal to u.
- *
- * Atomically adds @a to @l, so long as it was not @u.
- * Returns non-zero if @l was not @u, and zero otherwise.
- */
-#define local_add_unless(l, a, u)				\
-({								\
-	long c, old;						\
-	c = local_read(l);					\
-	for (;;) {						\
-		if (unlikely(c == (u)))				\
-			break;					\
-		old = local_cmpxchg((l), c, c + (a));	\
-		if (likely(old == c))				\
-			break;					\
-		c = old;					\
-	}							\
-	c != (u);						\
-})
-#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
-
-/* On x86, these are no better than the atomic variants. */
-#define __local_inc(l)		local_inc(l)
-#define __local_dec(l)		local_dec(l)
-#define __local_add(i,l)	local_add((i),(l))
-#define __local_sub(i,l)	local_sub((i),(l))
-
-/* Use these for per-cpu local_t variables: on some archs they are
- * much more efficient than these naive implementations.  Note they take
- * a variable, not an address.
- */
-
-/* Need to disable preemption for the cpu local counters otherwise we could
-   still access a variable of a previous CPU in a non atomic way. */
-#define cpu_local_wrap_v(l)	 	\
-	({ local_t res__;		\
-	   preempt_disable(); 		\
-	   res__ = (l);			\
-	   preempt_enable();		\
-	   res__; })
-#define cpu_local_wrap(l)		\
-	({ preempt_disable();		\
-	   l;				\
-	   preempt_enable(); })		\
-
-#define cpu_local_read(l)    cpu_local_wrap_v(local_read(&__get_cpu_var(l)))
-#define cpu_local_set(l, i)  cpu_local_wrap(local_set(&__get_cpu_var(l), (i)))
-#define cpu_local_inc(l)     cpu_local_wrap(local_inc(&__get_cpu_var(l)))
-#define cpu_local_dec(l)     cpu_local_wrap(local_dec(&__get_cpu_var(l)))
-#define cpu_local_add(i, l)  cpu_local_wrap(local_add((i), &__get_cpu_var(l)))
-#define cpu_local_sub(i, l)  cpu_local_wrap(local_sub((i), &__get_cpu_var(l)))
-
-#define __cpu_local_inc(l)	cpu_local_inc(l)
-#define __cpu_local_dec(l)	cpu_local_dec(l)
-#define __cpu_local_add(i, l)	cpu_local_add((i), (l))
-#define __cpu_local_sub(i, l)	cpu_local_sub((i), (l))
-
-#endif /* _ARCH_I386_LOCAL_H */
diff --git a/include/asm-x86/local_64.h b/include/asm-x86/local_64.h
deleted file mode 100644
index d685cd7e014f..000000000000
--- a/include/asm-x86/local_64.h
+++ /dev/null
@@ -1,207 +0,0 @@
-#ifndef _ARCH_X8664_LOCAL_H
-#define _ARCH_X8664_LOCAL_H
-
-static inline void local_inc(local_t *l)
-{
-	__asm__ __volatile__(
-		_ASM_INC "%0"
-		:"+m" (l->a.counter));
-}
-
-static inline void local_dec(local_t *l)
-{
-	__asm__ __volatile__(
-		_ASM_DEC "%0"
-		:"+m" (l->a.counter));
-}
-
-static inline void local_add(long i, local_t *l)
-{
-	__asm__ __volatile__(
-		_ASM_ADD "%1,%0"
-		:"+m" (l->a.counter)
-		:"ir" (i));
-}
-
-static inline void local_sub(long i, local_t *l)
-{
-	__asm__ __volatile__(
-		_ASM_SUB "%1,%0"
-		:"+m" (l->a.counter)
-		:"ir" (i));
-}
-
-/**
- * local_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @l: pointer to type local_t
- *
- * Atomically subtracts @i from @l and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-static inline int local_sub_and_test(long i, local_t *l)
-{
-	unsigned char c;
-
-	__asm__ __volatile__(
-		_ASM_SUB "%2,%0; sete %1"
-		:"+m" (l->a.counter), "=qm" (c)
-		:"ir" (i) : "memory");
-	return c;
-}
-
-/**
- * local_dec_and_test - decrement and test
- * @l: pointer to type local_t
- *
- * Atomically decrements @l by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-static inline int local_dec_and_test(local_t *l)
-{
-	unsigned char c;
-
-	__asm__ __volatile__(
-		_ASM_DEC "%0; sete %1"
-		:"+m" (l->a.counter), "=qm" (c)
-		: : "memory");
-	return c != 0;
-}
-
-/**
- * local_inc_and_test - increment and test
- * @l: pointer to type local_t
- *
- * Atomically increments @l by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-static inline int local_inc_and_test(local_t *l)
-{
-	unsigned char c;
-
-	__asm__ __volatile__(
-		_ASM_INC "%0; sete %1"
-		:"+m" (l->a.counter), "=qm" (c)
-		: : "memory");
-	return c != 0;
-}
-
-/**
- * local_add_negative - add and test if negative
- * @i: integer value to add
- * @l: pointer to type local_t
- *
- * Atomically adds @i to @l and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-static inline int local_add_negative(long i, local_t *l)
-{
-	unsigned char c;
-
-	__asm__ __volatile__(
-		_ASM_ADD "%2,%0; sets %1"
-		:"+m" (l->a.counter), "=qm" (c)
-		:"ir" (i) : "memory");
-	return c;
-}
-
-/**
- * local_add_return - add and return
- * @i: integer value to add
- * @l: pointer to type local_t
- *
- * Atomically adds @i to @l and returns @i + @l
- */
-static inline long local_add_return(long i, local_t *l)
-{
-	long __i = i;
-	__asm__ __volatile__(
-		_ASM_XADD "%0, %1;"
-		:"+r" (i), "+m" (l->a.counter)
-		: : "memory");
-	return i + __i;
-}
-
-static inline long local_sub_return(long i, local_t *l)
-{
-	return local_add_return(-i,l);
-}
-
-#define local_inc_return(l)  (local_add_return(1,l))
-#define local_dec_return(l)  (local_sub_return(1,l))
-
-#define local_cmpxchg(l, o, n) \
-	(cmpxchg_local(&((l)->a.counter), (o), (n)))
-/* Always has a lock prefix */
-#define local_xchg(l, n) (xchg(&((l)->a.counter), (n)))
-
-/**
- * atomic_up_add_unless - add unless the number is a given value
- * @l: pointer of type local_t
- * @a: the amount to add to l...
- * @u: ...unless l is equal to u.
- *
- * Atomically adds @a to @l, so long as it was not @u.
- * Returns non-zero if @l was not @u, and zero otherwise.
- */
-#define local_add_unless(l, a, u)				\
-({								\
-	long c, old;						\
-	c = local_read(l);					\
-	for (;;) {						\
-		if (unlikely(c == (u)))				\
-			break;					\
-		old = local_cmpxchg((l), c, c + (a));	\
-		if (likely(old == c))				\
-			break;					\
-		c = old;					\
-	}							\
-	c != (u);						\
-})
-#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
-
-/* On x86-64 these are better than the atomic variants on SMP kernels
-   because they dont use a lock prefix. */
-#define __local_inc(l)		local_inc(l)
-#define __local_dec(l)		local_dec(l)
-#define __local_add(i,l)	local_add((i),(l))
-#define __local_sub(i,l)	local_sub((i),(l))
-
-/* Use these for per-cpu local_t variables: on some archs they are
- * much more efficient than these naive implementations.  Note they take
- * a variable, not an address.
- *
- * This could be done better if we moved the per cpu data directly
- * after GS.
- */
-
-/* Need to disable preemption for the cpu local counters otherwise we could
-   still access a variable of a previous CPU in a non atomic way. */
-#define cpu_local_wrap_v(l)	 	\
-	({ local_t res__;		\
-	   preempt_disable(); 		\
-	   res__ = (l);			\
-	   preempt_enable();		\
-	   res__; })
-#define cpu_local_wrap(l)		\
-	({ preempt_disable();		\
-	   l;				\
-	   preempt_enable(); })		\
-
-#define cpu_local_read(l)    cpu_local_wrap_v(local_read(&__get_cpu_var(l)))
-#define cpu_local_set(l, i)  cpu_local_wrap(local_set(&__get_cpu_var(l), (i)))
-#define cpu_local_inc(l)     cpu_local_wrap(local_inc(&__get_cpu_var(l)))
-#define cpu_local_dec(l)     cpu_local_wrap(local_dec(&__get_cpu_var(l)))
-#define cpu_local_add(i, l)  cpu_local_wrap(local_add((i), &__get_cpu_var(l)))
-#define cpu_local_sub(i, l)  cpu_local_wrap(local_sub((i), &__get_cpu_var(l)))
-
-#define __cpu_local_inc(l)	cpu_local_inc(l)
-#define __cpu_local_dec(l)	cpu_local_dec(l)
-#define __cpu_local_add(i, l)	cpu_local_add((i), (l))
-#define __cpu_local_sub(i, l)	cpu_local_sub((i), (l))
-
-#endif /* _ARCH_X8664_LOCAL_H */

From 01c57fb6a3eefc34da17be1399453c0800bcff68 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:31:26 +0100
Subject: [PATCH 338/890] x86: local.h fix checkpatch warnings

Mostly space after comma, one space after if.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/local.h | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/include/asm-x86/local.h b/include/asm-x86/local.h
index f5677e208601..f852c62b3319 100644
--- a/include/asm-x86/local.h
+++ b/include/asm-x86/local.h
@@ -7,15 +7,14 @@
 #include <asm/atomic.h>
 #include <asm/asm.h>
 
-typedef struct
-{
+typedef struct {
 	atomic_long_t a;
 } local_t;
 
 #define LOCAL_INIT(i)	{ ATOMIC_LONG_INIT(i) }
 
 #define local_read(l)	atomic_long_read(&(l)->a)
-#define local_set(l,i)	atomic_long_set(&(l)->a, (i))
+#define local_set(l, i)	atomic_long_set(&(l)->a, (i))
 
 static inline void local_inc(local_t *l)
 {
@@ -137,7 +136,7 @@ static inline long local_add_return(long i, local_t *l)
 	long __i;
 #ifdef CONFIG_M386
 	unsigned long flags;
-	if(unlikely(boot_cpu_data.x86 <= 3))
+	if (unlikely(boot_cpu_data.x86 <= 3))
 		goto no_xadd;
 #endif
 	/* Modern 486+ processor */
@@ -160,11 +159,11 @@ no_xadd: /* Legacy 386 processor */
 
 static inline long local_sub_return(long i, local_t *l)
 {
-	return local_add_return(-i,l);
+	return local_add_return(-i, l);
 }
 
-#define local_inc_return(l)  (local_add_return(1,l))
-#define local_dec_return(l)  (local_sub_return(1,l))
+#define local_inc_return(l)  (local_add_return(1, l))
+#define local_dec_return(l)  (local_sub_return(1, l))
 
 #define local_cmpxchg(l, o, n) \
 	(cmpxchg_local(&((l)->a.counter), (o), (n)))
@@ -202,8 +201,8 @@ static inline long local_sub_return(long i, local_t *l)
  */
 #define __local_inc(l)		local_inc(l)
 #define __local_dec(l)		local_dec(l)
-#define __local_add(i,l)	local_add((i),(l))
-#define __local_sub(i,l)	local_sub((i),(l))
+#define __local_add(i, l)	local_add((i), (l))
+#define __local_sub(i, l)	local_sub((i), (l))
 
 /* Use these for per-cpu local_t variables: on some archs they are
  * much more efficient than these naive implementations.  Note they take

From 3c233d1334ffc8de63a4b6a6a86c40961aed335e Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:31:26 +0100
Subject: [PATCH 339/890] x86: unify kexec_{32|64}.h

One section collecting all constant defines.  Ifdef the asm
blocks for X86_32/64.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Vivek Goyal <vgoyal@redhat.com>
---
 include/asm-x86/kexec.h    | 169 ++++++++++++++++++++++++++++++++++++-
 include/asm-x86/kexec_32.h |  99 ----------------------
 include/asm-x86/kexec_64.h |  94 ---------------------
 3 files changed, 167 insertions(+), 195 deletions(-)
 delete mode 100644 include/asm-x86/kexec_32.h
 delete mode 100644 include/asm-x86/kexec_64.h

diff --git a/include/asm-x86/kexec.h b/include/asm-x86/kexec.h
index 718ddbfb9516..c90d3c77afc2 100644
--- a/include/asm-x86/kexec.h
+++ b/include/asm-x86/kexec.h
@@ -1,5 +1,170 @@
+#ifndef _KEXEC_H
+#define _KEXEC_H
+
 #ifdef CONFIG_X86_32
-# include "kexec_32.h"
+# define PA_CONTROL_PAGE	0
+# define VA_CONTROL_PAGE	1
+# define PA_PGD			2
+# define VA_PGD			3
+# define PA_PTE_0		4
+# define VA_PTE_0		5
+# define PA_PTE_1		6
+# define VA_PTE_1		7
+# ifdef CONFIG_X86_PAE
+#  define PA_PMD_0		8
+#  define VA_PMD_0		9
+#  define PA_PMD_1		10
+#  define VA_PMD_1		11
+#  define PAGES_NR		12
+# else
+#  define PAGES_NR		8
+# endif
 #else
-# include "kexec_64.h"
+# define PA_CONTROL_PAGE	0
+# define VA_CONTROL_PAGE	1
+# define PA_PGD			2
+# define VA_PGD			3
+# define PA_PUD_0		4
+# define VA_PUD_0		5
+# define PA_PMD_0		6
+# define VA_PMD_0		7
+# define PA_PTE_0		8
+# define VA_PTE_0		9
+# define PA_PUD_1		10
+# define VA_PUD_1		11
+# define PA_PMD_1		12
+# define VA_PMD_1		13
+# define PA_PTE_1		14
+# define VA_PTE_1		15
+# define PA_TABLE_PAGE		16
+# define PAGES_NR		17
 #endif
+
+#ifndef __ASSEMBLY__
+
+#include <linux/string.h>
+
+#include <asm/page.h>
+#include <asm/ptrace.h>
+
+/*
+ * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
+ * I.e. Maximum page that is mapped directly into kernel memory,
+ * and kmap is not required.
+ *
+ * So far x86_64 is limited to 40 physical address bits.
+ */
+#ifdef CONFIG_X86_32
+/* Maximum physical address we can use pages from */
+# define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+/* Maximum address we can reach in physical address mode */
+# define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+/* Maximum address we can use for the control code buffer */
+# define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+
+# define KEXEC_CONTROL_CODE_SIZE	4096
+
+/* The native architecture */
+# define KEXEC_ARCH KEXEC_ARCH_386
+
+/* We can also handle crash dumps from 64 bit kernel. */
+# define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64)
+#else
+/* Maximum physical address we can use pages from */
+# define KEXEC_SOURCE_MEMORY_LIMIT      (0xFFFFFFFFFFUL)
+/* Maximum address we can reach in physical address mode */
+# define KEXEC_DESTINATION_MEMORY_LIMIT (0xFFFFFFFFFFUL)
+/* Maximum address we can use for the control pages */
+# define KEXEC_CONTROL_MEMORY_LIMIT     (0xFFFFFFFFFFUL)
+
+/* Allocate one page for the pdp and the second for the code */
+# define KEXEC_CONTROL_CODE_SIZE  (4096UL + 4096UL)
+
+/* The native architecture */
+# define KEXEC_ARCH KEXEC_ARCH_X86_64
+#endif
+
+/*
+ * CPU does not save ss and sp on stack if execution is already
+ * running in kernel mode at the time of NMI occurrence. This code
+ * fixes it.
+ */
+static inline void crash_fixup_ss_esp(struct pt_regs *newregs,
+				      struct pt_regs *oldregs)
+{
+#ifdef CONFIG_X86_32
+	newregs->sp = (unsigned long)&(oldregs->sp);
+	__asm__ __volatile__(
+			"xorl %%eax, %%eax\n\t"
+			"movw %%ss, %%ax\n\t"
+			:"=a"(newregs->ss));
+#endif
+}
+
+/*
+ * This function is responsible for capturing register states if coming
+ * via panic otherwise just fix up the ss and sp if coming via kernel
+ * mode exception.
+ */
+static inline void crash_setup_regs(struct pt_regs *newregs,
+				    struct pt_regs *oldregs)
+{
+	if (oldregs) {
+		memcpy(newregs, oldregs, sizeof(*newregs));
+		crash_fixup_ss_esp(newregs, oldregs);
+	} else {
+#ifdef CONFIG_X86_32
+		__asm__ __volatile__("movl %%ebx,%0" : "=m"(newregs->bx));
+		__asm__ __volatile__("movl %%ecx,%0" : "=m"(newregs->cx));
+		__asm__ __volatile__("movl %%edx,%0" : "=m"(newregs->dx));
+		__asm__ __volatile__("movl %%esi,%0" : "=m"(newregs->si));
+		__asm__ __volatile__("movl %%edi,%0" : "=m"(newregs->di));
+		__asm__ __volatile__("movl %%ebp,%0" : "=m"(newregs->bp));
+		__asm__ __volatile__("movl %%eax,%0" : "=m"(newregs->ax));
+		__asm__ __volatile__("movl %%esp,%0" : "=m"(newregs->sp));
+		__asm__ __volatile__("movl %%ss, %%eax;" :"=a"(newregs->ss));
+		__asm__ __volatile__("movl %%cs, %%eax;" :"=a"(newregs->cs));
+		__asm__ __volatile__("movl %%ds, %%eax;" :"=a"(newregs->ds));
+		__asm__ __volatile__("movl %%es, %%eax;" :"=a"(newregs->es));
+		__asm__ __volatile__("pushfl; popl %0" :"=m"(newregs->flags));
+#else
+		__asm__ __volatile__("movq %%rbx,%0" : "=m"(newregs->bx));
+		__asm__ __volatile__("movq %%rcx,%0" : "=m"(newregs->cx));
+		__asm__ __volatile__("movq %%rdx,%0" : "=m"(newregs->dx));
+		__asm__ __volatile__("movq %%rsi,%0" : "=m"(newregs->si));
+		__asm__ __volatile__("movq %%rdi,%0" : "=m"(newregs->di));
+		__asm__ __volatile__("movq %%rbp,%0" : "=m"(newregs->bp));
+		__asm__ __volatile__("movq %%rax,%0" : "=m"(newregs->ax));
+		__asm__ __volatile__("movq %%rsp,%0" : "=m"(newregs->sp));
+		__asm__ __volatile__("movq %%r8,%0" : "=m"(newregs->r8));
+		__asm__ __volatile__("movq %%r9,%0" : "=m"(newregs->r9));
+		__asm__ __volatile__("movq %%r10,%0" : "=m"(newregs->r10));
+		__asm__ __volatile__("movq %%r11,%0" : "=m"(newregs->r11));
+		__asm__ __volatile__("movq %%r12,%0" : "=m"(newregs->r12));
+		__asm__ __volatile__("movq %%r13,%0" : "=m"(newregs->r13));
+		__asm__ __volatile__("movq %%r14,%0" : "=m"(newregs->r14));
+		__asm__ __volatile__("movq %%r15,%0" : "=m"(newregs->r15));
+		__asm__ __volatile__("movl %%ss, %%eax;" :"=a"(newregs->ss));
+		__asm__ __volatile__("movl %%cs, %%eax;" :"=a"(newregs->cs));
+		__asm__ __volatile__("pushfq; popq %0" :"=m"(newregs->flags));
+#endif
+		newregs->ip = (unsigned long)current_text_addr();
+	}
+}
+
+#ifdef CONFIG_X86_32
+asmlinkage NORET_TYPE void
+relocate_kernel(unsigned long indirection_page,
+		unsigned long control_page,
+		unsigned long start_address,
+		unsigned int has_pae) ATTRIB_NORET;
+#else
+NORET_TYPE void
+relocate_kernel(unsigned long indirection_page,
+		unsigned long page_list,
+		unsigned long start_address) ATTRIB_NORET;
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _KEXEC_H */
diff --git a/include/asm-x86/kexec_32.h b/include/asm-x86/kexec_32.h
deleted file mode 100644
index ff39d2f88022..000000000000
--- a/include/asm-x86/kexec_32.h
+++ /dev/null
@@ -1,99 +0,0 @@
-#ifndef _I386_KEXEC_H
-#define _I386_KEXEC_H
-
-#define PA_CONTROL_PAGE  0
-#define VA_CONTROL_PAGE  1
-#define PA_PGD           2
-#define VA_PGD           3
-#define PA_PTE_0         4
-#define VA_PTE_0         5
-#define PA_PTE_1         6
-#define VA_PTE_1         7
-#ifdef CONFIG_X86_PAE
-#define PA_PMD_0         8
-#define VA_PMD_0         9
-#define PA_PMD_1         10
-#define VA_PMD_1         11
-#define PAGES_NR         12
-#else
-#define PAGES_NR         8
-#endif
-
-#ifndef __ASSEMBLY__
-
-#include <asm/ptrace.h>
-#include <asm/string.h>
-
-/*
- * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
- * I.e. Maximum page that is mapped directly into kernel memory,
- * and kmap is not required.
- */
-
-/* Maximum physical address we can use pages from */
-#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
-/* Maximum address we can reach in physical address mode */
-#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
-/* Maximum address we can use for the control code buffer */
-#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
-
-#define KEXEC_CONTROL_CODE_SIZE	4096
-
-/* The native architecture */
-#define KEXEC_ARCH KEXEC_ARCH_386
-
-/* We can also handle crash dumps from 64 bit kernel. */
-#define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64)
-
-/* CPU does not save ss and sp on stack if execution is already
- * running in kernel mode at the time of NMI occurrence. This code
- * fixes it.
- */
-static inline void crash_fixup_ss_esp(struct pt_regs *newregs,
-					struct pt_regs *oldregs)
-{
-	memcpy(newregs, oldregs, sizeof(*newregs));
-	newregs->sp = (unsigned long)&(oldregs->sp);
-	__asm__ __volatile__(
-			"xorl %%eax, %%eax\n\t"
-			"movw %%ss, %%ax\n\t"
-			:"=a"(newregs->ss));
-}
-
-/*
- * This function is responsible for capturing register states if coming
- * via panic otherwise just fix up the ss and sp if coming via kernel
- * mode exception.
- */
-static inline void crash_setup_regs(struct pt_regs *newregs,
-                                       struct pt_regs *oldregs)
-{
-       if (oldregs)
-               crash_fixup_ss_esp(newregs, oldregs);
-       else {
-               __asm__ __volatile__("movl %%ebx,%0" : "=m"(newregs->bx));
-               __asm__ __volatile__("movl %%ecx,%0" : "=m"(newregs->cx));
-               __asm__ __volatile__("movl %%edx,%0" : "=m"(newregs->dx));
-               __asm__ __volatile__("movl %%esi,%0" : "=m"(newregs->si));
-               __asm__ __volatile__("movl %%edi,%0" : "=m"(newregs->di));
-               __asm__ __volatile__("movl %%ebp,%0" : "=m"(newregs->bp));
-               __asm__ __volatile__("movl %%eax,%0" : "=m"(newregs->ax));
-               __asm__ __volatile__("movl %%esp,%0" : "=m"(newregs->sp));
-               __asm__ __volatile__("movl %%ss, %%eax;" :"=a"(newregs->ss));
-               __asm__ __volatile__("movl %%cs, %%eax;" :"=a"(newregs->cs));
-               __asm__ __volatile__("movl %%ds, %%eax;" :"=a"(newregs->ds));
-               __asm__ __volatile__("movl %%es, %%eax;" :"=a"(newregs->es));
-               __asm__ __volatile__("pushfl; popl %0" :"=m"(newregs->flags));
-
-               newregs->ip = (unsigned long)current_text_addr();
-       }
-}
-asmlinkage NORET_TYPE void
-relocate_kernel(unsigned long indirection_page,
-		unsigned long control_page,
-		unsigned long start_address,
-		unsigned int has_pae) ATTRIB_NORET;
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _I386_KEXEC_H */
diff --git a/include/asm-x86/kexec_64.h b/include/asm-x86/kexec_64.h
deleted file mode 100644
index b5f989b15c0b..000000000000
--- a/include/asm-x86/kexec_64.h
+++ /dev/null
@@ -1,94 +0,0 @@
-#ifndef _X86_64_KEXEC_H
-#define _X86_64_KEXEC_H
-
-#define PA_CONTROL_PAGE  0
-#define VA_CONTROL_PAGE  1
-#define PA_PGD           2
-#define VA_PGD           3
-#define PA_PUD_0         4
-#define VA_PUD_0         5
-#define PA_PMD_0         6
-#define VA_PMD_0         7
-#define PA_PTE_0         8
-#define VA_PTE_0         9
-#define PA_PUD_1         10
-#define VA_PUD_1         11
-#define PA_PMD_1         12
-#define VA_PMD_1         13
-#define PA_PTE_1         14
-#define VA_PTE_1         15
-#define PA_TABLE_PAGE    16
-#define PAGES_NR         17
-
-#ifndef __ASSEMBLY__
-
-#include <linux/string.h>
-
-#include <asm/page.h>
-#include <asm/ptrace.h>
-
-/*
- * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
- * I.e. Maximum page that is mapped directly into kernel memory,
- * and kmap is not required.
- *
- * So far x86_64 is limited to 40 physical address bits.
- */
-
-/* Maximum physical address we can use pages from */
-#define KEXEC_SOURCE_MEMORY_LIMIT      (0xFFFFFFFFFFUL)
-/* Maximum address we can reach in physical address mode */
-#define KEXEC_DESTINATION_MEMORY_LIMIT (0xFFFFFFFFFFUL)
-/* Maximum address we can use for the control pages */
-#define KEXEC_CONTROL_MEMORY_LIMIT     (0xFFFFFFFFFFUL)
-
-/* Allocate one page for the pdp and the second for the code */
-#define KEXEC_CONTROL_CODE_SIZE  (4096UL + 4096UL)
-
-/* The native architecture */
-#define KEXEC_ARCH KEXEC_ARCH_X86_64
-
-/*
- * Saving the registers of the cpu on which panic occured in
- * crash_kexec to save a valid sp. The registers of other cpus
- * will be saved in machine_crash_shutdown while shooting down them.
- */
-
-static inline void crash_setup_regs(struct pt_regs *newregs,
-						struct pt_regs *oldregs)
-{
-	if (oldregs)
-		memcpy(newregs, oldregs, sizeof(*newregs));
-	else {
-		__asm__ __volatile__("movq %%rbx,%0" : "=m"(newregs->bx));
-		__asm__ __volatile__("movq %%rcx,%0" : "=m"(newregs->cx));
-		__asm__ __volatile__("movq %%rdx,%0" : "=m"(newregs->dx));
-		__asm__ __volatile__("movq %%rsi,%0" : "=m"(newregs->si));
-		__asm__ __volatile__("movq %%rdi,%0" : "=m"(newregs->di));
-		__asm__ __volatile__("movq %%rbp,%0" : "=m"(newregs->bp));
-		__asm__ __volatile__("movq %%rax,%0" : "=m"(newregs->ax));
-		__asm__ __volatile__("movq %%rsp,%0" : "=m"(newregs->sp));
-		__asm__ __volatile__("movq %%r8,%0" : "=m"(newregs->r8));
-		__asm__ __volatile__("movq %%r9,%0" : "=m"(newregs->r9));
-		__asm__ __volatile__("movq %%r10,%0" : "=m"(newregs->r10));
-		__asm__ __volatile__("movq %%r11,%0" : "=m"(newregs->r11));
-		__asm__ __volatile__("movq %%r12,%0" : "=m"(newregs->r12));
-		__asm__ __volatile__("movq %%r13,%0" : "=m"(newregs->r13));
-		__asm__ __volatile__("movq %%r14,%0" : "=m"(newregs->r14));
-		__asm__ __volatile__("movq %%r15,%0" : "=m"(newregs->r15));
-		__asm__ __volatile__("movl %%ss, %%eax;" :"=a"(newregs->ss));
-		__asm__ __volatile__("movl %%cs, %%eax;" :"=a"(newregs->cs));
-		__asm__ __volatile__("pushfq; popq %0" :"=m"(newregs->flags));
-
-		newregs->ip = (unsigned long)current_text_addr();
-	}
-}
-
-NORET_TYPE void
-relocate_kernel(unsigned long indirection_page,
-		unsigned long page_list,
-		unsigned long start_address) ATTRIB_NORET;
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _X86_64_KEXEC_H */

From 3b095a04e71243bd0f1679c04f1e8d73a3c9c5a9 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Wed, 30 Jan 2008 13:31:26 +0100
Subject: [PATCH 340/890] x86: cleanup i387_32.c according to checkpatch

clean up checkpatch warnings/errors on i387_32.c

The old and new i387_32.s (asm listings) were checked with diff to
be identical so it's safe to apply this patch.

Signed-off-by: Cyrill Gorcunov <gorunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/i387_32.c | 291 +++++++++++++++++++-------------------
 1 file changed, 149 insertions(+), 142 deletions(-)

diff --git a/arch/x86/kernel/i387_32.c b/arch/x86/kernel/i387_32.c
index 7d2e12f6c78b..bebe03463461 100644
--- a/arch/x86/kernel/i387_32.c
+++ b/arch/x86/kernel/i387_32.c
@@ -29,11 +29,13 @@ void mxcsr_feature_mask_init(void)
 	unsigned long mask = 0;
 	clts();
 	if (cpu_has_fxsr) {
-		memset(&current->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
-		asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave)); 
+		memset(&current->thread.i387.fxsave, 0,
+		       sizeof(struct i387_fxsave_struct));
+		asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave));
 		mask = current->thread.i387.fxsave.mxcsr_mask;
-		if (mask == 0) mask = 0x0000ffbf;
-	} 
+		if (mask == 0)
+			mask = 0x0000ffbf;
+	}
 	mxcsr_feature_mask &= mask;
 	stts();
 }
@@ -47,18 +49,21 @@ void mxcsr_feature_mask_init(void)
 void init_fpu(struct task_struct *tsk)
 {
 	if (cpu_has_fxsr) {
-		memset(&tsk->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
+		memset(&tsk->thread.i387.fxsave, 0,
+		       sizeof(struct i387_fxsave_struct));
 		tsk->thread.i387.fxsave.cwd = 0x37f;
 		if (cpu_has_xmm)
 			tsk->thread.i387.fxsave.mxcsr = 0x1f80;
 	} else {
-		memset(&tsk->thread.i387.fsave, 0, sizeof(struct i387_fsave_struct));
+		memset(&tsk->thread.i387.fsave, 0,
+		       sizeof(struct i387_fsave_struct));
 		tsk->thread.i387.fsave.cwd = 0xffff037fu;
 		tsk->thread.i387.fsave.swd = 0xffff0000u;
 		tsk->thread.i387.fsave.twd = 0xffffffffu;
 		tsk->thread.i387.fsave.fos = 0xffff0000u;
 	}
-	/* only the device not available exception or ptrace can call init_fpu */
+	/* only the device not available exception
+	 * or ptrace can call init_fpu */
 	set_stopped_child_used_math(tsk);
 }
 
@@ -83,21 +88,22 @@ EXPORT_SYMBOL_GPL(kernel_fpu_begin);
  * FPU tag word conversions.
  */
 
-static inline unsigned short twd_i387_to_fxsr( unsigned short twd )
+static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
 {
 	unsigned int tmp; /* to avoid 16 bit prefixes in the code */
- 
+
 	/* Transform each pair of bits into 01 (valid) or 00 (empty) */
-        tmp = ~twd;
-        tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
-        /* and move the valid bits to the lower byte. */
-        tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
-        tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
-        tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
-        return tmp;
+	tmp = ~twd;
+	tmp = (tmp | (tmp >> 1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
+	/* and move the valid bits to the lower byte. */
+	tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
+	tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
+	tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
+
+	return tmp;
 }
 
-static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave )
+static inline unsigned long twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
 {
 	struct _fpxreg *st = NULL;
 	unsigned long tos = (fxsave->swd >> 11) & 7;
@@ -108,26 +114,26 @@ static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave
 
 #define FPREG_ADDR(f, n)	((void *)&(f)->st_space + (n) * 16);
 
-	for ( i = 0 ; i < 8 ; i++ ) {
-		if ( twd & 0x1 ) {
-			st = FPREG_ADDR( fxsave, (i - tos) & 7 );
+	for (i = 0; i < 8; i++) {
+		if (twd & 0x1) {
+			st = FPREG_ADDR(fxsave, (i - tos) & 7);
 
-			switch ( st->exponent & 0x7fff ) {
+			switch (st->exponent & 0x7fff) {
 			case 0x7fff:
 				tag = 2;		/* Special */
 				break;
 			case 0x0000:
-				if ( !st->significand[0] &&
-				     !st->significand[1] &&
-				     !st->significand[2] &&
-				     !st->significand[3] ) {
+				if (!st->significand[0] &&
+				    !st->significand[1] &&
+				    !st->significand[2] &&
+				    !st->significand[3]) {
 					tag = 1;	/* Zero */
 				} else {
 					tag = 2;	/* Special */
 				}
 				break;
 			default:
-				if ( st->significand[3] & 0x8000 ) {
+				if (st->significand[3] & 0x8000) {
 					tag = 0;	/* Valid */
 				} else {
 					tag = 2;	/* Special */
@@ -147,18 +153,18 @@ static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave
  * FPU state interaction.
  */
 
-unsigned short get_fpu_cwd( struct task_struct *tsk )
+unsigned short get_fpu_cwd(struct task_struct *tsk)
 {
-	if ( cpu_has_fxsr ) {
+	if (cpu_has_fxsr) {
 		return tsk->thread.i387.fxsave.cwd;
 	} else {
 		return (unsigned short)tsk->thread.i387.fsave.cwd;
 	}
 }
 
-unsigned short get_fpu_swd( struct task_struct *tsk )
+unsigned short get_fpu_swd(struct task_struct *tsk)
 {
-	if ( cpu_has_fxsr ) {
+	if (cpu_has_fxsr) {
 		return tsk->thread.i387.fxsave.swd;
 	} else {
 		return (unsigned short)tsk->thread.i387.fsave.swd;
@@ -166,9 +172,9 @@ unsigned short get_fpu_swd( struct task_struct *tsk )
 }
 
 #if 0
-unsigned short get_fpu_twd( struct task_struct *tsk )
+unsigned short get_fpu_twd(struct task_struct *tsk)
 {
-	if ( cpu_has_fxsr ) {
+	if (cpu_has_fxsr) {
 		return tsk->thread.i387.fxsave.twd;
 	} else {
 		return (unsigned short)tsk->thread.i387.fsave.twd;
@@ -176,9 +182,9 @@ unsigned short get_fpu_twd( struct task_struct *tsk )
 }
 #endif  /*  0  */
 
-unsigned short get_fpu_mxcsr( struct task_struct *tsk )
+unsigned short get_fpu_mxcsr(struct task_struct *tsk)
 {
-	if ( cpu_has_xmm ) {
+	if (cpu_has_xmm) {
 		return tsk->thread.i387.fxsave.mxcsr;
 	} else {
 		return 0x1f80;
@@ -187,27 +193,27 @@ unsigned short get_fpu_mxcsr( struct task_struct *tsk )
 
 #if 0
 
-void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd )
+void set_fpu_cwd(struct task_struct *tsk, unsigned short cwd)
 {
-	if ( cpu_has_fxsr ) {
+	if (cpu_has_fxsr) {
 		tsk->thread.i387.fxsave.cwd = cwd;
 	} else {
 		tsk->thread.i387.fsave.cwd = ((long)cwd | 0xffff0000u);
 	}
 }
 
-void set_fpu_swd( struct task_struct *tsk, unsigned short swd )
+void set_fpu_swd(struct task_struct *tsk, unsigned short swd)
 {
-	if ( cpu_has_fxsr ) {
+	if (cpu_has_fxsr) {
 		tsk->thread.i387.fxsave.swd = swd;
 	} else {
 		tsk->thread.i387.fsave.swd = ((long)swd | 0xffff0000u);
 	}
 }
 
-void set_fpu_twd( struct task_struct *tsk, unsigned short twd )
+void set_fpu_twd(struct task_struct *tsk, unsigned short twd)
 {
-	if ( cpu_has_fxsr ) {
+	if (cpu_has_fxsr) {
 		tsk->thread.i387.fxsave.twd = twd_i387_to_fxsr(twd);
 	} else {
 		tsk->thread.i387.fsave.twd = ((long)twd | 0xffff0000u);
@@ -220,8 +226,8 @@ void set_fpu_twd( struct task_struct *tsk, unsigned short twd )
  * FXSR floating point environment conversions.
  */
 
-static int convert_fxsr_to_user( struct _fpstate __user *buf,
-					struct i387_fxsave_struct *fxsave )
+static int convert_fxsr_to_user(struct _fpstate __user *buf,
+				struct i387_fxsave_struct *fxsave)
 {
 	unsigned long env[7];
 	struct _fpreg __user *to;
@@ -236,32 +242,32 @@ static int convert_fxsr_to_user( struct _fpstate __user *buf,
 	env[5] = fxsave->foo;
 	env[6] = fxsave->fos;
 
-	if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) )
+	if (__copy_to_user(buf, env, 7 * sizeof(unsigned long)))
 		return 1;
 
 	to = &buf->_st[0];
 	from = (struct _fpxreg *) &fxsave->st_space[0];
-	for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
+	for (i = 0; i < 8; i++, to++, from++) {
 		unsigned long __user *t = (unsigned long __user *)to;
 		unsigned long *f = (unsigned long *)from;
 
 		if (__put_user(*f, t) ||
-				__put_user(*(f + 1), t + 1) ||
-				__put_user(from->exponent, &to->exponent))
+		    __put_user(*(f + 1), t + 1) ||
+		    __put_user(from->exponent, &to->exponent))
 			return 1;
 	}
 	return 0;
 }
 
-static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave,
-					  struct _fpstate __user *buf )
+static int convert_fxsr_from_user(struct i387_fxsave_struct *fxsave,
+				  struct _fpstate __user *buf)
 {
 	unsigned long env[7];
 	struct _fpxreg *to;
 	struct _fpreg __user *from;
 	int i;
 
-	if ( __copy_from_user( env, buf, 7 * sizeof(long) ) )
+	if (__copy_from_user(env, buf, 7 * sizeof(long)))
 		return 1;
 
 	fxsave->cwd = (unsigned short)(env[0] & 0xffff);
@@ -275,13 +281,13 @@ static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave,
 
 	to = (struct _fpxreg *) &fxsave->st_space[0];
 	from = &buf->_st[0];
-	for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
+	for (i = 0; i < 8; i++, to++, from++) {
 		unsigned long *t = (unsigned long *)to;
 		unsigned long __user *f = (unsigned long __user *)from;
 
 		if (__get_user(*t, f) ||
-				__get_user(*(t + 1), f + 1) ||
-				__get_user(to->exponent, &from->exponent))
+		    __get_user(*(t + 1), f + 1) ||
+		    __get_user(to->exponent, &from->exponent))
 			return 1;
 	}
 	return 0;
@@ -291,42 +297,42 @@ static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave,
  * Signal frame handlers.
  */
 
-static inline int save_i387_fsave( struct _fpstate __user *buf )
+static inline int save_i387_fsave(struct _fpstate __user *buf)
 {
 	struct task_struct *tsk = current;
 
-	unlazy_fpu( tsk );
+	unlazy_fpu(tsk);
 	tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd;
-	if ( __copy_to_user( buf, &tsk->thread.i387.fsave,
-			     sizeof(struct i387_fsave_struct) ) )
+	if (__copy_to_user(buf, &tsk->thread.i387.fsave,
+			   sizeof(struct i387_fsave_struct)))
 		return -1;
 	return 1;
 }
 
-static int save_i387_fxsave( struct _fpstate __user *buf )
+static int save_i387_fxsave(struct _fpstate __user *buf)
 {
 	struct task_struct *tsk = current;
 	int err = 0;
 
-	unlazy_fpu( tsk );
+	unlazy_fpu(tsk);
 
-	if ( convert_fxsr_to_user( buf, &tsk->thread.i387.fxsave ) )
+	if (convert_fxsr_to_user(buf, &tsk->thread.i387.fxsave))
 		return -1;
 
-	err |= __put_user( tsk->thread.i387.fxsave.swd, &buf->status );
-	err |= __put_user( X86_FXSR_MAGIC, &buf->magic );
-	if ( err )
+	err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status);
+	err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
+	if (err)
 		return -1;
 
-	if ( __copy_to_user( &buf->_fxsr_env[0], &tsk->thread.i387.fxsave,
-			     sizeof(struct i387_fxsave_struct) ) )
+	if (__copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave,
+			   sizeof(struct i387_fxsave_struct)))
 		return -1;
 	return 1;
 }
 
-int save_i387( struct _fpstate __user *buf )
+int save_i387(struct _fpstate __user *buf)
 {
-	if ( !used_math() )
+	if (!used_math())
 		return 0;
 
 	/* This will cause a "finit" to be triggered by the next
@@ -334,49 +340,49 @@ int save_i387( struct _fpstate __user *buf )
 	 */
 	clear_used_math();
 
-	if ( HAVE_HWFP ) {
-		if ( cpu_has_fxsr ) {
-			return save_i387_fxsave( buf );
+	if (HAVE_HWFP) {
+		if (cpu_has_fxsr) {
+			return save_i387_fxsave(buf);
 		} else {
-			return save_i387_fsave( buf );
+			return save_i387_fsave(buf);
 		}
 	} else {
-		return save_i387_soft( &current->thread.i387.soft, buf );
+		return save_i387_soft(&current->thread.i387.soft, buf);
 	}
 }
 
-static inline int restore_i387_fsave( struct _fpstate __user *buf )
+static inline int restore_i387_fsave(struct _fpstate __user *buf)
 {
 	struct task_struct *tsk = current;
-	clear_fpu( tsk );
-	return __copy_from_user( &tsk->thread.i387.fsave, buf,
-				 sizeof(struct i387_fsave_struct) );
+	clear_fpu(tsk);
+	return __copy_from_user(&tsk->thread.i387.fsave, buf,
+				sizeof(struct i387_fsave_struct));
 }
 
-static int restore_i387_fxsave( struct _fpstate __user *buf )
+static int restore_i387_fxsave(struct _fpstate __user *buf)
 {
 	int err;
 	struct task_struct *tsk = current;
-	clear_fpu( tsk );
-	err = __copy_from_user( &tsk->thread.i387.fxsave, &buf->_fxsr_env[0],
-				sizeof(struct i387_fxsave_struct) );
+	clear_fpu(tsk);
+	err = __copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0],
+			       sizeof(struct i387_fxsave_struct));
 	/* mxcsr reserved bits must be masked to zero for security reasons */
 	tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
-	return err ? 1 : convert_fxsr_from_user( &tsk->thread.i387.fxsave, buf );
+	return err ? 1 : convert_fxsr_from_user(&tsk->thread.i387.fxsave, buf);
 }
 
-int restore_i387( struct _fpstate __user *buf )
+int restore_i387(struct _fpstate __user *buf)
 {
 	int err;
 
-	if ( HAVE_HWFP ) {
-		if ( cpu_has_fxsr ) {
-			err = restore_i387_fxsave( buf );
+	if (HAVE_HWFP) {
+		if (cpu_has_fxsr) {
+			err = restore_i387_fxsave(buf);
 		} else {
-			err = restore_i387_fsave( buf );
+			err = restore_i387_fsave(buf);
 		}
 	} else {
-		err = restore_i387_soft( &current->thread.i387.soft, buf );
+		err = restore_i387_soft(&current->thread.i387.soft, buf);
 	}
 	set_used_math();
 	return err;
@@ -386,67 +392,67 @@ int restore_i387( struct _fpstate __user *buf )
  * ptrace request handlers.
  */
 
-static inline int get_fpregs_fsave( struct user_i387_struct __user *buf,
-				    struct task_struct *tsk )
+static inline int get_fpregs_fsave(struct user_i387_struct __user *buf,
+				   struct task_struct *tsk)
 {
-	return __copy_to_user( buf, &tsk->thread.i387.fsave,
-			       sizeof(struct user_i387_struct) );
+	return __copy_to_user(buf, &tsk->thread.i387.fsave,
+			      sizeof(struct user_i387_struct));
 }
 
-static inline int get_fpregs_fxsave( struct user_i387_struct __user *buf,
-				     struct task_struct *tsk )
+static inline int get_fpregs_fxsave(struct user_i387_struct __user *buf,
+				    struct task_struct *tsk)
 {
-	return convert_fxsr_to_user( (struct _fpstate __user *)buf,
-				     &tsk->thread.i387.fxsave );
+	return convert_fxsr_to_user((struct _fpstate __user *)buf,
+				    &tsk->thread.i387.fxsave);
 }
 
-int get_fpregs( struct user_i387_struct __user *buf, struct task_struct *tsk )
+int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *tsk)
 {
-	if ( HAVE_HWFP ) {
-		if ( cpu_has_fxsr ) {
-			return get_fpregs_fxsave( buf, tsk );
+	if (HAVE_HWFP) {
+		if (cpu_has_fxsr) {
+			return get_fpregs_fxsave(buf, tsk);
 		} else {
-			return get_fpregs_fsave( buf, tsk );
+			return get_fpregs_fsave(buf, tsk);
 		}
 	} else {
-		return save_i387_soft( &tsk->thread.i387.soft,
-				       (struct _fpstate __user *)buf );
+		return save_i387_soft(&tsk->thread.i387.soft,
+				      (struct _fpstate __user *)buf);
 	}
 }
 
-static inline int set_fpregs_fsave( struct task_struct *tsk,
-				    struct user_i387_struct __user *buf )
+static inline int set_fpregs_fsave(struct task_struct *tsk,
+				   struct user_i387_struct __user *buf)
 {
-	return __copy_from_user( &tsk->thread.i387.fsave, buf,
-				 sizeof(struct user_i387_struct) );
+	return __copy_from_user(&tsk->thread.i387.fsave, buf,
+				sizeof(struct user_i387_struct));
 }
 
-static inline int set_fpregs_fxsave( struct task_struct *tsk,
-				     struct user_i387_struct __user *buf )
+static inline int set_fpregs_fxsave(struct task_struct *tsk,
+				    struct user_i387_struct __user *buf)
 {
-	return convert_fxsr_from_user( &tsk->thread.i387.fxsave,
-				       (struct _fpstate __user *)buf );
+	return convert_fxsr_from_user(&tsk->thread.i387.fxsave,
+				      (struct _fpstate __user *)buf);
 }
 
-int set_fpregs( struct task_struct *tsk, struct user_i387_struct __user *buf )
+int set_fpregs(struct task_struct *tsk, struct user_i387_struct __user *buf)
 {
-	if ( HAVE_HWFP ) {
-		if ( cpu_has_fxsr ) {
-			return set_fpregs_fxsave( tsk, buf );
+	if (HAVE_HWFP) {
+		if (cpu_has_fxsr) {
+			return set_fpregs_fxsave(tsk, buf);
 		} else {
-			return set_fpregs_fsave( tsk, buf );
+			return set_fpregs_fsave(tsk, buf);
 		}
 	} else {
-		return restore_i387_soft( &tsk->thread.i387.soft,
-					  (struct _fpstate __user *)buf );
+		return restore_i387_soft(&tsk->thread.i387.soft,
+					 (struct _fpstate __user *)buf);
 	}
 }
 
-int get_fpxregs( struct user_fxsr_struct __user *buf, struct task_struct *tsk )
+int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *tsk)
 {
-	if ( cpu_has_fxsr ) {
-		if (__copy_to_user( buf, &tsk->thread.i387.fxsave,
-				    sizeof(struct user_fxsr_struct) ))
+	if (cpu_has_fxsr) {
+		if (__copy_to_user(buf, &tsk->thread.i387.fxsave,
+				   sizeof(struct user_fxsr_struct)))
 			return -EFAULT;
 		return 0;
 	} else {
@@ -454,15 +460,16 @@ int get_fpxregs( struct user_fxsr_struct __user *buf, struct task_struct *tsk )
 	}
 }
 
-int set_fpxregs( struct task_struct *tsk, struct user_fxsr_struct __user *buf )
+int set_fpxregs(struct task_struct *tsk, struct user_fxsr_struct __user *buf)
 {
 	int ret = 0;
 
-	if ( cpu_has_fxsr ) {
-		if (__copy_from_user( &tsk->thread.i387.fxsave, buf,
-				  sizeof(struct user_fxsr_struct) ))
+	if (cpu_has_fxsr) {
+		if (__copy_from_user(&tsk->thread.i387.fxsave, buf,
+				     sizeof(struct user_fxsr_struct)))
 			ret = -EFAULT;
-		/* mxcsr reserved bits must be masked to zero for security reasons */
+		/* mxcsr reserved bits must be masked to zero
+		 * for security reasons */
 		tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
 	} else {
 		ret = -EIO;
@@ -474,41 +481,40 @@ int set_fpxregs( struct task_struct *tsk, struct user_fxsr_struct __user *buf )
  * FPU state for core dumps.
  */
 
-static inline void copy_fpu_fsave( struct task_struct *tsk,
-				   struct user_i387_struct *fpu )
+static inline void copy_fpu_fsave(struct task_struct *tsk,
+				  struct user_i387_struct *fpu)
 {
-	memcpy( fpu, &tsk->thread.i387.fsave,
-		sizeof(struct user_i387_struct) );
+	memcpy(fpu, &tsk->thread.i387.fsave,
+	       sizeof(struct user_i387_struct));
 }
 
-static inline void copy_fpu_fxsave( struct task_struct *tsk,
-				   struct user_i387_struct *fpu )
+static inline void copy_fpu_fxsave(struct task_struct *tsk,
+				   struct user_i387_struct *fpu)
 {
 	unsigned short *to;
 	unsigned short *from;
 	int i;
 
-	memcpy( fpu, &tsk->thread.i387.fxsave, 7 * sizeof(long) );
+	memcpy(fpu, &tsk->thread.i387.fxsave, 7 * sizeof(long));
 
 	to = (unsigned short *)&fpu->st_space[0];
 	from = (unsigned short *)&tsk->thread.i387.fxsave.st_space[0];
-	for ( i = 0 ; i < 8 ; i++, to += 5, from += 8 ) {
-		memcpy( to, from, 5 * sizeof(unsigned short) );
-	}
+	for (i = 0; i < 8; i++, to += 5, from += 8)
+		memcpy(to, from, 5 * sizeof(unsigned short));
 }
 
-int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu )
+int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu)
 {
 	int fpvalid;
 	struct task_struct *tsk = current;
 
 	fpvalid = !!used_math();
-	if ( fpvalid ) {
-		unlazy_fpu( tsk );
-		if ( cpu_has_fxsr ) {
-			copy_fpu_fxsave( tsk, fpu );
+	if (fpvalid) {
+		unlazy_fpu(tsk);
+		if (cpu_has_fxsr) {
+			copy_fpu_fxsave(tsk, fpu);
 		} else {
-			copy_fpu_fsave( tsk, fpu );
+			copy_fpu_fsave(tsk, fpu);
 		}
 	}
 
@@ -531,7 +537,8 @@ int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu)
 	return fpvalid;
 }
 
-int dump_task_extended_fpu(struct task_struct *tsk, struct user_fxsr_struct *fpu)
+int dump_task_extended_fpu(struct task_struct *tsk,
+			   struct user_fxsr_struct *fpu)
 {
 	int fpvalid = tsk_used_math(tsk) && cpu_has_fxsr;
 

From 7bcbc78dea92fdf0947fa48e248da3c993a5690f Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Wed, 30 Jan 2008 13:31:26 +0100
Subject: [PATCH 341/890] x86: clean up arch/x86/kernel/early-quirks.c

clean up checkpatch errors. No code changed.

      text    data     bss     dec     hex filename
       705     120       0     825     339 early-quirks.o.before
       705     120       0     825     339 early-quirks.o.after

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/early-quirks.c | 91 ++++++++++++++++++----------------
 1 file changed, 47 insertions(+), 44 deletions(-)

diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index b55258e49208..3f88e437e843 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -32,10 +32,13 @@ static void __init fix_hypertransport_config(int num, int slot, int func)
 	 */
 	htcfg = read_pci_config(num, slot, func, 0x68);
 	if (htcfg & (1 << 18)) {
-		printk(KERN_INFO "Detected use of extended apic ids on hypertransport bus\n");
+		printk(KERN_INFO "Detected use of extended apic ids "
+				 "on hypertransport bus\n");
 		if ((htcfg & (1 << 17)) == 0) {
-			printk(KERN_INFO "Enabling hypertransport extended apic interrupt broadcast\n");
-			printk(KERN_INFO "Note this is a bios bug, please contact your hw vendor\n");
+			printk(KERN_INFO "Enabling hypertransport extended "
+					 "apic interrupt broadcast\n");
+			printk(KERN_INFO "Note this is a bios bug, "
+					 "please contact your hw vendor\n");
 			htcfg |= (1 << 17);
 			write_pci_config(num, slot, func, 0x68, htcfg);
 		}
@@ -130,6 +133,43 @@ static struct chipset early_qrk[] __initdata = {
 	{}
 };
 
+static void check_dev_quirk(int num, int slot, int func)
+{
+	u16 class;
+	u16 vendor;
+	u16 device;
+	u8 type;
+	int i;
+
+	class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE);
+
+	if (class == 0xffff)
+		return;
+
+	vendor = read_pci_config_16(num, slot, func, PCI_VENDOR_ID);
+
+	device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID);
+
+	for (i = 0; early_qrk[i].f != NULL; i++) {
+		if (((early_qrk[i].vendor == PCI_ANY_ID) ||
+			(early_qrk[i].vendor == vendor)) &&
+			((early_qrk[i].device == PCI_ANY_ID) ||
+			(early_qrk[i].device == device)) &&
+			(!((early_qrk[i].class ^ class) &
+			    early_qrk[i].class_mask))) {
+				if ((early_qrk[i].flags &
+				     QFLAG_DONE) != QFLAG_DONE)
+					early_qrk[i].f(num, slot, func);
+				early_qrk[i].flags |= QFLAG_APPLIED;
+			}
+	}
+
+	type = read_pci_config_byte(num, slot, func,
+				    PCI_HEADER_TYPE);
+	if (!(type & 0x80))
+		return;
+}
+
 void __init early_quirks(void)
 {
 	int num, slot, func;
@@ -138,45 +178,8 @@ void __init early_quirks(void)
 		return;
 
 	/* Poor man's PCI discovery */
-	for (num = 0; num < 32; num++) {
-		for (slot = 0; slot < 32; slot++) {
-			for (func = 0; func < 8; func++) {
-				u16 class;
-				u16 vendor;
-				u16 device;
-				u8 type;
-				int i;
-
-				class = read_pci_config_16(num,slot,func,
-							PCI_CLASS_REVISION);
-				if (class == 0xffff)
-					break;
-
-				vendor = read_pci_config_16(num, slot, func,
-							 PCI_VENDOR_ID);
-
-				device = read_pci_config_16(num, slot, func,
-							PCI_DEVICE_ID);
-
-				for(i=0;early_qrk[i].f != NULL;i++) {
-					if (((early_qrk[i].vendor == PCI_ANY_ID) ||
-					    (early_qrk[i].vendor == vendor)) &&
-					   ((early_qrk[i].device == PCI_ANY_ID) ||
-					    (early_qrk[i].device == device)) &&
-					   (!((early_qrk[i].class ^ class) &
-					     early_qrk[i].class_mask))) {
-						if ((early_qrk[i].flags & QFLAG_DONE) != QFLAG_DONE)
-							early_qrk[i].f(num, slot, func);
-						early_qrk[i].flags |= QFLAG_APPLIED;
-
-					}
-				}
-
-				type = read_pci_config_byte(num, slot, func,
-							    PCI_HEADER_TYPE);
-				if (!(type & 0x80))
-					break;
-			}
-		}
-	}
+	for (num = 0; num < 32; num++)
+		for (slot = 0; slot < 32; slot++)
+			for (func = 0; func < 8; func++)
+				check_dev_quirk(num, slot, func);
 }

From 73018a66e70fa631935192e5ed6ef8fa2d20b22d Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:26 +0100
Subject: [PATCH 342/890] x86: move tsc definitions to were they belong

This patch wipes out the definitions of tsc_disable from processor_32.h
and move it to tsc.h, were it belongs

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/processor_32.h | 5 -----
 include/asm-x86/tsc.h          | 4 ++++
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index a5560a3b57ef..76cada930d22 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -22,9 +22,6 @@
 #include <asm/processor-flags.h>
 #include <asm/desc_defs.h>
 
-/* flag for disabling the tsc */
-extern int tsc_disable;
-
 static inline int desc_empty(const void *ptr)
 {
 	const u32 *desc = ptr;
@@ -206,8 +203,6 @@ extern int bootloader_type;
 
 #define HAVE_ARCH_PICK_MMAP_LAYOUT
 
-extern void disable_TSC(void);
-
 /*
  * Size of io_bitmap.
  */
diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h
index 9b7d264897fa..401303724130 100644
--- a/include/asm-x86/tsc.h
+++ b/include/asm-x86/tsc.h
@@ -16,6 +16,10 @@ typedef unsigned long long cycles_t;
 
 extern unsigned int cpu_khz;
 extern unsigned int tsc_khz;
+/* flag for disabling the tsc */
+extern int tsc_disable;
+
+extern void disable_TSC(void);
 
 static inline cycles_t get_cycles(void)
 {

From 053de044411111da00272d1b4e174e7dd743f499 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:27 +0100
Subject: [PATCH 343/890] x86: get rid of _MASK flags

There's no need for the *_MASK flags (TF_MASK, IF_MASK, etc), found in
processor.h (both _32 and _64). They have a one-to-one mapping with the
EFLAGS value. This patch removes the definitions, and use the already
existent X86_EFLAGS_ version when applicable.

[ roland@redhat.com: KVM build fixes. ]

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/ia32/ia32_signal.c    |  4 ++--
 arch/x86/kernel/kprobes.c      | 18 +++++++++---------
 arch/x86/kernel/signal_64.c    |  4 ++--
 arch/x86/kernel/traps_64.c     |  2 +-
 drivers/kvm/vmx.c              |  8 ++++----
 include/asm-x86/kprobes.h      |  2 +-
 include/asm-x86/processor.h    |  2 ++
 include/asm-x86/processor_32.h |  1 -
 include/asm-x86/processor_64.h | 11 -----------
 9 files changed, 21 insertions(+), 31 deletions(-)

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index d03d43f32f4c..0e24e3fda3d7 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -501,7 +501,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 	regs->ss = __USER32_DS;
 
 	set_fs(USER_DS);
-	regs->flags &= ~TF_MASK;
+	regs->flags &= ~X86_EFLAGS_TF;
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
 
@@ -601,7 +601,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	regs->ss = __USER32_DS;
 
 	set_fs(USER_DS);
-	regs->flags &= ~TF_MASK;
+	regs->flags &= ~X86_EFLAGS_TF;
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
 
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 9aadd4d4a229..8de82c8cedd6 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -387,9 +387,9 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 {
 	__get_cpu_var(current_kprobe) = p;
 	kcb->kprobe_saved_flags = kcb->kprobe_old_flags
-		= (regs->flags & (TF_MASK | IF_MASK));
+		= (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
 	if (is_IF_modifier(p->ainsn.insn))
-		kcb->kprobe_saved_flags &= ~IF_MASK;
+		kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF;
 }
 
 static __always_inline void clear_btf(void)
@@ -407,8 +407,8 @@ static __always_inline void restore_btf(void)
 static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 {
 	clear_btf();
-	regs->flags |= TF_MASK;
-	regs->flags &= ~IF_MASK;
+	regs->flags |= X86_EFLAGS_TF;
+	regs->flags &= ~X86_EFLAGS_IF;
 	/*single step inline if the instruction is an int3*/
 	if (p->opcode == BREAKPOINT_INSTRUCTION)
 		regs->ip = (unsigned long)p->addr;
@@ -454,7 +454,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 		if (p) {
 			if (kcb->kprobe_status == KPROBE_HIT_SS &&
 				*p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
-				regs->flags &= ~TF_MASK;
+				regs->flags &= ~X86_EFLAGS_TF;
 				regs->flags |= kcb->kprobe_saved_flags;
 				goto no_kprobe;
 #ifdef CONFIG_X86_64
@@ -749,10 +749,10 @@ static void __kprobes resume_execution(struct kprobe *p,
 		insn++;
 #endif
 
-	regs->flags &= ~TF_MASK;
+	regs->flags &= ~X86_EFLAGS_TF;
 	switch (*insn) {
 	case 0x9c:	/* pushfl */
-		*tos &= ~(TF_MASK | IF_MASK);
+		*tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF);
 		*tos |= kcb->kprobe_old_flags;
 		break;
 	case 0xc2:	/* iret/ret/lret */
@@ -852,7 +852,7 @@ out:
 	 * will have TF set, in which case, continue the remaining processing
 	 * of do_debug, as if this is not a probe hit.
 	 */
-	if (regs->flags & TF_MASK)
+	if (regs->flags & X86_EFLAGS_TF)
 		return 0;
 
 	return 1;
@@ -982,7 +982,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	 */
 	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
 	       MIN_STACK_SIZE(addr));
-	regs->flags &= ~IF_MASK;
+	regs->flags &= ~X86_EFLAGS_IF;
 	trace_hardirqs_off();
 	regs->ip = (unsigned long)(jp->entry);
 	return 1;
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 1c9bca56fb55..4eb751c60390 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -295,7 +295,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	   see include/asm-x86_64/uaccess.h for details. */
 	set_fs(USER_DS);
 
-	regs->flags &= ~TF_MASK;
+	regs->flags &= ~X86_EFLAGS_TF;
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
 #ifdef DEBUG_SIG
@@ -463,7 +463,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 	       
 	/* Pending single-step? */
 	if (thread_info_flags & _TIF_SINGLESTEP) {
-		regs->flags |= TF_MASK;
+		regs->flags |= X86_EFLAGS_TF;
 		clear_thread_flag(TIF_SINGLESTEP);
 	}
 
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index cf90ceb48f5f..7118fa0320ae 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -904,7 +904,7 @@ clear_dr7:
 
 clear_TF_reenable:
 	set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
-	regs->flags &= ~TF_MASK;
+	regs->flags &= ~X86_EFLAGS_TF;
 	preempt_conditional_cli(regs);
 }
 
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index bb56ae3f89b6..5b397b6c9f93 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -524,7 +524,7 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
 static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 {
 	if (vcpu->rmode.active)
-		rflags |= IOPL_MASK | X86_EFLAGS_VM;
+		rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
 	vmcs_writel(GUEST_RFLAGS, rflags);
 }
 
@@ -1050,7 +1050,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 	vmcs_write32(GUEST_TR_AR_BYTES, vcpu->rmode.tr.ar);
 
 	flags = vmcs_readl(GUEST_RFLAGS);
-	flags &= ~(IOPL_MASK | X86_EFLAGS_VM);
+	flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM);
 	flags |= (vcpu->rmode.save_iopl << IOPL_SHIFT);
 	vmcs_writel(GUEST_RFLAGS, flags);
 
@@ -1107,9 +1107,9 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 	vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
 
 	flags = vmcs_readl(GUEST_RFLAGS);
-	vcpu->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT;
+	vcpu->rmode.save_iopl = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
 
-	flags |= IOPL_MASK | X86_EFLAGS_VM;
+	flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
 
 	vmcs_writel(GUEST_RFLAGS, flags);
 	vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
diff --git a/include/asm-x86/kprobes.h b/include/asm-x86/kprobes.h
index 6e6371a856e7..143476a3cb52 100644
--- a/include/asm-x86/kprobes.h
+++ b/include/asm-x86/kprobes.h
@@ -88,7 +88,7 @@ struct kprobe_ctlblk {
  */
 static inline void restore_interrupts(struct pt_regs *regs)
 {
-	if (regs->flags & IF_MASK)
+	if (regs->flags & X86_EFLAGS_IF)
 		local_irq_enable();
 }
 
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index dea81b70895d..e8dd394c9f46 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -1,6 +1,8 @@
 #ifndef __ASM_X86_PROCESSOR_H
 #define __ASM_X86_PROCESSOR_H
 
+#include <asm/processor-flags.h>
+
 static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
 					 unsigned int *ecx, unsigned int *edx)
 {
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 76cada930d22..b9dbe4668e75 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -19,7 +19,6 @@
 #include <asm/percpu.h>
 #include <linux/cpumask.h>
 #include <linux/init.h>
-#include <asm/processor-flags.h>
 #include <asm/desc_defs.h>
 
 static inline int desc_empty(const void *ptr)
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index a0a9e5515097..c49716a76644 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -18,19 +18,8 @@
 #include <asm/percpu.h>
 #include <linux/personality.h>
 #include <linux/cpumask.h>
-#include <asm/processor-flags.h>
 #include <asm/desc_defs.h>
 
-#define TF_MASK		0x00000100
-#define IF_MASK		0x00000200
-#define IOPL_MASK	0x00003000
-#define NT_MASK		0x00004000
-#define VM_MASK		0x00020000
-#define AC_MASK		0x00040000
-#define VIF_MASK	0x00080000	/* virtual interrupt flag */
-#define VIP_MASK	0x00100000	/* virtual interrupt pending */
-#define ID_MASK		0x00200000
-
 static inline int desc_empty(const void *ptr)
 {
 	const u32 *desc = ptr;

From 746ff60f2627626fca0d8ddbf6b1f04d505782f4 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:27 +0100
Subject: [PATCH 344/890] x86: move desc_empty to where they belong

This patch moves the (duplicated) desc_empty implementation to desc.h,
where the descriptor things belong.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/desc.h         | 6 ++++++
 include/asm-x86/processor_32.h | 6 ------
 include/asm-x86/processor_64.h | 6 ------
 3 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index e9356a852020..e17581ad8824 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -71,6 +71,12 @@ static inline void pack_gate(gate_desc *gate, unsigned char type,
 
 #endif
 
+static inline int desc_empty(const void *ptr)
+{
+	const u32 *desc = ptr;
+	return !(desc[0] | desc[1]);
+}
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index b9dbe4668e75..cadeffb54a72 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -21,12 +21,6 @@
 #include <linux/init.h>
 #include <asm/desc_defs.h>
 
-static inline int desc_empty(const void *ptr)
-{
-	const u32 *desc = ptr;
-	return !(desc[0] | desc[1]);
-}
-
 /*
  * Default implementation of macro that returns current
  * instruction pointer ("program counter").
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index c49716a76644..acecef80a510 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -20,12 +20,6 @@
 #include <linux/cpumask.h>
 #include <asm/desc_defs.h>
 
-static inline int desc_empty(const void *ptr)
-{
-	const u32 *desc = ptr;
-	return !(desc[0] | desc[1]);
-}
-
 /*
  * Default implementation of macro that returns current
  * instruction pointer ("program counter").

From c72dcf83ff8585c95739abffa3be7c87ca63d66b Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:27 +0100
Subject: [PATCH 345/890] x86: move load_cr3 to a common place.

There are currently two definitions of load_cr3, that essentially do the
same thing. This patch moves them all to processor.h.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/mmu_context_64.h | 5 -----
 include/asm-x86/processor.h      | 7 +++++++
 include/asm-x86/processor_32.h   | 1 -
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/asm-x86/mmu_context_64.h b/include/asm-x86/mmu_context_64.h
index 98bfe43dd806..7e2aa23fccbf 100644
--- a/include/asm-x86/mmu_context_64.h
+++ b/include/asm-x86/mmu_context_64.h
@@ -23,11 +23,6 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 #endif
 }
 
-static inline void load_cr3(pgd_t *pgd)
-{
-	asm volatile("movq %0,%%cr3" :: "r" (__pa(pgd)) : "memory");
-}
-
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, 
 			     struct task_struct *tsk)
 {
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index e8dd394c9f46..36ee9881b74f 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -3,6 +3,9 @@
 
 #include <asm/processor-flags.h>
 
+#include <asm/page.h>
+#include <asm/system.h>
+
 static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
 					 unsigned int *ecx, unsigned int *edx)
 {
@@ -15,6 +18,10 @@ static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
 		: "0" (*eax), "2" (*ecx));
 }
 
+static inline void load_cr3(pgd_t *pgdir)
+{
+	write_cr3(__pa(pgdir));
+}
 
 #ifdef CONFIG_X86_32
 # include "processor_32.h"
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index cadeffb54a72..b586902acd85 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -120,7 +120,6 @@ extern void detect_ht(struct cpuinfo_x86 *c);
 static inline void detect_ht(struct cpuinfo_x86 *c) {}
 #endif
 
-#define load_cr3(pgdir) write_cr3(__pa(pgdir))
 
 /*
  * Save the cr4 feature set we're using (ie

From 1b46cbe0ccaad25786526601bc54426f2e2abb20 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:27 +0100
Subject: [PATCH 346/890] x86: unify paravirt pieces of processor.h

This patch unifies the paravirt pieces of processor.h
The functionality present in 32 bit, but not (yet) in 64-bit,
like load_sp0 is _not_ done here, and let to a different patch.

With this unification, we get paravirt for free in x86_64 processor.h

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/processor.h    | 90 +++++++++++++++++++++++++++++++++-
 include/asm-x86/processor_32.h | 87 --------------------------------
 include/asm-x86/processor_64.h | 36 --------------
 3 files changed, 89 insertions(+), 124 deletions(-)

diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 36ee9881b74f..8d0af7bf090a 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -29,9 +29,97 @@ static inline void load_cr3(pgd_t *pgdir)
 # include "processor_64.h"
 #endif
 
+static inline unsigned long native_get_debugreg(int regno)
+{
+	unsigned long val = 0; 	/* Damn you, gcc! */
+
+	switch (regno) {
+	case 0:
+		asm("mov %%db0, %0" :"=r" (val)); break;
+	case 1:
+		asm("mov %%db1, %0" :"=r" (val)); break;
+	case 2:
+		asm("mov %%db2, %0" :"=r" (val)); break;
+	case 3:
+		asm("mov %%db3, %0" :"=r" (val)); break;
+	case 6:
+		asm("mov %%db6, %0" :"=r" (val)); break;
+	case 7:
+		asm("mov %%db7, %0" :"=r" (val)); break;
+	default:
+		BUG();
+	}
+	return val;
+}
+
+static inline void native_set_debugreg(int regno, unsigned long value)
+{
+	switch (regno) {
+	case 0:
+		asm("mov %0,%%db0"	: /* no output */ :"r" (value));
+		break;
+	case 1:
+		asm("mov %0,%%db1"	: /* no output */ :"r" (value));
+		break;
+	case 2:
+		asm("mov %0,%%db2"	: /* no output */ :"r" (value));
+		break;
+	case 3:
+		asm("mov %0,%%db3"	: /* no output */ :"r" (value));
+		break;
+	case 6:
+		asm("mov %0,%%db6"	: /* no output */ :"r" (value));
+		break;
+	case 7:
+		asm("mov %0,%%db7"	: /* no output */ :"r" (value));
+		break;
+	default:
+		BUG();
+	}
+}
+
+
 #ifndef CONFIG_PARAVIRT
 #define __cpuid native_cpuid
-#endif
+#define paravirt_enabled() 0
+
+/*
+ * These special macros can be used to get or set a debugging register
+ */
+#define get_debugreg(var, register)				\
+	(var) = native_get_debugreg(register)
+#define set_debugreg(value, register)				\
+	native_set_debugreg(register, value)
+
+#endif /* CONFIG_PARAVIRT */
+
+/*
+ * Save the cr4 feature set we're using (ie
+ * Pentium 4MB enable and PPro Global page
+ * enable), so that any CPU's that boot up
+ * after us can get the correct flags.
+ */
+extern unsigned long mmu_cr4_features;
+
+static inline void set_in_cr4(unsigned long mask)
+{
+	unsigned cr4;
+	mmu_cr4_features |= mask;
+	cr4 = read_cr4();
+	cr4 |= mask;
+	write_cr4(cr4);
+}
+
+static inline void clear_in_cr4(unsigned long mask)
+{
+	unsigned cr4;
+	mmu_cr4_features &= ~mask;
+	cr4 = read_cr4();
+	cr4 &= ~mask;
+	write_cr4(cr4);
+}
+
+
 
 /*
  * Generic CPUID function
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index b586902acd85..e7fc023cd810 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -120,33 +120,6 @@ extern void detect_ht(struct cpuinfo_x86 *c);
 static inline void detect_ht(struct cpuinfo_x86 *c) {}
 #endif
 
-
-/*
- * Save the cr4 feature set we're using (ie
- * Pentium 4MB enable and PPro Global page
- * enable), so that any CPU's that boot up
- * after us can get the correct flags.
- */
-extern unsigned long mmu_cr4_features;
-
-static inline void set_in_cr4 (unsigned long mask)
-{
-	unsigned cr4;
-	mmu_cr4_features |= mask;
-	cr4 = read_cr4();
-	cr4 |= mask;
-	write_cr4(cr4);
-}
-
-static inline void clear_in_cr4 (unsigned long mask)
-{
-	unsigned cr4;
-	mmu_cr4_features &= ~mask;
-	cr4 = read_cr4();
-	cr4 &= ~mask;
-	write_cr4(cr4);
-}
-
 /* Stop speculative execution */
 static inline void sync_core(void)
 {
@@ -482,57 +455,6 @@ static inline void native_load_sp0(struct tss_struct *tss, struct thread_struct
 		wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
 	}
 }
-
-
-static inline unsigned long native_get_debugreg(int regno)
-{
-	unsigned long val = 0; 	/* Damn you, gcc! */
-
-	switch (regno) {
-	case 0:
-		asm("movl %%db0, %0" :"=r" (val)); break;
-	case 1:
-		asm("movl %%db1, %0" :"=r" (val)); break;
-	case 2:
-		asm("movl %%db2, %0" :"=r" (val)); break;
-	case 3:
-		asm("movl %%db3, %0" :"=r" (val)); break;
-	case 6:
-		asm("movl %%db6, %0" :"=r" (val)); break;
-	case 7:
-		asm("movl %%db7, %0" :"=r" (val)); break;
-	default:
-		BUG();
-	}
-	return val;
-}
-
-static inline void native_set_debugreg(int regno, unsigned long value)
-{
-	switch (regno) {
-	case 0:
-		asm("movl %0,%%db0"	: /* no output */ :"r" (value));
-		break;
-	case 1:
-		asm("movl %0,%%db1"	: /* no output */ :"r" (value));
-		break;
-	case 2:
-		asm("movl %0,%%db2"	: /* no output */ :"r" (value));
-		break;
-	case 3:
-		asm("movl %0,%%db3"	: /* no output */ :"r" (value));
-		break;
-	case 6:
-		asm("movl %0,%%db6"	: /* no output */ :"r" (value));
-		break;
-	case 7:
-		asm("movl %0,%%db7"	: /* no output */ :"r" (value));
-		break;
-	default:
-		BUG();
-	}
-}
-
 /*
  * Set IOPL bits in EFLAGS from given mask
  */
@@ -552,21 +474,12 @@ static inline void native_set_iopl_mask(unsigned mask)
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
-#define paravirt_enabled() 0
 
 static inline void load_sp0(struct tss_struct *tss, struct thread_struct *thread)
 {
 	native_load_sp0(tss, thread);
 }
 
-/*
- * These special macros can be used to get or set a debugging register
- */
-#define get_debugreg(var, register)				\
-	(var) = native_get_debugreg(register)
-#define set_debugreg(value, register)				\
-	native_set_debugreg(register, value)
-
 #define set_iopl_mask native_set_iopl_mask
 #endif /* CONFIG_PARAVIRT */
 
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index acecef80a510..6abe1ba30fe0 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -88,34 +88,6 @@ extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
 extern unsigned short num_cache_leaves;
 
-/*
- * Save the cr4 feature set we're using (ie
- * Pentium 4MB enable and PPro Global page
- * enable), so that any CPU's that boot up
- * after us can get the correct flags.
- */
-extern unsigned long mmu_cr4_features;
-
-static inline void set_in_cr4 (unsigned long mask)
-{
-	mmu_cr4_features |= mask;
-	__asm__("movq %%cr4,%%rax\n\t"
-		"orq %0,%%rax\n\t"
-		"movq %%rax,%%cr4\n"
-		: : "irg" (mask)
-		:"ax");
-}
-
-static inline void clear_in_cr4 (unsigned long mask)
-{
-	mmu_cr4_features &= ~mask;
-	__asm__("movq %%cr4,%%rax\n\t"
-		"andq %0,%%rax\n\t"
-		"movq %%rax,%%cr4\n"
-		: : "irg" (~mask)
-		:"ax");
-}
-
 
 /*
  * User space process size. 47bits minus one guard page.
@@ -254,14 +226,6 @@ struct thread_struct {
 	set_fs(USER_DS);							 \
 } while(0) 
 
-#define get_debugreg(var, register)				\
-		__asm__("movq %%db" #register ", %0"		\
-			:"=r" (var))
-#define set_debugreg(value, register)			\
-		__asm__("movq %0,%%db" #register		\
-			: /* no output */			\
-			:"r" (value))
-
 struct task_struct;
 struct mm_struct;
 

From 62d7d7ed11760a0fea40e4fc6f0553e721d00443 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:27 +0100
Subject: [PATCH 347/890] x86: move the definition of set_iopl_mask to common
 header

This patch moves the definition of set_iopl_mask to processor.h,
instead of letting it at processor_32.h.
For x86_64, nothing is done, as we don't really need such a function.
However, having it on both arches saves us from putting an ifdef in the
pv_cpu_ops struct.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/processor.h    | 19 +++++++++++++++++++
 include/asm-x86/processor_32.h | 17 -----------------
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 8d0af7bf090a..8b7794766884 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -78,6 +78,24 @@ static inline void native_set_debugreg(int regno, unsigned long value)
 	}
 }
 
+/*
+ * Set IOPL bits in EFLAGS from given mask
+ */
+static inline void native_set_iopl_mask(unsigned mask)
+{
+#ifdef CONFIG_X86_32
+	unsigned int reg;
+	__asm__ __volatile__ ("pushfl;"
+			      "popl %0;"
+			      "andl %1, %0;"
+			      "orl %2, %0;"
+			      "pushl %0;"
+			      "popfl"
+				: "=&r" (reg)
+				: "i" (~X86_EFLAGS_IOPL), "r" (mask));
+#endif
+}
+
 
 #ifndef CONFIG_PARAVIRT
 #define __cpuid native_cpuid
@@ -91,6 +109,7 @@ static inline void native_set_debugreg(int regno, unsigned long value)
 #define set_debugreg(value, register)				\
 	native_set_debugreg(register, value)
 
+#define set_iopl_mask native_set_iopl_mask
 #endif /* CONFIG_PARAVIRT */
 
 /*
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index e7fc023cd810..516a38ad2a7a 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -455,21 +455,6 @@ static inline void native_load_sp0(struct tss_struct *tss, struct thread_struct
 		wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
 	}
 }
-/*
- * Set IOPL bits in EFLAGS from given mask
- */
-static inline void native_set_iopl_mask(unsigned mask)
-{
-	unsigned int reg;
-	__asm__ __volatile__ ("pushfl;"
-			      "popl %0;"
-			      "andl %1, %0;"
-			      "orl %2, %0;"
-			      "pushl %0;"
-			      "popfl"
-				: "=&r" (reg)
-				: "i" (~X86_EFLAGS_IOPL), "r" (mask));
-}
 
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
@@ -479,8 +464,6 @@ static inline void load_sp0(struct tss_struct *tss, struct thread_struct *thread
 {
 	native_load_sp0(tss, thread);
 }
-
-#define set_iopl_mask native_set_iopl_mask
 #endif /* CONFIG_PARAVIRT */
 
 /* generic versions from gas */

From 683e0253dbd12554b2ee969b15e68105252bff57 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:27 +0100
Subject: [PATCH 348/890] x86: unify common parts of processor.h

This patch moves the pieces of processor_32.h and processor_64 that are
equal to processor.h. Only what's exactly the same is moved around, the rest
not being touched.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/processor.h    | 120 +++++++++++++++++++++++++++++++++
 include/asm-x86/processor_32.h | 111 ------------------------------
 include/asm-x86/processor_64.h | 116 -------------------------------
 3 files changed, 120 insertions(+), 227 deletions(-)

diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 8b7794766884..52e3637ef59e 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -3,6 +3,10 @@
 
 #include <asm/processor-flags.h>
 
+/* Forward declaration, a strange C thing */
+struct task_struct;
+struct mm_struct;
+
 #include <asm/page.h>
 #include <asm/system.h>
 
@@ -29,6 +33,11 @@ static inline void load_cr3(pgd_t *pgdir)
 # include "processor_64.h"
 #endif
 
+extern void print_cpu_info(struct cpuinfo_x86 *);
+extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
+extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
+extern unsigned short num_cache_leaves;
+
 static inline unsigned long native_get_debugreg(int regno)
 {
 	unsigned long val = 0; 	/* Damn you, gcc! */
@@ -138,7 +147,53 @@ static inline void clear_in_cr4(unsigned long mask)
 	write_cr4(cr4);
 }
 
+struct microcode_header {
+	unsigned int hdrver;
+	unsigned int rev;
+	unsigned int date;
+	unsigned int sig;
+	unsigned int cksum;
+	unsigned int ldrver;
+	unsigned int pf;
+	unsigned int datasize;
+	unsigned int totalsize;
+	unsigned int reserved[3];
+};
 
+struct microcode {
+	struct microcode_header hdr;
+	unsigned int bits[0];
+};
+
+typedef struct microcode microcode_t;
+typedef struct microcode_header microcode_header_t;
+
+/* microcode format is extended from prescott processors */
+struct extended_signature {
+	unsigned int sig;
+	unsigned int pf;
+	unsigned int cksum;
+};
+
+struct extended_sigtable {
+	unsigned int count;
+	unsigned int cksum;
+	unsigned int reserved[3];
+	struct extended_signature sigs[0];
+};
+
+/*
+ * create a kernel thread without removing it from tasklists
+ */
+extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+
+/* Free all resources held by a thread. */
+extern void release_thread(struct task_struct *);
+
+/* Prepare to copy thread state - unlazy all lazy status */
+extern void prepare_to_copy(struct task_struct *tsk);
+
+unsigned long get_wchan(struct task_struct *p);
 
 /*
  * Generic CPUID function
@@ -196,4 +251,69 @@ static inline unsigned int cpuid_edx(unsigned int op)
 	return edx;
 }
 
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+static inline void rep_nop(void)
+{
+	__asm__ __volatile__("rep;nop": : :"memory");
+}
+
+/* Stop speculative execution */
+static inline void sync_core(void)
+{
+	int tmp;
+	asm volatile("cpuid" : "=a" (tmp) : "0" (1)
+					  : "ebx", "ecx", "edx", "memory");
+}
+
+#define cpu_relax()   rep_nop()
+
+static inline void __monitor(const void *eax, unsigned long ecx,
+		unsigned long edx)
+{
+	/* "monitor %eax,%ecx,%edx;" */
+	asm volatile(
+		".byte 0x0f,0x01,0xc8;"
+		: :"a" (eax), "c" (ecx), "d"(edx));
+}
+
+static inline void __mwait(unsigned long eax, unsigned long ecx)
+{
+	/* "mwait %eax,%ecx;" */
+	asm volatile(
+		".byte 0x0f,0x01,0xc9;"
+		: :"a" (eax), "c" (ecx));
+}
+
+static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+{
+	/* "mwait %eax,%ecx;" */
+	asm volatile(
+		"sti; .byte 0x0f,0x01,0xc9;"
+		: :"a" (eax), "c" (ecx));
+}
+
+extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
+
+extern int force_mwait;
+
+extern void select_idle_routine(const struct cpuinfo_x86 *c);
+
+extern unsigned long boot_option_idle_override;
+
+/* Boot loader type from the setup header */
+extern int bootloader_type;
+#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
+
+#define HAVE_ARCH_PICK_MMAP_LAYOUT 1
+#define ARCH_HAS_PREFETCHW
+#define ARCH_HAS_SPINLOCK_PREFETCH
+
+#define spin_lock_prefetch(x)	prefetchw(x)
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 3))
+
+#define KSTK_EIP(task) (task_pt_regs(task)->ip)
+
 #endif
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 516a38ad2a7a..9e119d3789b4 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -109,10 +109,6 @@ void __init cpu_detect(struct cpuinfo_x86 *c);
 
 extern void identify_boot_cpu(void);
 extern void identify_secondary_cpu(struct cpuinfo_x86 *);
-extern void print_cpu_info(struct cpuinfo_x86 *);
-extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
-extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
-extern unsigned short num_cache_leaves;
 
 #ifdef CONFIG_X86_HT
 extern void detect_ht(struct cpuinfo_x86 *c);
@@ -120,32 +116,6 @@ extern void detect_ht(struct cpuinfo_x86 *c);
 static inline void detect_ht(struct cpuinfo_x86 *c) {}
 #endif
 
-/* Stop speculative execution */
-static inline void sync_core(void)
-{
-	int tmp;
-	asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory");
-}
-
-static inline void __monitor(const void *eax, unsigned long ecx,
-		unsigned long edx)
-{
-	/* "monitor %eax,%ecx,%edx;" */
-	asm volatile(
-		".byte 0x0f,0x01,0xc8;"
-		: :"a" (eax), "c" (ecx), "d"(edx));
-}
-
-static inline void __mwait(unsigned long eax, unsigned long ecx)
-{
-	/* "mwait %eax,%ecx;" */
-	asm volatile(
-		".byte 0x0f,0x01,0xc9;"
-		: :"a" (eax), "c" (ecx));
-}
-
-extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
-
 /* from system description table in BIOS.  Mostly for MCA use, but
 others may find it useful. */
 extern unsigned int machine_id;
@@ -153,20 +123,11 @@ extern unsigned int machine_submodel_id;
 extern unsigned int BIOS_revision;
 extern unsigned int mca_pentium_flag;
 
-/* Boot loader type from the setup header */
-extern int bootloader_type;
-
 /*
  * User space process size: 3GB (default).
  */
 #define TASK_SIZE	(PAGE_OFFSET)
 
-/* This decides where the kernel will search for a free chunk of vm
- * space during mmap's.
- */
-#define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 3))
-
-#define HAVE_ARCH_PICK_MMAP_LAYOUT
 
 /*
  * Size of io_bitmap.
@@ -356,25 +317,9 @@ struct thread_struct {
 	regs->sp = new_esp;					\
 } while (0)
 
-/* Forward declaration, a strange C thing */
-struct task_struct;
-struct mm_struct;
-
-/* Free all resources held by a thread. */
-extern void release_thread(struct task_struct *);
-
-/* Prepare to copy thread state - unlazy all lazy status */
-extern void prepare_to_copy(struct task_struct *tsk);
-
-/*
- * create a kernel thread without removing it from tasklists
- */
-extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
 
 extern unsigned long thread_saved_pc(struct task_struct *tsk);
 
-unsigned long get_wchan(struct task_struct *p);
-
 #define THREAD_SIZE_LONGS      (THREAD_SIZE/sizeof(unsigned long))
 #define KSTK_TOP(info)                                                 \
 ({                                                                     \
@@ -399,53 +344,8 @@ unsigned long get_wchan(struct task_struct *p);
        __regs__ - 1;                                                   \
 })
 
-#define KSTK_EIP(task) (task_pt_regs(task)->ip)
 #define KSTK_ESP(task) (task_pt_regs(task)->sp)
 
-
-struct microcode_header {
-	unsigned int hdrver;
-	unsigned int rev;
-	unsigned int date;
-	unsigned int sig;
-	unsigned int cksum;
-	unsigned int ldrver;
-	unsigned int pf;
-	unsigned int datasize;
-	unsigned int totalsize;
-	unsigned int reserved[3];
-};
-
-struct microcode {
-	struct microcode_header hdr;
-	unsigned int bits[0];
-};
-
-typedef struct microcode microcode_t;
-typedef struct microcode_header microcode_header_t;
-
-/* microcode format is extended from prescott processors */
-struct extended_signature {
-	unsigned int sig;
-	unsigned int pf;
-	unsigned int cksum;
-};
-
-struct extended_sigtable {
-	unsigned int count;
-	unsigned int cksum;
-	unsigned int reserved[3];
-	struct extended_signature sigs[0];
-};
-
-/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static inline void rep_nop(void)
-{
-	__asm__ __volatile__("rep;nop": : :"memory");
-}
-
-#define cpu_relax()	rep_nop()
-
 static inline void native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
 {
 	tss->x86_tss.sp0 = thread->sp0;
@@ -555,7 +455,6 @@ static inline void load_sp0(struct tss_struct *tss, struct thread_struct *thread
    because they are microcoded there and very slow.
    However we don't do prefetches for pre XP Athlons currently
    That should be fixed. */
-#define ARCH_HAS_PREFETCH
 static inline void prefetch(const void *x)
 {
 	alternative_input(ASM_NOP4,
@@ -565,8 +464,6 @@ static inline void prefetch(const void *x)
 }
 
 #define ARCH_HAS_PREFETCH
-#define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
 
 /* 3dnow! prefetch to get an exclusive cache line. Useful for 
    spinlocks to avoid one state transition in the cache coherency protocol. */
@@ -577,13 +474,7 @@ static inline void prefetchw(const void *x)
 			  X86_FEATURE_3DNOW,
 			  "r" (x));
 }
-#define spin_lock_prefetch(x)	prefetchw(x)
 
-extern void select_idle_routine(const struct cpuinfo_x86 *c);
-
-#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
-
-extern unsigned long boot_option_idle_override;
 extern void enable_sep_cpu(void);
 extern int sysenter_setup(void);
 
@@ -595,6 +486,4 @@ extern void switch_to_new_gdt(void);
 extern void cpu_init(void);
 extern void init_gdt(int cpu);
 
-extern int force_mwait;
-
 #endif /* __ASM_I386_PROCESSOR_H */
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index 6abe1ba30fe0..5f5c7fc63797 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -83,11 +83,6 @@ DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
 extern char ignore_irq13;
 
 extern void identify_cpu(struct cpuinfo_x86 *);
-extern void print_cpu_info(struct cpuinfo_x86 *);
-extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
-extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
-extern unsigned short num_cache_leaves;
-
 
 /*
  * User space process size. 47bits minus one guard page.
@@ -102,8 +97,6 @@ extern unsigned short num_cache_leaves;
 #define TASK_SIZE 		(test_thread_flag(TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE64)
 #define TASK_SIZE_OF(child) 	((test_tsk_thread_flag(child, TIF_IA32)) ? IA32_PAGE_OFFSET : TASK_SIZE64)
 
-#define TASK_UNMAPPED_BASE	PAGE_ALIGN(TASK_SIZE/3)
-
 /*
  * Size of io_bitmap.
  */
@@ -226,68 +219,16 @@ struct thread_struct {
 	set_fs(USER_DS);							 \
 } while(0) 
 
-struct task_struct;
-struct mm_struct;
-
-/* Free all resources held by a thread. */
-extern void release_thread(struct task_struct *);
-
-/* Prepare to copy thread state - unlazy all lazy status */
-extern void prepare_to_copy(struct task_struct *tsk);
-
-/*
- * create a kernel thread without removing it from tasklists
- */
-extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
-
 /*
  * Return saved PC of a blocked thread.
  * What is this good for? it will be always the scheduler or ret_from_fork.
  */
 #define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8))
 
-extern unsigned long get_wchan(struct task_struct *p);
 #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
-#define KSTK_EIP(tsk) (task_pt_regs(tsk)->ip)
 #define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */
 
 
-struct microcode_header {
-	unsigned int hdrver;
-	unsigned int rev;
-	unsigned int date;
-	unsigned int sig;
-	unsigned int cksum;
-	unsigned int ldrver;
-	unsigned int pf;
-	unsigned int datasize;
-	unsigned int totalsize;
-	unsigned int reserved[3];
-};
-
-struct microcode {
-	struct microcode_header hdr;
-	unsigned int bits[0];
-};
-
-typedef struct microcode microcode_t;
-typedef struct microcode_header microcode_header_t;
-
-/* microcode format is extended from prescott processors */
-struct extended_signature {
-	unsigned int sig;
-	unsigned int pf;
-	unsigned int cksum;
-};
-
-struct extended_sigtable {
-	unsigned int count;
-	unsigned int cksum;
-	unsigned int reserved[3];
-	struct extended_signature sigs[0];
-};
-
-
 #if defined(CONFIG_MPSC) || defined(CONFIG_MCORE2)
 #define ASM_NOP1 P6_NOP1
 #define ASM_NOP2 P6_NOP2
@@ -331,20 +272,6 @@ struct extended_sigtable {
 
 #define ASM_NOP_MAX 8
 
-/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static inline void rep_nop(void)
-{
-	__asm__ __volatile__("rep;nop": : :"memory");
-}
-
-/* Stop speculative execution */
-static inline void sync_core(void)
-{ 
-	int tmp;
-	asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory");
-} 
-
-#define ARCH_HAS_PREFETCHW 1
 static inline void prefetchw(void *x) 
 { 
 	alternative_input("prefetcht0 (%1)",
@@ -353,42 +280,6 @@ static inline void prefetchw(void *x)
 			  "r" (x));
 } 
 
-#define ARCH_HAS_SPINLOCK_PREFETCH 1
-
-#define spin_lock_prefetch(x)  prefetchw(x)
-
-#define cpu_relax()   rep_nop()
-
-static inline void __monitor(const void *eax, unsigned long ecx,
-		unsigned long edx)
-{
-	/* "monitor %eax,%ecx,%edx;" */
-	asm volatile(
-		".byte 0x0f,0x01,0xc8;"
-		: :"a" (eax), "c" (ecx), "d"(edx));
-}
-
-static inline void __mwait(unsigned long eax, unsigned long ecx)
-{
-	/* "mwait %eax,%ecx;" */
-	asm volatile(
-		".byte 0x0f,0x01,0xc9;"
-		: :"a" (eax), "c" (ecx));
-}
-
-static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
-{
-	/* "mwait %eax,%ecx;" */
-	asm volatile(
-		"sti; .byte 0x0f,0x01,0xc9;"
-		: :"a" (eax), "c" (ecx));
-}
-
-extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
-
-extern int force_mwait;
-
-extern void select_idle_routine(const struct cpuinfo_x86 *c);
 
 #define stack_current() \
 ({								\
@@ -397,12 +288,5 @@ extern void select_idle_routine(const struct cpuinfo_x86 *c);
 	ti->task;					\
 })
 
-#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
-
-extern unsigned long boot_option_idle_override;
-/* Boot loader type from the setup header */
-extern int bootloader_type;
-
-#define HAVE_ARCH_PICK_MMAP_LAYOUT 1
 
 #endif /* __ASM_X86_64_PROCESSOR_H */

From 0ccb8acc51693a2aef0f38024943808046d81251 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:27 +0100
Subject: [PATCH 349/890] x86: unify current_text_addr

current_text_addr() has a different implementation in x86_64 and
i386, but it is not fundamentally different. I stick to the i386
implementation, that seem to be a common base, and move it to processor.h

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/processor.h    | 11 +++++++++++
 include/asm-x86/processor_32.h |  6 ------
 include/asm-x86/processor_64.h |  6 ------
 3 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 52e3637ef59e..3deb5ba55f55 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -10,6 +10,17 @@ struct mm_struct;
 #include <asm/page.h>
 #include <asm/system.h>
 
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+static inline void *current_text_addr(void)
+{
+	void *pc;
+	asm volatile("mov $1f,%0\n1:":"=r" (pc));
+	return pc;
+}
+
 static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
 					 unsigned int *ecx, unsigned int *edx)
 {
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 9e119d3789b4..6cd2149dcbad 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -21,12 +21,6 @@
 #include <linux/init.h>
 #include <asm/desc_defs.h>
 
-/*
- * Default implementation of macro that returns current
- * instruction pointer ("program counter").
- */
-#define current_text_addr() ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; })
-
 /*
  *  CPU type and hardware bug flags. Kept separately for each CPU.
  *  Members of this structure are referenced in head.S, so think twice
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index 5f5c7fc63797..1984a4a38b74 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -20,12 +20,6 @@
 #include <linux/cpumask.h>
 #include <asm/desc_defs.h>
 
-/*
- * Default implementation of macro that returns current
- * instruction pointer ("program counter").
- */
-#define current_text_addr() ({ void *pc; asm volatile("leaq 1f(%%rip),%0\n1:":"=r"(pc)); pc; })
-
 /*
  *  CPU type and hardware bug flags. Kept separately for each CPU.
  */

From ca241c75037b32e0216a68e39ad2801d04fa1f87 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:31 +0100
Subject: [PATCH 350/890] x86: unify tss_struct

Although slighly different, the tss_struct is very similar in x86_64 and
i386. The really different part, which matchs the hardware vision of it, is
now called x86_hw_tss, and each of the architectures provides yours.
It's then used as a field in the outter tss_struct.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/asm-offsets_64.c |  2 +-
 arch/x86/kernel/doublefault_32.c |  2 +-
 arch/x86/kernel/process_64.c     |  2 +-
 arch/x86/kernel/setup64.c        |  4 +-
 arch/x86/kernel/smpboot_64.c     |  2 +-
 include/asm-x86/lguest.h         |  2 +-
 include/asm-x86/processor.h      | 77 ++++++++++++++++++++++++++++++++
 include/asm-x86/processor_32.h   | 62 -------------------------
 include/asm-x86/processor_64.h   | 35 +--------------
 9 files changed, 85 insertions(+), 103 deletions(-)

diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index a05428764314..2b32719a3fea 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -110,7 +110,7 @@ int main(void)
 	ENTRY(cr8);
 	BLANK();
 #undef ENTRY
-	DEFINE(TSS_ist, offsetof(struct tss_struct, ist));
+	DEFINE(TSS_ist, offsetof(struct tss_struct, x86_tss.ist));
 	BLANK();
 	DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx));
 	BLANK();
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c
index d16122a8e4eb..a47798b59f07 100644
--- a/arch/x86/kernel/doublefault_32.c
+++ b/arch/x86/kernel/doublefault_32.c
@@ -33,7 +33,7 @@ static void doublefault_fn(void)
 		printk(KERN_EMERG "double fault, tss at %08lx\n", tss);
 
 		if (ptr_ok(tss)) {
-			struct i386_hw_tss *t = (struct i386_hw_tss *)tss;
+			struct x86_hw_tss *t = (struct x86_hw_tss *)tss;
 
 			printk(KERN_EMERG "eip = %08lx, esp = %08lx\n",
 			       t->ip, t->sp);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 836a71adfa11..af56104b73ff 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -639,7 +639,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	/*
 	 * Reload esp0, LDT and the page table pointer:
 	 */
-	tss->sp0 = next->sp0;
+	tss->x86_tss.sp0 = next->sp0;
 
 	/* 
 	 * Switch DS and ES.
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index 05cafcb94109..3b0ffa31f3c0 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -258,10 +258,10 @@ void __cpuinit cpu_init (void)
 				      v, cpu); 
 		}
 		estacks += PAGE_SIZE << order[v];
-		orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks;
+		orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks;
 	}
 
-	t->io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+	t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
 	/*
 	 * <= is required because the CPU will access up to
 	 * 8 bits beyond the end of the IO permission bitmap.
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index 4c03ddccd681..2ea02a71b644 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -614,7 +614,7 @@ do_rest:
 	start_rip = setup_trampoline();
 
 	init_rsp = c_idle.idle->thread.sp;
-	per_cpu(init_tss,cpu).sp0 = init_rsp;
+	per_cpu(init_tss, cpu).x86_tss.sp0 = init_rsp;
 	initial_code = start_secondary;
 	clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
 
diff --git a/include/asm-x86/lguest.h b/include/asm-x86/lguest.h
index 3585a1628b59..1c8367a692f6 100644
--- a/include/asm-x86/lguest.h
+++ b/include/asm-x86/lguest.h
@@ -51,7 +51,7 @@ struct lguest_ro_state
 	/* Fields which are used when guest is running. */
 	struct desc_ptr guest_idt_desc;
 	struct desc_ptr guest_gdt_desc;
-	struct i386_hw_tss guest_tss;
+	struct x86_hw_tss guest_tss;
 	struct desc_struct guest_idt[IDT_ENTRIES];
 	struct desc_struct guest_gdt[GDT_ENTRIES];
 };
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 3deb5ba55f55..cede9ad3dc6e 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -8,6 +8,7 @@ struct task_struct;
 struct mm_struct;
 
 #include <asm/page.h>
+#include <asm/percpu.h>
 #include <asm/system.h>
 
 /*
@@ -38,6 +39,82 @@ static inline void load_cr3(pgd_t *pgdir)
 	write_cr3(__pa(pgdir));
 }
 
+#ifdef CONFIG_X86_32
+/* This is the TSS defined by the hardware. */
+struct x86_hw_tss {
+	unsigned short	back_link, __blh;
+	unsigned long	sp0;
+	unsigned short	ss0, __ss0h;
+	unsigned long	sp1;
+	unsigned short	ss1, __ss1h;	/* ss1 caches MSR_IA32_SYSENTER_CS */
+	unsigned long	sp2;
+	unsigned short	ss2, __ss2h;
+	unsigned long	__cr3;
+	unsigned long	ip;
+	unsigned long	flags;
+	unsigned long	ax, cx, dx, bx;
+	unsigned long	sp, bp, si, di;
+	unsigned short	es, __esh;
+	unsigned short	cs, __csh;
+	unsigned short	ss, __ssh;
+	unsigned short	ds, __dsh;
+	unsigned short	fs, __fsh;
+	unsigned short	gs, __gsh;
+	unsigned short	ldt, __ldth;
+	unsigned short	trace, io_bitmap_base;
+} __attribute__((packed));
+#else
+struct x86_hw_tss {
+	u32 reserved1;
+	u64 sp0;
+	u64 sp1;
+	u64 sp2;
+	u64 reserved2;
+	u64 ist[7];
+	u32 reserved3;
+	u32 reserved4;
+	u16 reserved5;
+	u16 io_bitmap_base;
+} __attribute__((packed)) ____cacheline_aligned;
+#endif
+
+/*
+ * Size of io_bitmap.
+ */
+#define IO_BITMAP_BITS  65536
+#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
+#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
+#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap)
+#define INVALID_IO_BITMAP_OFFSET 0x8000
+#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000
+
+struct tss_struct {
+	struct x86_hw_tss x86_tss;
+
+	/*
+	 * The extra 1 is there because the CPU will access an
+	 * additional byte beyond the end of the IO permission
+	 * bitmap. The extra byte must be all 1 bits, and must
+	 * be within the limit.
+	 */
+	unsigned long	io_bitmap[IO_BITMAP_LONGS + 1];
+	/*
+	 * Cache the current maximum and the last task that used the bitmap:
+	 */
+	unsigned long io_bitmap_max;
+	struct thread_struct *io_bitmap_owner;
+	/*
+	 * pads the TSS to be cacheline-aligned (size is 0x100)
+	 */
+	unsigned long __cacheline_filler[35];
+	/*
+	 * .. and then another 0x100 bytes for emergency kernel stack
+	 */
+	unsigned long stack[64];
+} __attribute__((packed));
+
+DECLARE_PER_CPU(struct tss_struct, init_tss);
+
 #ifdef CONFIG_X86_32
 # include "processor_32.h"
 #else
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 6cd2149dcbad..57b345bc3c74 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -81,7 +81,6 @@ struct cpuinfo_x86 {
 extern struct cpuinfo_x86 boot_cpu_data;
 extern struct cpuinfo_x86 new_cpu_data;
 extern struct tss_struct doublefault_tss;
-DECLARE_PER_CPU(struct tss_struct, init_tss);
 
 #ifdef CONFIG_SMP
 DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
@@ -123,16 +122,6 @@ extern unsigned int mca_pentium_flag;
 #define TASK_SIZE	(PAGE_OFFSET)
 
 
-/*
- * Size of io_bitmap.
- */
-#define IO_BITMAP_BITS  65536
-#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
-#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
-#define INVALID_IO_BITMAP_OFFSET 0x8000
-#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000
-
 struct i387_fsave_struct {
 	long	cwd;
 	long	swd;
@@ -185,57 +174,6 @@ typedef struct {
 	unsigned long seg;
 } mm_segment_t;
 
-struct thread_struct;
-
-/* This is the TSS defined by the hardware. */
-struct i386_hw_tss {
-	unsigned short	back_link,__blh;
-	unsigned long	sp0;
-	unsigned short	ss0,__ss0h;
-	unsigned long	sp1;
-	unsigned short	ss1,__ss1h;	/* ss1 is used to cache MSR_IA32_SYSENTER_CS */
-	unsigned long	sp2;
-	unsigned short	ss2,__ss2h;
-	unsigned long	__cr3;
-	unsigned long	ip;
-	unsigned long	flags;
-	unsigned long	ax, cx, dx, bx;
-	unsigned long	sp, bp, si, di;
-	unsigned short	es, __esh;
-	unsigned short	cs, __csh;
-	unsigned short	ss, __ssh;
-	unsigned short	ds, __dsh;
-	unsigned short	fs, __fsh;
-	unsigned short	gs, __gsh;
-	unsigned short	ldt, __ldth;
-	unsigned short	trace, io_bitmap_base;
-} __attribute__((packed));
-
-struct tss_struct {
-	struct i386_hw_tss x86_tss;
-
-	/*
-	 * The extra 1 is there because the CPU will access an
-	 * additional byte beyond the end of the IO permission
-	 * bitmap. The extra byte must be all 1 bits, and must
-	 * be within the limit.
-	 */
-	unsigned long	io_bitmap[IO_BITMAP_LONGS + 1];
-	/*
-	 * Cache the current maximum and the last task that used the bitmap:
-	 */
-	unsigned long io_bitmap_max;
-	struct thread_struct *io_bitmap_owner;
-	/*
-	 * pads the TSS to be cacheline-aligned (size is 0x100)
-	 */
-	unsigned long __cacheline_filler[35];
-	/*
-	 * .. and then another 0x100 bytes for emergency kernel stack
-	 */
-	unsigned long stack[64];
-} __attribute__((packed));
-
 #define ARCH_MIN_TASKALIGN	16
 
 struct thread_struct {
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index 1984a4a38b74..8d342c23ad14 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -91,14 +91,6 @@ extern void identify_cpu(struct cpuinfo_x86 *);
 #define TASK_SIZE 		(test_thread_flag(TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE64)
 #define TASK_SIZE_OF(child) 	((test_tsk_thread_flag(child, TIF_IA32)) ? IA32_PAGE_OFFSET : TASK_SIZE64)
 
-/*
- * Size of io_bitmap.
- */
-#define IO_BITMAP_BITS  65536
-#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
-#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
-#define INVALID_IO_BITMAP_OFFSET 0x8000
 
 struct i387_fxsave_struct {
 	u16	cwd;
@@ -118,32 +110,7 @@ union i387_union {
 	struct i387_fxsave_struct	fxsave;
 };
 
-struct tss_struct {
-	u32 reserved1;
-	u64 sp0;
-	u64 sp1;
-	u64 sp2;
-	u64 reserved2;
-	u64 ist[7];
-	u32 reserved3;
-	u32 reserved4;
-	u16 reserved5;
-	u16 io_bitmap_base;
-	/*
-	 * The extra 1 is there because the CPU will access an
-	 * additional byte beyond the end of the IO permission
-	 * bitmap. The extra byte must be all 1 bits, and must
-	 * be within the limit. Thus we have:
-	 *
-	 * 128 bytes, the bitmap itself, for ports 0..0x3ff
-	 * 8 bytes, for an extra "long" of ~0UL
-	 */
-	unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
-} __attribute__((packed)) ____cacheline_aligned;
-
-
 extern struct cpuinfo_x86 boot_cpu_data;
-DECLARE_PER_CPU(struct tss_struct,init_tss);
 /* Save the original ist values for checking stack pointers during debugging */
 struct orig_ist {
 	unsigned long ist[7];
@@ -195,7 +162,7 @@ struct thread_struct {
 }
 
 #define INIT_TSS  { \
-	.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+	.x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
 }
 
 #define INIT_MMAP \

From 7818a1e0294debee02d5135e17b89f28b8871887 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:31 +0100
Subject: [PATCH 351/890] x86: provide 64-bit with a load_sp0 function.

Paravirt guests need to inform the underlying hypervisor whenever the sp0
tss field changes. i386 already has such a function, and we use it for
x86_64 too. There's an unnecessary (for 64-bit) msr handling part in the original
version, and it is placed around an ifdef. Making no more sense in
processor_32.h, it is moved to the common header

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process_64.c   |  2 +-
 arch/x86/kernel/smpboot_64.c   |  2 +-
 include/asm-x86/processor.h    | 22 +++++++++++++++++++++-
 include/asm-x86/processor_32.h | 20 --------------------
 4 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index af56104b73ff..e3a3610ade10 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -639,7 +639,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	/*
 	 * Reload esp0, LDT and the page table pointer:
 	 */
-	tss->x86_tss.sp0 = next->sp0;
+	load_sp0(tss, next);
 
 	/* 
 	 * Switch DS and ES.
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index 2ea02a71b644..5bd42ce144da 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -614,7 +614,7 @@ do_rest:
 	start_rip = setup_trampoline();
 
 	init_rsp = c_idle.idle->thread.sp;
-	per_cpu(init_tss, cpu).x86_tss.sp0 = init_rsp;
+	load_sp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread);
 	initial_code = start_secondary;
 	clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
 
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index cede9ad3dc6e..b1ea52156362 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -193,8 +193,22 @@ static inline void native_set_iopl_mask(unsigned mask)
 #endif
 }
 
+static inline void native_load_sp0(struct tss_struct *tss,
+				   struct thread_struct *thread)
+{
+	tss->x86_tss.sp0 = thread->sp0;
+#ifdef CONFIG_X86_32
+	/* Only happens when SEP is enabled, no need to test "SEP"arately */
+	if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
+		tss->x86_tss.ss1 = thread->sysenter_cs;
+		wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
+	}
+#endif
+}
 
-#ifndef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
 #define __cpuid native_cpuid
 #define paravirt_enabled() 0
 
@@ -206,6 +220,12 @@ static inline void native_set_iopl_mask(unsigned mask)
 #define set_debugreg(value, register)				\
 	native_set_debugreg(register, value)
 
+static inline void load_sp0(struct tss_struct *tss,
+			    struct thread_struct *thread)
+{
+	native_load_sp0(tss, thread);
+}
+
 #define set_iopl_mask native_set_iopl_mask
 #endif /* CONFIG_PARAVIRT */
 
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 57b345bc3c74..53037d1a6ae6 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -278,26 +278,6 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
 
 #define KSTK_ESP(task) (task_pt_regs(task)->sp)
 
-static inline void native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
-{
-	tss->x86_tss.sp0 = thread->sp0;
-	/* This can only happen when SEP is enabled, no need to test "SEP"arately */
-	if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
-		tss->x86_tss.ss1 = thread->sysenter_cs;
-		wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
-	}
-}
-
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-
-static inline void load_sp0(struct tss_struct *tss, struct thread_struct *thread)
-{
-	native_load_sp0(tss, thread);
-}
-#endif /* CONFIG_PARAVIRT */
-
 /* generic versions from gas */
 #define GENERIC_NOP1	".byte 0x90\n"
 #define GENERIC_NOP2    	".byte 0x89,0xf6\n"

From cb38d377337cadd73fe4c582f77c5273685aeb71 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:31 +0100
Subject: [PATCH 352/890] x86: unify thread struct.

The thread_struct is not fundamentally different between architectures, and
this patch puts it in the common header. What's really unique for each of
them is enclosed in ifdefs.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/processor.h    | 44 ++++++++++++++++++++++++++++++++++
 include/asm-x86/processor_32.h | 37 ----------------------------
 include/asm-x86/processor_64.h | 32 -------------------------
 3 files changed, 44 insertions(+), 69 deletions(-)

diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index b1ea52156362..0e82ad5a22a4 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -126,6 +126,50 @@ extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
 extern unsigned short num_cache_leaves;
 
+struct thread_struct {
+/* cached TLS descriptors. */
+	struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
+	unsigned long	sp0;
+	unsigned long	sp;
+#ifdef CONFIG_X86_32
+	unsigned long	sysenter_cs;
+#else
+	unsigned long 	usersp;	/* Copy from PDA */
+	unsigned short	es, ds, fsindex, gsindex;
+#endif
+	unsigned long	ip;
+	unsigned long	fs;
+	unsigned long	gs;
+/* Hardware debugging registers */
+	unsigned long	debugreg0;
+	unsigned long	debugreg1;
+	unsigned long	debugreg2;
+	unsigned long	debugreg3;
+	unsigned long	debugreg6;
+	unsigned long	debugreg7;
+/* fault info */
+	unsigned long	cr2, trap_no, error_code;
+/* floating point info */
+	union i387_union	i387 __attribute__((aligned(16)));;
+#ifdef CONFIG_X86_32
+/* virtual 86 mode info */
+	struct vm86_struct __user *vm86_info;
+	unsigned long		screen_bitmap;
+	unsigned long		v86flags, v86mask, saved_sp0;
+	unsigned int		saved_fs, saved_gs;
+#endif
+/* IO permissions */
+	unsigned long	*io_bitmap_ptr;
+	unsigned long	iopl;
+/* max allowed port in the bitmap, in bytes: */
+	unsigned io_bitmap_max;
+/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
+	unsigned long	debugctlmsr;
+/* Debug Store - if not 0 points to a DS Save Area configuration;
+ *               goes into MSR_IA32_DS_AREA */
+	unsigned long	ds_area_msr;
+};
+
 static inline unsigned long native_get_debugreg(int regno)
 {
 	unsigned long val = 0; 	/* Damn you, gcc! */
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 53037d1a6ae6..8cb05cec3cb9 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -176,43 +176,6 @@ typedef struct {
 
 #define ARCH_MIN_TASKALIGN	16
 
-struct thread_struct {
-/* cached TLS descriptors. */
-	struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
-	unsigned long	sp0;
-	unsigned long	sysenter_cs;
-	unsigned long	ip;
-	unsigned long	sp;
-	unsigned long	fs;
-	unsigned long	gs;
-/* Hardware debugging registers */
-	unsigned long	debugreg0;
-	unsigned long	debugreg1;
-	unsigned long	debugreg2;
-	unsigned long	debugreg3;
-	unsigned long	debugreg6;
-	unsigned long	debugreg7;
-/* fault info */
-	unsigned long	cr2, trap_no, error_code;
-/* floating point info */
-	union i387_union	i387;
-/* virtual 86 mode info */
-	struct vm86_struct __user * vm86_info;
-	unsigned long		screen_bitmap;
-	unsigned long		v86flags, v86mask, saved_sp0;
-	unsigned int		saved_fs, saved_gs;
-/* IO permissions */
-	unsigned long	*io_bitmap_ptr;
- 	unsigned long	iopl;
-/* max allowed port in the bitmap, in bytes: */
-	unsigned long	io_bitmap_max;
-/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
-	unsigned long	debugctlmsr;
-/* Debug Store - if not 0 points to a DS Save Area configuration;
- *               goes into MSR_IA32_DS_AREA */
-	unsigned long	ds_area_msr;
-};
-
 #define INIT_THREAD  {							\
 	.sp0 = sizeof(init_stack) + (long)&init_stack,			\
 	.vm86_info = NULL,						\
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index 8d342c23ad14..4b4ec3d79910 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -125,38 +125,6 @@ DECLARE_PER_CPU(struct orig_ist, orig_ist);
 #define ARCH_MIN_MMSTRUCT_ALIGN	0
 #endif
 
-struct thread_struct {
-	unsigned long	sp0;
-	unsigned long	sp;
-	unsigned long 	usersp;	/* Copy from PDA */
-	unsigned long	fs;
-	unsigned long	gs;
-	unsigned short	es, ds, fsindex, gsindex;	
-/* Hardware debugging registers */
-	unsigned long	debugreg0;  
-	unsigned long	debugreg1;  
-	unsigned long	debugreg2;  
-	unsigned long	debugreg3;  
-	unsigned long	debugreg6;  
-	unsigned long	debugreg7;  
-/* fault info */
-	unsigned long	cr2, trap_no, error_code;
-/* floating point info */
-	union i387_union	i387  __attribute__((aligned(16)));
-/* IO permissions. the bitmap could be moved into the GDT, that would make
-   switch faster for a limited number of ioperm using tasks. -AK */
-	int		ioperm;
-	unsigned long	*io_bitmap_ptr;
-	unsigned io_bitmap_max;
-/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
-	unsigned long	debugctlmsr;
-/* Debug Store - if not 0 points to a DS Save Area configuration;
- *               goes into MSR_IA32_DS_AREA */
-	unsigned long	ds_area_msr;
-/* cached TLS descriptors. */
-	struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
-} __attribute__((aligned(16)));
-
 #define INIT_THREAD  { \
 	.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
 }

From dbcb4660246c240a159b4037067fdedb563a63cb Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:31 +0100
Subject: [PATCH 353/890] x86: unify TASK_ALIGN definitions

This patch moves the TASK_ALIGN constraints to common header.
The base of it is the same for x86_64 and i386. The only difference
is the presence of vSMP in x86_64. As it's not a worry in i386, we can
safely use the same code for both.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/processor.h    | 8 ++++++++
 include/asm-x86/processor_32.h | 1 -
 include/asm-x86/processor_64.h | 8 --------
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 0e82ad5a22a4..8b56c23f043d 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -22,6 +22,14 @@ static inline void *current_text_addr(void)
 	return pc;
 }
 
+#ifdef CONFIG_X86_VSMP
+#define ARCH_MIN_TASKALIGN	(1 << INTERNODE_CACHE_SHIFT)
+#define ARCH_MIN_MMSTRUCT_ALIGN	(1 << INTERNODE_CACHE_SHIFT)
+#else
+#define ARCH_MIN_TASKALIGN	16
+#define ARCH_MIN_MMSTRUCT_ALIGN	0
+#endif
+
 static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
 					 unsigned int *ecx, unsigned int *edx)
 {
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 8cb05cec3cb9..8380243a19d3 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -174,7 +174,6 @@ typedef struct {
 	unsigned long seg;
 } mm_segment_t;
 
-#define ARCH_MIN_TASKALIGN	16
 
 #define INIT_THREAD  {							\
 	.sp0 = sizeof(init_stack) + (long)&init_stack,			\
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index 4b4ec3d79910..7fe5c1461c83 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -117,14 +117,6 @@ struct orig_ist {
 };
 DECLARE_PER_CPU(struct orig_ist, orig_ist);
 
-#ifdef CONFIG_X86_VSMP
-#define ARCH_MIN_TASKALIGN	(1 << INTERNODE_CACHE_SHIFT)
-#define ARCH_MIN_MMSTRUCT_ALIGN	(1 << INTERNODE_CACHE_SHIFT)
-#else
-#define ARCH_MIN_TASKALIGN	16
-#define ARCH_MIN_MMSTRUCT_ALIGN	0
-#endif
-
 #define INIT_THREAD  { \
 	.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
 }

From 26996dd22b3cbc9dbe18cf908d2f844a116b6ec1 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:31 +0100
Subject: [PATCH 354/890] x86: change bitwise operations to get a void
 parameter.

This patch changes the bitwise operations in bitops.h to get
a void pointers as a parameter. Before this patch, a lot of warnings
can be seen. They're gone after it.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/bitops.h | 39 ++++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h
index c6dd7e259b46..1a23ce1a5697 100644
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h
@@ -43,7 +43,7 @@
  * Note that @nr may be almost arbitrarily large; this function is not
  * restricted to acting on a single-word quantity.
  */
-static inline void set_bit(int nr, volatile unsigned long *addr)
+static inline void set_bit(int nr, volatile void *addr)
 {
 	asm volatile(LOCK_PREFIX "bts %1,%0"
 		     : ADDR
@@ -59,7 +59,7 @@ static inline void set_bit(int nr, volatile unsigned long *addr)
  * If it's called on the same region of memory simultaneously, the effect
  * may be that only one operation succeeds.
  */
-static inline void __set_bit(int nr, volatile unsigned long *addr)
+static inline void __set_bit(int nr, volatile void *addr)
 {
 	asm volatile("bts %1,%0"
 		     : ADDR
@@ -77,7 +77,7 @@ static inline void __set_bit(int nr, volatile unsigned long *addr)
  * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
  * in order to ensure changes are visible on other processors.
  */
-static inline void clear_bit(int nr, volatile unsigned long *addr)
+static inline void clear_bit(int nr, volatile void *addr)
 {
 	asm volatile(LOCK_PREFIX "btr %1,%0"
 		     : ADDR
@@ -92,13 +92,13 @@ static inline void clear_bit(int nr, volatile unsigned long *addr)
  * clear_bit() is atomic and implies release semantics before the memory
  * operation. It can be used for an unlock.
  */
-static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
+static inline void clear_bit_unlock(unsigned nr, volatile void *addr)
 {
 	barrier();
 	clear_bit(nr, addr);
 }
 
-static inline void __clear_bit(int nr, volatile unsigned long *addr)
+static inline void __clear_bit(int nr, volatile void *addr)
 {
 	asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
 }
@@ -115,7 +115,7 @@ static inline void __clear_bit(int nr, volatile unsigned long *addr)
  * No memory barrier is required here, because x86 cannot reorder stores past
  * older loads. Same principle as spin_unlock.
  */
-static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
+static inline void __clear_bit_unlock(unsigned nr, volatile void *addr)
 {
 	barrier();
 	__clear_bit(nr, addr);
@@ -133,7 +133,7 @@ static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
  * If it's called on the same region of memory simultaneously, the effect
  * may be that only one operation succeeds.
  */
-static inline void __change_bit(int nr, volatile unsigned long *addr)
+static inline void __change_bit(int nr, volatile void *addr)
 {
 	asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
 }
@@ -147,7 +147,7 @@ static inline void __change_bit(int nr, volatile unsigned long *addr)
  * Note that @nr may be almost arbitrarily large; this function is not
  * restricted to acting on a single-word quantity.
  */
-static inline void change_bit(int nr, volatile unsigned long *addr)
+static inline void change_bit(int nr, volatile void *addr)
 {
 	asm volatile(LOCK_PREFIX "btc %1,%0"
 		     : ADDR : "Ir" (nr));
@@ -161,7 +161,7 @@ static inline void change_bit(int nr, volatile unsigned long *addr)
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
+static inline int test_and_set_bit(int nr, volatile void *addr)
 {
 	int oldbit;
 
@@ -180,7 +180,7 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
  *
  * This is the same as test_and_set_bit on x86.
  */
-static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr)
+static inline int test_and_set_bit_lock(int nr, volatile void *addr)
 {
 	return test_and_set_bit(nr, addr);
 }
@@ -194,7 +194,7 @@ static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr)
  * If two examples of this operation race, one can appear to succeed
  * but actually fail.  You must protect multiple accesses with a lock.
  */
-static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
+static inline int __test_and_set_bit(int nr, volatile void *addr)
 {
 	int oldbit;
 
@@ -213,7 +213,7 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
+static inline int test_and_clear_bit(int nr, volatile void *addr)
 {
 	int oldbit;
 
@@ -234,7 +234,7 @@ static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
  * If two examples of this operation race, one can appear to succeed
  * but actually fail.  You must protect multiple accesses with a lock.
  */
-static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
+static inline int __test_and_clear_bit(int nr, volatile void *addr)
 {
 	int oldbit;
 
@@ -246,7 +246,7 @@ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
 }
 
 /* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
+static inline int __test_and_change_bit(int nr, volatile void *addr)
 {
 	int oldbit;
 
@@ -266,7 +266,7 @@ static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
+static inline int test_and_change_bit(int nr, volatile void *addr)
 {
 	int oldbit;
 
@@ -278,19 +278,20 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
 	return oldbit;
 }
 
-static inline int constant_test_bit(int nr, const volatile unsigned long *addr)
+static inline int constant_test_bit(int nr, const volatile void *addr)
 {
-	return ((1UL << (nr % BITS_PER_LONG)) & (addr[nr / BITS_PER_LONG])) != 0;
+	return ((1UL << (nr % BITS_PER_LONG)) &
+		(((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0;
 }
 
-static inline int variable_test_bit(int nr, volatile const unsigned long *addr)
+static inline int variable_test_bit(int nr, volatile const void *addr)
 {
 	int oldbit;
 
 	asm volatile("bt %2,%1\n\t"
 		     "sbb %0,%0"
 		     : "=r" (oldbit)
-		     : "m" (*addr), "Ir" (nr));
+		     : "m" (*(unsigned long *)addr), "Ir" (nr));
 
 	return oldbit;
 }

From 5300db887e251276eaab2801af297acf4b53b9eb Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:33 +0100
Subject: [PATCH 355/890] x86: unify x86_cpuinfo struct.

x86_cpuinfo is one more to the family of "not fundamentally different"
structs. It's unified in processor.h, with very specific fields enclosed
around ifdefs.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/common.c   |  6 +--
 include/asm-x86/processor.h    | 82 ++++++++++++++++++++++++++++++++++
 include/asm-x86/processor_32.h | 67 ---------------------------
 include/asm-x86/processor_64.h | 57 -----------------------
 4 files changed, 85 insertions(+), 127 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 69507ae8a65b..ecd13c0e8542 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -427,7 +427,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 
 	printk(KERN_DEBUG "CPU: After generic identify, caps:");
 	for (i = 0; i < NCAPINTS; i++)
-		printk(" %08lx", c->x86_capability[i]);
+		printk(" %08x", c->x86_capability[i]);
 	printk("\n");
 
 	if (this_cpu->c_identify) {
@@ -435,7 +435,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 
 		printk(KERN_DEBUG "CPU: After vendor identify, caps:");
 		for (i = 0; i < NCAPINTS; i++)
-			printk(" %08lx", c->x86_capability[i]);
+			printk(" %08x", c->x86_capability[i]);
 		printk("\n");
 	}
 
@@ -493,7 +493,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 
 	printk(KERN_DEBUG "CPU: After all inits, caps:");
 	for (i = 0; i < NCAPINTS; i++)
-		printk(" %08lx", c->x86_capability[i]);
+		printk(" %08x", c->x86_capability[i]);
 	printk("\n");
 
 	/*
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 8b56c23f043d..2b896b0aa3fa 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -10,6 +10,9 @@ struct mm_struct;
 #include <asm/page.h>
 #include <asm/percpu.h>
 #include <asm/system.h>
+#include <asm/percpu.h>
+#include <linux/cpumask.h>
+#include <linux/cache.h>
 
 /*
  * Default implementation of macro that returns current
@@ -30,6 +33,85 @@ static inline void *current_text_addr(void)
 #define ARCH_MIN_MMSTRUCT_ALIGN	0
 #endif
 
+/*
+ *  CPU type and hardware bug flags. Kept separately for each CPU.
+ *  Members of this structure are referenced in head.S, so think twice
+ *  before touching them. [mj]
+ */
+
+struct cpuinfo_x86 {
+	__u8	x86;		/* CPU family */
+	__u8	x86_vendor;	/* CPU vendor */
+	__u8	x86_model;
+	__u8	x86_mask;
+#ifdef CONFIG_X86_32
+	char	wp_works_ok;	/* It doesn't on 386's */
+	char	hlt_works_ok;	/* Problems on some 486Dx4's and old 386's */
+	char	hard_math;
+	char	rfu;
+	char	fdiv_bug;
+	char	f00f_bug;
+	char	coma_bug;
+	char	pad0;
+#else
+	/* number of 4K pages in DTLB/ITLB combined(in pages)*/
+	int     x86_tlbsize;
+	__u8    x86_virt_bits, x86_phys_bits;
+	/* cpuid returned core id bits */
+	__u8    x86_coreid_bits;
+	/* Max extended CPUID function supported */
+	__u32   extended_cpuid_level;
+#endif
+	int	cpuid_level;	/* Maximum supported CPUID level, -1=no CPUID */
+	__u32	x86_capability[NCAPINTS];
+	char	x86_vendor_id[16];
+	char	x86_model_id[64];
+	int 	x86_cache_size;  /* in KB - valid for CPUS which support this
+				    call  */
+	int 	x86_cache_alignment;	/* In bytes */
+	int	x86_power;
+	unsigned long loops_per_jiffy;
+#ifdef CONFIG_SMP
+	cpumask_t llc_shared_map;	/* cpus sharing the last level cache */
+#endif
+	unsigned char x86_max_cores;	/* cpuid returned max cores value */
+	unsigned char apicid;
+	unsigned short x86_clflush_size;
+#ifdef CONFIG_SMP
+	unsigned char booted_cores;	/* number of cores as seen by OS */
+	__u8 phys_proc_id; 		/* Physical processor id. */
+	__u8 cpu_core_id;  		/* Core id */
+	__u8 cpu_index;			/* index into per_cpu list */
+#endif
+} __attribute__((__aligned__(SMP_CACHE_BYTES)));
+
+#define X86_VENDOR_INTEL 0
+#define X86_VENDOR_CYRIX 1
+#define X86_VENDOR_AMD 2
+#define X86_VENDOR_UMC 3
+#define X86_VENDOR_NEXGEN 4
+#define X86_VENDOR_CENTAUR 5
+#define X86_VENDOR_TRANSMETA 7
+#define X86_VENDOR_NSC 8
+#define X86_VENDOR_NUM 9
+#define X86_VENDOR_UNKNOWN 0xff
+
+extern struct cpuinfo_x86 boot_cpu_data;
+
+#ifdef CONFIG_SMP
+DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
+#define cpu_data(cpu)		per_cpu(cpu_info, cpu)
+#define current_cpu_data	cpu_data(smp_processor_id())
+#else
+#define cpu_data(cpu)		boot_cpu_data
+#define current_cpu_data	boot_cpu_data
+#endif
+
+extern void print_cpu_info(struct cpuinfo_x86 *);
+extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
+extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
+extern unsigned short num_cache_leaves;
+
 static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
 					 unsigned int *ecx, unsigned int *edx)
 {
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 8380243a19d3..f0f7ca48444e 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -14,83 +14,16 @@
 #include <asm/cpufeature.h>
 #include <asm/msr.h>
 #include <asm/system.h>
-#include <linux/cache.h>
 #include <linux/threads.h>
-#include <asm/percpu.h>
-#include <linux/cpumask.h>
 #include <linux/init.h>
 #include <asm/desc_defs.h>
 
-/*
- *  CPU type and hardware bug flags. Kept separately for each CPU.
- *  Members of this structure are referenced in head.S, so think twice
- *  before touching them. [mj]
- */
-
-struct cpuinfo_x86 {
-	__u8	x86;		/* CPU family */
-	__u8	x86_vendor;	/* CPU vendor */
-	__u8	x86_model;
-	__u8	x86_mask;
-	char	wp_works_ok;	/* It doesn't on 386's */
-	char	hlt_works_ok;	/* Problems on some 486Dx4's and old 386's */
-	char	hard_math;
-	char	rfu;
-       	int	cpuid_level;	/* Maximum supported CPUID level, -1=no CPUID */
-	unsigned long	x86_capability[NCAPINTS];
-	char	x86_vendor_id[16];
-	char	x86_model_id[64];
-	int 	x86_cache_size;  /* in KB - valid for CPUS which support this
-				    call  */
-	int 	x86_cache_alignment;	/* In bytes */
-	char	fdiv_bug;
-	char	f00f_bug;
-	char	coma_bug;
-	char	pad0;
-	int	x86_power;
-	unsigned long loops_per_jiffy;
-#ifdef CONFIG_SMP
-	cpumask_t llc_shared_map;	/* cpus sharing the last level cache */
-#endif
-	unsigned char x86_max_cores;	/* cpuid returned max cores value */
-	unsigned char apicid;
-	unsigned short x86_clflush_size;
-#ifdef CONFIG_SMP
-	unsigned char booted_cores;	/* number of cores as seen by OS */
-	__u8 phys_proc_id; 		/* Physical processor id. */
-	__u8 cpu_core_id;  		/* Core id */
-	__u8 cpu_index;			/* index into per_cpu list */
-#endif
-} __attribute__((__aligned__(SMP_CACHE_BYTES)));
-
-#define X86_VENDOR_INTEL 0
-#define X86_VENDOR_CYRIX 1
-#define X86_VENDOR_AMD 2
-#define X86_VENDOR_UMC 3
-#define X86_VENDOR_NEXGEN 4
-#define X86_VENDOR_CENTAUR 5
-#define X86_VENDOR_TRANSMETA 7
-#define X86_VENDOR_NSC 8
-#define X86_VENDOR_NUM 9
-#define X86_VENDOR_UNKNOWN 0xff
-
 /*
  * capabilities of CPUs
  */
-
-extern struct cpuinfo_x86 boot_cpu_data;
 extern struct cpuinfo_x86 new_cpu_data;
 extern struct tss_struct doublefault_tss;
 
-#ifdef CONFIG_SMP
-DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
-#define cpu_data(cpu)		per_cpu(cpu_info, cpu)
-#define current_cpu_data	cpu_data(smp_processor_id())
-#else
-#define cpu_data(cpu)		boot_cpu_data
-#define current_cpu_data	boot_cpu_data
-#endif
-
 /*
  * the following now lives in the per cpu area:
  * extern	int cpu_llc_id[NR_CPUS];
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index 7fe5c1461c83..5045abe91e37 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -15,65 +15,9 @@
 #include <asm/current.h>
 #include <asm/system.h>
 #include <asm/mmsegment.h>
-#include <asm/percpu.h>
 #include <linux/personality.h>
-#include <linux/cpumask.h>
 #include <asm/desc_defs.h>
 
-/*
- *  CPU type and hardware bug flags. Kept separately for each CPU.
- */
-
-struct cpuinfo_x86 {
-	__u8	x86;		/* CPU family */
-	__u8	x86_vendor;	/* CPU vendor */
-	__u8	x86_model;
-	__u8	x86_mask;
-	int	cpuid_level;	/* Maximum supported CPUID level, -1=no CPUID */
-	__u32	x86_capability[NCAPINTS];
-	char	x86_vendor_id[16];
-	char	x86_model_id[64];
-	int 	x86_cache_size;  /* in KB */
-	int	x86_clflush_size;
-	int	x86_cache_alignment;
-	int	x86_tlbsize;	/* number of 4K pages in DTLB/ITLB combined(in pages)*/
-        __u8    x86_virt_bits, x86_phys_bits;
-	__u8	x86_max_cores;	/* cpuid returned max cores value */
-	__u8	x86_coreid_bits; /* cpuid returned core id bits */
-        __u32   x86_power; 	
-	__u32   extended_cpuid_level;	/* Max extended CPUID function supported */
-	unsigned long loops_per_jiffy;
-#ifdef CONFIG_SMP
-	cpumask_t llc_shared_map;	/* cpus sharing the last level cache */
-#endif
-	__u8	apicid;
-#ifdef CONFIG_SMP
-	__u8	booted_cores;	/* number of cores as seen by OS */
-	__u8	phys_proc_id;	/* Physical Processor id. */
-	__u8	cpu_core_id;	/* Core id. */
-	__u8	cpu_index;	/* index into per_cpu list */
-#endif
-} ____cacheline_aligned;
-
-#define X86_VENDOR_INTEL 0
-#define X86_VENDOR_CYRIX 1
-#define X86_VENDOR_AMD 2
-#define X86_VENDOR_UMC 3
-#define X86_VENDOR_NEXGEN 4
-#define X86_VENDOR_CENTAUR 5
-#define X86_VENDOR_TRANSMETA 7
-#define X86_VENDOR_NUM 8
-#define X86_VENDOR_UNKNOWN 0xff
-
-#ifdef CONFIG_SMP
-DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
-#define cpu_data(cpu)		per_cpu(cpu_info, cpu)
-#define current_cpu_data	cpu_data(smp_processor_id())
-#else
-#define cpu_data(cpu)		boot_cpu_data
-#define current_cpu_data	boot_cpu_data
-#endif
-
 extern char ignore_irq13;
 
 extern void identify_cpu(struct cpuinfo_x86 *);
@@ -110,7 +54,6 @@ union i387_union {
 	struct i387_fxsave_struct	fxsave;
 };
 
-extern struct cpuinfo_x86 boot_cpu_data;
 /* Save the original ist values for checking stack pointers during debugging */
 struct orig_ist {
 	unsigned long ist[7];

From 2c1c468a382e64afccacdfe03fa4c0615cd4264f Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:37 +0100
Subject: [PATCH 356/890] x86: remove legacy stuff from processor_64.h

This patch removes definitions and macros that are not used anymore
from processor_64.h

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/processor_64.h | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index 5045abe91e37..bfc4ce6e2f00 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -18,8 +18,6 @@
 #include <linux/personality.h>
 #include <asm/desc_defs.h>
 
-extern char ignore_irq13;
-
 extern void identify_cpu(struct cpuinfo_x86 *);
 
 /*
@@ -68,9 +66,6 @@ DECLARE_PER_CPU(struct orig_ist, orig_ist);
 	.x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
 }
 
-#define INIT_MMAP \
-{ &init_mm, 0, 0, NULL, PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, 1, NULL, NULL }
-
 #define start_thread(regs,new_rip,new_rsp) do { \
 	asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0));	 \
 	load_gs_index(0);							\
@@ -144,13 +139,4 @@ static inline void prefetchw(void *x)
 			  "r" (x));
 } 
 
-
-#define stack_current() \
-({								\
-	struct thread_info *ti;					\
-	asm("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK));	\
-	ti->task;					\
-})
-
-
 #endif /* __ASM_X86_64_PROCESSOR_H */

From fc87e9061a5635fe1b367dad95498846dd5eda31 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:38 +0100
Subject: [PATCH 357/890] x86: unify mm_segment_t definition

This patch moves the mm_segment_t structure definition to processor.h
This makes mmsegment.h file useless, and it is deleted.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/mmsegment.h      | 8 --------
 include/asm-x86/processor.h      | 5 +++++
 include/asm-x86/processor_32.h   | 5 -----
 include/asm-x86/processor_64.h   | 1 -
 include/asm-x86/thread_info_64.h | 2 +-
 5 files changed, 6 insertions(+), 15 deletions(-)
 delete mode 100644 include/asm-x86/mmsegment.h

diff --git a/include/asm-x86/mmsegment.h b/include/asm-x86/mmsegment.h
deleted file mode 100644
index d3f80c996330..000000000000
--- a/include/asm-x86/mmsegment.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _ASM_MMSEGMENT_H
-#define _ASM_MMSEGMENT_H 1
-
-typedef struct {
-	unsigned long seg;
-} mm_segment_t;
-
-#endif
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 2b896b0aa3fa..87466b6d3b92 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -424,6 +424,11 @@ struct extended_sigtable {
 	struct extended_signature sigs[0];
 };
 
+typedef struct {
+	unsigned long seg;
+} mm_segment_t;
+
+
 /*
  * create a kernel thread without removing it from tasklists
  */
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index f0f7ca48444e..fa921aa1a93f 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -103,11 +103,6 @@ union i387_union {
 	struct i387_soft_struct soft;
 };
 
-typedef struct {
-	unsigned long seg;
-} mm_segment_t;
-
-
 #define INIT_THREAD  {							\
 	.sp0 = sizeof(init_stack) + (long)&init_stack,			\
 	.vm86_info = NULL,						\
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index bfc4ce6e2f00..1a258749b7ca 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -14,7 +14,6 @@
 #include <asm/msr.h>
 #include <asm/current.h>
 #include <asm/system.h>
-#include <asm/mmsegment.h>
 #include <linux/personality.h>
 #include <asm/desc_defs.h>
 
diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h
index c2911a99cc32..9b531ea015a8 100644
--- a/include/asm-x86/thread_info_64.h
+++ b/include/asm-x86/thread_info_64.h
@@ -21,7 +21,7 @@
 #ifndef __ASSEMBLY__
 struct task_struct;
 struct exec_domain;
-#include <asm/mmsegment.h>
+#include <asm/processor.h>
 
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */

From 1a53905adddf6cc6d795bd7e988c60a19773f72e Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:39 +0100
Subject: [PATCH 358/890] x86: move definitions to processor.h

This patch moves definitions that are present in only one of the files
(between processor_32.h and processor_64.h), to processor.h. They're mostly
structures and function definitions.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/common.c   |  2 +-
 arch/x86/kernel/setup_64.c     |  2 +-
 include/asm-x86/processor.h    | 41 ++++++++++++++++++++++++++++++++++
 include/asm-x86/processor_32.h | 35 -----------------------------
 include/asm-x86/processor_64.h |  6 -----
 5 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ecd13c0e8542..e48832a6c2a9 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -399,7 +399,7 @@ __setup("serialnumber", x86_serial_nr_setup);
 /*
  * This does the hard work of actually picking apart the CPU stuff...
  */
-static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 {
 	int i;
 
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 90b8bb4748b9..02409100f456 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -757,7 +757,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 		disable_apic_timer = 1;
 }
 
-static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
+void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
 	u32 eax, ebx, ecx, edx;
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 87466b6d3b92..c6b749a018a7 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -96,7 +96,12 @@ struct cpuinfo_x86 {
 #define X86_VENDOR_NUM 9
 #define X86_VENDOR_UNKNOWN 0xff
 
+/*
+ * capabilities of CPUs
+ */
 extern struct cpuinfo_x86 boot_cpu_data;
+extern struct cpuinfo_x86 new_cpu_data;
+extern struct tss_struct doublefault_tss;
 
 #ifdef CONFIG_SMP
 DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
@@ -107,11 +112,22 @@ DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
 #define current_cpu_data	boot_cpu_data
 #endif
 
+void cpu_detect(struct cpuinfo_x86 *c);
+
+extern void identify_cpu(struct cpuinfo_x86 *);
+extern void identify_boot_cpu(void);
+extern void identify_secondary_cpu(struct cpuinfo_x86 *);
 extern void print_cpu_info(struct cpuinfo_x86 *);
 extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
 extern unsigned short num_cache_leaves;
 
+#if defined(CONFIG_X86_HT) || defined(CONFIG_X86_64)
+extern void detect_ht(struct cpuinfo_x86 *c);
+#else
+static inline void detect_ht(struct cpuinfo_x86 *c) {}
+#endif
+
 static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
 					 unsigned int *ecx, unsigned int *edx)
 {
@@ -205,6 +221,11 @@ struct tss_struct {
 
 DECLARE_PER_CPU(struct tss_struct, init_tss);
 
+/* Save the original ist values for checking stack pointers during debugging */
+struct orig_ist {
+	unsigned long ist[7];
+};
+
 #ifdef CONFIG_X86_32
 # include "processor_32.h"
 #else
@@ -547,8 +568,28 @@ extern void select_idle_routine(const struct cpuinfo_x86 *c);
 
 extern unsigned long boot_option_idle_override;
 
+extern void enable_sep_cpu(void);
+extern int sysenter_setup(void);
+
+/* Defined in head.S */
+extern struct desc_ptr early_gdt_descr;
+
+extern void cpu_set_gdt(int);
+extern void switch_to_new_gdt(void);
+extern void cpu_init(void);
+extern void init_gdt(int cpu);
+
+/* from system description table in BIOS.  Mostly for MCA use, but
+ * others may find it useful. */
+extern unsigned int machine_id;
+extern unsigned int machine_submodel_id;
+extern unsigned int BIOS_revision;
+extern unsigned int mca_pentium_flag;
+
 /* Boot loader type from the setup header */
 extern int bootloader_type;
+
+extern char ignore_fpu_irq;
 #define cache_line_size() (boot_cpu_data.x86_cache_alignment)
 
 #define HAVE_ARCH_PICK_MMAP_LAYOUT 1
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index fa921aa1a93f..84a4c5e47d57 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -18,36 +18,12 @@
 #include <linux/init.h>
 #include <asm/desc_defs.h>
 
-/*
- * capabilities of CPUs
- */
-extern struct cpuinfo_x86 new_cpu_data;
-extern struct tss_struct doublefault_tss;
 
 /*
  * the following now lives in the per cpu area:
  * extern	int cpu_llc_id[NR_CPUS];
  */
 DECLARE_PER_CPU(u8, cpu_llc_id);
-extern char ignore_fpu_irq;
-
-void __init cpu_detect(struct cpuinfo_x86 *c);
-
-extern void identify_boot_cpu(void);
-extern void identify_secondary_cpu(struct cpuinfo_x86 *);
-
-#ifdef CONFIG_X86_HT
-extern void detect_ht(struct cpuinfo_x86 *c);
-#else
-static inline void detect_ht(struct cpuinfo_x86 *c) {}
-#endif
-
-/* from system description table in BIOS.  Mostly for MCA use, but
-others may find it useful. */
-extern unsigned int machine_id;
-extern unsigned int machine_submodel_id;
-extern unsigned int BIOS_revision;
-extern unsigned int mca_pentium_flag;
 
 /*
  * User space process size: 3GB (default).
@@ -277,15 +253,4 @@ static inline void prefetchw(const void *x)
 			  "r" (x));
 }
 
-extern void enable_sep_cpu(void);
-extern int sysenter_setup(void);
-
-/* Defined in head.S */
-extern struct desc_ptr early_gdt_descr;
-
-extern void cpu_set_gdt(int);
-extern void switch_to_new_gdt(void);
-extern void cpu_init(void);
-extern void init_gdt(int cpu);
-
 #endif /* __ASM_I386_PROCESSOR_H */
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index 1a258749b7ca..45e382989b33 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -17,8 +17,6 @@
 #include <linux/personality.h>
 #include <asm/desc_defs.h>
 
-extern void identify_cpu(struct cpuinfo_x86 *);
-
 /*
  * User space process size. 47bits minus one guard page.
  */
@@ -51,10 +49,6 @@ union i387_union {
 	struct i387_fxsave_struct	fxsave;
 };
 
-/* Save the original ist values for checking stack pointers during debugging */
-struct orig_ist {
-	unsigned long ist[7];
-};
 DECLARE_PER_CPU(struct orig_ist, orig_ist);
 
 #define INIT_THREAD  { \

From ae2e15eb3b6c2a011bee615470bf52d2beb99a4b Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:40 +0100
Subject: [PATCH 359/890] x86: unify prefetch operations

This patch moves the prefetch[w]? functions to processor.h

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/processor.h    | 30 ++++++++++++++++++++++++++++++
 include/asm-x86/processor_32.h | 25 -------------------------
 include/asm-x86/processor_64.h |  8 --------
 3 files changed, 30 insertions(+), 33 deletions(-)

diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index c6b749a018a7..bfac9739f57e 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -596,6 +596,36 @@ extern char ignore_fpu_irq;
 #define ARCH_HAS_PREFETCHW
 #define ARCH_HAS_SPINLOCK_PREFETCH
 
+#ifdef CONFIG_X86_32
+#define BASE_PREFETCH	ASM_NOP4
+#define ARCH_HAS_PREFETCH
+#else
+#define BASE_PREFETCH	"prefetcht0 (%1)"
+#endif
+
+/* Prefetch instructions for Pentium III and AMD Athlon */
+/* It's not worth to care about 3dnow! prefetches for the K6
+   because they are microcoded there and very slow.
+   However we don't do prefetches for pre XP Athlons currently
+   That should be fixed. */
+static inline void prefetch(const void *x)
+{
+	alternative_input(BASE_PREFETCH,
+			  "prefetchnta (%1)",
+			  X86_FEATURE_XMM,
+			  "r" (x));
+}
+
+/* 3dnow! prefetch to get an exclusive cache line. Useful for
+   spinlocks to avoid one state transition in the cache coherency protocol. */
+static inline void prefetchw(const void *x)
+{
+	alternative_input(BASE_PREFETCH,
+			  "prefetchw (%1)",
+			  X86_FEATURE_3DNOW,
+			  "r" (x));
+}
+
 #define spin_lock_prefetch(x)	prefetchw(x)
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 84a4c5e47d57..61a9cae2364b 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -228,29 +228,4 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
 
 #define ASM_NOP_MAX 8
 
-/* Prefetch instructions for Pentium III and AMD Athlon */
-/* It's not worth to care about 3dnow! prefetches for the K6
-   because they are microcoded there and very slow.
-   However we don't do prefetches for pre XP Athlons currently
-   That should be fixed. */
-static inline void prefetch(const void *x)
-{
-	alternative_input(ASM_NOP4,
-			  "prefetchnta (%1)",
-			  X86_FEATURE_XMM,
-			  "r" (x));
-}
-
-#define ARCH_HAS_PREFETCH
-
-/* 3dnow! prefetch to get an exclusive cache line. Useful for 
-   spinlocks to avoid one state transition in the cache coherency protocol. */
-static inline void prefetchw(const void *x)
-{
-	alternative_input(ASM_NOP4,
-			  "prefetchw (%1)",
-			  X86_FEATURE_3DNOW,
-			  "r" (x));
-}
-
 #endif /* __ASM_I386_PROCESSOR_H */
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index 45e382989b33..08b965124b97 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -124,12 +124,4 @@ DECLARE_PER_CPU(struct orig_ist, orig_ist);
 
 #define ASM_NOP_MAX 8
 
-static inline void prefetchw(void *x) 
-{ 
-	alternative_input("prefetcht0 (%1)",
-			  "prefetchw (%1)",
-			  X86_FEATURE_3DNOW,
-			  "r" (x));
-} 
-
 #endif /* __ASM_X86_64_PROCESSOR_H */

From ea5e3593a493298c69b8c4a958628feace930478 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:40 +0100
Subject: [PATCH 360/890] x86: unify asm nops

There's only one difference between the NOPs used in asm code for i386 and x86_64:
i386 has a lot more variants. The code is moved to processor.h, and adjusted
accordingly.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/processor.h    | 85 ++++++++++++++++++++++++++++++++++
 include/asm-x86/processor_32.h | 85 ----------------------------------
 include/asm-x86/processor_64.h | 44 ------------------
 3 files changed, 85 insertions(+), 129 deletions(-)

diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index bfac9739f57e..a8f1702ec3ba 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -592,6 +592,91 @@ extern int bootloader_type;
 extern char ignore_fpu_irq;
 #define cache_line_size() (boot_cpu_data.x86_cache_alignment)
 
+/* generic versions from gas */
+#define GENERIC_NOP1	".byte 0x90\n"
+#define GENERIC_NOP2    	".byte 0x89,0xf6\n"
+#define GENERIC_NOP3        ".byte 0x8d,0x76,0x00\n"
+#define GENERIC_NOP4        ".byte 0x8d,0x74,0x26,0x00\n"
+#define GENERIC_NOP5        GENERIC_NOP1 GENERIC_NOP4
+#define GENERIC_NOP6	".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n"
+#define GENERIC_NOP7	".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n"
+#define GENERIC_NOP8	GENERIC_NOP1 GENERIC_NOP7
+
+/* Opteron nops */
+#define K8_NOP1 GENERIC_NOP1
+#define K8_NOP2	".byte 0x66,0x90\n"
+#define K8_NOP3	".byte 0x66,0x66,0x90\n"
+#define K8_NOP4	".byte 0x66,0x66,0x66,0x90\n"
+#define K8_NOP5	K8_NOP3 K8_NOP2
+#define K8_NOP6	K8_NOP3 K8_NOP3
+#define K8_NOP7	K8_NOP4 K8_NOP3
+#define K8_NOP8	K8_NOP4 K8_NOP4
+
+/* K7 nops */
+/* uses eax dependencies (arbitary choice) */
+#define K7_NOP1  GENERIC_NOP1
+#define K7_NOP2	".byte 0x8b,0xc0\n"
+#define K7_NOP3	".byte 0x8d,0x04,0x20\n"
+#define K7_NOP4	".byte 0x8d,0x44,0x20,0x00\n"
+#define K7_NOP5	K7_NOP4 ASM_NOP1
+#define K7_NOP6	".byte 0x8d,0x80,0,0,0,0\n"
+#define K7_NOP7        ".byte 0x8D,0x04,0x05,0,0,0,0\n"
+#define K7_NOP8        K7_NOP7 ASM_NOP1
+
+/* P6 nops */
+/* uses eax dependencies (Intel-recommended choice) */
+#define P6_NOP1	GENERIC_NOP1
+#define P6_NOP2	".byte 0x66,0x90\n"
+#define P6_NOP3	".byte 0x0f,0x1f,0x00\n"
+#define P6_NOP4	".byte 0x0f,0x1f,0x40,0\n"
+#define P6_NOP5	".byte 0x0f,0x1f,0x44,0x00,0\n"
+#define P6_NOP6	".byte 0x66,0x0f,0x1f,0x44,0x00,0\n"
+#define P6_NOP7	".byte 0x0f,0x1f,0x80,0,0,0,0\n"
+#define P6_NOP8	".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n"
+
+#ifdef CONFIG_MK7
+#define ASM_NOP1 K7_NOP1
+#define ASM_NOP2 K7_NOP2
+#define ASM_NOP3 K7_NOP3
+#define ASM_NOP4 K7_NOP4
+#define ASM_NOP5 K7_NOP5
+#define ASM_NOP6 K7_NOP6
+#define ASM_NOP7 K7_NOP7
+#define ASM_NOP8 K7_NOP8
+#elif defined(CONFIG_M686) || defined(CONFIG_MPENTIUMII) || \
+      defined(CONFIG_MPENTIUMIII) || defined(CONFIG_MPENTIUMM) || \
+      defined(CONFIG_MCORE2) || defined(CONFIG_PENTIUM4) || \
+      defined(CONFIG_MPSC)
+#define ASM_NOP1 P6_NOP1
+#define ASM_NOP2 P6_NOP2
+#define ASM_NOP3 P6_NOP3
+#define ASM_NOP4 P6_NOP4
+#define ASM_NOP5 P6_NOP5
+#define ASM_NOP6 P6_NOP6
+#define ASM_NOP7 P6_NOP7
+#define ASM_NOP8 P6_NOP8
+#elif defined(CONFIG_MK8) || defined(CONFIG_X86_64)
+#define ASM_NOP1 K8_NOP1
+#define ASM_NOP2 K8_NOP2
+#define ASM_NOP3 K8_NOP3
+#define ASM_NOP4 K8_NOP4
+#define ASM_NOP5 K8_NOP5
+#define ASM_NOP6 K8_NOP6
+#define ASM_NOP7 K8_NOP7
+#define ASM_NOP8 K8_NOP8
+#else
+#define ASM_NOP1 GENERIC_NOP1
+#define ASM_NOP2 GENERIC_NOP2
+#define ASM_NOP3 GENERIC_NOP3
+#define ASM_NOP4 GENERIC_NOP4
+#define ASM_NOP5 GENERIC_NOP5
+#define ASM_NOP6 GENERIC_NOP6
+#define ASM_NOP7 GENERIC_NOP7
+#define ASM_NOP8 GENERIC_NOP8
+#endif
+
+#define ASM_NOP_MAX 8
+
 #define HAVE_ARCH_PICK_MMAP_LAYOUT 1
 #define ARCH_HAS_PREFETCHW
 #define ARCH_HAS_SPINLOCK_PREFETCH
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 61a9cae2364b..82fafabbfe7a 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -18,7 +18,6 @@
 #include <linux/init.h>
 #include <asm/desc_defs.h>
 
-
 /*
  * the following now lives in the per cpu area:
  * extern	int cpu_llc_id[NR_CPUS];
@@ -144,88 +143,4 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
 
 #define KSTK_ESP(task) (task_pt_regs(task)->sp)
 
-/* generic versions from gas */
-#define GENERIC_NOP1	".byte 0x90\n"
-#define GENERIC_NOP2    	".byte 0x89,0xf6\n"
-#define GENERIC_NOP3        ".byte 0x8d,0x76,0x00\n"
-#define GENERIC_NOP4        ".byte 0x8d,0x74,0x26,0x00\n"
-#define GENERIC_NOP5        GENERIC_NOP1 GENERIC_NOP4
-#define GENERIC_NOP6	".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n"
-#define GENERIC_NOP7	".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n"
-#define GENERIC_NOP8	GENERIC_NOP1 GENERIC_NOP7
-
-/* Opteron nops */
-#define K8_NOP1 GENERIC_NOP1
-#define K8_NOP2	".byte 0x66,0x90\n" 
-#define K8_NOP3	".byte 0x66,0x66,0x90\n" 
-#define K8_NOP4	".byte 0x66,0x66,0x66,0x90\n" 
-#define K8_NOP5	K8_NOP3 K8_NOP2 
-#define K8_NOP6	K8_NOP3 K8_NOP3
-#define K8_NOP7	K8_NOP4 K8_NOP3
-#define K8_NOP8	K8_NOP4 K8_NOP4
-
-/* K7 nops */
-/* uses eax dependencies (arbitary choice) */
-#define K7_NOP1  GENERIC_NOP1
-#define K7_NOP2	".byte 0x8b,0xc0\n" 
-#define K7_NOP3	".byte 0x8d,0x04,0x20\n"
-#define K7_NOP4	".byte 0x8d,0x44,0x20,0x00\n"
-#define K7_NOP5	K7_NOP4 ASM_NOP1
-#define K7_NOP6	".byte 0x8d,0x80,0,0,0,0\n"
-#define K7_NOP7        ".byte 0x8D,0x04,0x05,0,0,0,0\n"
-#define K7_NOP8        K7_NOP7 ASM_NOP1
-
-/* P6 nops */
-/* uses eax dependencies (Intel-recommended choice) */
-#define P6_NOP1	GENERIC_NOP1
-#define P6_NOP2	".byte 0x66,0x90\n"
-#define P6_NOP3	".byte 0x0f,0x1f,0x00\n"
-#define P6_NOP4	".byte 0x0f,0x1f,0x40,0\n"
-#define P6_NOP5	".byte 0x0f,0x1f,0x44,0x00,0\n"
-#define P6_NOP6	".byte 0x66,0x0f,0x1f,0x44,0x00,0\n"
-#define P6_NOP7	".byte 0x0f,0x1f,0x80,0,0,0,0\n"
-#define P6_NOP8	".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n"
-
-#ifdef CONFIG_MK8
-#define ASM_NOP1 K8_NOP1
-#define ASM_NOP2 K8_NOP2
-#define ASM_NOP3 K8_NOP3
-#define ASM_NOP4 K8_NOP4
-#define ASM_NOP5 K8_NOP5
-#define ASM_NOP6 K8_NOP6
-#define ASM_NOP7 K8_NOP7
-#define ASM_NOP8 K8_NOP8
-#elif defined(CONFIG_MK7)
-#define ASM_NOP1 K7_NOP1
-#define ASM_NOP2 K7_NOP2
-#define ASM_NOP3 K7_NOP3
-#define ASM_NOP4 K7_NOP4
-#define ASM_NOP5 K7_NOP5
-#define ASM_NOP6 K7_NOP6
-#define ASM_NOP7 K7_NOP7
-#define ASM_NOP8 K7_NOP8
-#elif defined(CONFIG_M686) || defined(CONFIG_MPENTIUMII) || \
-      defined(CONFIG_MPENTIUMIII) || defined(CONFIG_MPENTIUMM) || \
-      defined(CONFIG_MCORE2) || defined(CONFIG_PENTIUM4)
-#define ASM_NOP1 P6_NOP1
-#define ASM_NOP2 P6_NOP2
-#define ASM_NOP3 P6_NOP3
-#define ASM_NOP4 P6_NOP4
-#define ASM_NOP5 P6_NOP5
-#define ASM_NOP6 P6_NOP6
-#define ASM_NOP7 P6_NOP7
-#define ASM_NOP8 P6_NOP8
-#else
-#define ASM_NOP1 GENERIC_NOP1
-#define ASM_NOP2 GENERIC_NOP2
-#define ASM_NOP3 GENERIC_NOP3
-#define ASM_NOP4 GENERIC_NOP4
-#define ASM_NOP5 GENERIC_NOP5
-#define ASM_NOP6 GENERIC_NOP6
-#define ASM_NOP7 GENERIC_NOP7
-#define ASM_NOP8 GENERIC_NOP8
-#endif
-
-#define ASM_NOP_MAX 8
-
 #endif /* __ASM_I386_PROCESSOR_H */
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index 08b965124b97..86fc2dcc8687 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -80,48 +80,4 @@ DECLARE_PER_CPU(struct orig_ist, orig_ist);
 #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
 #define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */
 
-
-#if defined(CONFIG_MPSC) || defined(CONFIG_MCORE2)
-#define ASM_NOP1 P6_NOP1
-#define ASM_NOP2 P6_NOP2
-#define ASM_NOP3 P6_NOP3
-#define ASM_NOP4 P6_NOP4
-#define ASM_NOP5 P6_NOP5
-#define ASM_NOP6 P6_NOP6
-#define ASM_NOP7 P6_NOP7
-#define ASM_NOP8 P6_NOP8
-#else
-#define ASM_NOP1 K8_NOP1
-#define ASM_NOP2 K8_NOP2
-#define ASM_NOP3 K8_NOP3
-#define ASM_NOP4 K8_NOP4
-#define ASM_NOP5 K8_NOP5
-#define ASM_NOP6 K8_NOP6
-#define ASM_NOP7 K8_NOP7
-#define ASM_NOP8 K8_NOP8
-#endif
-
-/* Opteron nops */
-#define K8_NOP1 ".byte 0x90\n"
-#define K8_NOP2	".byte 0x66,0x90\n" 
-#define K8_NOP3	".byte 0x66,0x66,0x90\n" 
-#define K8_NOP4	".byte 0x66,0x66,0x66,0x90\n" 
-#define K8_NOP5	K8_NOP3 K8_NOP2 
-#define K8_NOP6	K8_NOP3 K8_NOP3
-#define K8_NOP7	K8_NOP4 K8_NOP3
-#define K8_NOP8	K8_NOP4 K8_NOP4
-
-/* P6 nops */
-/* uses eax dependencies (Intel-recommended choice) */
-#define P6_NOP1	".byte 0x90\n"
-#define P6_NOP2	".byte 0x66,0x90\n"
-#define P6_NOP3	".byte 0x0f,0x1f,0x00\n"
-#define P6_NOP4	".byte 0x0f,0x1f,0x40,0\n"
-#define P6_NOP5	".byte 0x0f,0x1f,0x44,0x00,0\n"
-#define P6_NOP6	".byte 0x66,0x0f,0x1f,0x44,0x00,0\n"
-#define P6_NOP7	".byte 0x0f,0x1f,0x80,0,0,0,0\n"
-#define P6_NOP8	".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n"
-
-#define ASM_NOP_MAX 8
-
 #endif /* __ASM_X86_64_PROCESSOR_H */

From 46265df040533f57c191bb2b019d6b25c3bf1f34 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:41 +0100
Subject: [PATCH 361/890] x86: move i387 definitions to processor.h

This patch moves i387 definitions from processor_32.h and processor_64.h
to processor.h. They are different. Very different. And there's appearently
nothing we can do about it, so they're enclosed inside ifdefs.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/processor.h    | 66 ++++++++++++++++++++++++++++++++++
 include/asm-x86/processor_32.h | 49 -------------------------
 include/asm-x86/processor_64.h | 17 ---------
 3 files changed, 66 insertions(+), 66 deletions(-)

diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index a8f1702ec3ba..80c50020ef60 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -227,8 +227,74 @@ struct orig_ist {
 };
 
 #ifdef CONFIG_X86_32
+struct i387_fsave_struct {
+	long	cwd;
+	long	swd;
+	long	twd;
+	long	fip;
+	long	fcs;
+	long	foo;
+	long	fos;
+	long	st_space[20];	/* 8*10 bytes for each FP-reg = 80 bytes */
+	long	status;		/* software status information */
+};
+
+struct i387_fxsave_struct {
+	unsigned short	cwd;
+	unsigned short	swd;
+	unsigned short	twd;
+	unsigned short	fop;
+	long	fip;
+	long	fcs;
+	long	foo;
+	long	fos;
+	long	mxcsr;
+	long	mxcsr_mask;
+	long	st_space[32];	/* 8*16 bytes for each FP-reg = 128 bytes */
+	long	xmm_space[32];	/* 8*16 bytes for each XMM-reg = 128 bytes */
+	long	padding[56];
+} __attribute__((aligned(16)));
+
+struct i387_soft_struct {
+	long	cwd;
+	long	swd;
+	long	twd;
+	long	fip;
+	long	fcs;
+	long	foo;
+	long	fos;
+	long	st_space[20];	/* 8*10 bytes for each FP-reg = 80 bytes */
+	unsigned char	ftop, changed, lookahead, no_update, rm, alimit;
+	struct info	*info;
+	unsigned long	entry_eip;
+};
+
+union i387_union {
+	struct i387_fsave_struct	fsave;
+	struct i387_fxsave_struct	fxsave;
+	struct i387_soft_struct soft;
+};
+
 # include "processor_32.h"
 #else
+struct i387_fxsave_struct {
+	u16	cwd;
+	u16	swd;
+	u16	twd;
+	u16	fop;
+	u64	rip;
+	u64	rdp;
+	u32	mxcsr;
+	u32	mxcsr_mask;
+	u32	st_space[32];	/* 8*16 bytes for each FP-reg = 128 bytes */
+	u32	xmm_space[64];	/* 16*16 bytes for each XMM-reg = 256 bytes */
+	u32	padding[24];
+} __attribute__((aligned(16)));
+
+union i387_union {
+	struct i387_fxsave_struct	fxsave;
+};
+
 # include "processor_64.h"
 #endif
 
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 82fafabbfe7a..0d6a430b2bc3 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -29,55 +29,6 @@ DECLARE_PER_CPU(u8, cpu_llc_id);
  */
 #define TASK_SIZE	(PAGE_OFFSET)
 
-
-struct i387_fsave_struct {
-	long	cwd;
-	long	swd;
-	long	twd;
-	long	fip;
-	long	fcs;
-	long	foo;
-	long	fos;
-	long	st_space[20];	/* 8*10 bytes for each FP-reg = 80 bytes */
-	long	status;		/* software status information */
-};
-
-struct i387_fxsave_struct {
-	unsigned short	cwd;
-	unsigned short	swd;
-	unsigned short	twd;
-	unsigned short	fop;
-	long	fip;
-	long	fcs;
-	long	foo;
-	long	fos;
-	long	mxcsr;
-	long	mxcsr_mask;
-	long	st_space[32];	/* 8*16 bytes for each FP-reg = 128 bytes */
-	long	xmm_space[32];	/* 8*16 bytes for each XMM-reg = 128 bytes */
-	long	padding[56];
-} __attribute__ ((aligned (16)));
-
-struct i387_soft_struct {
-	long	cwd;
-	long	swd;
-	long	twd;
-	long	fip;
-	long	fcs;
-	long	foo;
-	long	fos;
-	long	st_space[20];	/* 8*10 bytes for each FP-reg = 80 bytes */
-	unsigned char	ftop, changed, lookahead, no_update, rm, alimit;
-	struct info	*info;
-	unsigned long	entry_eip;
-};
-
-union i387_union {
-	struct i387_fsave_struct	fsave;
-	struct i387_fxsave_struct	fxsave;
-	struct i387_soft_struct soft;
-};
-
 #define INIT_THREAD  {							\
 	.sp0 = sizeof(init_stack) + (long)&init_stack,			\
 	.vm86_info = NULL,						\
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index 86fc2dcc8687..04ce823ea7e6 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -31,23 +31,6 @@
 #define TASK_SIZE_OF(child) 	((test_tsk_thread_flag(child, TIF_IA32)) ? IA32_PAGE_OFFSET : TASK_SIZE64)
 
 
-struct i387_fxsave_struct {
-	u16	cwd;
-	u16	swd;
-	u16	twd;
-	u16	fop;
-	u64	rip;
-	u64	rdp; 
-	u32	mxcsr;
-	u32	mxcsr_mask;
-	u32	st_space[32];	/* 8*16 bytes for each FP-reg = 128 bytes */
-	u32	xmm_space[64];	/* 16*16 bytes for each XMM-reg = 256 bytes */
-	u32	padding[24];
-} __attribute__ ((aligned (16)));
-
-union i387_union {
-	struct i387_fxsave_struct	fxsave;
-};
 
 DECLARE_PER_CPU(struct orig_ist, orig_ist);
 

From 6d48583ba9ade609634e694fc35ea62b7a8adaaa Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:31:41 +0100
Subject: [PATCH 362/890] x86: unify extable_{32|64}.c

Introduce fixup_exception() on 64-bit and use it in kprobes to
eliminate an #ifdef.

Only 64-bit needs search_extable() due to a stepping bug.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/kprobes.c | 12 +-------
 arch/x86/mm/Makefile_32   |  2 +-
 arch/x86/mm/Makefile_64   |  2 +-
 arch/x86/mm/extable.c     | 62 +++++++++++++++++++++++++++++++++++++++
 arch/x86/mm/extable_32.c  | 35 ----------------------
 arch/x86/mm/extable_64.c  | 34 ---------------------
 6 files changed, 65 insertions(+), 82 deletions(-)
 create mode 100644 arch/x86/mm/extable.c
 delete mode 100644 arch/x86/mm/extable_32.c
 delete mode 100644 arch/x86/mm/extable_64.c

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 8de82c8cedd6..7848bf74e2ab 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -904,19 +904,9 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
 		 */
-#ifdef CONFIG_X86_64
-		{
-			const struct exception_table_entry *fixup;
-			fixup = search_exception_tables(regs->ip);
-			if (fixup) {
-				regs->ip = fixup->fixup;
-				return 1;
-			}
-		}
-#else
 		if (fixup_exception(regs))
 			return 1;
-#endif
+
 		/*
 		 * fixup routine could not handle it,
 		 * Let do_page_fault() fix it.
diff --git a/arch/x86/mm/Makefile_32 b/arch/x86/mm/Makefile_32
index a01aca79dc69..6a5e981981f7 100644
--- a/arch/x86/mm/Makefile_32
+++ b/arch/x86/mm/Makefile_32
@@ -2,7 +2,7 @@
 # Makefile for the linux i386-specific parts of the memory manager.
 #
 
-obj-y	:= init_32.o pgtable_32.o fault_32.o ioremap_32.o extable_32.o pageattr_32.o mmap.o
+obj-y	:= init_32.o pgtable_32.o fault_32.o ioremap_32.o extable.o pageattr_32.o mmap.o
 
 obj-$(CONFIG_NUMA) += discontig_32.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/x86/mm/Makefile_64 b/arch/x86/mm/Makefile_64
index b5a74486c3aa..09c997fe5856 100644
--- a/arch/x86/mm/Makefile_64
+++ b/arch/x86/mm/Makefile_64
@@ -2,7 +2,7 @@
 # Makefile for the linux x86_64-specific parts of the memory manager.
 #
 
-obj-y	 := init_64.o fault_64.o ioremap_64.o extable_64.o pageattr_64.o mmap.o
+obj-y	 := init_64.o fault_64.o ioremap_64.o extable.o pageattr_64.o mmap.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_NUMA) += numa_64.o
 obj-$(CONFIG_K8_NUMA) += k8topology_64.o
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
new file mode 100644
index 000000000000..7e8db53528a7
--- /dev/null
+++ b/arch/x86/mm/extable.c
@@ -0,0 +1,62 @@
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/uaccess.h>
+
+
+int fixup_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+
+#ifdef CONFIG_PNPBIOS
+	if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
+		extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
+		extern u32 pnp_bios_is_utter_crap;
+		pnp_bios_is_utter_crap = 1;
+		printk(KERN_CRIT "PNPBIOS fault.. attempting recovery.\n");
+		__asm__ volatile(
+			"movl %0, %%esp\n\t"
+			"jmp *%1\n\t"
+			: : "g" (pnp_bios_fault_esp), "g" (pnp_bios_fault_eip));
+		panic("do_trap: can't hit this");
+	}
+#endif
+
+	fixup = search_exception_tables(regs->ip);
+	if (fixup) {
+		regs->ip = fixup->fixup;
+		return 1;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_X86_64
+/*
+ * Need to defined our own search_extable on X86_64 to work around
+ * a B stepping K8 bug.
+ */
+const struct exception_table_entry *
+search_extable(const struct exception_table_entry *first,
+	       const struct exception_table_entry *last,
+	       unsigned long value)
+{
+	/* B stepping K8 bug */
+	if ((value >> 32) == 0)
+		value |= 0xffffffffUL << 32;
+
+	while (first <= last) {
+		const struct exception_table_entry *mid;
+		long diff;
+
+		mid = (last - first) / 2 + first;
+		diff = mid->insn - value;
+		if (diff == 0)
+			return mid;
+		else if (diff < 0)
+			first = mid+1;
+		else
+			last = mid-1;
+	}
+	return NULL;
+}
+#endif
diff --git a/arch/x86/mm/extable_32.c b/arch/x86/mm/extable_32.c
deleted file mode 100644
index 41685461f8b2..000000000000
--- a/arch/x86/mm/extable_32.c
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * linux/arch/i386/mm/extable.c
- */
-
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <asm/uaccess.h>
-
-int fixup_exception(struct pt_regs *regs)
-{
-	const struct exception_table_entry *fixup;
-
-#ifdef CONFIG_PNPBIOS
-	if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs)))
-	{
-		extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
-		extern u32 pnp_bios_is_utter_crap;
-		pnp_bios_is_utter_crap = 1;
-		printk(KERN_CRIT "PNPBIOS fault.. attempting recovery.\n");
-		__asm__ volatile(
-			"movl %0, %%esp\n\t"
-			"jmp *%1\n\t"
-			: : "g" (pnp_bios_fault_esp), "g" (pnp_bios_fault_eip));
-		panic("do_trap: can't hit this");
-	}
-#endif
-
-	fixup = search_exception_tables(regs->ip);
-	if (fixup) {
-		regs->ip = fixup->fixup;
-		return 1;
-	}
-
-	return 0;
-}
diff --git a/arch/x86/mm/extable_64.c b/arch/x86/mm/extable_64.c
deleted file mode 100644
index 79ac6e7100af..000000000000
--- a/arch/x86/mm/extable_64.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * linux/arch/x86_64/mm/extable.c
- */
-
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <asm/uaccess.h>
-
-/* Simple binary search */
-const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
-	       const struct exception_table_entry *last,
-	       unsigned long value)
-{
-	/* Work around a B stepping K8 bug */
-	if ((value >> 32) == 0)
-		value |= 0xffffffffUL << 32; 
-
-        while (first <= last) {
-		const struct exception_table_entry *mid;
-		long diff;
-
-		mid = (last - first) / 2 + first;
-		diff = mid->insn - value;
-                if (diff == 0)
-                        return mid;
-                else if (diff < 0)
-                        first = mid+1;
-                else
-                        last = mid-1;
-        }
-        return NULL;
-}

From 85f2adf16955460c98131360f3d76aeb51aba073 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:31:42 +0100
Subject: [PATCH 363/890] x86: use helper in fault_64.c

Use the fixup_exception() helper in fault_64.c

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_64.c       | 5 +----
 include/asm-x86/uaccess_64.h | 2 ++
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 121c7bda6297..3a94941578fa 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -298,7 +298,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	struct mm_struct *mm;
 	struct vm_area_struct * vma;
 	unsigned long address;
-	const struct exception_table_entry *fixup;
 	int write, fault;
 	unsigned long flags;
 	siginfo_t info;
@@ -508,9 +507,7 @@ bad_area_nosemaphore:
 no_context:
 	
 	/* Are we prepared to handle this kernel fault?  */
-	fixup = search_exception_tables(regs->ip);
-	if (fixup) {
-		regs->ip = fixup->fixup;
+	if (fixup_exception(regs)) {
 		return;
 	}
 
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index f4ce8768ad44..31d794702719 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -65,6 +65,8 @@ struct exception_table_entry
 	unsigned long insn, fixup;
 };
 
+extern int fixup_exception(struct pt_regs *regs);
+
 #define ARCH_HAS_SEARCH_EXTABLE
 
 /*

From ab4a574ef23cfa801a5078f7d7c2f2b76ecd6d91 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 30 Jan 2008 13:31:42 +0100
Subject: [PATCH 364/890] arch/x86/: spelling fixes

Spelling fixes.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic_32.c    | 2 +-
 arch/x86/kernel/mpparse_32.c | 2 +-
 arch/x86/kernel/vm86_32.c    | 2 +-
 arch/x86/mm/srat_64.c        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 4c014fca2057..4330a899ddcb 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -108,7 +108,7 @@ static inline int lapic_get_version(void)
 }
 
 /*
- * Check, if the APIC is integrated or a seperate chip
+ * Check, if the APIC is integrated or a separate chip
  */
 static inline int lapic_is_integrated(void)
 {
diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c
index bfcfc41f5607..a4c05372626b 100644
--- a/arch/x86/kernel/mpparse_32.c
+++ b/arch/x86/kernel/mpparse_32.c
@@ -1050,7 +1050,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
 	int idx, bit = 0;
 	static int pci_irq = IRQ_COMPRESSION_START;
 	/*
-	 * Mapping between Global System Interrups, which
+	 * Mapping between Global System Interrupts, which
 	 * represent all possible interrupts, and IRQs
 	 * assigned to actual devices.
 	 */
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index c9f67effbc42..738c2104df30 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -384,7 +384,7 @@ static inline void clear_AC(struct kernel_vm86_regs * regs)
  * functions. However someone forgot to call clear_IF(regs)
  * in the opposite case.
  * After the command sequence CLI PUSHF STI POPF you should
- * end up with interrups disabled, but you ended up with
+ * end up with interrupts disabled, but you ended up with
  * interrupts enabled.
  *  ( I was testing my own changes, but the only bug I
  *    could find was in a function I had not changed. )
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 9be14171144b..5c0637e4c2f4 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -160,7 +160,7 @@ static inline int save_add_info(void) {return 0;}
 #endif
 /*
  * Update nodes_add and decide if to include add are in the zone.
- * Both SPARSE and RESERVE need nodes_add infomation.
+ * Both SPARSE and RESERVE need nodes_add information.
  * This code supports one contiguous hot add area per node.
  */
 static int reserve_hotadd(int node, unsigned long start, unsigned long end)

From e7b5e11eaaa8ef93a34e68016de51152d0d62911 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:31:43 +0100
Subject: [PATCH 365/890] x86: kprobes leftover cleanups

Eliminate __always_inline, all of these static functions are
only called once.  Minor whitespace cleanup.  Eliminate one
supefluous return at end of void function.  Change the one
#ifndef to #ifdef to match the sense of the rest of the config
tests.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/kprobes.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 7848bf74e2ab..521a469acaad 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -159,7 +159,7 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
 const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
 
 /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
-static __always_inline void set_jmp_op(void *from, void *to)
+static void __kprobes set_jmp_op(void *from, void *to)
 {
 	struct __arch_jmp_op {
 		char op;
@@ -174,7 +174,7 @@ static __always_inline void set_jmp_op(void *from, void *to)
  * Returns non-zero if opcode is boostable.
  * RIP relative instructions are adjusted at copying time in 64 bits mode
  */
-static __always_inline int can_boost(kprobe_opcode_t *opcodes)
+static int __kprobes can_boost(kprobe_opcode_t *opcodes)
 {
 	kprobe_opcode_t opcode;
 	kprobe_opcode_t *orig_opcodes = opcodes;
@@ -392,13 +392,13 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 		kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF;
 }
 
-static __always_inline void clear_btf(void)
+static void __kprobes clear_btf(void)
 {
 	if (test_thread_flag(TIF_DEBUGCTLMSR))
 		wrmsr(MSR_IA32_DEBUGCTLMSR, 0, 0);
 }
 
-static __always_inline void restore_btf(void)
+static void __kprobes restore_btf(void)
 {
 	if (test_thread_flag(TIF_DEBUGCTLMSR))
 		wrmsr(MSR_IA32_DEBUGCTLMSR, current->thread.debugctlmsr, 0);
@@ -409,7 +409,7 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 	clear_btf();
 	regs->flags |= X86_EFLAGS_TF;
 	regs->flags &= ~X86_EFLAGS_IF;
-	/*single step inline if the instruction is an int3*/
+	/* single step inline if the instruction is an int3 */
 	if (p->opcode == BREAKPOINT_INSTRUCTION)
 		regs->ip = (unsigned long)p->addr;
 	else
@@ -767,7 +767,7 @@ static void __kprobes resume_execution(struct kprobe *p,
 	case 0xe8:	/* call relative - Fix return addr */
 		*tos = orig_ip + (*tos - copy_ip);
 		break;
-#ifndef CONFIG_X86_64
+#ifdef CONFIG_X86_32
 	case 0x9a:	/* call absolute -- same as call absolute, indirect */
 		*tos = orig_ip + (*tos - copy_ip);
 		goto no_change;
@@ -813,8 +813,6 @@ static void __kprobes resume_execution(struct kprobe *p,
 
 no_change:
 	restore_btf();
-
-	return;
 }
 
 /*

From 0ddc9cc8fdfe3df7a90557e66069e3da2c584725 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Tue, 18 Dec 2007 03:58:10 -0800
Subject: [PATCH 366/890] x86: unify module_{32|64}.h

This adds one case to the MODULE_PROC_FAMILY block testing
for X86_64.  There are no new things defined on X86_64 than
there were before.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/module.h    | 81 ++++++++++++++++++++++++++++++++++++-
 include/asm-x86/module_32.h | 75 ----------------------------------
 include/asm-x86/module_64.h | 10 -----
 3 files changed, 79 insertions(+), 87 deletions(-)
 delete mode 100644 include/asm-x86/module_32.h
 delete mode 100644 include/asm-x86/module_64.h

diff --git a/include/asm-x86/module.h b/include/asm-x86/module.h
index 2b2f18d8a531..bfedb247871c 100644
--- a/include/asm-x86/module.h
+++ b/include/asm-x86/module.h
@@ -1,5 +1,82 @@
+#ifndef _ASM_MODULE_H
+#define _ASM_MODULE_H
+
+/* x86_32/64 are simple */
+struct mod_arch_specific {};
+
 #ifdef CONFIG_X86_32
-# include "module_32.h"
+# define Elf_Shdr Elf32_Shdr
+# define Elf_Sym Elf32_Sym
+# define Elf_Ehdr Elf32_Ehdr
 #else
-# include "module_64.h"
+# define Elf_Shdr Elf64_Shdr
+# define Elf_Sym Elf64_Sym
+# define Elf_Ehdr Elf64_Ehdr
 #endif
+
+#ifdef CONFIG_X86_64
+/* X86_64 does not define MODULE_PROC_FAMILY */
+#elif defined CONFIG_M386
+#define MODULE_PROC_FAMILY "386 "
+#elif defined CONFIG_M486
+#define MODULE_PROC_FAMILY "486 "
+#elif defined CONFIG_M586
+#define MODULE_PROC_FAMILY "586 "
+#elif defined CONFIG_M586TSC
+#define MODULE_PROC_FAMILY "586TSC "
+#elif defined CONFIG_M586MMX
+#define MODULE_PROC_FAMILY "586MMX "
+#elif defined CONFIG_MCORE2
+#define MODULE_PROC_FAMILY "CORE2 "
+#elif defined CONFIG_M686
+#define MODULE_PROC_FAMILY "686 "
+#elif defined CONFIG_MPENTIUMII
+#define MODULE_PROC_FAMILY "PENTIUMII "
+#elif defined CONFIG_MPENTIUMIII
+#define MODULE_PROC_FAMILY "PENTIUMIII "
+#elif defined CONFIG_MPENTIUMM
+#define MODULE_PROC_FAMILY "PENTIUMM "
+#elif defined CONFIG_MPENTIUM4
+#define MODULE_PROC_FAMILY "PENTIUM4 "
+#elif defined CONFIG_MK6
+#define MODULE_PROC_FAMILY "K6 "
+#elif defined CONFIG_MK7
+#define MODULE_PROC_FAMILY "K7 "
+#elif defined CONFIG_MK8
+#define MODULE_PROC_FAMILY "K8 "
+#elif defined CONFIG_X86_ELAN
+#define MODULE_PROC_FAMILY "ELAN "
+#elif defined CONFIG_MCRUSOE
+#define MODULE_PROC_FAMILY "CRUSOE "
+#elif defined CONFIG_MEFFICEON
+#define MODULE_PROC_FAMILY "EFFICEON "
+#elif defined CONFIG_MWINCHIPC6
+#define MODULE_PROC_FAMILY "WINCHIPC6 "
+#elif defined CONFIG_MWINCHIP2
+#define MODULE_PROC_FAMILY "WINCHIP2 "
+#elif defined CONFIG_MWINCHIP3D
+#define MODULE_PROC_FAMILY "WINCHIP3D "
+#elif defined CONFIG_MCYRIXIII
+#define MODULE_PROC_FAMILY "CYRIXIII "
+#elif defined CONFIG_MVIAC3_2
+#define MODULE_PROC_FAMILY "VIAC3-2 "
+#elif defined CONFIG_MVIAC7
+#define MODULE_PROC_FAMILY "VIAC7 "
+#elif defined CONFIG_MGEODEGX1
+#define MODULE_PROC_FAMILY "GEODEGX1 "
+#elif defined CONFIG_MGEODE_LX
+#define MODULE_PROC_FAMILY "GEODE "
+#else
+#error unknown processor family
+#endif
+
+#ifdef CONFIG_X86_32
+# ifdef CONFIG_4KSTACKS
+#  define MODULE_STACKSIZE "4KSTACKS "
+# else
+#  define MODULE_STACKSIZE ""
+# endif
+# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE
+#endif
+
+#endif /* _ASM_MODULE_H */
diff --git a/include/asm-x86/module_32.h b/include/asm-x86/module_32.h
deleted file mode 100644
index 7e5fda6c3976..000000000000
--- a/include/asm-x86/module_32.h
+++ /dev/null
@@ -1,75 +0,0 @@
-#ifndef _ASM_I386_MODULE_H
-#define _ASM_I386_MODULE_H
-
-/* x86 is simple */
-struct mod_arch_specific
-{
-};
-
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
-
-#ifdef CONFIG_M386
-#define MODULE_PROC_FAMILY "386 "
-#elif defined CONFIG_M486
-#define MODULE_PROC_FAMILY "486 "
-#elif defined CONFIG_M586
-#define MODULE_PROC_FAMILY "586 "
-#elif defined CONFIG_M586TSC
-#define MODULE_PROC_FAMILY "586TSC "
-#elif defined CONFIG_M586MMX
-#define MODULE_PROC_FAMILY "586MMX "
-#elif defined CONFIG_MCORE2
-#define MODULE_PROC_FAMILY "CORE2 "
-#elif defined CONFIG_M686
-#define MODULE_PROC_FAMILY "686 "
-#elif defined CONFIG_MPENTIUMII
-#define MODULE_PROC_FAMILY "PENTIUMII "
-#elif defined CONFIG_MPENTIUMIII
-#define MODULE_PROC_FAMILY "PENTIUMIII "
-#elif defined CONFIG_MPENTIUMM
-#define MODULE_PROC_FAMILY "PENTIUMM "
-#elif defined CONFIG_MPENTIUM4
-#define MODULE_PROC_FAMILY "PENTIUM4 "
-#elif defined CONFIG_MK6
-#define MODULE_PROC_FAMILY "K6 "
-#elif defined CONFIG_MK7
-#define MODULE_PROC_FAMILY "K7 "
-#elif defined CONFIG_MK8
-#define MODULE_PROC_FAMILY "K8 "
-#elif defined CONFIG_X86_ELAN
-#define MODULE_PROC_FAMILY "ELAN "
-#elif defined CONFIG_MCRUSOE
-#define MODULE_PROC_FAMILY "CRUSOE "
-#elif defined CONFIG_MEFFICEON
-#define MODULE_PROC_FAMILY "EFFICEON "
-#elif defined CONFIG_MWINCHIPC6
-#define MODULE_PROC_FAMILY "WINCHIPC6 "
-#elif defined CONFIG_MWINCHIP2
-#define MODULE_PROC_FAMILY "WINCHIP2 "
-#elif defined CONFIG_MWINCHIP3D
-#define MODULE_PROC_FAMILY "WINCHIP3D "
-#elif defined CONFIG_MCYRIXIII
-#define MODULE_PROC_FAMILY "CYRIXIII "
-#elif defined CONFIG_MVIAC3_2
-#define MODULE_PROC_FAMILY "VIAC3-2 "
-#elif defined CONFIG_MVIAC7
-#define MODULE_PROC_FAMILY "VIAC7 "
-#elif defined CONFIG_MGEODEGX1
-#define MODULE_PROC_FAMILY "GEODEGX1 "
-#elif defined CONFIG_MGEODE_LX
-#define MODULE_PROC_FAMILY "GEODE "
-#else
-#error unknown processor family
-#endif
-
-#ifdef CONFIG_4KSTACKS
-#define MODULE_STACKSIZE "4KSTACKS "
-#else
-#define MODULE_STACKSIZE ""
-#endif
-
-#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE
-
-#endif /* _ASM_I386_MODULE_H */
diff --git a/include/asm-x86/module_64.h b/include/asm-x86/module_64.h
deleted file mode 100644
index 67f8f69fa7b1..000000000000
--- a/include/asm-x86/module_64.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _ASM_X8664_MODULE_H
-#define _ASM_X8664_MODULE_H
-
-struct mod_arch_specific {}; 
-
-#define Elf_Shdr Elf64_Shdr
-#define Elf_Sym Elf64_Sym
-#define Elf_Ehdr Elf64_Ehdr
-
-#endif 

From bdf88217b70dbb18c4ee27a6c497286e040a6705 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:44 +0100
Subject: [PATCH 367/890] x86: user_regset header

The new header <linux/regset.h> defines the types struct user_regset and
struct user_regset_view, with some associated declarations.  This new set
of interfaces will become the standard way for arch code to expose
user-mode machine-specific state.  A single set of entry points into arch
code can do all the low-level work in one place to fill the needs of core
dumps, ptrace, and any other user-mode debugging facilities that might come
along in the future.

For existing arch code to adapt to the user_regset interfaces, each arch
can work from the code it already has to support core files and ptrace.
The formats you want for user_regset are the core file formats.  The only
wrinkle in adapting old ptrace implementation code as user_regset get and
set functions is that these functions can be called on current as well as
on another task_struct that is stopped and switched out as for ptrace.
For some kinds of machine state, you may have to load it directly from CPU
registers or otherwise differently for current than for another thread.
(Your core dump support already handles this in elf_core_copy_regs for
current and elf_core_copy_task_regs for other tasks, so just check there.)
The set function should also be made to work on current in case that
entails some special cases, though this was never required before for
ptrace.  Adding this flexibility covers the arch needs to open the door to
more sophisticated new debugging facilities that don't always need to
context-switch to do every little thing.

The copyin/copyout helper functions (in a later patch) relieve the arch
code of most of the cumbersome details of the flexible get/set interfaces.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/regset.h | 206 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 206 insertions(+)
 create mode 100644 include/linux/regset.h

diff --git a/include/linux/regset.h b/include/linux/regset.h
new file mode 100644
index 000000000000..85d0fb0a014d
--- /dev/null
+++ b/include/linux/regset.h
@@ -0,0 +1,206 @@
+/*
+ * User-mode machine state access
+ *
+ * Copyright (C) 2007 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * Red Hat Author: Roland McGrath.
+ */
+
+#ifndef _LINUX_REGSET_H
+#define _LINUX_REGSET_H	1
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+struct task_struct;
+struct user_regset;
+
+
+/**
+ * user_regset_active_fn - type of @active function in &struct user_regset
+ * @target:	thread being examined
+ * @regset:	regset being examined
+ *
+ * Return -%ENODEV if not available on the hardware found.
+ * Return %0 if no interesting state in this thread.
+ * Return >%0 number of @size units of interesting state.
+ * Any get call fetching state beyond that number will
+ * see the default initialization state for this data,
+ * so a caller that knows what the default state is need
+ * not copy it all out.
+ * This call is optional; the pointer is %NULL if there
+ * is no inexpensive check to yield a value < @n.
+ */
+typedef int user_regset_active_fn(struct task_struct *target,
+				  const struct user_regset *regset);
+
+/**
+ * user_regset_get_fn - type of @get function in &struct user_regset
+ * @target:	thread being examined
+ * @regset:	regset being examined
+ * @pos:	offset into the regset data to access, in bytes
+ * @count:	amount of data to copy, in bytes
+ * @kbuf:	if not %NULL, a kernel-space pointer to copy into
+ * @ubuf:	if @kbuf is %NULL, a user-space pointer to copy into
+ *
+ * Fetch register values.  Return %0 on success; -%EIO or -%ENODEV
+ * are usual failure returns.  The @pos and @count values are in
+ * bytes, but must be properly aligned.  If @kbuf is non-null, that
+ * buffer is used and @ubuf is ignored.  If @kbuf is %NULL, then
+ * ubuf gives a userland pointer to access directly, and an -%EFAULT
+ * return value is possible.
+ */
+typedef int user_regset_get_fn(struct task_struct *target,
+			       const struct user_regset *regset,
+			       unsigned int pos, unsigned int count,
+			       void *kbuf, void __user *ubuf);
+
+/**
+ * user_regset_set_fn - type of @set function in &struct user_regset
+ * @target:	thread being examined
+ * @regset:	regset being examined
+ * @pos:	offset into the regset data to access, in bytes
+ * @count:	amount of data to copy, in bytes
+ * @kbuf:	if not %NULL, a kernel-space pointer to copy from
+ * @ubuf:	if @kbuf is %NULL, a user-space pointer to copy from
+ *
+ * Store register values.  Return %0 on success; -%EIO or -%ENODEV
+ * are usual failure returns.  The @pos and @count values are in
+ * bytes, but must be properly aligned.  If @kbuf is non-null, that
+ * buffer is used and @ubuf is ignored.  If @kbuf is %NULL, then
+ * ubuf gives a userland pointer to access directly, and an -%EFAULT
+ * return value is possible.
+ */
+typedef int user_regset_set_fn(struct task_struct *target,
+			       const struct user_regset *regset,
+			       unsigned int pos, unsigned int count,
+			       const void *kbuf, const void __user *ubuf);
+
+/**
+ * user_regset_writeback_fn - type of @writeback function in &struct user_regset
+ * @target:	thread being examined
+ * @regset:	regset being examined
+ * @immediate:	zero if writeback at completion of next context switch is OK
+ *
+ * This call is optional; usually the pointer is %NULL.  When
+ * provided, there is some user memory associated with this regset's
+ * hardware, such as memory backing cached register data on register
+ * window machines; the regset's data controls what user memory is
+ * used (e.g. via the stack pointer value).
+ *
+ * Write register data back to user memory.  If the @immediate flag
+ * is nonzero, it must be written to the user memory so uaccess or
+ * access_process_vm() can see it when this call returns; if zero,
+ * then it must be written back by the time the task completes a
+ * context switch (as synchronized with wait_task_inactive()).
+ * Return %0 on success or if there was nothing to do, -%EFAULT for
+ * a memory problem (bad stack pointer or whatever), or -%EIO for a
+ * hardware problem.
+ */
+typedef int user_regset_writeback_fn(struct task_struct *target,
+				     const struct user_regset *regset,
+				     int immediate);
+
+/**
+ * struct user_regset - accessible thread CPU state
+ * @n:			Number of slots (registers).
+ * @size:		Size in bytes of a slot (register).
+ * @align:		Required alignment, in bytes.
+ * @bias:		Bias from natural indexing.
+ * @core_note_type:	ELF note @n_type value used in core dumps.
+ * @get:		Function to fetch values.
+ * @set:		Function to store values.
+ * @active:		Function to report if regset is active, or %NULL.
+ * @writeback:		Function to write data back to user memory, or %NULL.
+ *
+ * This data structure describes a machine resource we call a register set.
+ * This is part of the state of an individual thread, not necessarily
+ * actual CPU registers per se.  A register set consists of a number of
+ * similar slots, given by @n.  Each slot is @size bytes, and aligned to
+ * @align bytes (which is at least @size).
+ *
+ * These functions must be called only on the current thread or on a
+ * thread that is in %TASK_STOPPED or %TASK_TRACED state, that we are
+ * guaranteed will not be woken up and return to user mode, and that we
+ * have called wait_task_inactive() on.  (The target thread always might
+ * wake up for SIGKILL while these functions are working, in which case
+ * that thread's user_regset state might be scrambled.)
+ *
+ * The @pos argument must be aligned according to @align; the @count
+ * argument must be a multiple of @size.  These functions are not
+ * responsible for checking for invalid arguments.
+ *
+ * When there is a natural value to use as an index, @bias gives the
+ * difference between the natural index and the slot index for the
+ * register set.  For example, x86 GDT segment descriptors form a regset;
+ * the segment selector produces a natural index, but only a subset of
+ * that index space is available as a regset (the TLS slots); subtracting
+ * @bias from a segment selector index value computes the regset slot.
+ *
+ * If nonzero, @core_note_type gives the n_type field (NT_* value)
+ * of the core file note in which this regset's data appears.
+ * NT_PRSTATUS is a special case in that the regset data starts at
+ * offsetof(struct elf_prstatus, pr_reg) into the note data; that is
+ * part of the per-machine ELF formats userland knows about.  In
+ * other cases, the core file note contains exactly the whole regset
+ * (@n * @size) and nothing else.  The core file note is normally
+ * omitted when there is an @active function and it returns zero.
+ */
+struct user_regset {
+	user_regset_get_fn		*get;
+	user_regset_set_fn		*set;
+	user_regset_active_fn		*active;
+	user_regset_writeback_fn	*writeback;
+	unsigned int			n;
+	unsigned int 			size;
+	unsigned int 			align;
+	unsigned int 			bias;
+	unsigned int 			core_note_type;
+};
+
+/**
+ * struct user_regset_view - available regsets
+ * @name:	Identifier, e.g. UTS_MACHINE string.
+ * @regsets:	Array of @n regsets available in this view.
+ * @n:		Number of elements in @regsets.
+ * @e_machine:	ELF header @e_machine %EM_* value written in core dumps.
+ * @e_flags:	ELF header @e_flags value written in core dumps.
+ * @ei_osabi:	ELF header @e_ident[%EI_OSABI] value written in core dumps.
+ *
+ * A regset view is a collection of regsets (&struct user_regset,
+ * above).  This describes all the state of a thread that can be seen
+ * from a given architecture/ABI environment.  More than one view might
+ * refer to the same &struct user_regset, or more than one regset
+ * might refer to the same machine-specific state in the thread.  For
+ * example, a 32-bit thread's state could be examined from the 32-bit
+ * view or from the 64-bit view.  Either method reaches the same thread
+ * register state, doing appropriate widening or truncation.
+ */
+struct user_regset_view {
+	const char *name;
+	const struct user_regset *regsets;
+	unsigned int n;
+	u32 e_flags;
+	u16 e_machine;
+	u8 ei_osabi;
+};
+
+/*
+ * This is documented here rather than at the definition sites because its
+ * implementation is machine-dependent but its interface is universal.
+ */
+/**
+ * task_user_regset_view - Return the process's native regset view.
+ * @tsk: a thread of the process in question
+ *
+ * Return the &struct user_regset_view that is native for the given process.
+ * For example, what it would access when it called ptrace().
+ * Throughout the life of the process, this only changes at exec.
+ */
+const struct user_regset_view *task_user_regset_view(struct task_struct *tsk);
+
+
+#endif	/* <linux/regset.h> */

From 3aba481fc94d83ff630d4b7cd2f7447010c4c6df Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:44 +0100
Subject: [PATCH 368/890] elf core dump: notes reorg

This pulls out the code for writing the notes segment of an ELF core dump
into separate functions.  This cleanly isolates into one cluster of
functions everything that deals with the note formats and the hooks into
arch code to fill them.  The top-level elf_core_dump function itself now
deals purely with the generic ELF format and the memory segments.

This only moves code around into functions that can be inlined away.
It should not change any behavior at all.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/binfmt_elf.c | 324 +++++++++++++++++++++++++++++-------------------
 1 file changed, 194 insertions(+), 130 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index b8bca1ebc1a0..4510429b973e 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1395,7 +1395,8 @@ static int writenote(struct memelfnote *men, struct file *file,
 	if (!dump_seek(file, (off))) \
 		goto end_coredump;
 
-static void fill_elf_header(struct elfhdr *elf, int segs)
+static void fill_elf_header(struct elfhdr *elf, int segs,
+			    u16 machine, u32 flags, u8 osabi)
 {
 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
 	elf->e_ident[EI_CLASS] = ELF_CLASS;
@@ -1405,12 +1406,12 @@ static void fill_elf_header(struct elfhdr *elf, int segs)
 	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
 
 	elf->e_type = ET_CORE;
-	elf->e_machine = ELF_ARCH;
+	elf->e_machine = machine;
 	elf->e_version = EV_CURRENT;
 	elf->e_entry = 0;
 	elf->e_phoff = sizeof(struct elfhdr);
 	elf->e_shoff = 0;
-	elf->e_flags = ELF_CORE_EFLAGS;
+	elf->e_flags = flags;
 	elf->e_ehsize = sizeof(struct elfhdr);
 	elf->e_phentsize = sizeof(struct elf_phdr);
 	elf->e_phnum = segs;
@@ -1517,6 +1518,16 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 	return 0;
 }
 
+static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
+{
+	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
+	int i = 0;
+	do
+		i += 2;
+	while (auxv[i - 2] != AT_NULL);
+	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
+}
+
 /* Here is the structure in which status of each thread is captured. */
 struct elf_thread_status
 {
@@ -1569,6 +1580,174 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
 	return sz;
 }
 
+struct elf_note_info {
+	struct memelfnote *notes;
+	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
+	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
+	struct list_head thread_list;
+	elf_fpregset_t *fpu;
+#ifdef ELF_CORE_COPY_XFPREGS
+	elf_fpxregset_t *xfpu;
+#endif
+	int thread_status_size;
+	int numnote;
+};
+
+static int fill_note_info(struct elfhdr *elf, int phdrs,
+			  struct elf_note_info *info,
+			  long signr, struct pt_regs *regs)
+{
+#define	NUM_NOTES	6
+	struct list_head *t;
+	struct task_struct *g, *p;
+
+	info->notes = NULL;
+	info->prstatus = NULL;
+	info->psinfo = NULL;
+	info->fpu = NULL;
+#ifdef ELF_CORE_COPY_XFPREGS
+	info->xfpu = NULL;
+#endif
+	INIT_LIST_HEAD(&info->thread_list);
+
+	info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
+			      GFP_KERNEL);
+	if (!info->notes)
+		return 0;
+	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
+	if (!info->psinfo)
+		return 0;
+	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
+	if (!info->prstatus)
+		return 0;
+	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
+	if (!info->fpu)
+		return 0;
+#ifdef ELF_CORE_COPY_XFPREGS
+	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
+	if (!info->xfpu)
+		return 0;
+#endif
+
+	info->thread_status_size = 0;
+	if (signr) {
+		struct elf_thread_status *tmp;
+		rcu_read_lock();
+		do_each_thread(g, p)
+			if (current->mm == p->mm && current != p) {
+				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
+				if (!tmp) {
+					rcu_read_unlock();
+					return 0;
+				}
+				tmp->thread = p;
+				list_add(&tmp->list, &info->thread_list);
+			}
+		while_each_thread(g, p);
+		rcu_read_unlock();
+		list_for_each(t, &info->thread_list) {
+			struct elf_thread_status *tmp;
+			int sz;
+
+			tmp = list_entry(t, struct elf_thread_status, list);
+			sz = elf_dump_thread_status(signr, tmp);
+			info->thread_status_size += sz;
+		}
+	}
+	/* now collect the dump for the current */
+	memset(info->prstatus, 0, sizeof(*info->prstatus));
+	fill_prstatus(info->prstatus, current, signr);
+	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
+
+	/* Set up header */
+	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
+
+	/*
+	 * Set up the notes in similar form to SVR4 core dumps made
+	 * with info from their /proc.
+	 */
+
+	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
+		  sizeof(*info->prstatus), info->prstatus);
+	fill_psinfo(info->psinfo, current->group_leader, current->mm);
+	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
+		  sizeof(*info->psinfo), info->psinfo);
+
+	info->numnote = 2;
+
+	fill_auxv_note(&info->notes[info->numnote++], current->mm);
+
+	/* Try to dump the FPU. */
+	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
+							       info->fpu);
+	if (info->prstatus->pr_fpvalid)
+		fill_note(info->notes + info->numnote++,
+			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
+#ifdef ELF_CORE_COPY_XFPREGS
+	if (elf_core_copy_task_xfpregs(current, info->xfpu))
+		fill_note(info->notes + info->numnote++,
+			  "LINUX", ELF_CORE_XFPREG_TYPE,
+			  sizeof(*info->xfpu), info->xfpu);
+#endif
+
+	return 1;
+
+#undef NUM_NOTES
+}
+
+static size_t get_note_info_size(struct elf_note_info *info)
+{
+	int sz = 0;
+	int i;
+
+	for (i = 0; i < info->numnote; i++)
+		sz += notesize(info->notes + i);
+
+	sz += info->thread_status_size;
+
+	return sz;
+}
+
+static int write_note_info(struct elf_note_info *info,
+			   struct file *file, loff_t *foffset)
+{
+	int i;
+	struct list_head *t;
+
+	for (i = 0; i < info->numnote; i++)
+		if (!writenote(info->notes + i, file, foffset))
+			return 0;
+
+	/* write out the thread status notes section */
+	list_for_each(t, &info->thread_list) {
+		struct elf_thread_status *tmp =
+				list_entry(t, struct elf_thread_status, list);
+
+		for (i = 0; i < tmp->num_notes; i++)
+			if (!writenote(&tmp->notes[i], file, foffset))
+				return 0;
+	}
+
+	return 1;
+}
+
+static void free_note_info(struct elf_note_info *info)
+{
+	while (!list_empty(&info->thread_list)) {
+		struct list_head *tmp = info->thread_list.next;
+		list_del(tmp);
+		kfree(list_entry(tmp, struct elf_thread_status, list));
+	}
+
+	kfree(info->prstatus);
+	kfree(info->psinfo);
+	kfree(info->notes);
+	kfree(info->fpu);
+#ifdef ELF_CORE_COPY_XFPREGS
+	kfree(info->xfpu);
+#endif
+}
+
 static struct vm_area_struct *first_vma(struct task_struct *tsk,
 					struct vm_area_struct *gate_vma)
 {
@@ -1604,29 +1783,15 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
  */
 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
 {
-#define	NUM_NOTES	6
 	int has_dumped = 0;
 	mm_segment_t fs;
 	int segs;
 	size_t size = 0;
-	int i;
 	struct vm_area_struct *vma, *gate_vma;
 	struct elfhdr *elf = NULL;
 	loff_t offset = 0, dataoff, foffset;
-	int numnote;
-	struct memelfnote *notes = NULL;
-	struct elf_prstatus *prstatus = NULL;	/* NT_PRSTATUS */
-	struct elf_prpsinfo *psinfo = NULL;	/* NT_PRPSINFO */
- 	struct task_struct *g, *p;
- 	LIST_HEAD(thread_list);
- 	struct list_head *t;
-	elf_fpregset_t *fpu = NULL;
-#ifdef ELF_CORE_COPY_XFPREGS
-	elf_fpxregset_t *xfpu = NULL;
-#endif
-	int thread_status_size = 0;
-	elf_addr_t *auxv;
 	unsigned long mm_flags;
+	struct elf_note_info info;
 
 	/*
 	 * We no longer stop all VM operations.
@@ -1644,52 +1809,6 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
 	if (!elf)
 		goto cleanup;
-	prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
-	if (!prstatus)
-		goto cleanup;
-	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
-	if (!psinfo)
-		goto cleanup;
-	notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
-	if (!notes)
-		goto cleanup;
-	fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
-	if (!fpu)
-		goto cleanup;
-#ifdef ELF_CORE_COPY_XFPREGS
-	xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
-	if (!xfpu)
-		goto cleanup;
-#endif
-
-	if (signr) {
-		struct elf_thread_status *tmp;
-		rcu_read_lock();
-		do_each_thread(g,p)
-			if (current->mm == p->mm && current != p) {
-				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
-				if (!tmp) {
-					rcu_read_unlock();
-					goto cleanup;
-				}
-				tmp->thread = p;
-				list_add(&tmp->list, &thread_list);
-			}
-		while_each_thread(g,p);
-		rcu_read_unlock();
-		list_for_each(t, &thread_list) {
-			struct elf_thread_status *tmp;
-			int sz;
-
-			tmp = list_entry(t, struct elf_thread_status, list);
-			sz = elf_dump_thread_status(signr, tmp);
-			thread_status_size += sz;
-		}
-	}
-	/* now collect the dump for the current */
-	memset(prstatus, 0, sizeof(*prstatus));
-	fill_prstatus(prstatus, current, signr);
-	elf_core_copy_regs(&prstatus->pr_reg, regs);
 	
 	segs = current->mm->map_count;
 #ifdef ELF_CORE_EXTRA_PHDRS
@@ -1700,42 +1819,16 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 	if (gate_vma != NULL)
 		segs++;
 
-	/* Set up header */
-	fill_elf_header(elf, segs + 1);	/* including notes section */
+	/*
+	 * Collect all the non-memory information about the process for the
+	 * notes.  This also sets up the file header.
+	 */
+	if (!fill_note_info(elf, segs + 1, /* including notes section */
+			    &info, signr, regs))
+		goto cleanup;
 
 	has_dumped = 1;
 	current->flags |= PF_DUMPCORE;
-
-	/*
-	 * Set up the notes in similar form to SVR4 core dumps made
-	 * with info from their /proc.
-	 */
-
-	fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
-	fill_psinfo(psinfo, current->group_leader, current->mm);
-	fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
-	
-	numnote = 2;
-
-	auxv = (elf_addr_t *)current->mm->saved_auxv;
-
-	i = 0;
-	do
-		i += 2;
-	while (auxv[i - 2] != AT_NULL);
-	fill_note(&notes[numnote++], "CORE", NT_AUXV,
-		  i * sizeof(elf_addr_t), auxv);
-
-  	/* Try to dump the FPU. */
-	if ((prstatus->pr_fpvalid =
-	     elf_core_copy_task_fpregs(current, regs, fpu)))
-		fill_note(notes + numnote++,
-			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
-#ifdef ELF_CORE_COPY_XFPREGS
-	if (elf_core_copy_task_xfpregs(current, xfpu))
-		fill_note(notes + numnote++,
-			  "LINUX", ELF_CORE_XFPREG_TYPE, sizeof(*xfpu), xfpu);
-#endif	
   
 	fs = get_fs();
 	set_fs(KERNEL_DS);
@@ -1748,12 +1841,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 	/* Write notes phdr entry */
 	{
 		struct elf_phdr phdr;
-		int sz = 0;
-
-		for (i = 0; i < numnote; i++)
-			sz += notesize(notes + i);
-		
-		sz += thread_status_size;
+		size_t sz = get_note_info_size(&info);
 
 		sz += elf_coredump_extra_notes_size();
 
@@ -1798,23 +1886,12 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 #endif
 
  	/* write out the notes section */
-	for (i = 0; i < numnote; i++)
-		if (!writenote(notes + i, file, &foffset))
-			goto end_coredump;
+	if (!write_note_info(&info, file, &foffset))
+		goto end_coredump;
 
 	if (elf_coredump_extra_notes_write(file, &foffset))
 		goto end_coredump;
 
-	/* write out the thread status notes section */
-	list_for_each(t, &thread_list) {
-		struct elf_thread_status *tmp =
-				list_entry(t, struct elf_thread_status, list);
-
-		for (i = 0; i < tmp->num_notes; i++)
-			if (!writenote(&tmp->notes[i], file, &foffset))
-				goto end_coredump;
-	}
-
 	/* Align to page */
 	DUMP_SEEK(dataoff - foffset);
 
@@ -1865,22 +1942,9 @@ end_coredump:
 	set_fs(fs);
 
 cleanup:
-	while (!list_empty(&thread_list)) {
-		struct list_head *tmp = thread_list.next;
-		list_del(tmp);
-		kfree(list_entry(tmp, struct elf_thread_status, list));
-	}
-
 	kfree(elf);
-	kfree(prstatus);
-	kfree(psinfo);
-	kfree(notes);
-	kfree(fpu);
-#ifdef ELF_CORE_COPY_XFPREGS
-	kfree(xfpu);
-#endif
+	free_note_info(&info);
 	return has_dumped;
-#undef NUM_NOTES
 }
 
 #endif		/* USE_ELF_CORE_DUMP */

From 4206d3aa1978e44f58bfa4e1c9d8d35cbf19c187 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:45 +0100
Subject: [PATCH 369/890] elf core dump: notes user_regset

This modifies the ELF core dump code under #ifdef CORE_DUMP_USE_REGSET.
It changes nothing when this macro is not defined.  When it's #define'd
by some arch header (e.g. asm/elf.h), the arch must support the
user_regset (linux/regset.h) interface for reading thread state.

This provides an alternate version of note segment writing that is based
purely on the user_regset interfaces.  When CORE_DUMP_USE_REGSET is set,
the arch need not define macros such as ELF_CORE_COPY_REGS and ELF_ARCH.
All that information is taken from the user_regset data structures.
The core dumps come out exactly the same if arch's definitions for its
user_regset details are correct.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/binfmt_elf.c | 224 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 224 insertions(+)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 4510429b973e..786ee275ec0a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1528,6 +1528,228 @@ static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
 }
 
+#ifdef CORE_DUMP_USE_REGSET
+#include <linux/regset.h>
+
+struct elf_thread_core_info {
+	struct elf_thread_core_info *next;
+	struct task_struct *task;
+	struct elf_prstatus prstatus;
+	struct memelfnote notes[0];
+};
+
+struct elf_note_info {
+	struct elf_thread_core_info *thread;
+	struct memelfnote psinfo;
+	struct memelfnote auxv;
+	size_t size;
+	int thread_notes;
+};
+
+static int fill_thread_core_info(struct elf_thread_core_info *t,
+				 const struct user_regset_view *view,
+				 long signr, size_t *total)
+{
+	unsigned int i;
+
+	/*
+	 * NT_PRSTATUS is the one special case, because the regset data
+	 * goes into the pr_reg field inside the note contents, rather
+	 * than being the whole note contents.  We fill the reset in here.
+	 * We assume that regset 0 is NT_PRSTATUS.
+	 */
+	fill_prstatus(&t->prstatus, t->task, signr);
+	(void) view->regsets[0].get(t->task, &view->regsets[0],
+				    0, sizeof(t->prstatus.pr_reg),
+				    &t->prstatus.pr_reg, NULL);
+
+	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
+		  sizeof(t->prstatus), &t->prstatus);
+	*total += notesize(&t->notes[0]);
+
+	/*
+	 * Each other regset might generate a note too.  For each regset
+	 * that has no core_note_type or is inactive, we leave t->notes[i]
+	 * all zero and we'll know to skip writing it later.
+	 */
+	for (i = 1; i < view->n; ++i) {
+		const struct user_regset *regset = &view->regsets[i];
+		if (regset->core_note_type &&
+		    (!regset->active || regset->active(t->task, regset))) {
+			int ret;
+			size_t size = regset->n * regset->size;
+			void *data = kmalloc(size, GFP_KERNEL);
+			if (unlikely(!data))
+				return 0;
+			ret = regset->get(t->task, regset,
+					  0, size, data, NULL);
+			if (unlikely(ret))
+				kfree(data);
+			else {
+				if (regset->core_note_type != NT_PRFPREG)
+					fill_note(&t->notes[i], "LINUX",
+						  regset->core_note_type,
+						  size, data);
+				else {
+					t->prstatus.pr_fpvalid = 1;
+					fill_note(&t->notes[i], "CORE",
+						  NT_PRFPREG, size, data);
+				}
+				*total += notesize(&t->notes[i]);
+			}
+		}
+	}
+
+	return 1;
+}
+
+static int fill_note_info(struct elfhdr *elf, int phdrs,
+			  struct elf_note_info *info,
+			  long signr, struct pt_regs *regs)
+{
+	struct task_struct *dump_task = current;
+	const struct user_regset_view *view = task_user_regset_view(dump_task);
+	struct elf_thread_core_info *t;
+	struct elf_prpsinfo *psinfo;
+	struct task_struct *g, *p;
+	unsigned int i;
+
+	info->size = 0;
+	info->thread = NULL;
+
+	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
+	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
+
+	if (psinfo == NULL)
+		return 0;
+
+	/*
+	 * Figure out how many notes we're going to need for each thread.
+	 */
+	info->thread_notes = 0;
+	for (i = 0; i < view->n; ++i)
+		if (view->regsets[i].core_note_type != 0)
+			++info->thread_notes;
+
+	/*
+	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
+	 * since it is our one special case.
+	 */
+	if (unlikely(info->thread_notes == 0) ||
+	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
+		WARN_ON(1);
+		return 0;
+	}
+
+	/*
+	 * Initialize the ELF file header.
+	 */
+	fill_elf_header(elf, phdrs,
+			view->e_machine, view->e_flags, view->ei_osabi);
+
+	/*
+	 * Allocate a structure for each thread.
+	 */
+	rcu_read_lock();
+	do_each_thread(g, p)
+		if (p->mm == dump_task->mm) {
+			t = kzalloc(offsetof(struct elf_thread_core_info,
+					     notes[info->thread_notes]),
+				    GFP_ATOMIC);
+			if (unlikely(!t)) {
+				rcu_read_unlock();
+				return 0;
+			}
+			t->task = p;
+			if (p == dump_task || !info->thread) {
+				t->next = info->thread;
+				info->thread = t;
+			} else {
+				/*
+				 * Make sure to keep the original task at
+				 * the head of the list.
+				 */
+				t->next = info->thread->next;
+				info->thread->next = t;
+			}
+		}
+	while_each_thread(g, p);
+	rcu_read_unlock();
+
+	/*
+	 * Now fill in each thread's information.
+	 */
+	for (t = info->thread; t != NULL; t = t->next)
+		if (!fill_thread_core_info(t, view, signr, &info->size))
+			return 0;
+
+	/*
+	 * Fill in the two process-wide notes.
+	 */
+	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
+	info->size += notesize(&info->psinfo);
+
+	fill_auxv_note(&info->auxv, current->mm);
+	info->size += notesize(&info->auxv);
+
+	return 1;
+}
+
+static size_t get_note_info_size(struct elf_note_info *info)
+{
+	return info->size;
+}
+
+/*
+ * Write all the notes for each thread.  When writing the first thread, the
+ * process-wide notes are interleaved after the first thread-specific note.
+ */
+static int write_note_info(struct elf_note_info *info,
+			   struct file *file, loff_t *foffset)
+{
+	bool first = 1;
+	struct elf_thread_core_info *t = info->thread;
+
+	do {
+		int i;
+
+		if (!writenote(&t->notes[0], file, foffset))
+			return 0;
+
+		if (first && !writenote(&info->psinfo, file, foffset))
+			return 0;
+		if (first && !writenote(&info->auxv, file, foffset))
+			return 0;
+
+		for (i = 1; i < info->thread_notes; ++i)
+			if (t->notes[i].data &&
+			    !writenote(&t->notes[i], file, foffset))
+				return 0;
+
+		first = 0;
+		t = t->next;
+	} while (t);
+
+	return 1;
+}
+
+static void free_note_info(struct elf_note_info *info)
+{
+	struct elf_thread_core_info *threads = info->thread;
+	while (threads) {
+		unsigned int i;
+		struct elf_thread_core_info *t = threads;
+		threads = t->next;
+		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
+		for (i = 1; i < info->thread_notes; ++i)
+			kfree(t->notes[i].data);
+		kfree(t);
+	}
+	kfree(info->psinfo.data);
+}
+
+#else
+
 /* Here is the structure in which status of each thread is captured. */
 struct elf_thread_status
 {
@@ -1748,6 +1970,8 @@ static void free_note_info(struct elf_note_info *info)
 #endif
 }
 
+#endif
+
 static struct vm_area_struct *first_vma(struct task_struct *tsk,
 					struct vm_area_struct *gate_vma)
 {

From bae3f7c39dee5951bcbedeaedb6744f882a00173 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:45 +0100
Subject: [PATCH 370/890] x86: user_regset helpers

This adds some inlines to linux/regset.h intended for arch code to use in
its user_regset get and set functions.  These make it pretty easy to deal
with the interface's optional kernel-space or user-space pointers and its
generalized access to a part of the register data at a time.

In simple cases where the internal data structure matches the exported
layout (core dump format), a get function can be nothing but a call to
user_regset_copyout, and a set function a call to user_regset_copyin.

In other cases the exported layout is usually made up of a few pieces each
stored contiguously in a different internal data structure.  These helpers
make it straightforward to write a get or set function by processing each
contiguous chunk of the data in order.  The start_pos and end_pos arguments
are always constants, so these inlines collapse to a small amount of code.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/regset.h | 116 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 116 insertions(+)

diff --git a/include/linux/regset.h b/include/linux/regset.h
index 85d0fb0a014d..761c931af975 100644
--- a/include/linux/regset.h
+++ b/include/linux/regset.h
@@ -15,6 +15,7 @@
 
 #include <linux/compiler.h>
 #include <linux/types.h>
+#include <linux/uaccess.h>
 struct task_struct;
 struct user_regset;
 
@@ -203,4 +204,119 @@ struct user_regset_view {
 const struct user_regset_view *task_user_regset_view(struct task_struct *tsk);
 
 
+/*
+ * These are helpers for writing regset get/set functions in arch code.
+ * Because @start_pos and @end_pos are always compile-time constants,
+ * these are inlined into very little code though they look large.
+ *
+ * Use one or more calls sequentially for each chunk of regset data stored
+ * contiguously in memory.  Call with constants for @start_pos and @end_pos,
+ * giving the range of byte positions in the regset that data corresponds
+ * to; @end_pos can be -1 if this chunk is at the end of the regset layout.
+ * Each call updates the arguments to point past its chunk.
+ */
+
+static inline int user_regset_copyout(unsigned int *pos, unsigned int *count,
+				      void **kbuf,
+				      void __user **ubuf, const void *data,
+				      const int start_pos, const int end_pos)
+{
+	if (*count == 0)
+		return 0;
+	BUG_ON(*pos < start_pos);
+	if (end_pos < 0 || *pos < end_pos) {
+		unsigned int copy = (end_pos < 0 ? *count
+				     : min(*count, end_pos - *pos));
+		data += *pos - start_pos;
+		if (*kbuf) {
+			memcpy(*kbuf, data, copy);
+			*kbuf += copy;
+		} else if (__copy_to_user(*ubuf, data, copy))
+			return -EFAULT;
+		else
+			*ubuf += copy;
+		*pos += copy;
+		*count -= copy;
+	}
+	return 0;
+}
+
+static inline int user_regset_copyin(unsigned int *pos, unsigned int *count,
+				     const void **kbuf,
+				     const void __user **ubuf, void *data,
+				     const int start_pos, const int end_pos)
+{
+	if (*count == 0)
+		return 0;
+	BUG_ON(*pos < start_pos);
+	if (end_pos < 0 || *pos < end_pos) {
+		unsigned int copy = (end_pos < 0 ? *count
+				     : min(*count, end_pos - *pos));
+		data += *pos - start_pos;
+		if (*kbuf) {
+			memcpy(data, *kbuf, copy);
+			*kbuf += copy;
+		} else if (__copy_from_user(data, *ubuf, copy))
+			return -EFAULT;
+		else
+			*ubuf += copy;
+		*pos += copy;
+		*count -= copy;
+	}
+	return 0;
+}
+
+/*
+ * These two parallel the two above, but for portions of a regset layout
+ * that always read as all-zero or for which writes are ignored.
+ */
+static inline int user_regset_copyout_zero(unsigned int *pos,
+					   unsigned int *count,
+					   void **kbuf, void __user **ubuf,
+					   const int start_pos,
+					   const int end_pos)
+{
+	if (*count == 0)
+		return 0;
+	BUG_ON(*pos < start_pos);
+	if (end_pos < 0 || *pos < end_pos) {
+		unsigned int copy = (end_pos < 0 ? *count
+				     : min(*count, end_pos - *pos));
+		if (*kbuf) {
+			memset(*kbuf, 0, copy);
+			*kbuf += copy;
+		} else if (__clear_user(*ubuf, copy))
+			return -EFAULT;
+		else
+			*ubuf += copy;
+		*pos += copy;
+		*count -= copy;
+	}
+	return 0;
+}
+
+static inline int user_regset_copyin_ignore(unsigned int *pos,
+					    unsigned int *count,
+					    const void **kbuf,
+					    const void __user **ubuf,
+					    const int start_pos,
+					    const int end_pos)
+{
+	if (*count == 0)
+		return 0;
+	BUG_ON(*pos < start_pos);
+	if (end_pos < 0 || *pos < end_pos) {
+		unsigned int copy = (end_pos < 0 ? *count
+				     : min(*count, end_pos - *pos));
+		if (*kbuf)
+			*kbuf += copy;
+		else
+			*ubuf += copy;
+		*pos += copy;
+		*count -= copy;
+	}
+	return 0;
+}
+
+
 #endif	/* <linux/regset.h> */

From 2f79e48ae2651fff08d08dab3acf1294467c1155 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:46 +0100
Subject: [PATCH 371/890] x86: compat_binfmt_elf

This adds fs/compat_binfmt_elf.c, a wrapper around fs/binfmt_elf.c for
32-bit ELF support on 64-bit kernels.  It can replace all the hand-rolled
versions of this that each 32/64 arch has, which are all about the same.

To use this, an arch's asm/elf.h has to define at least a few compat_*
macros that parallel the various macros that fs/binfmt_elf.c uses for
native support.

There is no attempt to deal with compat macros for the core dump format
support.  To use this file, the arch has to define compat_gregset_t for
linux/elfcore-compat.h and #define CORE_DUMP_USE_REGSET.  The 32-bit
compatible formats should come automatically from task_user_regset_view
called on a 32-bit task.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/compat_binfmt_elf.c | 131 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 131 insertions(+)
 create mode 100644 fs/compat_binfmt_elf.c

diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
new file mode 100644
index 000000000000..0adced2f296f
--- /dev/null
+++ b/fs/compat_binfmt_elf.c
@@ -0,0 +1,131 @@
+/*
+ * 32-bit compatibility support for ELF format executables and core dumps.
+ *
+ * Copyright (C) 2007 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * Red Hat Author: Roland McGrath.
+ *
+ * This file is used in a 64-bit kernel that wants to support 32-bit ELF.
+ * asm/elf.h is responsible for defining the compat_* and COMPAT_* macros
+ * used below, with definitions appropriate for 32-bit ABI compatibility.
+ *
+ * We use macros to rename the ABI types and machine-dependent
+ * functions used in binfmt_elf.c to compat versions.
+ */
+
+#include <linux/elfcore-compat.h>
+#include <linux/time.h>
+
+/*
+ * Rename the basic ELF layout types to refer to the 32-bit class of files.
+ */
+#undef	ELF_CLASS
+#define ELF_CLASS	ELFCLASS32
+
+#undef	elfhdr
+#undef	elf_phdr
+#undef	elf_note
+#undef	elf_addr_t
+#define elfhdr		elf32_hdr
+#define elf_phdr	elf32_phdr
+#define elf_note	elf32_note
+#define elf_addr_t	Elf32_Addr
+
+/*
+ * The machine-dependent core note format types are defined in elfcore-compat.h,
+ * which requires asm/elf.h to define compat_elf_gregset_t et al.
+ */
+#define elf_prstatus	compat_elf_prstatus
+#define elf_prpsinfo	compat_elf_prpsinfo
+
+/*
+ * Compat version of cputime_to_compat_timeval, perhaps this
+ * should be an inline in <linux/compat.h>.
+ */
+static void cputime_to_compat_timeval(const cputime_t cputime,
+				      struct compat_timeval *value)
+{
+	struct timeval tv;
+	cputime_to_timeval(cputime, &tv);
+	value->tv_sec = tv.tv_sec;
+	value->tv_usec = tv.tv_usec;
+}
+
+#undef cputime_to_timeval
+#define cputime_to_timeval cputime_to_compat_timeval
+
+
+/*
+ * To use this file, asm/elf.h must define compat_elf_check_arch.
+ * The other following macros can be defined if the compat versions
+ * differ from the native ones, or omitted when they match.
+ */
+
+#undef	ELF_ARCH
+#undef	elf_check_arch
+#define	elf_check_arch	compat_elf_check_arch
+
+#ifdef	COMPAT_ELF_PLATFORM
+#undef	ELF_PLATFORM
+#define	ELF_PLATFORM		COMPAT_ELF_PLATFORM
+#endif
+
+#ifdef	COMPAT_ELF_HWCAP
+#undef	ELF_HWCAP
+#define	ELF_HWCAP		COMPAT_ELF_HWCAP
+#endif
+
+#ifdef	COMPAT_ARCH_DLINFO
+#undef	ARCH_DLINFO
+#define	ARCH_DLINFO		COMPAT_ARCH_DLINFO
+#endif
+
+#ifdef	COMPAT_ELF_ET_DYN_BASE
+#undef	ELF_ET_DYN_BASE
+#define	ELF_ET_DYN_BASE		COMPAT_ELF_ET_DYN_BASE
+#endif
+
+#ifdef COMPAT_ELF_EXEC_PAGESIZE
+#undef	ELF_EXEC_PAGESIZE
+#define	ELF_EXEC_PAGESIZE	COMPAT_ELF_EXEC_PAGESIZE
+#endif
+
+#ifdef	COMPAT_ELF_PLAT_INIT
+#undef	ELF_PLAT_INIT
+#define	ELF_PLAT_INIT		COMPAT_ELF_PLAT_INIT
+#endif
+
+#ifdef	COMPAT_SET_PERSONALITY
+#undef	SET_PERSONALITY
+#define	SET_PERSONALITY		COMPAT_SET_PERSONALITY
+#endif
+
+#ifdef	compat_start_thread
+#undef	start_thread
+#define	start_thread		compat_start_thread
+#endif
+
+#ifdef	compat_arch_setup_additional_pages
+#undef	ARCH_HAS_SETUP_ADDITIONAL_PAGES
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+#undef	arch_setup_additional_pages
+#define	arch_setup_additional_pages compat_arch_setup_additional_pages
+#endif
+
+/*
+ * Rename a few of the symbols that binfmt_elf.c will define.
+ * These are all local so the names don't really matter, but it
+ * might make some debugging less confusing not to duplicate them.
+ */
+#define elf_format		compat_elf_format
+#define init_elf_binfmt		init_compat_elf_binfmt
+#define exit_elf_binfmt		exit_compat_elf_binfmt
+
+/*
+ * We share all the actual code with the native (64-bit) version.
+ */
+#include "binfmt_elf.c"

From b9d36d5d000294a128f7f174fe67623a10e29d61 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:46 +0100
Subject: [PATCH 372/890] x86: compat_binfmt_elf Kconfig

This adds Kconfig and Makefile bits to build fs/compat_binfmt_elf.c,
just added.  Each arch that wants to use this file needs to add a
"select COMPAT_BINFMT_ELF" line in its Kconfig bits that enable COMPAT.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/Kconfig.binfmt | 4 ++++
 fs/Makefile       | 1 +
 2 files changed, 5 insertions(+)

diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index d4fc6095466d..7c3d5f923da1 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -23,6 +23,10 @@ config BINFMT_ELF
 	  ld.so (check the file <file:Documentation/Changes> for location and
 	  latest version).
 
+config COMPAT_BINFMT_ELF
+	bool
+	depends on COMPAT && MMU
+
 config BINFMT_ELF_FDPIC
 	bool "Kernel support for FDPIC ELF binaries"
 	default y
diff --git a/fs/Makefile b/fs/Makefile
index 500cf15cdb4b..1e7a11bd4da1 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_BINFMT_MISC)	+= binfmt_misc.o
 obj-y				+= binfmt_script.o
 
 obj-$(CONFIG_BINFMT_ELF)	+= binfmt_elf.o
+obj-$(CONFIG_COMPAT_BINFMT_ELF)	+= compat_binfmt_elf.o
 obj-$(CONFIG_BINFMT_ELF_FDPIC)	+= binfmt_elf_fdpic.o
 obj-$(CONFIG_BINFMT_SOM)	+= binfmt_som.o
 obj-$(CONFIG_BINFMT_FLAT)	+= binfmt_flat.o

From 5bde4d181793be84351bc21c256d8c71cfcd313a Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:47 +0100
Subject: [PATCH 373/890] x86: user_regset user-copy helpers

This defines two new inlines in linux/regset.h, for use in arch_ptrace
implementations and the like.  These provide simplified wrappers for using
the user_regset interfaces to copy thread regset data into the caller's
user-space memory.  The inlines are trivial, but make the common uses in
places such as ptrace implementation much more concise, easier to read, and
less prone to code-copying errors.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/regset.h | 46 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/include/linux/regset.h b/include/linux/regset.h
index 761c931af975..8abee6556223 100644
--- a/include/linux/regset.h
+++ b/include/linux/regset.h
@@ -318,5 +318,51 @@ static inline int user_regset_copyin_ignore(unsigned int *pos,
 	return 0;
 }
 
+/**
+ * copy_regset_to_user - fetch a thread's user_regset data into user memory
+ * @target:	thread to be examined
+ * @view:	&struct user_regset_view describing user thread machine state
+ * @setno:	index in @view->regsets
+ * @offset:	offset into the regset data, in bytes
+ * @size:	amount of data to copy, in bytes
+ * @data:	user-mode pointer to copy into
+ */
+static inline int copy_regset_to_user(struct task_struct *target,
+				      const struct user_regset_view *view,
+				      unsigned int setno,
+				      unsigned int offset, unsigned int size,
+				      void __user *data)
+{
+	const struct user_regset *regset = &view->regsets[setno];
+
+	if (!access_ok(VERIFY_WRITE, data, size))
+		return -EIO;
+
+	return regset->get(target, regset, offset, size, NULL, data);
+}
+
+/**
+ * copy_regset_from_user - store into thread's user_regset data from user memory
+ * @target:	thread to be examined
+ * @view:	&struct user_regset_view describing user thread machine state
+ * @setno:	index in @view->regsets
+ * @offset:	offset into the regset data, in bytes
+ * @size:	amount of data to copy, in bytes
+ * @data:	user-mode pointer to copy from
+ */
+static inline int copy_regset_from_user(struct task_struct *target,
+					const struct user_regset_view *view,
+					unsigned int setno,
+					unsigned int offset, unsigned int size,
+					const void __user *data)
+{
+	const struct user_regset *regset = &view->regsets[setno];
+
+	if (!access_ok(VERIFY_READ, data, size))
+		return -EIO;
+
+	return regset->set(target, regset, offset, size, NULL, data);
+}
+
 
 #endif	/* <linux/regset.h> */

From 16c3e389e7a7254ff8dc7029ac4fbe996c3c75bf Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:47 +0100
Subject: [PATCH 374/890] x86: ptrace_request peekdata/pokedata

This makes ptrace_request handle {PEEK,POKE}{TEXT,DATA} directly.
Every arch_ptrace that could call generic_ptrace_peekdata already
has a default case calling ptrace_request, so this keeps things
simpler for the arch code.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/ptrace.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 973d727f5e84..e6a99d2793b3 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -426,6 +426,13 @@ int ptrace_request(struct task_struct *child, long request,
 	int ret = -EIO;
 
 	switch (request) {
+	case PTRACE_PEEKTEXT:
+	case PTRACE_PEEKDATA:
+		return generic_ptrace_peekdata(child, addr, data);
+	case PTRACE_POKETEXT:
+	case PTRACE_POKEDATA:
+		return generic_ptrace_pokedata(child, addr, data);
+
 #ifdef PTRACE_OLDSETOPTIONS
 	case PTRACE_OLDSETOPTIONS:
 #endif

From 032d82d9065dec0e26718eca376c2029e4bd0595 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:47 +0100
Subject: [PATCH 375/890] x86: compat_ptrace_request

This adds a compat_ptrace_request that is the analogue of ptrace_request
for the things that 32-on-64 ptrace implementations can share in common.
So far there are just a couple of requests handled generically.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/compat.h |  4 ++++
 kernel/ptrace.c        | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+)

diff --git a/include/linux/compat.h b/include/linux/compat.h
index ba29d4c59643..a907fbede6c3 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -243,6 +243,10 @@ asmlinkage long compat_sys_migrate_pages(compat_pid_t pid,
 		compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes,
 		const compat_ulong_t __user *new_nodes);
 
+extern int compat_ptrace_request(struct task_struct *child,
+				 compat_long_t request,
+				 compat_ulong_t addr, compat_ulong_t data);
+
 /*
  * epoll (fs/eventpoll.c) compat bits follow ...
  */
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index e6a99d2793b3..ed1c3d56c2cd 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -607,3 +607,41 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data)
 	copied = access_process_vm(tsk, addr, &data, sizeof(data), 1);
 	return (copied == sizeof(data)) ? 0 : -EIO;
 }
+
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+
+int compat_ptrace_request(struct task_struct *child, compat_long_t request,
+			  compat_ulong_t addr, compat_ulong_t data)
+{
+	compat_ulong_t __user *datap = compat_ptr(data);
+	compat_ulong_t word;
+	int ret;
+
+	switch (request) {
+	case PTRACE_PEEKTEXT:
+	case PTRACE_PEEKDATA:
+		ret = access_process_vm(child, addr, &word, sizeof(word), 0);
+		if (ret != sizeof(word))
+			ret = -EIO;
+		else
+			ret = put_user(word, datap);
+		break;
+
+	case PTRACE_POKETEXT:
+	case PTRACE_POKEDATA:
+		ret = access_process_vm(child, addr, &data, sizeof(data), 1);
+		ret = (ret != sizeof(data) ? -EIO : 0);
+		break;
+
+	case PTRACE_GETEVENTMSG:
+		ret = put_user((compat_ulong_t) child->ptrace_message, datap);
+		break;
+
+	default:
+		ret = ptrace_request(child, request, addr, data);
+	}
+
+	return ret;
+}
+#endif	/* CONFIG_COMPAT */

From c269f19617f508cc5c29c0b064c1a437d7011a46 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:48 +0100
Subject: [PATCH 376/890] x86: compat_sys_ptrace

This adds a generic definition of compat_sys_ptrace that calls
compat_arch_ptrace, parallel to sys_ptrace/arch_ptrace.  Some
machines needing this already define a function by that name.
The new generic function is defined only on machines that
put #define __ARCH_WANT_COMPAT_SYS_PTRACE into asm/ptrace.h.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/compat.h |  7 +++++++
 kernel/ptrace.c        | 46 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+)

diff --git a/include/linux/compat.h b/include/linux/compat.h
index a907fbede6c3..d38655f2be70 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -247,6 +247,13 @@ extern int compat_ptrace_request(struct task_struct *child,
 				 compat_long_t request,
 				 compat_ulong_t addr, compat_ulong_t data);
 
+#ifdef __ARCH_WANT_COMPAT_SYS_PTRACE
+extern long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+			       compat_ulong_t addr, compat_ulong_t data);
+asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
+				  compat_long_t addr, compat_long_t data);
+#endif	/* __ARCH_WANT_COMPAT_SYS_PTRACE */
+
 /*
  * epoll (fs/eventpoll.c) compat bits follow ...
  */
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index ed1c3d56c2cd..e6e9b8be4b05 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -644,4 +644,50 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
 
 	return ret;
 }
+
+#ifdef __ARCH_WANT_COMPAT_SYS_PTRACE
+asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
+				  compat_long_t addr, compat_long_t data)
+{
+	struct task_struct *child;
+	long ret;
+
+	/*
+	 * This lock_kernel fixes a subtle race with suid exec
+	 */
+	lock_kernel();
+	if (request == PTRACE_TRACEME) {
+		ret = ptrace_traceme();
+		goto out;
+	}
+
+	child = ptrace_get_task_struct(pid);
+	if (IS_ERR(child)) {
+		ret = PTR_ERR(child);
+		goto out;
+	}
+
+	if (request == PTRACE_ATTACH) {
+		ret = ptrace_attach(child);
+		/*
+		 * Some architectures need to do book-keeping after
+		 * a ptrace attach.
+		 */
+		if (!ret)
+			arch_ptrace_attach(child);
+		goto out_put_task_struct;
+	}
+
+	ret = ptrace_check_attach(child, request == PTRACE_KILL);
+	if (!ret)
+		ret = compat_arch_ptrace(child, request, addr, data);
+
+ out_put_task_struct:
+	put_task_struct(child);
+ out:
+	unlock_kernel();
+	return ret;
+}
+#endif /* __ARCH_WANT_COMPAT_SYS_PTRACE */
+
 #endif	/* CONFIG_COMPAT */

From 863aec860259f287797f886c3f373389cdafc8e3 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:48 +0100
Subject: [PATCH 377/890] x86: x86-64 wired cpu_has macros

This adds hard-wired definitions for the remaining cpu_has_* macros
that correspond to flags required-features.h demands are set for
64-bit.  Using these can efficiently avoid some #ifdef's when
merging 32-bit and 64-bit code together.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/cpufeature.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 87dd900154d1..75e2f78a7fda 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -192,9 +192,21 @@
 #undef  cpu_has_centaur_mcr
 #define cpu_has_centaur_mcr	0
 
+#undef  cpu_has_pse
+#define cpu_has_pse		1
+
 #undef  cpu_has_pge
 #define cpu_has_pge		1
 
+#undef  cpu_has_xmm
+#define cpu_has_xmm		1
+
+#undef  cpu_has_xmm2
+#define cpu_has_xmm2		1
+
+#undef  cpu_has_fxsr
+#define cpu_has_fxsr		1
+
 #endif /* CONFIG_X86_64 */
 
 #endif /* _ASM_X86_CPUFEATURE_H */

From 99f8ecdf4506010feda26ffe4ac0d23962947106 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:48 +0100
Subject: [PATCH 378/890] x86: x86 i387 unify structs

The i387_fxsave_struct formats really have the same layout
on 32 and 64, with only some slightly different use of a few
fields.  The i387_fsave_struct and i387_soft_struct formats
are never used by 64-bit kernels, but it doesn't hurt to
have the unused types in the union and cuts down on the
amount of #ifdef hair required throughout the i387 code.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/processor.h | 96 +++++++++++++++++--------------------
 1 file changed, 44 insertions(+), 52 deletions(-)

diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 80c50020ef60..ea222cfe7b00 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -226,64 +226,37 @@ struct orig_ist {
 	unsigned long ist[7];
 };
 
-#ifdef CONFIG_X86_32
+#define	MXCSR_DEFAULT		0x1f80
+
 struct i387_fsave_struct {
-	long	cwd;
-	long	swd;
-	long	twd;
-	long	fip;
-	long	fcs;
-	long	foo;
-	long	fos;
-	long	st_space[20];	/* 8*10 bytes for each FP-reg = 80 bytes */
-	long	status;		/* software status information */
+	u32	cwd;
+	u32	swd;
+	u32	twd;
+	u32	fip;
+	u32	fcs;
+	u32	foo;
+	u32	fos;
+	u32	st_space[20];	/* 8*10 bytes for each FP-reg = 80 bytes */
+	u32	status;		/* software status information */
 };
 
-struct i387_fxsave_struct {
-	unsigned short	cwd;
-	unsigned short	swd;
-	unsigned short	twd;
-	unsigned short	fop;
-	long	fip;
-	long	fcs;
-	long	foo;
-	long	fos;
-	long	mxcsr;
-	long	mxcsr_mask;
-	long	st_space[32];	/* 8*16 bytes for each FP-reg = 128 bytes */
-	long	xmm_space[32];	/* 8*16 bytes for each XMM-reg = 128 bytes */
-	long	padding[56];
-} __attribute__((aligned(16)));
-
-struct i387_soft_struct {
-	long	cwd;
-	long	swd;
-	long	twd;
-	long	fip;
-	long	fcs;
-	long	foo;
-	long	fos;
-	long	st_space[20];	/* 8*10 bytes for each FP-reg = 80 bytes */
-	unsigned char	ftop, changed, lookahead, no_update, rm, alimit;
-	struct info	*info;
-	unsigned long	entry_eip;
-};
-
-union i387_union {
-	struct i387_fsave_struct	fsave;
-	struct i387_fxsave_struct	fxsave;
-	struct i387_soft_struct soft;
-};
-
-# include "processor_32.h"
-#else
 struct i387_fxsave_struct {
 	u16	cwd;
 	u16	swd;
 	u16	twd;
 	u16	fop;
-	u64	rip;
-	u64	rdp;
+	union {
+		struct {
+			u64	rip;
+			u64	rdp;
+		};
+		struct {
+			u32	fip;
+			u32	fcs;
+			u32	foo;
+			u32	fos;
+		};
+	};
 	u32	mxcsr;
 	u32	mxcsr_mask;
 	u32	st_space[32];	/* 8*16 bytes for each FP-reg = 128 bytes */
@@ -291,10 +264,29 @@ struct i387_fxsave_struct {
 	u32	padding[24];
 } __attribute__((aligned(16)));
 
-union i387_union {
-	struct i387_fxsave_struct	fxsave;
+struct i387_soft_struct {
+	u32	cwd;
+	u32	swd;
+	u32	twd;
+	u32	fip;
+	u32	fcs;
+	u32	foo;
+	u32	fos;
+	u32	st_space[20];	/* 8*10 bytes for each FP-reg = 80 bytes */
+	u8	ftop, changed, lookahead, no_update, rm, alimit;
+	struct info	*info;
+	u32	entry_eip;
 };
 
+union i387_union {
+	struct i387_fsave_struct	fsave;
+	struct i387_fxsave_struct	fxsave;
+	struct i387_soft_struct 	soft;
+};
+
+#ifdef CONFIG_X86_32
+# include "processor_32.h"
+#else
 # include "processor_64.h"
 #endif
 

From cc927a25bd704448b18b095c658cbfdd79dab865 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:49 +0100
Subject: [PATCH 379/890] x86: x86 i387 header cleanup

This moves some code into asm-x86/i387_64.h in preparation for
unifying this code between 32 and 64.  The 32-bit versions of
some things are copied in some existing names changed to match
32-bit names and share code.  For 64, save_i387 is moved into
an inline from i387_64.c; this matches restore_i387, which is
already an inline, and makes sense since there is exactly one
caller (in signal_64.c).  The save_i387 function could use more
cosmetic cleanup, but it is just moved verbatim in this patch.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/i387_64.c |  31 ----
 include/asm-x86/i387_64.h | 333 +++++++++++++++++++++++++++-----------
 2 files changed, 242 insertions(+), 122 deletions(-)

diff --git a/arch/x86/kernel/i387_64.c b/arch/x86/kernel/i387_64.c
index bfaff28fb134..f335a76d7ea7 100644
--- a/arch/x86/kernel/i387_64.c
+++ b/arch/x86/kernel/i387_64.c
@@ -71,37 +71,6 @@ void init_fpu(struct task_struct *child)
 	set_stopped_child_used_math(child);
 }
 
-/*
- * Signal frame handlers.
- */
-
-int save_i387(struct _fpstate __user *buf)
-{
-	struct task_struct *tsk = current;
-	int err = 0;
-
-	BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
-			sizeof(tsk->thread.i387.fxsave));
-
-	if ((unsigned long)buf % 16) 
-		printk("save_i387: bad fpstate %p\n",buf); 
-
-	if (!used_math())
-		return 0;
-	clear_used_math(); /* trigger finit */
-	if (task_thread_info(tsk)->status & TS_USEDFPU) {
-		err = save_i387_checking((struct i387_fxsave_struct __user *)buf);
-		if (err) return err;
-		task_thread_info(tsk)->status &= ~TS_USEDFPU;
-		stts();
-	} else {
-		if (__copy_to_user(buf, &tsk->thread.i387.fxsave,
-				   sizeof(struct i387_fxsave_struct)))
-			return -1;
-	}
-	return 1;
-}
-
 /*
  * ptrace request handlers.
  */
diff --git a/include/asm-x86/i387_64.h b/include/asm-x86/i387_64.h
index 12edb9f2d755..4cea7373ac78 100644
--- a/include/asm-x86/i387_64.h
+++ b/include/asm-x86/i387_64.h
@@ -7,33 +7,23 @@
  * x86-64 work by Andi Kleen 2002
  */
 
-#ifndef __ASM_X86_64_I387_H
-#define __ASM_X86_64_I387_H
+#ifndef _ASM_X86_I387_H
+#define _ASM_X86_I387_H
 
 #include <linux/sched.h>
+#include <linux/kernel_stat.h>
 #include <asm/processor.h>
 #include <asm/sigcontext.h>
 #include <asm/user.h>
-#include <asm/thread_info.h>
 #include <asm/uaccess.h>
 
 extern void fpu_init(void);
 extern unsigned int mxcsr_feature_mask;
 extern void mxcsr_feature_mask_init(void);
 extern void init_fpu(struct task_struct *child);
-extern int save_i387(struct _fpstate __user *buf);
 extern asmlinkage void math_state_restore(void);
 
-/*
- * FPU lazy state save handling...
- */
-
-#define unlazy_fpu(tsk) do { \
-	if (task_thread_info(tsk)->status & TS_USEDFPU) \
-		save_init_fpu(tsk); 			\
-	else						\
-		tsk->fpu_counter = 0;			\
-} while (0)
+#ifdef CONFIG_X86_64
 
 /* Ignore delayed exceptions from user space */
 static inline void tolerant_fwait(void)
@@ -46,52 +36,8 @@ static inline void tolerant_fwait(void)
 		     "	.previous\n");
 }
 
-#define clear_fpu(tsk) do { \
-	if (task_thread_info(tsk)->status & TS_USEDFPU) {	\
-		tolerant_fwait();				\
-		task_thread_info(tsk)->status &= ~TS_USEDFPU;	\
-		stts();						\
-	}							\
-} while (0)
-
-/*
- * ptrace request handers...
- */
-extern int get_fpregs(struct user_i387_struct __user *buf,
-		      struct task_struct *tsk);
-extern int set_fpregs(struct task_struct *tsk,
-		      struct user_i387_struct __user *buf);
-
-/*
- * i387 state interaction
- */
-#define get_fpu_mxcsr(t) ((t)->thread.i387.fxsave.mxcsr)
-#define get_fpu_cwd(t) ((t)->thread.i387.fxsave.cwd)
-#define get_fpu_fxsr_twd(t) ((t)->thread.i387.fxsave.twd)
-#define get_fpu_swd(t) ((t)->thread.i387.fxsave.swd)
-#define set_fpu_cwd(t,val) ((t)->thread.i387.fxsave.cwd = (val))
-#define set_fpu_swd(t,val) ((t)->thread.i387.fxsave.swd = (val))
-#define set_fpu_fxsr_twd(t,val) ((t)->thread.i387.fxsave.twd = (val))
-
-#define X87_FSW_ES (1 << 7)	/* Exception Summary */
-
-/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
-   is pending. Clear the x87 state here by setting it to fixed
-   values. The kernel data segment can be sometimes 0 and sometimes
-   new user value. Both should be ok.
-   Use the PDA as safe address because it should be already in L1. */
-static inline void clear_fpu_state(struct i387_fxsave_struct *fx)
+static inline int restore_fpu_checking(struct i387_fxsave_struct *fx)
 {
-	if (unlikely(fx->swd & X87_FSW_ES))
-		 asm volatile("fnclex");
-	alternative_input(ASM_NOP8 ASM_NOP2,
-	     	     "    emms\n"		/* clear stack tags */
-	     	     "    fildl %%gs:0",	/* load to clear state */
-		     X86_FEATURE_FXSAVE_LEAK);
-}
-
-static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) 
-{ 
 	int err;
 
 	asm volatile("1:  rex64/fxrstor (%[fx])\n\t"
@@ -105,7 +51,7 @@ static inline int restore_fpu_checking(struct i387_fxsave_struct *fx)
 		     "   .quad  1b,3b\n"
 		     ".previous"
 		     : [err] "=r" (err)
-#if 0 /* See comment in __fxsave_clear() below. */
+#if 0 /* See comment in __save_init_fpu() below. */
 		     : [fx] "r" (fx), "m" (*fx), "0" (0));
 #else
 		     : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0));
@@ -113,10 +59,27 @@ static inline int restore_fpu_checking(struct i387_fxsave_struct *fx)
 	if (unlikely(err))
 		init_fpu(current);
 	return err;
-} 
+}
 
-static inline int save_i387_checking(struct i387_fxsave_struct __user *fx) 
-{ 
+#define X87_FSW_ES (1 << 7)	/* Exception Summary */
+
+/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
+   is pending. Clear the x87 state here by setting it to fixed
+   values. The kernel data segment can be sometimes 0 and sometimes
+   new user value. Both should be ok.
+   Use the PDA as safe address because it should be already in L1. */
+static inline void clear_fpu_state(struct i387_fxsave_struct *fx)
+{
+	if (unlikely(fx->swd & X87_FSW_ES))
+		 asm volatile("fnclex");
+	alternative_input(ASM_NOP8 ASM_NOP2,
+		     "    emms\n"		/* clear stack tags */
+		     "    fildl %%gs:0",	/* load to clear state */
+		     X86_FEATURE_FXSAVE_LEAK);
+}
+
+static inline int save_i387_checking(struct i387_fxsave_struct __user *fx)
+{
 	int err;
 
 	asm volatile("1:  rex64/fxsave (%[fx])\n\t"
@@ -139,9 +102,9 @@ static inline int save_i387_checking(struct i387_fxsave_struct __user *fx)
 		err = -EFAULT;
 	/* No need to clear here because the caller clears USED_MATH */
 	return err;
-} 
+}
 
-static inline void __fxsave_clear(struct task_struct *tsk)
+static inline void __save_init_fpu(struct task_struct *tsk)
 {
 	/* Using "rex64; fxsave %0" is broken because, if the memory operand
 	   uses any extended registers for addressing, a second REX prefix
@@ -169,34 +132,41 @@ static inline void __fxsave_clear(struct task_struct *tsk)
 					      thread.i387.fxsave)));
 #endif
 	clear_fpu_state(&tsk->thread.i387.fxsave);
-}
-
-static inline void kernel_fpu_begin(void)
-{
-	struct thread_info *me = current_thread_info();
-	preempt_disable();
-	if (me->status & TS_USEDFPU) {
-		__fxsave_clear(me->task);
-		me->status &= ~TS_USEDFPU;
-		return;
-	}
-	clts();
-}
-
-static inline void kernel_fpu_end(void)
-{
-	stts();
-	preempt_enable();
-}
-
-static inline void save_init_fpu(struct task_struct *tsk)
-{
- 	__fxsave_clear(tsk);
 	task_thread_info(tsk)->status &= ~TS_USEDFPU;
-	stts();
 }
 
-/* 
+/*
+ * Signal frame handlers.
+ */
+
+static inline int save_i387(struct _fpstate __user *buf)
+{
+	struct task_struct *tsk = current;
+	int err = 0;
+
+	BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
+			sizeof(tsk->thread.i387.fxsave));
+
+	if ((unsigned long)buf % 16)
+		printk("save_i387: bad fpstate %p\n", buf);
+
+	if (!used_math())
+		return 0;
+	clear_used_math(); /* trigger finit */
+	if (task_thread_info(tsk)->status & TS_USEDFPU) {
+		err = save_i387_checking((struct i387_fxsave_struct __user *)buf);
+		if (err) return err;
+		task_thread_info(tsk)->status &= ~TS_USEDFPU;
+		stts();
+	} else {
+		if (__copy_to_user(buf, &tsk->thread.i387.fxsave,
+				   sizeof(struct i387_fxsave_struct)))
+			return -1;
+	}
+	return 1;
+}
+
+/*
  * This restores directly out of user space. Exceptions are handled.
  */
 static inline int restore_i387(struct _fpstate __user *buf)
@@ -209,4 +179,185 @@ static inline int restore_i387(struct _fpstate __user *buf)
 	return restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
 }
 
-#endif /* __ASM_X86_64_I387_H */
+#else  /* CONFIG_X86_32 */
+
+static inline void tolerant_fwait(void)
+{
+	asm volatile("fnclex ; fwait");
+}
+
+static inline void restore_fpu(struct task_struct *tsk)
+{
+	/*
+	 * The "nop" is needed to make the instructions the same
+	 * length.
+	 */
+	alternative_input(
+		"nop ; frstor %1",
+		"fxrstor %1",
+		X86_FEATURE_FXSR,
+		"m" ((tsk)->thread.i387.fxsave));
+}
+
+/* We need a safe address that is cheap to find and that is already
+   in L1 during context switch. The best choices are unfortunately
+   different for UP and SMP */
+#ifdef CONFIG_SMP
+#define safe_address (__per_cpu_offset[0])
+#else
+#define safe_address (kstat_cpu(0).cpustat.user)
+#endif
+
+/*
+ * These must be called with preempt disabled
+ */
+static inline void __save_init_fpu(struct task_struct *tsk)
+{
+	/* Use more nops than strictly needed in case the compiler
+	   varies code */
+	alternative_input(
+		"fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4,
+		"fxsave %[fx]\n"
+		"bt $7,%[fsw] ; jnc 1f ; fnclex\n1:",
+		X86_FEATURE_FXSR,
+		[fx] "m" (tsk->thread.i387.fxsave),
+		[fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory");
+	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
+	   is pending.  Clear the x87 state here by setting it to fixed
+	   values. safe_address is a random variable that should be in L1 */
+	alternative_input(
+		GENERIC_NOP8 GENERIC_NOP2,
+		"emms\n\t"	  	/* clear stack tags */
+		"fildl %[addr]", 	/* set F?P to defined value */
+		X86_FEATURE_FXSAVE_LEAK,
+		[addr] "m" (safe_address));
+	task_thread_info(tsk)->status &= ~TS_USEDFPU;
+}
+
+/*
+ * Signal frame handlers...
+ */
+extern int save_i387(struct _fpstate __user *buf);
+extern int restore_i387(struct _fpstate __user *buf);
+
+#endif	/* CONFIG_X86_64 */
+
+static inline void __unlazy_fpu(struct task_struct *tsk)
+{
+	if (task_thread_info(tsk)->status & TS_USEDFPU) {
+		__save_init_fpu(tsk);
+		stts();
+	} else
+		tsk->fpu_counter = 0;
+}
+
+static inline void __clear_fpu(struct task_struct *tsk)
+{
+	if (task_thread_info(tsk)->status & TS_USEDFPU) {
+		tolerant_fwait();
+		task_thread_info(tsk)->status &= ~TS_USEDFPU;
+		stts();
+	}
+}
+
+static inline void kernel_fpu_begin(void)
+{
+	struct thread_info *me = current_thread_info();
+	preempt_disable();
+	if (me->status & TS_USEDFPU)
+		__save_init_fpu(me->task);
+	else
+		clts();
+}
+
+static inline void kernel_fpu_end(void)
+{
+	stts();
+	preempt_enable();
+}
+
+#ifdef CONFIG_X86_64
+
+static inline void save_init_fpu(struct task_struct *tsk)
+{
+	__save_init_fpu(tsk);
+	stts();
+}
+
+#define unlazy_fpu	__unlazy_fpu
+#define clear_fpu	__clear_fpu
+
+#else  /* CONFIG_X86_32 */
+
+/*
+ * These disable preemption on their own and are safe
+ */
+static inline void save_init_fpu(struct task_struct *tsk)
+{
+	preempt_disable();
+	__save_init_fpu(tsk);
+	stts();
+	preempt_enable();
+}
+
+static inline void unlazy_fpu(struct task_struct *tsk)
+{
+	preempt_disable();
+	__unlazy_fpu(tsk);
+	preempt_enable();
+}
+
+static inline void clear_fpu(struct task_struct *tsk)
+{
+	preempt_disable();
+	__clear_fpu(tsk);
+	preempt_enable();
+}
+
+#endif	/* CONFIG_X86_64 */
+
+/*
+ * ptrace request handlers...
+ */
+extern int get_fpregs(struct user_i387_struct __user *buf,
+		      struct task_struct *tsk);
+extern int set_fpregs(struct task_struct *tsk,
+		      struct user_i387_struct __user *buf);
+
+struct user_fxsr_struct;
+extern int get_fpxregs(struct user_fxsr_struct __user *buf,
+		       struct task_struct *tsk);
+extern int set_fpxregs(struct task_struct *tsk,
+		       struct user_fxsr_struct __user *buf);
+
+/*
+ * i387 state interaction
+ */
+static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
+{
+	if (cpu_has_fxsr) {
+		return tsk->thread.i387.fxsave.cwd;
+	} else {
+		return (unsigned short)tsk->thread.i387.fsave.cwd;
+	}
+}
+
+static inline unsigned short get_fpu_swd(struct task_struct *tsk)
+{
+	if (cpu_has_fxsr) {
+		return tsk->thread.i387.fxsave.swd;
+	} else {
+		return (unsigned short)tsk->thread.i387.fsave.swd;
+	}
+}
+
+static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
+{
+	if (cpu_has_xmm) {
+		return tsk->thread.i387.fxsave.mxcsr;
+	} else {
+		return MXCSR_DEFAULT;
+	}
+}
+
+#endif	/* _ASM_X86_I387_H */

From ff0ebb23c6eedc9ac0d84638489d806b30bde8b0 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:49 +0100
Subject: [PATCH 380/890] x86: x86 user_regset math_emu

This converts the ptrace/signal accessors for i387 math_emu
state to the user_regset interface style, and calls these
from the old interfaces.

It also cleans up math_emulate's ptrace check to be a
single-step check, which is what it really wants.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/math-emu/fpu_entry.c | 86 ++++++++++++++++++++++-------------
 1 file changed, 54 insertions(+), 32 deletions(-)

diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 377c60dfa2f0..cfbdaa1532ce 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -25,10 +25,11 @@
  +---------------------------------------------------------------------------*/
 
 #include <linux/signal.h>
-#include <linux/ptrace.h>
+#include <linux/regset.h>
 
 #include <asm/uaccess.h>
 #include <asm/desc.h>
+#include <asm/user.h>
 
 #include "fpu_system.h"
 #include "fpu_emu.h"
@@ -198,9 +199,7 @@ asmlinkage void math_emulate(long arg)
 			code_limit = 0xffffffff;
 	}
 
-	FPU_lookahead = 1;
-	if (current->ptrace & PT_PTRACED)
-		FPU_lookahead = 0;
+	FPU_lookahead = !(FPU_EFLAGS & X86_EFLAGS_TF);
 
 	if (!valid_prefix(&byte1, (u_char __user **) & FPU_EIP,
 			  &addr_modes.override)) {
@@ -673,31 +672,37 @@ void math_abort(struct info *info, unsigned int signal)
 #define sstatus_word() \
   ((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top))
 
-int restore_i387_soft(void *s387, struct _fpstate __user *buf)
+int fpregs_soft_set(struct task_struct *target,
+		    const struct user_regset *regset,
+		    unsigned int pos, unsigned int count,
+		    const void *kbuf, const void __user *ubuf)
 {
-	u_char __user *d = (u_char __user *) buf;
+	struct i387_soft_struct *s387 = &target->thread.i387.soft;
+	void *space = s387->st_space;
+	int ret;
 	int offset, other, i, tags, regnr, tag, newtop;
 
 	RE_ENTRANT_CHECK_OFF;
-	FPU_access_ok(VERIFY_READ, d, 7 * 4 + 8 * 10);
-	if (__copy_from_user(&S387->cwd, d, 7 * 4))
-		return -1;
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, s387, 0,
+				 offsetof(struct i387_soft_struct, st_space));
 	RE_ENTRANT_CHECK_ON;
 
-	d += 7 * 4;
+	if (ret)
+		return ret;
 
 	S387->ftop = (S387->swd >> SW_Top_Shift) & 7;
 	offset = (S387->ftop & 7) * 10;
 	other = 80 - offset;
 
 	RE_ENTRANT_CHECK_OFF;
+
 	/* Copy all registers in stack order. */
-	if (__copy_from_user(((u_char *) & S387->st_space) + offset, d, other))
-		return -1;
-	if (offset)
-		if (__copy_from_user
-		    ((u_char *) & S387->st_space, d + other, offset))
-			return -1;
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 space + offset, 0, other);
+	if (!ret && offset)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 space, 0, offset);
+
 	RE_ENTRANT_CHECK_ON;
 
 	/* The tags may need to be corrected now. */
@@ -716,16 +721,21 @@ int restore_i387_soft(void *s387, struct _fpstate __user *buf)
 	}
 	S387->twd = tags;
 
-	return 0;
+	return ret;
 }
 
-int save_i387_soft(void *s387, struct _fpstate __user * buf)
+int fpregs_soft_get(struct task_struct *target,
+		    const struct user_regset *regset,
+		    unsigned int pos, unsigned int count,
+		    void *kbuf, void __user *ubuf)
 {
-	u_char __user *d = (u_char __user *) buf;
+	struct i387_soft_struct *s387 = &target->thread.i387.soft;
+	const void *space = s387->st_space;
+	int ret;
 	int offset = (S387->ftop & 7) * 10, other = 80 - offset;
 
 	RE_ENTRANT_CHECK_OFF;
-	FPU_access_ok(VERIFY_WRITE, d, 7 * 4 + 8 * 10);
+
 #ifdef PECULIAR_486
 	S387->cwd &= ~0xe080;
 	/* An 80486 sets nearly all of the reserved bits to 1. */
@@ -735,21 +745,33 @@ int save_i387_soft(void *s387, struct _fpstate __user * buf)
 	S387->fcs &= ~0xf8000000;
 	S387->fos |= 0xffff0000;
 #endif /* PECULIAR_486 */
-	if (__copy_to_user(d, &S387->cwd, 7 * 4))
-		return -1;
-	RE_ENTRANT_CHECK_ON;
 
-	d += 7 * 4;
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, s387, 0,
+				  offsetof(struct i387_soft_struct, st_space));
 
-	RE_ENTRANT_CHECK_OFF;
 	/* Copy all registers in stack order. */
-	if (__copy_to_user(d, ((u_char *) & S387->st_space) + offset, other))
-		return -1;
-	if (offset)
-		if (__copy_to_user
-		    (d + other, (u_char *) & S387->st_space, offset))
-			return -1;
+	if (!ret)
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					  space + offset, 0, other);
+	if (!ret)
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					  space, 0, offset);
+
 	RE_ENTRANT_CHECK_ON;
 
-	return 1;
+	return ret;
+}
+
+int save_i387_soft(void *s387, struct _fpstate __user *buf)
+{
+	return fpregs_soft_get(current, NULL,
+			       0, sizeof(struct user_i387_struct),
+			       NULL, buf) ? -1 : 1;
+}
+
+int restore_i387_soft(void *s387, struct _fpstate __user *buf)
+{
+	return fpregs_soft_set(current, NULL,
+			       0, sizeof(struct user_i387_struct),
+			       NULL, buf) ? -1 : 1;
 }

From b7b71725fb9584454bfe5f231223bd63421798fb Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:50 +0100
Subject: [PATCH 381/890] x86: i387 renaming

This renames arch/x86/kernel/{i387_32.c => i387.c}.
This is a pure renaming, but paves the way for merging
the 32-bit and 64-bit versions of this code.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/Makefile_32           | 3 ++-
 arch/x86/kernel/{i387_32.c => i387.c} | 0
 2 files changed, 2 insertions(+), 1 deletion(-)
 rename arch/x86/kernel/{i387_32.c => i387.c} (100%)

diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index c7a959da363a..a67198044b0d 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -7,9 +7,10 @@ CPPFLAGS_vmlinux.lds += -Ui386
 
 obj-y	:= process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
 		time_32.o ioport_32.o ldt.o setup_32.o i8259_32.o sys_i386_32.o \
-		pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\
+		pci-dma_32.o i386_ksyms_32.o bootflag.o e820_32.o\
 		quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o rtc.o
 
+obj-y				+= i387.o
 obj-y				+= ptrace.o
 obj-y				+= ds.o
 obj-y				+= tls.o
diff --git a/arch/x86/kernel/i387_32.c b/arch/x86/kernel/i387.c
similarity index 100%
rename from arch/x86/kernel/i387_32.c
rename to arch/x86/kernel/i387.c

From 4421011120b2304e5c248ae4165a2704588aedf1 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:50 +0100
Subject: [PATCH 382/890] x86: x86 i387 user_regset

This revamps the i387 code to be shared across 32-bit, 64-bit,
and 32-on-64.  It does so by consolidating the code in one place
based on the user_regset accessor interfaces.  This switches
32-bit to using the i387_64.h header and 64-bit to using the
i387.c that was previously i387_32.c, but that's what took the
least cleanup in each file.  Here i387.h is stubbed to always
include i387_64.h rather than renaming the file, to keep this
diff smaller and easier to read.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/ia32/Makefile      |   2 +-
 arch/x86/ia32/ia32_signal.c |   5 +-
 arch/x86/kernel/Makefile_64 |   3 +-
 arch/x86/kernel/i387.c      | 513 +++++++++++++++++++-----------------
 arch/x86/kernel/ptrace.c    |   1 -
 include/asm-x86/fpu32.h     |  10 -
 include/asm-x86/i387.h      |   6 +-
 include/asm-x86/i387_64.h   |  11 +
 8 files changed, 281 insertions(+), 270 deletions(-)
 delete mode 100644 include/asm-x86/fpu32.h

diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile
index ec71cfeac87e..93a6fda65f49 100644
--- a/arch/x86/ia32/Makefile
+++ b/arch/x86/ia32/Makefile
@@ -3,7 +3,7 @@
 #
 
 obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o \
-	ia32_binfmt.o fpu32.o
+	ia32_binfmt.o
 
 sysv-$(CONFIG_SYSVIPC) := ipc32.o
 obj-$(CONFIG_IA32_EMULATION) += $(sysv-y)
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 0e24e3fda3d7..0a34c24f19e5 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -29,7 +29,6 @@
 #include <asm/ia32_unistd.h>
 #include <asm/user32.h>
 #include <asm/sigcontext32.h>
-#include <asm/fpu32.h>
 #include <asm/proto.h>
 #include <asm/vdso.h>
 
@@ -258,7 +257,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 	if (buf) {
 		if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
 			goto badframe;
-		err |= restore_i387_ia32(current, buf, 0);
+		err |= restore_i387_ia32(buf);
 	} else {
 		struct task_struct *me = current;
 
@@ -377,7 +376,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
 	err |= __put_user((u32)regs->flags, &sc->flags);
 	err |= __put_user((u32)regs->sp, &sc->sp_at_signal);
 
-	tmp = save_i387_ia32(current, fpstate, regs, 0);
+	tmp = save_i387_ia32(fpstate);
 	if (tmp < 0)
 		err = -EFAULT;
 	else {
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index fbb370071239..7fcf972aa5d6 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -7,7 +7,7 @@ CPPFLAGS_vmlinux.lds += -Ux86_64
 
 obj-y	:= process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
 		time_64.o ioport_64.o ldt.o setup_64.o i8259_64.o sys_x86_64.o \
-		x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \
+		x8664_ksyms_64.o syscall_64.o vsyscall_64.o \
 		setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \
 		pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \
 		i8253.o io_delay.o rtc.o
@@ -16,6 +16,7 @@ obj-y				+= ptrace.o
 obj-y				+= ds.o
 obj-y				+= step.o
 
+obj-y				+= i387.o
 obj-$(CONFIG_IA32_EMULATION)	+= tls.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-y				+= cpu/
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index bebe03463461..f7f7568dd7bc 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -8,6 +8,7 @@
 
 #include <linux/sched.h>
 #include <linux/module.h>
+#include <linux/regset.h>
 #include <asm/processor.h>
 #include <asm/i387.h>
 #include <asm/math_emu.h>
@@ -16,13 +17,29 @@
 #include <asm/ptrace.h>
 #include <asm/uaccess.h>
 
+#ifdef CONFIG_X86_64
+
+#include <asm/sigcontext32.h>
+#include <asm/user32.h>
+
+#else
+
+#define	save_i387_ia32		save_i387
+#define	restore_i387_ia32	restore_i387
+
+#define _fpstate_ia32 		_fpstate
+#define user_i387_ia32_struct	user_i387_struct
+#define user32_fxsr_struct	user_fxsr_struct
+
+#endif
+
 #ifdef CONFIG_MATH_EMULATION
 #define HAVE_HWFP (boot_cpu_data.hard_math)
 #else
 #define HAVE_HWFP 1
 #endif
 
-static unsigned long mxcsr_feature_mask __read_mostly = 0xffffffff;
+unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
 
 void mxcsr_feature_mask_init(void)
 {
@@ -40,6 +57,30 @@ void mxcsr_feature_mask_init(void)
 	stts();
 }
 
+#ifdef CONFIG_X86_64
+/*
+ * Called at bootup to set up the initial FPU state that is later cloned
+ * into all processes.
+ */
+void __cpuinit fpu_init(void)
+{
+	unsigned long oldcr0 = read_cr0();
+	extern void __bad_fxsave_alignment(void);
+
+	if (offsetof(struct task_struct, thread.i387.fxsave) & 15)
+		__bad_fxsave_alignment();
+	set_in_cr4(X86_CR4_OSFXSR);
+	set_in_cr4(X86_CR4_OSXMMEXCPT);
+
+	write_cr0(oldcr0 & ~((1UL<<3)|(1UL<<2))); /* clear TS and EM */
+
+	mxcsr_feature_mask_init();
+	/* clean state in init */
+	current_thread_info()->status = 0;
+	clear_used_math();
+}
+#endif	/* CONFIG_X86_64 */
+
 /*
  * The _current_ task is using the FPU for the first time
  * so initialize it and set the mxcsr to its default
@@ -48,12 +89,18 @@ void mxcsr_feature_mask_init(void)
  */
 void init_fpu(struct task_struct *tsk)
 {
+	if (tsk_used_math(tsk)) {
+		if (tsk == current)
+			unlazy_fpu(tsk);
+		return;
+	}
+
 	if (cpu_has_fxsr) {
 		memset(&tsk->thread.i387.fxsave, 0,
 		       sizeof(struct i387_fxsave_struct));
 		tsk->thread.i387.fxsave.cwd = 0x37f;
 		if (cpu_has_xmm)
-			tsk->thread.i387.fxsave.mxcsr = 0x1f80;
+			tsk->thread.i387.fxsave.mxcsr = MXCSR_DEFAULT;
 	} else {
 		memset(&tsk->thread.i387.fsave, 0,
 		       sizeof(struct i387_fsave_struct));
@@ -62,27 +109,59 @@ void init_fpu(struct task_struct *tsk)
 		tsk->thread.i387.fsave.twd = 0xffffffffu;
 		tsk->thread.i387.fsave.fos = 0xffff0000u;
 	}
-	/* only the device not available exception
-	 * or ptrace can call init_fpu */
+	/*
+	 * Only the device not available exception or ptrace can call init_fpu.
+	 */
 	set_stopped_child_used_math(tsk);
 }
 
-/*
- * FPU lazy state save handling.
- */
-
-void kernel_fpu_begin(void)
+int fpregs_active(struct task_struct *target, const struct user_regset *regset)
 {
-	struct thread_info *thread = current_thread_info();
-
-	preempt_disable();
-	if (thread->status & TS_USEDFPU) {
-		__save_init_fpu(thread->task);
-		return;
-	}
-	clts();
+	return tsk_used_math(target) ? regset->n : 0;
 }
-EXPORT_SYMBOL_GPL(kernel_fpu_begin);
+
+int xfpregs_active(struct task_struct *target, const struct user_regset *regset)
+{
+	return (cpu_has_fxsr && tsk_used_math(target)) ? regset->n : 0;
+}
+
+int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	if (!cpu_has_fxsr)
+		return -ENODEV;
+
+	unlazy_fpu(target);
+
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   &target->thread.i387.fxsave, 0, -1);
+}
+
+int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	if (!cpu_has_fxsr)
+		return -ENODEV;
+
+	unlazy_fpu(target);
+	set_stopped_child_used_math(target);
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &target->thread.i387.fxsave, 0, -1);
+
+	/*
+	 * mxcsr reserved bits must be masked to zero for security reasons.
+	 */
+	target->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
+
+	return ret;
+}
+
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
 
 /*
  * FPU tag word conversions.
@@ -94,210 +173,187 @@ static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
 
 	/* Transform each pair of bits into 01 (valid) or 00 (empty) */
 	tmp = ~twd;
-	tmp = (tmp | (tmp >> 1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
+	tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
 	/* and move the valid bits to the lower byte. */
 	tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
 	tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
 	tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
-
 	return tmp;
 }
 
-static inline unsigned long twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
+#define FPREG_ADDR(f, n)	((void *)&(f)->st_space + (n) * 16);
+#define FP_EXP_TAG_VALID	0
+#define FP_EXP_TAG_ZERO		1
+#define FP_EXP_TAG_SPECIAL	2
+#define FP_EXP_TAG_EMPTY	3
+
+static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
 {
-	struct _fpxreg *st = NULL;
-	unsigned long tos = (fxsave->swd >> 11) & 7;
-	unsigned long twd = (unsigned long) fxsave->twd;
-	unsigned long tag;
-	unsigned long ret = 0xffff0000u;
+	struct _fpxreg *st;
+	u32 tos = (fxsave->swd >> 11) & 7;
+	u32 twd = (unsigned long) fxsave->twd;
+	u32 tag;
+	u32 ret = 0xffff0000u;
 	int i;
 
-#define FPREG_ADDR(f, n)	((void *)&(f)->st_space + (n) * 16);
-
-	for (i = 0; i < 8; i++) {
+	for (i = 0; i < 8; i++, twd >>= 1) {
 		if (twd & 0x1) {
 			st = FPREG_ADDR(fxsave, (i - tos) & 7);
 
 			switch (st->exponent & 0x7fff) {
 			case 0x7fff:
-				tag = 2;		/* Special */
+				tag = FP_EXP_TAG_SPECIAL;
 				break;
 			case 0x0000:
 				if (!st->significand[0] &&
 				    !st->significand[1] &&
 				    !st->significand[2] &&
-				    !st->significand[3]) {
-					tag = 1;	/* Zero */
-				} else {
-					tag = 2;	/* Special */
-				}
+				    !st->significand[3])
+					tag = FP_EXP_TAG_ZERO;
+				else
+					tag = FP_EXP_TAG_SPECIAL;
 				break;
 			default:
-				if (st->significand[3] & 0x8000) {
-					tag = 0;	/* Valid */
-				} else {
-					tag = 2;	/* Special */
-				}
+				if (st->significand[3] & 0x8000)
+					tag = FP_EXP_TAG_VALID;
+				else
+					tag = FP_EXP_TAG_SPECIAL;
 				break;
 			}
 		} else {
-			tag = 3;			/* Empty */
+			tag = FP_EXP_TAG_EMPTY;
 		}
-		ret |= (tag << (2 * i));
-		twd = twd >> 1;
+		ret |= tag << (2 * i);
 	}
 	return ret;
 }
 
-/*
- * FPU state interaction.
- */
-
-unsigned short get_fpu_cwd(struct task_struct *tsk)
-{
-	if (cpu_has_fxsr) {
-		return tsk->thread.i387.fxsave.cwd;
-	} else {
-		return (unsigned short)tsk->thread.i387.fsave.cwd;
-	}
-}
-
-unsigned short get_fpu_swd(struct task_struct *tsk)
-{
-	if (cpu_has_fxsr) {
-		return tsk->thread.i387.fxsave.swd;
-	} else {
-		return (unsigned short)tsk->thread.i387.fsave.swd;
-	}
-}
-
-#if 0
-unsigned short get_fpu_twd(struct task_struct *tsk)
-{
-	if (cpu_has_fxsr) {
-		return tsk->thread.i387.fxsave.twd;
-	} else {
-		return (unsigned short)tsk->thread.i387.fsave.twd;
-	}
-}
-#endif  /*  0  */
-
-unsigned short get_fpu_mxcsr(struct task_struct *tsk)
-{
-	if (cpu_has_xmm) {
-		return tsk->thread.i387.fxsave.mxcsr;
-	} else {
-		return 0x1f80;
-	}
-}
-
-#if 0
-
-void set_fpu_cwd(struct task_struct *tsk, unsigned short cwd)
-{
-	if (cpu_has_fxsr) {
-		tsk->thread.i387.fxsave.cwd = cwd;
-	} else {
-		tsk->thread.i387.fsave.cwd = ((long)cwd | 0xffff0000u);
-	}
-}
-
-void set_fpu_swd(struct task_struct *tsk, unsigned short swd)
-{
-	if (cpu_has_fxsr) {
-		tsk->thread.i387.fxsave.swd = swd;
-	} else {
-		tsk->thread.i387.fsave.swd = ((long)swd | 0xffff0000u);
-	}
-}
-
-void set_fpu_twd(struct task_struct *tsk, unsigned short twd)
-{
-	if (cpu_has_fxsr) {
-		tsk->thread.i387.fxsave.twd = twd_i387_to_fxsr(twd);
-	} else {
-		tsk->thread.i387.fsave.twd = ((long)twd | 0xffff0000u);
-	}
-}
-
-#endif  /*  0  */
-
 /*
  * FXSR floating point environment conversions.
  */
 
-static int convert_fxsr_to_user(struct _fpstate __user *buf,
-				struct i387_fxsave_struct *fxsave)
+static void convert_from_fxsr(struct user_i387_ia32_struct *env,
+			      struct task_struct *tsk)
 {
-	unsigned long env[7];
-	struct _fpreg __user *to;
-	struct _fpxreg *from;
+	struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave;
+	struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
+	struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0];
 	int i;
 
-	env[0] = (unsigned long)fxsave->cwd | 0xffff0000ul;
-	env[1] = (unsigned long)fxsave->swd | 0xffff0000ul;
-	env[2] = twd_fxsr_to_i387(fxsave);
-	env[3] = fxsave->fip;
-	env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16);
-	env[5] = fxsave->foo;
-	env[6] = fxsave->fos;
+	env->cwd = fxsave->cwd | 0xffff0000u;
+	env->swd = fxsave->swd | 0xffff0000u;
+	env->twd = twd_fxsr_to_i387(fxsave);
 
-	if (__copy_to_user(buf, env, 7 * sizeof(unsigned long)))
-		return 1;
-
-	to = &buf->_st[0];
-	from = (struct _fpxreg *) &fxsave->st_space[0];
-	for (i = 0; i < 8; i++, to++, from++) {
-		unsigned long __user *t = (unsigned long __user *)to;
-		unsigned long *f = (unsigned long *)from;
-
-		if (__put_user(*f, t) ||
-		    __put_user(*(f + 1), t + 1) ||
-		    __put_user(from->exponent, &to->exponent))
-			return 1;
+#ifdef CONFIG_X86_64
+	env->fip = fxsave->rip;
+	env->foo = fxsave->rdp;
+	if (tsk == current) {
+		/*
+		 * should be actually ds/cs at fpu exception time, but
+		 * that information is not available in 64bit mode.
+		 */
+		asm("mov %%ds,%0" : "=r" (env->fos));
+		asm("mov %%cs,%0" : "=r" (env->fcs));
+	} else {
+		struct pt_regs *regs = task_pt_regs(tsk);
+		env->fos = 0xffff0000 | tsk->thread.ds;
+		env->fcs = regs->cs;
 	}
-	return 0;
+#else
+	env->fip = fxsave->fip;
+	env->fcs = fxsave->fcs;
+	env->foo = fxsave->foo;
+	env->fos = fxsave->fos;
+#endif
+
+	for (i = 0; i < 8; ++i)
+		memcpy(&to[i], &from[i], sizeof(to[0]));
 }
 
-static int convert_fxsr_from_user(struct i387_fxsave_struct *fxsave,
-				  struct _fpstate __user *buf)
+static void convert_to_fxsr(struct task_struct *tsk,
+			    const struct user_i387_ia32_struct *env)
+
 {
-	unsigned long env[7];
-	struct _fpxreg *to;
-	struct _fpreg __user *from;
+	struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave;
+	struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
+	struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
 	int i;
 
-	if (__copy_from_user(env, buf, 7 * sizeof(long)))
-		return 1;
+	fxsave->cwd = env->cwd;
+	fxsave->swd = env->swd;
+	fxsave->twd = twd_i387_to_fxsr(env->twd);
+	fxsave->fop = (u16) ((u32) env->fcs >> 16);
+#ifdef CONFIG_X86_64
+	fxsave->rip = env->fip;
+	fxsave->rdp = env->foo;
+	/* cs and ds ignored */
+#else
+	fxsave->fip = env->fip;
+	fxsave->fcs = (env->fcs & 0xffff);
+	fxsave->foo = env->foo;
+	fxsave->fos = env->fos;
+#endif
 
-	fxsave->cwd = (unsigned short)(env[0] & 0xffff);
-	fxsave->swd = (unsigned short)(env[1] & 0xffff);
-	fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff));
-	fxsave->fip = env[3];
-	fxsave->fop = (unsigned short)((env[4] & 0xffff0000ul) >> 16);
-	fxsave->fcs = (env[4] & 0xffff);
-	fxsave->foo = env[5];
-	fxsave->fos = env[6];
+	for (i = 0; i < 8; ++i)
+		memcpy(&to[i], &from[i], sizeof(from[0]));
+}
 
-	to = (struct _fpxreg *) &fxsave->st_space[0];
-	from = &buf->_st[0];
-	for (i = 0; i < 8; i++, to++, from++) {
-		unsigned long *t = (unsigned long *)to;
-		unsigned long __user *f = (unsigned long __user *)from;
+int fpregs_get(struct task_struct *target, const struct user_regset *regset,
+	       unsigned int pos, unsigned int count,
+	       void *kbuf, void __user *ubuf)
+{
+	struct user_i387_ia32_struct env;
 
-		if (__get_user(*t, f) ||
-		    __get_user(*(t + 1), f + 1) ||
-		    __get_user(to->exponent, &from->exponent))
-			return 1;
+	if (!HAVE_HWFP)
+		return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
+
+	unlazy_fpu(target);
+
+	if (!cpu_has_fxsr)
+		return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					   &target->thread.i387.fsave, 0, -1);
+
+	if (kbuf && pos == 0 && count == sizeof(env)) {
+		convert_from_fxsr(kbuf, target);
+		return 0;
 	}
-	return 0;
+
+	convert_from_fxsr(&env, target);
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
+}
+
+int fpregs_set(struct task_struct *target, const struct user_regset *regset,
+	       unsigned int pos, unsigned int count,
+	       const void *kbuf, const void __user *ubuf)
+{
+	struct user_i387_ia32_struct env;
+	int ret;
+
+	if (!HAVE_HWFP)
+		return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
+
+	unlazy_fpu(target);
+	set_stopped_child_used_math(target);
+
+	if (!cpu_has_fxsr)
+		return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					  &target->thread.i387.fsave, 0, -1);
+
+	if (pos > 0 || count < sizeof(env))
+		convert_from_fxsr(&env, target);
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
+	if (!ret)
+		convert_to_fxsr(target, &env);
+
+	return ret;
 }
 
 /*
  * Signal frame handlers.
  */
 
-static inline int save_i387_fsave(struct _fpstate __user *buf)
+static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
 {
 	struct task_struct *tsk = current;
 
@@ -309,14 +365,16 @@ static inline int save_i387_fsave(struct _fpstate __user *buf)
 	return 1;
 }
 
-static int save_i387_fxsave(struct _fpstate __user *buf)
+static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
 {
 	struct task_struct *tsk = current;
+	struct user_i387_ia32_struct env;
 	int err = 0;
 
 	unlazy_fpu(tsk);
 
-	if (convert_fxsr_to_user(buf, &tsk->thread.i387.fxsave))
+	convert_from_fxsr(&env, tsk);
+	if (__copy_to_user(buf, &env, sizeof(env)))
 		return -1;
 
 	err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status);
@@ -330,7 +388,7 @@ static int save_i387_fxsave(struct _fpstate __user *buf)
 	return 1;
 }
 
-int save_i387(struct _fpstate __user *buf)
+int save_i387_ia32(struct _fpstate_ia32 __user *buf)
 {
 	if (!used_math())
 		return 0;
@@ -347,11 +405,13 @@ int save_i387(struct _fpstate __user *buf)
 			return save_i387_fsave(buf);
 		}
 	} else {
-		return save_i387_soft(&current->thread.i387.soft, buf);
+		return fpregs_soft_get(current, NULL,
+				       0, sizeof(struct user_i387_ia32_struct),
+				       NULL, buf) ? -1 : 1;
 	}
 }
 
-static inline int restore_i387_fsave(struct _fpstate __user *buf)
+static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
 {
 	struct task_struct *tsk = current;
 	clear_fpu(tsk);
@@ -359,19 +419,23 @@ static inline int restore_i387_fsave(struct _fpstate __user *buf)
 				sizeof(struct i387_fsave_struct));
 }
 
-static int restore_i387_fxsave(struct _fpstate __user *buf)
+static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
 {
 	int err;
 	struct task_struct *tsk = current;
+	struct user_i387_ia32_struct env;
 	clear_fpu(tsk);
 	err = __copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0],
 			       sizeof(struct i387_fxsave_struct));
 	/* mxcsr reserved bits must be masked to zero for security reasons */
 	tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
-	return err ? 1 : convert_fxsr_from_user(&tsk->thread.i387.fxsave, buf);
+	if (err || __copy_from_user(&env, buf, sizeof(env)))
+		return 1;
+	convert_to_fxsr(tsk, &env);
+	return 0;
 }
 
-int restore_i387(struct _fpstate __user *buf)
+int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
 {
 	int err;
 
@@ -382,101 +446,52 @@ int restore_i387(struct _fpstate __user *buf)
 			err = restore_i387_fsave(buf);
 		}
 	} else {
-		err = restore_i387_soft(&current->thread.i387.soft, buf);
+		err = fpregs_soft_set(current, NULL,
+				      0, sizeof(struct user_i387_ia32_struct),
+				      NULL, buf) != 0;
 	}
 	set_used_math();
 	return err;
 }
 
-/*
- * ptrace request handlers.
- */
+#endif	/* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
 
-static inline int get_fpregs_fsave(struct user_i387_struct __user *buf,
-				   struct task_struct *tsk)
-{
-	return __copy_to_user(buf, &tsk->thread.i387.fsave,
-			      sizeof(struct user_i387_struct));
-}
-
-static inline int get_fpregs_fxsave(struct user_i387_struct __user *buf,
-				    struct task_struct *tsk)
-{
-	return convert_fxsr_to_user((struct _fpstate __user *)buf,
-				    &tsk->thread.i387.fxsave);
-}
+#ifdef CONFIG_X86_64
 
 int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *tsk)
 {
-	if (HAVE_HWFP) {
-		if (cpu_has_fxsr) {
-			return get_fpregs_fxsave(buf, tsk);
-		} else {
-			return get_fpregs_fsave(buf, tsk);
-		}
-	} else {
-		return save_i387_soft(&tsk->thread.i387.soft,
-				      (struct _fpstate __user *)buf);
-	}
-}
-
-static inline int set_fpregs_fsave(struct task_struct *tsk,
-				   struct user_i387_struct __user *buf)
-{
-	return __copy_from_user(&tsk->thread.i387.fsave, buf,
-				sizeof(struct user_i387_struct));
-}
-
-static inline int set_fpregs_fxsave(struct task_struct *tsk,
-				    struct user_i387_struct __user *buf)
-{
-	return convert_fxsr_from_user(&tsk->thread.i387.fxsave,
-				      (struct _fpstate __user *)buf);
+	return xfpregs_get(tsk, NULL, 0, sizeof(*buf), NULL, buf);
 }
 
 int set_fpregs(struct task_struct *tsk, struct user_i387_struct __user *buf)
 {
-	if (HAVE_HWFP) {
-		if (cpu_has_fxsr) {
-			return set_fpregs_fxsave(tsk, buf);
-		} else {
-			return set_fpregs_fsave(tsk, buf);
-		}
-	} else {
-		return restore_i387_soft(&tsk->thread.i387.soft,
-					 (struct _fpstate __user *)buf);
-	}
+	return xfpregs_set(tsk, NULL, 0, sizeof(*buf), NULL, buf);
+}
+
+#else
+
+int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *tsk)
+{
+	return fpregs_get(tsk, NULL, 0, sizeof(*buf), NULL, buf);
+}
+
+int set_fpregs(struct task_struct *tsk, struct user_i387_struct __user *buf)
+{
+	return fpregs_set(tsk, NULL, 0, sizeof(*buf), NULL, buf);
 }
 
 int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *tsk)
 {
-	if (cpu_has_fxsr) {
-		if (__copy_to_user(buf, &tsk->thread.i387.fxsave,
-				   sizeof(struct user_fxsr_struct)))
-			return -EFAULT;
-		return 0;
-	} else {
-		return -EIO;
-	}
+	return xfpregs_get(tsk, NULL, 0, sizeof(*buf), NULL, buf);
 }
 
 int set_fpxregs(struct task_struct *tsk, struct user_fxsr_struct __user *buf)
 {
-	int ret = 0;
-
-	if (cpu_has_fxsr) {
-		if (__copy_from_user(&tsk->thread.i387.fxsave, buf,
-				     sizeof(struct user_fxsr_struct)))
-			ret = -EFAULT;
-		/* mxcsr reserved bits must be masked to zero
-		 * for security reasons */
-		tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
-	} else {
-		ret = -EIO;
-	}
-	return ret;
+	return xfpregs_get(tsk, NULL, 0, sizeof(*buf), NULL, buf);
 }
 
+#endif
+
 /*
  * FPU state for core dumps.
  */
@@ -538,7 +553,7 @@ int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu)
 }
 
 int dump_task_extended_fpu(struct task_struct *tsk,
-			   struct user_fxsr_struct *fpu)
+			   struct user32_fxsr_struct *fpu)
 {
 	int fpvalid = tsk_used_math(tsk) && cpu_has_fxsr;
 
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 18972a305890..084805ab7323 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -886,7 +886,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 #include <linux/compat.h>
 #include <linux/syscalls.h>
 #include <asm/ia32.h>
-#include <asm/fpu32.h>
 #include <asm/user32.h>
 
 #define R32(l,q)							\
diff --git a/include/asm-x86/fpu32.h b/include/asm-x86/fpu32.h
deleted file mode 100644
index 4153db5c0c31..000000000000
--- a/include/asm-x86/fpu32.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _FPU32_H
-#define _FPU32_H 1
-
-struct _fpstate_ia32;
-
-int restore_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 __user *buf, int fsave);
-int save_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 __user *buf, 
-		   struct pt_regs *regs, int fsave);
-
-#endif
diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h
index a8bbed349664..b2bc0050ce99 100644
--- a/include/asm-x86/i387.h
+++ b/include/asm-x86/i387.h
@@ -1,5 +1 @@
-#ifdef CONFIG_X86_32
-# include "i387_32.h"
-#else
-# include "i387_64.h"
-#endif
+#include "i387_64.h"
diff --git a/include/asm-x86/i387_64.h b/include/asm-x86/i387_64.h
index 4cea7373ac78..de435b9114df 100644
--- a/include/asm-x86/i387_64.h
+++ b/include/asm-x86/i387_64.h
@@ -12,6 +12,7 @@
 
 #include <linux/sched.h>
 #include <linux/kernel_stat.h>
+#include <linux/regset.h>
 #include <asm/processor.h>
 #include <asm/sigcontext.h>
 #include <asm/user.h>
@@ -23,6 +24,16 @@ extern void mxcsr_feature_mask_init(void);
 extern void init_fpu(struct task_struct *child);
 extern asmlinkage void math_state_restore(void);
 
+extern user_regset_active_fn fpregs_active, xfpregs_active;
+extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get;
+extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set;
+
+#ifdef CONFIG_IA32_EMULATION
+struct _fpstate_ia32;
+extern int save_i387_ia32(struct _fpstate_ia32 __user *buf);
+extern int restore_i387_ia32(struct _fpstate_ia32 __user *buf);
+#endif
+
 #ifdef CONFIG_X86_64
 
 /* Ignore delayed exceptions from user space */

From 1eeaed7679eab3666d2d8c964d060c2169b3813b Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:51 +0100
Subject: [PATCH 383/890] x86: x86 i387 cleanup

This removes all the old code that is no longer used after
the i387 unification and cleanup.  The i387_64.h is renamed
to i387.h with no changes, but since it replaces the nonempty
one-line stub i387.h it looks like a big diff and not a rename.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/ia32/fpu32.c     | 181 ------------------
 arch/x86/kernel/i387_64.c | 119 ------------
 include/asm-x86/i387.h    | 375 +++++++++++++++++++++++++++++++++++++-
 include/asm-x86/i387_32.h | 149 ---------------
 include/asm-x86/i387_64.h | 374 -------------------------------------
 5 files changed, 374 insertions(+), 824 deletions(-)
 delete mode 100644 arch/x86/ia32/fpu32.c
 delete mode 100644 arch/x86/kernel/i387_64.c
 delete mode 100644 include/asm-x86/i387_32.h
 delete mode 100644 include/asm-x86/i387_64.h

diff --git a/arch/x86/ia32/fpu32.c b/arch/x86/ia32/fpu32.c
deleted file mode 100644
index ae80745f668f..000000000000
--- a/arch/x86/ia32/fpu32.c
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * Copyright 2002 Andi Kleen, SuSE Labs.
- * FXSAVE<->i387 conversion support. Based on code by Gareth Hughes.
- * This is used for ptrace, signals and coredumps in 32bit emulation.
- */
-
-#include <linux/sched.h>
-#include <asm/sigcontext32.h>
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-#include <asm/i387.h>
-
-static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
-{
-	unsigned int tmp; /* to avoid 16 bit prefixes in the code */
-
-	/* Transform each pair of bits into 01 (valid) or 00 (empty) */
-	tmp = ~twd;
-	tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
-	/* and move the valid bits to the lower byte. */
-	tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
-	tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
-	tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
-	return tmp;
-}
-
-#define FPREG_ADDR(f, n)	((void *)&(f)->st_space + (n) * 16);
-#define FP_EXP_TAG_VALID	0
-#define FP_EXP_TAG_ZERO		1
-#define FP_EXP_TAG_SPECIAL	2
-#define FP_EXP_TAG_EMPTY	3
-
-static inline unsigned long twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
-{
-	struct _fpxreg *st;
-	unsigned long tos = (fxsave->swd >> 11) & 7;
-	unsigned long twd = (unsigned long) fxsave->twd;
-	unsigned long tag;
-	unsigned long ret = 0xffff0000;
-	int i;
-
-	for (i = 0; i < 8; i++, twd >>= 1) {
-		if (twd & 0x1) {
-			st = FPREG_ADDR(fxsave, (i - tos) & 7);
-
-			switch (st->exponent & 0x7fff) {
-			case 0x7fff:
-				tag = FP_EXP_TAG_SPECIAL;
-				break;
-			case 0x0000:
-				if (!st->significand[0] &&
-				    !st->significand[1] &&
-				    !st->significand[2] &&
-				    !st->significand[3])
-					tag = FP_EXP_TAG_ZERO;
-				else
-					tag = FP_EXP_TAG_SPECIAL;
-				break;
-			default:
-				if (st->significand[3] & 0x8000)
-					tag = FP_EXP_TAG_VALID;
-				else
-					tag = FP_EXP_TAG_SPECIAL;
-				break;
-			}
-		} else {
-			tag = FP_EXP_TAG_EMPTY;
-		}
-		ret |= tag << (2 * i);
-	}
-	return ret;
-}
-
-#define G(num, val) err |= __get_user(val, num + (u32 __user *)buf)
-
-static inline int convert_fxsr_from_user(struct i387_fxsave_struct *fxsave,
-					 struct _fpstate_ia32 __user *buf)
-{
-	struct _fpxreg *to;
-	struct _fpreg __user *from;
-	int i, err = 0;
-	u32 v;
-
-	G(0, fxsave->cwd);
-	G(1, fxsave->swd);
-	G(2, fxsave->twd);
-	fxsave->twd = twd_i387_to_fxsr(fxsave->twd);
-	G(3, fxsave->rip);
-	G(4, v);
-	/* cs ignored */
-	fxsave->fop = v>>16;
-	G(5, fxsave->rdp);
-	/* 6: ds ignored */
-	if (err)
-		return -1;
-
-	to = (struct _fpxreg *)&fxsave->st_space[0];
-	from = &buf->_st[0];
-	for (i = 0; i < 8; i++, to++, from++) {
-		if (__copy_from_user(to, from, sizeof(*from)))
-			return -1;
-	}
-	return 0;
-}
-
-#define P(num, val) err |= __put_user(val, num + (u32 __user *)buf)
-
-static inline int convert_fxsr_to_user(struct _fpstate_ia32 __user *buf,
-				       struct i387_fxsave_struct *fxsave,
-				       struct pt_regs *regs,
-				       struct task_struct *tsk)
-{
-	struct _fpreg __user *to;
-	struct _fpxreg *from;
-	int i, err = 0;
-	u16 cs, ds;
-
-	if (tsk == current) {
-		/*
-		 * should be actually ds/cs at fpu exception time, but
-		 * that information is not available in 64bit mode.
-		 */
-		asm("movw %%ds,%0 " : "=r" (ds));
-		asm("movw %%cs,%0 " : "=r" (cs));
-	} else {
-		 /* ptrace. task has stopped. */
-		ds = tsk->thread.ds;
-		cs = regs->cs;
-	}
-
-	P(0, (u32)fxsave->cwd | 0xffff0000);
-	P(1, (u32)fxsave->swd | 0xffff0000);
-	P(2, twd_fxsr_to_i387(fxsave));
-	P(3, (u32)fxsave->rip);
-	P(4,  cs | ((u32)fxsave->fop) << 16);
-	P(5, fxsave->rdp);
-	P(6, 0xffff0000 | ds);
-
-	if (err)
-		return -1;
-
-	to = &buf->_st[0];
-	from = (struct _fpxreg *) &fxsave->st_space[0];
-	for (i = 0; i < 8; i++, to++, from++) {
-		if (__copy_to_user(to, from, sizeof(*to)))
-			return -1;
-	}
-	return 0;
-}
-
-int restore_i387_ia32(struct task_struct *tsk,
-		      struct _fpstate_ia32 __user *buf, int fsave)
-{
-	clear_fpu(tsk);
-	if (!fsave) {
-		if (__copy_from_user(&tsk->thread.i387.fxsave,
-				     &buf->_fxsr_env[0],
-				     sizeof(struct i387_fxsave_struct)))
-			return -1;
-		tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
-		set_stopped_child_used_math(tsk);
-	}
-	return convert_fxsr_from_user(&tsk->thread.i387.fxsave, buf);
-}
-
-int save_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 __user *buf,
-		   struct pt_regs *regs, int fsave)
-{
-	int err = 0;
-
-	init_fpu(tsk);
-	if (convert_fxsr_to_user(buf, &tsk->thread.i387.fxsave, regs, tsk))
-		return -1;
-	if (fsave)
-		return 0;
-	err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status);
-	err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
-	err |= __copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave,
-			      sizeof(struct i387_fxsave_struct));
-	return err ? -1 : 1;
-}
diff --git a/arch/x86/kernel/i387_64.c b/arch/x86/kernel/i387_64.c
deleted file mode 100644
index f335a76d7ea7..000000000000
--- a/arch/x86/kernel/i387_64.c
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- *  Copyright (C) 1994 Linus Torvalds
- *  Copyright (C) 2002 Andi Kleen, SuSE Labs
- *
- *  Pentium III FXSR, SSE support
- *  General FPU state handling cleanups
- *	Gareth Hughes <gareth@valinux.com>, May 2000
- * 
- *  x86-64 rework 2002 Andi Kleen. 
- *  Does direct fxsave in and out of user space now for signal handlers.
- *  All the FSAVE<->FXSAVE conversion code has been moved to the 32bit emulation,
- *  the 64bit user space sees a FXSAVE frame directly. 
- */
-
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/sigcontext.h>
-#include <asm/user.h>
-#include <asm/ptrace.h>
-#include <asm/uaccess.h>
-
-unsigned int mxcsr_feature_mask __read_mostly = 0xffffffff;
-
-void mxcsr_feature_mask_init(void)
-{
-	unsigned int mask;
-	clts();
-	memset(&current->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
-	asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave));
-	mask = current->thread.i387.fxsave.mxcsr_mask;
-	if (mask == 0) mask = 0x0000ffbf;
-	mxcsr_feature_mask &= mask;
-	stts();
-}
-
-/*
- * Called at bootup to set up the initial FPU state that is later cloned
- * into all processes.
- */
-void __cpuinit fpu_init(void)
-{
-	unsigned long oldcr0 = read_cr0();
-	extern void __bad_fxsave_alignment(void);
-		
-	if (offsetof(struct task_struct, thread.i387.fxsave) & 15)
-		__bad_fxsave_alignment();
-	set_in_cr4(X86_CR4_OSFXSR);
-	set_in_cr4(X86_CR4_OSXMMEXCPT);
-
-	write_cr0(oldcr0 & ~((1UL<<3)|(1UL<<2))); /* clear TS and EM */
-
-	mxcsr_feature_mask_init();
-	/* clean state in init */
-	current_thread_info()->status = 0;
-	clear_used_math();
-}
-
-void init_fpu(struct task_struct *child)
-{
-	if (tsk_used_math(child)) {
-		if (child == current)
-			unlazy_fpu(child);
-		return;
-	}	
-	memset(&child->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
-	child->thread.i387.fxsave.cwd = 0x37f;
-	child->thread.i387.fxsave.mxcsr = 0x1f80;
-	/* only the device not available exception or ptrace can call init_fpu */
-	set_stopped_child_used_math(child);
-}
-
-/*
- * ptrace request handlers.
- */
-
-int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *tsk)
-{
-	init_fpu(tsk);
-	return __copy_to_user(buf, &tsk->thread.i387.fxsave,
-			       sizeof(struct user_i387_struct)) ? -EFAULT : 0;
-}
-
-int set_fpregs(struct task_struct *tsk, struct user_i387_struct __user *buf)
-{
-	if (__copy_from_user(&tsk->thread.i387.fxsave, buf, 
-			     sizeof(struct user_i387_struct)))
-		return -EFAULT;
-		return 0;
-}
-
-/*
- * FPU state for core dumps.
- */
-
-int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu )
-{
-	struct task_struct *tsk = current;
-
-	if (!used_math())
-		return 0;
-
-	unlazy_fpu(tsk);
-	memcpy(fpu, &tsk->thread.i387.fxsave, sizeof(struct user_i387_struct)); 
-	return 1; 
-}
-
-int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu)
-{
-	int fpvalid = !!tsk_used_math(tsk);
-
-	if (fpvalid) {
-		if (tsk == current)
-			unlazy_fpu(tsk);
-		memcpy(fpu, &tsk->thread.i387.fxsave, sizeof(struct user_i387_struct)); 	
-}
-	return fpvalid;
-}
diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h
index b2bc0050ce99..de435b9114df 100644
--- a/include/asm-x86/i387.h
+++ b/include/asm-x86/i387.h
@@ -1 +1,374 @@
-#include "i387_64.h"
+/*
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ *	Gareth Hughes <gareth@valinux.com>, May 2000
+ * x86-64 work by Andi Kleen 2002
+ */
+
+#ifndef _ASM_X86_I387_H
+#define _ASM_X86_I387_H
+
+#include <linux/sched.h>
+#include <linux/kernel_stat.h>
+#include <linux/regset.h>
+#include <asm/processor.h>
+#include <asm/sigcontext.h>
+#include <asm/user.h>
+#include <asm/uaccess.h>
+
+extern void fpu_init(void);
+extern unsigned int mxcsr_feature_mask;
+extern void mxcsr_feature_mask_init(void);
+extern void init_fpu(struct task_struct *child);
+extern asmlinkage void math_state_restore(void);
+
+extern user_regset_active_fn fpregs_active, xfpregs_active;
+extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get;
+extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set;
+
+#ifdef CONFIG_IA32_EMULATION
+struct _fpstate_ia32;
+extern int save_i387_ia32(struct _fpstate_ia32 __user *buf);
+extern int restore_i387_ia32(struct _fpstate_ia32 __user *buf);
+#endif
+
+#ifdef CONFIG_X86_64
+
+/* Ignore delayed exceptions from user space */
+static inline void tolerant_fwait(void)
+{
+	asm volatile("1: fwait\n"
+		     "2:\n"
+		     "   .section __ex_table,\"a\"\n"
+		     "	.align 8\n"
+		     "	.quad 1b,2b\n"
+		     "	.previous\n");
+}
+
+static inline int restore_fpu_checking(struct i387_fxsave_struct *fx)
+{
+	int err;
+
+	asm volatile("1:  rex64/fxrstor (%[fx])\n\t"
+		     "2:\n"
+		     ".section .fixup,\"ax\"\n"
+		     "3:  movl $-1,%[err]\n"
+		     "    jmp  2b\n"
+		     ".previous\n"
+		     ".section __ex_table,\"a\"\n"
+		     "   .align 8\n"
+		     "   .quad  1b,3b\n"
+		     ".previous"
+		     : [err] "=r" (err)
+#if 0 /* See comment in __save_init_fpu() below. */
+		     : [fx] "r" (fx), "m" (*fx), "0" (0));
+#else
+		     : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0));
+#endif
+	if (unlikely(err))
+		init_fpu(current);
+	return err;
+}
+
+#define X87_FSW_ES (1 << 7)	/* Exception Summary */
+
+/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
+   is pending. Clear the x87 state here by setting it to fixed
+   values. The kernel data segment can be sometimes 0 and sometimes
+   new user value. Both should be ok.
+   Use the PDA as safe address because it should be already in L1. */
+static inline void clear_fpu_state(struct i387_fxsave_struct *fx)
+{
+	if (unlikely(fx->swd & X87_FSW_ES))
+		 asm volatile("fnclex");
+	alternative_input(ASM_NOP8 ASM_NOP2,
+		     "    emms\n"		/* clear stack tags */
+		     "    fildl %%gs:0",	/* load to clear state */
+		     X86_FEATURE_FXSAVE_LEAK);
+}
+
+static inline int save_i387_checking(struct i387_fxsave_struct __user *fx)
+{
+	int err;
+
+	asm volatile("1:  rex64/fxsave (%[fx])\n\t"
+		     "2:\n"
+		     ".section .fixup,\"ax\"\n"
+		     "3:  movl $-1,%[err]\n"
+		     "    jmp  2b\n"
+		     ".previous\n"
+		     ".section __ex_table,\"a\"\n"
+		     "   .align 8\n"
+		     "   .quad  1b,3b\n"
+		     ".previous"
+		     : [err] "=r" (err), "=m" (*fx)
+#if 0 /* See comment in __fxsave_clear() below. */
+		     : [fx] "r" (fx), "0" (0));
+#else
+		     : [fx] "cdaSDb" (fx), "0" (0));
+#endif
+	if (unlikely(err) && __clear_user(fx, sizeof(struct i387_fxsave_struct)))
+		err = -EFAULT;
+	/* No need to clear here because the caller clears USED_MATH */
+	return err;
+}
+
+static inline void __save_init_fpu(struct task_struct *tsk)
+{
+	/* Using "rex64; fxsave %0" is broken because, if the memory operand
+	   uses any extended registers for addressing, a second REX prefix
+	   will be generated (to the assembler, rex64 followed by semicolon
+	   is a separate instruction), and hence the 64-bitness is lost. */
+#if 0
+	/* Using "fxsaveq %0" would be the ideal choice, but is only supported
+	   starting with gas 2.16. */
+	__asm__ __volatile__("fxsaveq %0"
+			     : "=m" (tsk->thread.i387.fxsave));
+#elif 0
+	/* Using, as a workaround, the properly prefixed form below isn't
+	   accepted by any binutils version so far released, complaining that
+	   the same type of prefix is used twice if an extended register is
+	   needed for addressing (fix submitted to mainline 2005-11-21). */
+	__asm__ __volatile__("rex64/fxsave %0"
+			     : "=m" (tsk->thread.i387.fxsave));
+#else
+	/* This, however, we can work around by forcing the compiler to select
+	   an addressing mode that doesn't require extended registers. */
+	__asm__ __volatile__("rex64/fxsave %P2(%1)"
+			     : "=m" (tsk->thread.i387.fxsave)
+			     : "cdaSDb" (tsk),
+				"i" (offsetof(__typeof__(*tsk),
+					      thread.i387.fxsave)));
+#endif
+	clear_fpu_state(&tsk->thread.i387.fxsave);
+	task_thread_info(tsk)->status &= ~TS_USEDFPU;
+}
+
+/*
+ * Signal frame handlers.
+ */
+
+static inline int save_i387(struct _fpstate __user *buf)
+{
+	struct task_struct *tsk = current;
+	int err = 0;
+
+	BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
+			sizeof(tsk->thread.i387.fxsave));
+
+	if ((unsigned long)buf % 16)
+		printk("save_i387: bad fpstate %p\n", buf);
+
+	if (!used_math())
+		return 0;
+	clear_used_math(); /* trigger finit */
+	if (task_thread_info(tsk)->status & TS_USEDFPU) {
+		err = save_i387_checking((struct i387_fxsave_struct __user *)buf);
+		if (err) return err;
+		task_thread_info(tsk)->status &= ~TS_USEDFPU;
+		stts();
+	} else {
+		if (__copy_to_user(buf, &tsk->thread.i387.fxsave,
+				   sizeof(struct i387_fxsave_struct)))
+			return -1;
+	}
+	return 1;
+}
+
+/*
+ * This restores directly out of user space. Exceptions are handled.
+ */
+static inline int restore_i387(struct _fpstate __user *buf)
+{
+	set_used_math();
+	if (!(task_thread_info(current)->status & TS_USEDFPU)) {
+		clts();
+		task_thread_info(current)->status |= TS_USEDFPU;
+	}
+	return restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
+}
+
+#else  /* CONFIG_X86_32 */
+
+static inline void tolerant_fwait(void)
+{
+	asm volatile("fnclex ; fwait");
+}
+
+static inline void restore_fpu(struct task_struct *tsk)
+{
+	/*
+	 * The "nop" is needed to make the instructions the same
+	 * length.
+	 */
+	alternative_input(
+		"nop ; frstor %1",
+		"fxrstor %1",
+		X86_FEATURE_FXSR,
+		"m" ((tsk)->thread.i387.fxsave));
+}
+
+/* We need a safe address that is cheap to find and that is already
+   in L1 during context switch. The best choices are unfortunately
+   different for UP and SMP */
+#ifdef CONFIG_SMP
+#define safe_address (__per_cpu_offset[0])
+#else
+#define safe_address (kstat_cpu(0).cpustat.user)
+#endif
+
+/*
+ * These must be called with preempt disabled
+ */
+static inline void __save_init_fpu(struct task_struct *tsk)
+{
+	/* Use more nops than strictly needed in case the compiler
+	   varies code */
+	alternative_input(
+		"fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4,
+		"fxsave %[fx]\n"
+		"bt $7,%[fsw] ; jnc 1f ; fnclex\n1:",
+		X86_FEATURE_FXSR,
+		[fx] "m" (tsk->thread.i387.fxsave),
+		[fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory");
+	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
+	   is pending.  Clear the x87 state here by setting it to fixed
+	   values. safe_address is a random variable that should be in L1 */
+	alternative_input(
+		GENERIC_NOP8 GENERIC_NOP2,
+		"emms\n\t"	  	/* clear stack tags */
+		"fildl %[addr]", 	/* set F?P to defined value */
+		X86_FEATURE_FXSAVE_LEAK,
+		[addr] "m" (safe_address));
+	task_thread_info(tsk)->status &= ~TS_USEDFPU;
+}
+
+/*
+ * Signal frame handlers...
+ */
+extern int save_i387(struct _fpstate __user *buf);
+extern int restore_i387(struct _fpstate __user *buf);
+
+#endif	/* CONFIG_X86_64 */
+
+static inline void __unlazy_fpu(struct task_struct *tsk)
+{
+	if (task_thread_info(tsk)->status & TS_USEDFPU) {
+		__save_init_fpu(tsk);
+		stts();
+	} else
+		tsk->fpu_counter = 0;
+}
+
+static inline void __clear_fpu(struct task_struct *tsk)
+{
+	if (task_thread_info(tsk)->status & TS_USEDFPU) {
+		tolerant_fwait();
+		task_thread_info(tsk)->status &= ~TS_USEDFPU;
+		stts();
+	}
+}
+
+static inline void kernel_fpu_begin(void)
+{
+	struct thread_info *me = current_thread_info();
+	preempt_disable();
+	if (me->status & TS_USEDFPU)
+		__save_init_fpu(me->task);
+	else
+		clts();
+}
+
+static inline void kernel_fpu_end(void)
+{
+	stts();
+	preempt_enable();
+}
+
+#ifdef CONFIG_X86_64
+
+static inline void save_init_fpu(struct task_struct *tsk)
+{
+	__save_init_fpu(tsk);
+	stts();
+}
+
+#define unlazy_fpu	__unlazy_fpu
+#define clear_fpu	__clear_fpu
+
+#else  /* CONFIG_X86_32 */
+
+/*
+ * These disable preemption on their own and are safe
+ */
+static inline void save_init_fpu(struct task_struct *tsk)
+{
+	preempt_disable();
+	__save_init_fpu(tsk);
+	stts();
+	preempt_enable();
+}
+
+static inline void unlazy_fpu(struct task_struct *tsk)
+{
+	preempt_disable();
+	__unlazy_fpu(tsk);
+	preempt_enable();
+}
+
+static inline void clear_fpu(struct task_struct *tsk)
+{
+	preempt_disable();
+	__clear_fpu(tsk);
+	preempt_enable();
+}
+
+#endif	/* CONFIG_X86_64 */
+
+/*
+ * ptrace request handlers...
+ */
+extern int get_fpregs(struct user_i387_struct __user *buf,
+		      struct task_struct *tsk);
+extern int set_fpregs(struct task_struct *tsk,
+		      struct user_i387_struct __user *buf);
+
+struct user_fxsr_struct;
+extern int get_fpxregs(struct user_fxsr_struct __user *buf,
+		       struct task_struct *tsk);
+extern int set_fpxregs(struct task_struct *tsk,
+		       struct user_fxsr_struct __user *buf);
+
+/*
+ * i387 state interaction
+ */
+static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
+{
+	if (cpu_has_fxsr) {
+		return tsk->thread.i387.fxsave.cwd;
+	} else {
+		return (unsigned short)tsk->thread.i387.fsave.cwd;
+	}
+}
+
+static inline unsigned short get_fpu_swd(struct task_struct *tsk)
+{
+	if (cpu_has_fxsr) {
+		return tsk->thread.i387.fxsave.swd;
+	} else {
+		return (unsigned short)tsk->thread.i387.fsave.swd;
+	}
+}
+
+static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
+{
+	if (cpu_has_xmm) {
+		return tsk->thread.i387.fxsave.mxcsr;
+	} else {
+		return MXCSR_DEFAULT;
+	}
+}
+
+#endif	/* _ASM_X86_I387_H */
diff --git a/include/asm-x86/i387_32.h b/include/asm-x86/i387_32.h
deleted file mode 100644
index 9ac2502cdd3d..000000000000
--- a/include/asm-x86/i387_32.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (C) 1994 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- *	Gareth Hughes <gareth@valinux.com>, May 2000
- */
-
-#ifndef __ASM_I386_I387_H
-#define __ASM_I386_I387_H
-
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/kernel_stat.h>
-#include <asm/processor.h>
-#include <asm/sigcontext.h>
-#include <asm/user.h>
-
-extern void mxcsr_feature_mask_init(void);
-extern void init_fpu(struct task_struct *);
-
-/*
- * FPU lazy state save handling...
- */
-
-/*
- * The "nop" is needed to make the instructions the same
- * length.
- */
-#define restore_fpu(tsk)			\
-	alternative_input(			\
-		"nop ; frstor %1",		\
-		"fxrstor %1",			\
-		X86_FEATURE_FXSR,		\
-		"m" ((tsk)->thread.i387.fxsave))
-
-extern void kernel_fpu_begin(void);
-#define kernel_fpu_end() do { stts(); preempt_enable(); } while(0)
-
-/* We need a safe address that is cheap to find and that is already
-   in L1 during context switch. The best choices are unfortunately
-   different for UP and SMP */
-#ifdef CONFIG_SMP
-#define safe_address (__per_cpu_offset[0])
-#else
-#define safe_address (kstat_cpu(0).cpustat.user)
-#endif
-
-/*
- * These must be called with preempt disabled
- */
-static inline void __save_init_fpu( struct task_struct *tsk )
-{
-	/* Use more nops than strictly needed in case the compiler
-	   varies code */
-	alternative_input(
-		"fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4,
-		"fxsave %[fx]\n"
-		"bt $7,%[fsw] ; jnc 1f ; fnclex\n1:",
-		X86_FEATURE_FXSR,
-		[fx] "m" (tsk->thread.i387.fxsave),
-		[fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory");
-	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
-	   is pending.  Clear the x87 state here by setting it to fixed
-   	   values. safe_address is a random variable that should be in L1 */
-	alternative_input(
-		GENERIC_NOP8 GENERIC_NOP2,
-		"emms\n\t"	  	/* clear stack tags */
-		"fildl %[addr]", 	/* set F?P to defined value */
-		X86_FEATURE_FXSAVE_LEAK,
-		[addr] "m" (safe_address));
-	task_thread_info(tsk)->status &= ~TS_USEDFPU;
-}
-
-#define __unlazy_fpu( tsk ) do {				\
-	if (task_thread_info(tsk)->status & TS_USEDFPU) {	\
-		__save_init_fpu(tsk);				\
-		stts();						\
-	} else							\
-		tsk->fpu_counter = 0;				\
-} while (0)
-
-#define __clear_fpu( tsk )					\
-do {								\
-	if (task_thread_info(tsk)->status & TS_USEDFPU) {	\
-		asm volatile("fnclex ; fwait");			\
-		task_thread_info(tsk)->status &= ~TS_USEDFPU;	\
-		stts();						\
-	}							\
-} while (0)
-
-
-/*
- * These disable preemption on their own and are safe
- */
-static inline void save_init_fpu( struct task_struct *tsk )
-{
-	preempt_disable();
-	__save_init_fpu(tsk);
-	stts();
-	preempt_enable();
-}
-
-#define unlazy_fpu( tsk ) do {	\
-	preempt_disable();	\
-	__unlazy_fpu(tsk);	\
-	preempt_enable();	\
-} while (0)
-
-#define clear_fpu( tsk ) do {	\
-	preempt_disable();	\
-	__clear_fpu( tsk );	\
-	preempt_enable();	\
-} while (0)
-
-/*
- * FPU state interaction...
- */
-extern unsigned short get_fpu_cwd( struct task_struct *tsk );
-extern unsigned short get_fpu_swd( struct task_struct *tsk );
-extern unsigned short get_fpu_mxcsr( struct task_struct *tsk );
-extern asmlinkage void math_state_restore(void);
-
-/*
- * Signal frame handlers...
- */
-extern int save_i387( struct _fpstate __user *buf );
-extern int restore_i387( struct _fpstate __user *buf );
-
-/*
- * ptrace request handers...
- */
-extern int get_fpregs( struct user_i387_struct __user *buf,
-		       struct task_struct *tsk );
-extern int set_fpregs( struct task_struct *tsk,
-		       struct user_i387_struct __user *buf );
-
-extern int get_fpxregs( struct user_fxsr_struct __user *buf,
-			struct task_struct *tsk );
-extern int set_fpxregs( struct task_struct *tsk,
-			struct user_fxsr_struct __user *buf );
-
-/*
- * FPU state for core dumps...
- */
-extern int dump_fpu( struct pt_regs *regs,
-		     struct user_i387_struct *fpu );
-
-#endif /* __ASM_I386_I387_H */
diff --git a/include/asm-x86/i387_64.h b/include/asm-x86/i387_64.h
deleted file mode 100644
index de435b9114df..000000000000
--- a/include/asm-x86/i387_64.h
+++ /dev/null
@@ -1,374 +0,0 @@
-/*
- * Copyright (C) 1994 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- *	Gareth Hughes <gareth@valinux.com>, May 2000
- * x86-64 work by Andi Kleen 2002
- */
-
-#ifndef _ASM_X86_I387_H
-#define _ASM_X86_I387_H
-
-#include <linux/sched.h>
-#include <linux/kernel_stat.h>
-#include <linux/regset.h>
-#include <asm/processor.h>
-#include <asm/sigcontext.h>
-#include <asm/user.h>
-#include <asm/uaccess.h>
-
-extern void fpu_init(void);
-extern unsigned int mxcsr_feature_mask;
-extern void mxcsr_feature_mask_init(void);
-extern void init_fpu(struct task_struct *child);
-extern asmlinkage void math_state_restore(void);
-
-extern user_regset_active_fn fpregs_active, xfpregs_active;
-extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get;
-extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set;
-
-#ifdef CONFIG_IA32_EMULATION
-struct _fpstate_ia32;
-extern int save_i387_ia32(struct _fpstate_ia32 __user *buf);
-extern int restore_i387_ia32(struct _fpstate_ia32 __user *buf);
-#endif
-
-#ifdef CONFIG_X86_64
-
-/* Ignore delayed exceptions from user space */
-static inline void tolerant_fwait(void)
-{
-	asm volatile("1: fwait\n"
-		     "2:\n"
-		     "   .section __ex_table,\"a\"\n"
-		     "	.align 8\n"
-		     "	.quad 1b,2b\n"
-		     "	.previous\n");
-}
-
-static inline int restore_fpu_checking(struct i387_fxsave_struct *fx)
-{
-	int err;
-
-	asm volatile("1:  rex64/fxrstor (%[fx])\n\t"
-		     "2:\n"
-		     ".section .fixup,\"ax\"\n"
-		     "3:  movl $-1,%[err]\n"
-		     "    jmp  2b\n"
-		     ".previous\n"
-		     ".section __ex_table,\"a\"\n"
-		     "   .align 8\n"
-		     "   .quad  1b,3b\n"
-		     ".previous"
-		     : [err] "=r" (err)
-#if 0 /* See comment in __save_init_fpu() below. */
-		     : [fx] "r" (fx), "m" (*fx), "0" (0));
-#else
-		     : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0));
-#endif
-	if (unlikely(err))
-		init_fpu(current);
-	return err;
-}
-
-#define X87_FSW_ES (1 << 7)	/* Exception Summary */
-
-/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
-   is pending. Clear the x87 state here by setting it to fixed
-   values. The kernel data segment can be sometimes 0 and sometimes
-   new user value. Both should be ok.
-   Use the PDA as safe address because it should be already in L1. */
-static inline void clear_fpu_state(struct i387_fxsave_struct *fx)
-{
-	if (unlikely(fx->swd & X87_FSW_ES))
-		 asm volatile("fnclex");
-	alternative_input(ASM_NOP8 ASM_NOP2,
-		     "    emms\n"		/* clear stack tags */
-		     "    fildl %%gs:0",	/* load to clear state */
-		     X86_FEATURE_FXSAVE_LEAK);
-}
-
-static inline int save_i387_checking(struct i387_fxsave_struct __user *fx)
-{
-	int err;
-
-	asm volatile("1:  rex64/fxsave (%[fx])\n\t"
-		     "2:\n"
-		     ".section .fixup,\"ax\"\n"
-		     "3:  movl $-1,%[err]\n"
-		     "    jmp  2b\n"
-		     ".previous\n"
-		     ".section __ex_table,\"a\"\n"
-		     "   .align 8\n"
-		     "   .quad  1b,3b\n"
-		     ".previous"
-		     : [err] "=r" (err), "=m" (*fx)
-#if 0 /* See comment in __fxsave_clear() below. */
-		     : [fx] "r" (fx), "0" (0));
-#else
-		     : [fx] "cdaSDb" (fx), "0" (0));
-#endif
-	if (unlikely(err) && __clear_user(fx, sizeof(struct i387_fxsave_struct)))
-		err = -EFAULT;
-	/* No need to clear here because the caller clears USED_MATH */
-	return err;
-}
-
-static inline void __save_init_fpu(struct task_struct *tsk)
-{
-	/* Using "rex64; fxsave %0" is broken because, if the memory operand
-	   uses any extended registers for addressing, a second REX prefix
-	   will be generated (to the assembler, rex64 followed by semicolon
-	   is a separate instruction), and hence the 64-bitness is lost. */
-#if 0
-	/* Using "fxsaveq %0" would be the ideal choice, but is only supported
-	   starting with gas 2.16. */
-	__asm__ __volatile__("fxsaveq %0"
-			     : "=m" (tsk->thread.i387.fxsave));
-#elif 0
-	/* Using, as a workaround, the properly prefixed form below isn't
-	   accepted by any binutils version so far released, complaining that
-	   the same type of prefix is used twice if an extended register is
-	   needed for addressing (fix submitted to mainline 2005-11-21). */
-	__asm__ __volatile__("rex64/fxsave %0"
-			     : "=m" (tsk->thread.i387.fxsave));
-#else
-	/* This, however, we can work around by forcing the compiler to select
-	   an addressing mode that doesn't require extended registers. */
-	__asm__ __volatile__("rex64/fxsave %P2(%1)"
-			     : "=m" (tsk->thread.i387.fxsave)
-			     : "cdaSDb" (tsk),
-				"i" (offsetof(__typeof__(*tsk),
-					      thread.i387.fxsave)));
-#endif
-	clear_fpu_state(&tsk->thread.i387.fxsave);
-	task_thread_info(tsk)->status &= ~TS_USEDFPU;
-}
-
-/*
- * Signal frame handlers.
- */
-
-static inline int save_i387(struct _fpstate __user *buf)
-{
-	struct task_struct *tsk = current;
-	int err = 0;
-
-	BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
-			sizeof(tsk->thread.i387.fxsave));
-
-	if ((unsigned long)buf % 16)
-		printk("save_i387: bad fpstate %p\n", buf);
-
-	if (!used_math())
-		return 0;
-	clear_used_math(); /* trigger finit */
-	if (task_thread_info(tsk)->status & TS_USEDFPU) {
-		err = save_i387_checking((struct i387_fxsave_struct __user *)buf);
-		if (err) return err;
-		task_thread_info(tsk)->status &= ~TS_USEDFPU;
-		stts();
-	} else {
-		if (__copy_to_user(buf, &tsk->thread.i387.fxsave,
-				   sizeof(struct i387_fxsave_struct)))
-			return -1;
-	}
-	return 1;
-}
-
-/*
- * This restores directly out of user space. Exceptions are handled.
- */
-static inline int restore_i387(struct _fpstate __user *buf)
-{
-	set_used_math();
-	if (!(task_thread_info(current)->status & TS_USEDFPU)) {
-		clts();
-		task_thread_info(current)->status |= TS_USEDFPU;
-	}
-	return restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
-}
-
-#else  /* CONFIG_X86_32 */
-
-static inline void tolerant_fwait(void)
-{
-	asm volatile("fnclex ; fwait");
-}
-
-static inline void restore_fpu(struct task_struct *tsk)
-{
-	/*
-	 * The "nop" is needed to make the instructions the same
-	 * length.
-	 */
-	alternative_input(
-		"nop ; frstor %1",
-		"fxrstor %1",
-		X86_FEATURE_FXSR,
-		"m" ((tsk)->thread.i387.fxsave));
-}
-
-/* We need a safe address that is cheap to find and that is already
-   in L1 during context switch. The best choices are unfortunately
-   different for UP and SMP */
-#ifdef CONFIG_SMP
-#define safe_address (__per_cpu_offset[0])
-#else
-#define safe_address (kstat_cpu(0).cpustat.user)
-#endif
-
-/*
- * These must be called with preempt disabled
- */
-static inline void __save_init_fpu(struct task_struct *tsk)
-{
-	/* Use more nops than strictly needed in case the compiler
-	   varies code */
-	alternative_input(
-		"fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4,
-		"fxsave %[fx]\n"
-		"bt $7,%[fsw] ; jnc 1f ; fnclex\n1:",
-		X86_FEATURE_FXSR,
-		[fx] "m" (tsk->thread.i387.fxsave),
-		[fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory");
-	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
-	   is pending.  Clear the x87 state here by setting it to fixed
-	   values. safe_address is a random variable that should be in L1 */
-	alternative_input(
-		GENERIC_NOP8 GENERIC_NOP2,
-		"emms\n\t"	  	/* clear stack tags */
-		"fildl %[addr]", 	/* set F?P to defined value */
-		X86_FEATURE_FXSAVE_LEAK,
-		[addr] "m" (safe_address));
-	task_thread_info(tsk)->status &= ~TS_USEDFPU;
-}
-
-/*
- * Signal frame handlers...
- */
-extern int save_i387(struct _fpstate __user *buf);
-extern int restore_i387(struct _fpstate __user *buf);
-
-#endif	/* CONFIG_X86_64 */
-
-static inline void __unlazy_fpu(struct task_struct *tsk)
-{
-	if (task_thread_info(tsk)->status & TS_USEDFPU) {
-		__save_init_fpu(tsk);
-		stts();
-	} else
-		tsk->fpu_counter = 0;
-}
-
-static inline void __clear_fpu(struct task_struct *tsk)
-{
-	if (task_thread_info(tsk)->status & TS_USEDFPU) {
-		tolerant_fwait();
-		task_thread_info(tsk)->status &= ~TS_USEDFPU;
-		stts();
-	}
-}
-
-static inline void kernel_fpu_begin(void)
-{
-	struct thread_info *me = current_thread_info();
-	preempt_disable();
-	if (me->status & TS_USEDFPU)
-		__save_init_fpu(me->task);
-	else
-		clts();
-}
-
-static inline void kernel_fpu_end(void)
-{
-	stts();
-	preempt_enable();
-}
-
-#ifdef CONFIG_X86_64
-
-static inline void save_init_fpu(struct task_struct *tsk)
-{
-	__save_init_fpu(tsk);
-	stts();
-}
-
-#define unlazy_fpu	__unlazy_fpu
-#define clear_fpu	__clear_fpu
-
-#else  /* CONFIG_X86_32 */
-
-/*
- * These disable preemption on their own and are safe
- */
-static inline void save_init_fpu(struct task_struct *tsk)
-{
-	preempt_disable();
-	__save_init_fpu(tsk);
-	stts();
-	preempt_enable();
-}
-
-static inline void unlazy_fpu(struct task_struct *tsk)
-{
-	preempt_disable();
-	__unlazy_fpu(tsk);
-	preempt_enable();
-}
-
-static inline void clear_fpu(struct task_struct *tsk)
-{
-	preempt_disable();
-	__clear_fpu(tsk);
-	preempt_enable();
-}
-
-#endif	/* CONFIG_X86_64 */
-
-/*
- * ptrace request handlers...
- */
-extern int get_fpregs(struct user_i387_struct __user *buf,
-		      struct task_struct *tsk);
-extern int set_fpregs(struct task_struct *tsk,
-		      struct user_i387_struct __user *buf);
-
-struct user_fxsr_struct;
-extern int get_fpxregs(struct user_fxsr_struct __user *buf,
-		       struct task_struct *tsk);
-extern int set_fpxregs(struct task_struct *tsk,
-		       struct user_fxsr_struct __user *buf);
-
-/*
- * i387 state interaction
- */
-static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
-{
-	if (cpu_has_fxsr) {
-		return tsk->thread.i387.fxsave.cwd;
-	} else {
-		return (unsigned short)tsk->thread.i387.fsave.cwd;
-	}
-}
-
-static inline unsigned short get_fpu_swd(struct task_struct *tsk)
-{
-	if (cpu_has_fxsr) {
-		return tsk->thread.i387.fxsave.swd;
-	} else {
-		return (unsigned short)tsk->thread.i387.fsave.swd;
-	}
-}
-
-static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
-{
-	if (cpu_has_xmm) {
-		return tsk->thread.i387.fxsave.mxcsr;
-	} else {
-		return MXCSR_DEFAULT;
-	}
-}
-
-#endif	/* _ASM_X86_I387_H */

From 1bd5718ce58fb49ac158653380fa200f4759daad Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:51 +0100
Subject: [PATCH 384/890] x86: x86 TLS desc_struct cleanup

This cleans up the TLS code to use struct desc_struct and to separate the
encoding and installation magic from the interface wrappers.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/tls.c  | 89 +++++++++++++++++++++---------------------
 include/asm-x86/desc.h | 11 +++++-
 2 files changed, 54 insertions(+), 46 deletions(-)

diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 98f428be8e8c..f11c92a3faac 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -24,6 +24,29 @@ static int get_free_idx(void)
 	return -ESRCH;
 }
 
+static void set_tls_desc(struct task_struct *p, int idx,
+			 const struct user_desc *info)
+{
+	struct thread_struct *t = &p->thread;
+	struct desc_struct *desc = &t->tls_array[idx - GDT_ENTRY_TLS_MIN];
+	int cpu;
+
+	/*
+	 * We must not get preempted while modifying the TLS.
+	 */
+	cpu = get_cpu();
+
+	if (LDT_empty(info))
+		desc->a = desc->b = 0;
+	else
+		fill_ldt(desc, info);
+
+	if (t == &current->thread)
+		load_TLS(t, cpu);
+
+	put_cpu();
+}
+
 /*
  * Set a given TLS descriptor:
  */
@@ -31,10 +54,7 @@ int do_set_thread_area(struct task_struct *p, int idx,
 		       struct user_desc __user *u_info,
 		       int can_allocate)
 {
-	struct thread_struct *t = &p->thread;
 	struct user_desc info;
-	u32 *desc;
-	int cpu;
 
 	if (copy_from_user(&info, u_info, sizeof(info)))
 		return -EFAULT;
@@ -57,23 +77,8 @@ int do_set_thread_area(struct task_struct *p, int idx,
 	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
 		return -EINVAL;
 
-	desc = (u32 *) &t->tls_array[idx - GDT_ENTRY_TLS_MIN];
+	set_tls_desc(p, idx, &info);
 
-	/*
-	 * We must not get preempted while modifying the TLS.
-	 */
-	cpu = get_cpu();
-
-	if (LDT_empty(&info)) {
-		desc[0] = 0;
-		desc[1] = 0;
-	} else
-		fill_ldt((struct desc_struct *)desc, &info);
-
-	if (t == &current->thread)
-		load_TLS(t, cpu);
-
-	put_cpu();
 	return 0;
 }
 
@@ -87,42 +92,38 @@ asmlinkage int sys_set_thread_area(struct user_desc __user *u_info)
  * Get the current Thread-Local Storage area:
  */
 
-#define GET_LIMIT(desc)		(((desc)[0] & 0x0ffff) | ((desc)[1] & 0xf0000))
-#define GET_32BIT(desc)		(((desc)[1] >> 22) & 1)
-#define GET_CONTENTS(desc)	(((desc)[1] >> 10) & 3)
-#define GET_WRITABLE(desc)	(((desc)[1] >>  9) & 1)
-#define GET_LIMIT_PAGES(desc)	(((desc)[1] >> 23) & 1)
-#define GET_PRESENT(desc)	(((desc)[1] >> 15) & 1)
-#define GET_USEABLE(desc)	(((desc)[1] >> 20) & 1)
-#define GET_LONGMODE(desc)	(((desc)[1] >> 21) & 1)
+static void fill_user_desc(struct user_desc *info, int idx,
+			   const struct desc_struct *desc)
+
+{
+	memset(info, 0, sizeof(*info));
+	info->entry_number = idx;
+	info->base_addr = get_desc_base(desc);
+	info->limit = get_desc_limit(desc);
+	info->seg_32bit = desc->d;
+	info->contents = desc->type >> 2;
+	info->read_exec_only = !(desc->type & 2);
+	info->limit_in_pages = desc->g;
+	info->seg_not_present = !desc->p;
+	info->useable = desc->avl;
+#ifdef CONFIG_X86_64
+	info->lm = desc->l;
+#endif
+}
 
 int do_get_thread_area(struct task_struct *p, int idx,
 		       struct user_desc __user *u_info)
 {
-	struct thread_struct *t = &p->thread;
 	struct user_desc info;
-	u32 *desc;
 
 	if (idx == -1 && get_user(idx, &u_info->entry_number))
 		return -EFAULT;
+
 	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
 		return -EINVAL;
 
-	desc = (u32 *) &t->tls_array[idx - GDT_ENTRY_TLS_MIN];
-
-	memset(&info, 0, sizeof(struct user_desc));
-	info.entry_number = idx;
-	info.base_addr = get_desc_base((struct desc_struct *)desc);
-	info.limit = GET_LIMIT(desc);
-	info.seg_32bit = GET_32BIT(desc);
-	info.contents = GET_CONTENTS(desc);
-	info.read_exec_only = !GET_WRITABLE(desc);
-	info.limit_in_pages = GET_LIMIT_PAGES(desc);
-	info.seg_not_present = !GET_PRESENT(desc);
-	info.useable = GET_USEABLE(desc);
-#ifdef CONFIG_X86_64
-	info.lm = GET_LONGMODE(desc);
-#endif
+	fill_user_desc(&info, idx,
+		       &p->thread.tls_array[idx - GDT_ENTRY_TLS_MIN]);
 
 	if (copy_to_user(u_info, &info, sizeof(info)))
 		return -EFAULT;
diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index e17581ad8824..5b6a05d3a771 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -7,7 +7,8 @@
 #include <asm/mmu.h>
 #include <linux/smp.h>
 
-static inline void fill_ldt(struct desc_struct *desc, struct user_desc *info)
+static inline void fill_ldt(struct desc_struct *desc,
+			    const struct user_desc *info)
 {
 	desc->limit0 = info->limit & 0x0ffff;
 	desc->base0 = info->base_addr & 0x0000ffff;
@@ -275,10 +276,16 @@ static inline void load_LDT(mm_context_t *pc)
 	preempt_enable();
 }
 
-static inline unsigned long get_desc_base(struct desc_struct *desc)
+static inline unsigned long get_desc_base(const struct desc_struct *desc)
 {
 	return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24);
 }
+
+static inline unsigned long get_desc_limit(const struct desc_struct *desc)
+{
+	return desc->limit0 | (desc->limit << 16);
+}
+
 static inline void _set_gate(int gate, unsigned type, void *addr,
 			      unsigned dpl, unsigned ist, unsigned seg)
 {

From 4c79a2d8e5b7e0a2f987ace9b6af9e7a1655447b Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:52 +0100
Subject: [PATCH 385/890] x86: x86 user_regset TLS

This adds accessor functions in the user_regset style for the TLS data.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/tls.c | 89 ++++++++++++++++++++++++++++++++++++++++---
 arch/x86/kernel/tls.h | 21 ++++++++++
 2 files changed, 104 insertions(+), 6 deletions(-)
 create mode 100644 arch/x86/kernel/tls.h

diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index f11c92a3faac..6dfd4e76661a 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -2,6 +2,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/user.h>
+#include <linux/regset.h>
 
 #include <asm/uaccess.h>
 #include <asm/desc.h>
@@ -10,6 +11,8 @@
 #include <asm/processor.h>
 #include <asm/proto.h>
 
+#include "tls.h"
+
 /*
  * sys_alloc_thread_area: get a yet unused TLS descriptor index.
  */
@@ -25,7 +28,7 @@ static int get_free_idx(void)
 }
 
 static void set_tls_desc(struct task_struct *p, int idx,
-			 const struct user_desc *info)
+			 const struct user_desc *info, int n)
 {
 	struct thread_struct *t = &p->thread;
 	struct desc_struct *desc = &t->tls_array[idx - GDT_ENTRY_TLS_MIN];
@@ -36,10 +39,14 @@ static void set_tls_desc(struct task_struct *p, int idx,
 	 */
 	cpu = get_cpu();
 
-	if (LDT_empty(info))
-		desc->a = desc->b = 0;
-	else
-		fill_ldt(desc, info);
+	while (n-- > 0) {
+		if (LDT_empty(info))
+			desc->a = desc->b = 0;
+		else
+			fill_ldt(desc, info);
+		++info;
+		++desc;
+	}
 
 	if (t == &current->thread)
 		load_TLS(t, cpu);
@@ -77,7 +84,7 @@ int do_set_thread_area(struct task_struct *p, int idx,
 	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
 		return -EINVAL;
 
-	set_tls_desc(p, idx, &info);
+	set_tls_desc(p, idx, &info, 1);
 
 	return 0;
 }
@@ -134,3 +141,73 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
 {
 	return do_get_thread_area(current, -1, u_info);
 }
+
+int regset_tls_active(struct task_struct *target,
+		      const struct user_regset *regset)
+{
+	struct thread_struct *t = &target->thread;
+	int n = GDT_ENTRY_TLS_ENTRIES;
+	while (n > 0 && desc_empty(&t->tls_array[n - 1]))
+		--n;
+	return n;
+}
+
+int regset_tls_get(struct task_struct *target, const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   void *kbuf, void __user *ubuf)
+{
+	const struct desc_struct *tls;
+
+	if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
+	    (pos % sizeof(struct user_desc)) != 0 ||
+	    (count % sizeof(struct user_desc)) != 0)
+		return -EINVAL;
+
+	pos /= sizeof(struct user_desc);
+	count /= sizeof(struct user_desc);
+
+	tls = &target->thread.tls_array[pos];
+
+	if (kbuf) {
+		struct user_desc *info = kbuf;
+		while (count-- > 0)
+			fill_user_desc(info++, GDT_ENTRY_TLS_MIN + pos++,
+				       tls++);
+	} else {
+		struct user_desc __user *u_info = ubuf;
+		while (count-- > 0) {
+			struct user_desc info;
+			fill_user_desc(&info, GDT_ENTRY_TLS_MIN + pos++, tls++);
+			if (__copy_to_user(u_info++, &info, sizeof(info)))
+				return -EFAULT;
+		}
+	}
+
+	return 0;
+}
+
+int regset_tls_set(struct task_struct *target, const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   const void *kbuf, const void __user *ubuf)
+{
+	struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES];
+	const struct user_desc *info;
+
+	if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
+	    (pos % sizeof(struct user_desc)) != 0 ||
+	    (count % sizeof(struct user_desc)) != 0)
+		return -EINVAL;
+
+	if (kbuf)
+		info = kbuf;
+	else if (__copy_from_user(infobuf, ubuf, count))
+		return -EFAULT;
+	else
+		info = infobuf;
+
+	set_tls_desc(target,
+		     GDT_ENTRY_TLS_MIN + (pos / sizeof(struct user_desc)),
+		     info, count / sizeof(struct user_desc));
+
+	return 0;
+}
diff --git a/arch/x86/kernel/tls.h b/arch/x86/kernel/tls.h
new file mode 100644
index 000000000000..2f083a2fe216
--- /dev/null
+++ b/arch/x86/kernel/tls.h
@@ -0,0 +1,21 @@
+/*
+ * Internal declarations for x86 TLS implementation functions.
+ *
+ * Copyright (C) 2007 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * Red Hat Author: Roland McGrath.
+ */
+
+#ifndef _ARCH_X86_KERNEL_TLS_H
+
+#include <linux/regset.h>
+
+extern user_regset_active_fn regset_tls_active;
+extern user_regset_get_fn regset_tls_get;
+extern user_regset_set_fn regset_tls_set;
+
+#endif	/* _ARCH_X86_KERNEL_TLS_H */

From 91e7b707a4776185f91f03bd052aa53af820094e Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:52 +0100
Subject: [PATCH 386/890] x86: x86 user_regset general regs

This adds accessor functions in the user_regset style for
the general registers (struct user_regs_struct).

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace.c | 109 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 084805ab7323..ef349ff170a7 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -13,6 +13,7 @@
 #include <linux/smp.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
+#include <linux/regset.h>
 #include <linux/user.h>
 #include <linux/security.h>
 #include <linux/audit.h>
@@ -368,6 +369,59 @@ static unsigned long getreg(struct task_struct *task, unsigned long offset)
 	return *pt_regs_access(task_pt_regs(task), offset);
 }
 
+static int genregs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       void *kbuf, void __user *ubuf)
+{
+	if (kbuf) {
+		unsigned long *k = kbuf;
+		while (count > 0) {
+			*k++ = getreg(target, pos);
+			count -= sizeof(*k);
+			pos += sizeof(*k);
+		}
+	} else {
+		unsigned long __user *u = ubuf;
+		while (count > 0) {
+			if (__put_user(getreg(target, pos), u++))
+				return -EFAULT;
+			count -= sizeof(*u);
+			pos += sizeof(*u);
+		}
+	}
+
+	return 0;
+}
+
+static int genregs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	int ret = 0;
+	if (kbuf) {
+		const unsigned long *k = kbuf;
+		while (count > 0 && !ret) {
+			ret = putreg(target, pos, *k++);
+			count -= sizeof(*k);
+			pos += sizeof(*k);
+		}
+	} else {
+		const unsigned long  __user *u = ubuf;
+		while (count > 0 && !ret) {
+			unsigned long word;
+			ret = __get_user(word, u++);
+			if (ret)
+				break;
+			ret = putreg(target, pos, word);
+			count -= sizeof(*u);
+			pos += sizeof(*u);
+		}
+	}
+	return ret;
+}
+
 /*
  * This function is trivial and will be inlined by the compiler.
  * Having it separates the implementation details of debug
@@ -1008,6 +1062,61 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
 #undef R32
 #undef SEG32
 
+static int genregs32_get(struct task_struct *target,
+			 const struct user_regset *regset,
+			 unsigned int pos, unsigned int count,
+			 void *kbuf, void __user *ubuf)
+{
+	if (kbuf) {
+		compat_ulong_t *k = kbuf;
+		while (count > 0) {
+			getreg32(target, pos, k++);
+			count -= sizeof(*k);
+			pos += sizeof(*k);
+		}
+	} else {
+		compat_ulong_t __user *u = ubuf;
+		while (count > 0) {
+			compat_ulong_t word;
+			getreg32(target, pos, &word);
+			if (__put_user(word, u++))
+				return -EFAULT;
+			count -= sizeof(*u);
+			pos += sizeof(*u);
+		}
+	}
+
+	return 0;
+}
+
+static int genregs32_set(struct task_struct *target,
+			 const struct user_regset *regset,
+			 unsigned int pos, unsigned int count,
+			 const void *kbuf, const void __user *ubuf)
+{
+	int ret = 0;
+	if (kbuf) {
+		const compat_ulong_t *k = kbuf;
+		while (count > 0 && !ret) {
+			ret = putreg(target, pos, *k++);
+			count -= sizeof(*k);
+			pos += sizeof(*k);
+		}
+	} else {
+		const compat_ulong_t __user *u = ubuf;
+		while (count > 0 && !ret) {
+			compat_ulong_t word;
+			ret = __get_user(word, u++);
+			if (ret)
+				break;
+			ret = putreg(target, pos, word);
+			count -= sizeof(*u);
+			pos += sizeof(*u);
+		}
+	}
+	return ret;
+}
+
 static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data)
 {
 	siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t));

From 070459d95e132726a6ac0c8204f0b5ef653bd824 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:53 +0100
Subject: [PATCH 387/890] x86: x86 user_regset_view

This defines task_user_regset_view and the tables
describing the x86 user_regset layouts for 32 and 64.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace.c | 87 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index ef349ff170a7..196cc27bd39a 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -15,6 +15,7 @@
 #include <linux/ptrace.h>
 #include <linux/regset.h>
 #include <linux/user.h>
+#include <linux/elf.h>
 #include <linux/security.h>
 #include <linux/audit.h>
 #include <linux/seccomp.h>
@@ -32,6 +33,14 @@
 #include <asm/proto.h>
 #include <asm/ds.h>
 
+#include "tls.h"
+
+enum x86_regset {
+	REGSET_GENERAL,
+	REGSET_FP,
+	REGSET_XFP,
+	REGSET_TLS,
+};
 
 /*
  * does not yet catch signals sent when the child dies.
@@ -1335,6 +1344,84 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 
 #endif	/* CONFIG_IA32_EMULATION */
 
+#ifdef CONFIG_X86_64
+
+static const struct user_regset x86_64_regsets[] = {
+	[REGSET_GENERAL] = {
+		.core_note_type = NT_PRSTATUS,
+		.n = sizeof(struct user_regs_struct) / sizeof(long),
+		.size = sizeof(long), .align = sizeof(long),
+		.get = genregs_get, .set = genregs_set
+	},
+	[REGSET_FP] = {
+		.core_note_type = NT_PRFPREG,
+		.n = sizeof(struct user_i387_struct) / sizeof(long),
+		.size = sizeof(long), .align = sizeof(long),
+		.active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
+	},
+};
+
+static const struct user_regset_view user_x86_64_view = {
+	.name = "x86_64", .e_machine = EM_X86_64,
+	.regsets = x86_64_regsets, .n = ARRAY_SIZE(x86_64_regsets)
+};
+
+#else  /* CONFIG_X86_32 */
+
+#define user_regs_struct32	user_regs_struct
+#define genregs32_get		genregs_get
+#define genregs32_set		genregs_set
+
+#endif	/* CONFIG_X86_64 */
+
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+static const struct user_regset x86_32_regsets[] = {
+	[REGSET_GENERAL] = {
+		.core_note_type = NT_PRSTATUS,
+		.n = sizeof(struct user_regs_struct32) / sizeof(u32),
+		.size = sizeof(u32), .align = sizeof(u32),
+		.get = genregs32_get, .set = genregs32_set
+	},
+	[REGSET_FP] = {
+		.core_note_type = NT_PRFPREG,
+		.n = sizeof(struct user_i387_struct) / sizeof(u32),
+		.size = sizeof(u32), .align = sizeof(u32),
+		.active = fpregs_active, .get = fpregs_get, .set = fpregs_set
+	},
+	[REGSET_XFP] = {
+		.core_note_type = NT_PRXFPREG,
+		.n = sizeof(struct user_i387_struct) / sizeof(u32),
+		.size = sizeof(u32), .align = sizeof(u32),
+		.active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
+	},
+	[REGSET_TLS] = {
+		.n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN,
+		.size = sizeof(struct user_desc),
+		.align = sizeof(struct user_desc),
+		.active = regset_tls_active,
+		.get = regset_tls_get, .set = regset_tls_set
+	},
+};
+
+static const struct user_regset_view user_x86_32_view = {
+	.name = "i386", .e_machine = EM_386,
+	.regsets = x86_32_regsets, .n = ARRAY_SIZE(x86_32_regsets)
+};
+#endif
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+#ifdef CONFIG_IA32_EMULATION
+	if (test_tsk_thread_flag(task, TIF_IA32))
+#endif
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+		return &user_x86_32_view;
+#endif
+#ifdef CONFIG_X86_64
+	return &user_x86_64_view;
+#endif
+}
+
 #ifdef CONFIG_X86_32
 
 void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)

From 975511be74d1b82bf041aaed2861a242abfe3886 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:54 +0100
Subject: [PATCH 388/890] x86: x86 CORE_DUMP_USE_REGSET

This switches x86 to the user_regset-based code for ELF core dumps.
The core dumps come out exactly the same as before.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/elf.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/asm-x86/elf.h b/include/asm-x86/elf.h
index 5e5705bf082a..123e2d599c6d 100644
--- a/include/asm-x86/elf.h
+++ b/include/asm-x86/elf.h
@@ -201,6 +201,7 @@ extern int vdso_enabled;
 
 #endif /* !CONFIG_X86_32 */
 
+#define CORE_DUMP_USE_REGSET
 #define USE_ELF_CORE_DUMP
 #define ELF_EXEC_PAGESIZE	4096
 

From 5a4646a4efed8c835f76c3b88f3155f6ab5b8d9b Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:54 +0100
Subject: [PATCH 389/890] x86: x86 ptrace user_regset

This cleans up the PTRACE_*REGS* request code so each one is just a
simple call to copy_regset_to_user or copy_regset_from_user.  The
ptrace layouts already match the user_regset formats (core dump formats).

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace.c | 218 ++++++++++++---------------------------
 1 file changed, 68 insertions(+), 150 deletions(-)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 196cc27bd39a..f8b89059e6ed 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -750,9 +750,13 @@ void ptrace_disable(struct task_struct *child)
 	}
 }
 
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+static const struct user_regset_view user_x86_32_view; /* Initialized below. */
+#endif
+
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
-	int i, ret;
+	int ret;
 	unsigned long __user *datap = (unsigned long __user *)data;
 
 	switch (request) {
@@ -805,82 +809,46 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		}
 		break;
 
-	case PTRACE_GETREGS: { /* Get all gp regs from the child. */
-		if (!access_ok(VERIFY_WRITE, datap, sizeof(struct user_regs_struct))) {
-			ret = -EIO;
-			break;
-		}
-		for (i = 0; i < sizeof(struct user_regs_struct); i += sizeof(long)) {
-			__put_user(getreg(child, i), datap);
-			datap++;
-		}
-		ret = 0;
-		break;
-	}
+	case PTRACE_GETREGS:	/* Get all gp regs from the child. */
+		return copy_regset_to_user(child,
+					   task_user_regset_view(current),
+					   REGSET_GENERAL,
+					   0, sizeof(struct user_regs_struct),
+					   datap);
 
-	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
-		unsigned long tmp;
-		if (!access_ok(VERIFY_READ, datap, sizeof(struct user_regs_struct))) {
-			ret = -EIO;
-			break;
-		}
-		for (i = 0; i < sizeof(struct user_regs_struct); i += sizeof(long)) {
-			__get_user(tmp, datap);
-			putreg(child, i, tmp);
-			datap++;
-		}
-		ret = 0;
-		break;
-	}
+	case PTRACE_SETREGS:	/* Set all gp regs in the child. */
+		return copy_regset_from_user(child,
+					     task_user_regset_view(current),
+					     REGSET_GENERAL,
+					     0, sizeof(struct user_regs_struct),
+					     datap);
 
-	case PTRACE_GETFPREGS: { /* Get the child FPU state. */
-		if (!access_ok(VERIFY_WRITE, datap,
-			       sizeof(struct user_i387_struct))) {
-			ret = -EIO;
-			break;
-		}
-		ret = 0;
-		if (!tsk_used_math(child))
-			init_fpu(child);
-		get_fpregs((struct user_i387_struct __user *)data, child);
-		break;
-	}
+	case PTRACE_GETFPREGS:	/* Get the child FPU state. */
+		return copy_regset_to_user(child,
+					   task_user_regset_view(current),
+					   REGSET_FP,
+					   0, sizeof(struct user_i387_struct),
+					   datap);
 
-	case PTRACE_SETFPREGS: { /* Set the child FPU state. */
-		if (!access_ok(VERIFY_READ, datap,
-			       sizeof(struct user_i387_struct))) {
-			ret = -EIO;
-			break;
-		}
-		set_stopped_child_used_math(child);
-		set_fpregs(child, (struct user_i387_struct __user *)data);
-		ret = 0;
-		break;
-	}
+	case PTRACE_SETFPREGS:	/* Set the child FPU state. */
+		return copy_regset_from_user(child,
+					     task_user_regset_view(current),
+					     REGSET_FP,
+					     0, sizeof(struct user_i387_struct),
+					     datap);
 
 #ifdef CONFIG_X86_32
-	case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */
-		if (!access_ok(VERIFY_WRITE, datap,
-			       sizeof(struct user_fxsr_struct))) {
-			ret = -EIO;
-			break;
-		}
-		if (!tsk_used_math(child))
-			init_fpu(child);
-		ret = get_fpxregs((struct user_fxsr_struct __user *)data, child);
-		break;
-	}
+	case PTRACE_GETFPXREGS:	/* Get the child extended FPU state. */
+		return copy_regset_to_user(child, &user_x86_32_view,
+					   REGSET_XFP,
+					   0, sizeof(struct user_fxsr_struct),
+					   datap);
 
-	case PTRACE_SETFPXREGS: { /* Set the child extended FPU state. */
-		if (!access_ok(VERIFY_READ, datap,
-			       sizeof(struct user_fxsr_struct))) {
-			ret = -EIO;
-			break;
-		}
-		set_stopped_child_used_math(child);
-		ret = set_fpxregs(child, (struct user_fxsr_struct __user *)data);
-		break;
-	}
+	case PTRACE_SETFPXREGS:	/* Set the child extended FPU state. */
+		return copy_regset_from_user(child, &user_x86_32_view,
+					     REGSET_XFP,
+					     0, sizeof(struct user_fxsr_struct),
+					     datap);
 #endif
 
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -1243,90 +1211,40 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 		ret = putreg32(child, addr, data);
 		break;
 
-	case PTRACE_GETREGS: { /* Get all gp regs from the child. */
-		int i;
+	case PTRACE_GETREGS:	/* Get all gp regs from the child. */
+		return copy_regset_to_user(child, &user_x86_32_view,
+					   REGSET_GENERAL,
+					   0, sizeof(struct user_regs_struct32),
+					   datap);
 
-		if (!access_ok(VERIFY_WRITE, datap, 16*4)) {
-			ret = -EIO;
-			break;
-		}
-		ret = 0;
-		for (i = 0; i < sizeof(struct user_regs_struct32); i += sizeof(__u32)) {
-			getreg32(child, i, &val);
-			ret |= __put_user(val, (u32 __user *)datap);
-			datap += sizeof(u32);
-		}
-		break;
-	}
+	case PTRACE_SETREGS:	/* Set all gp regs in the child. */
+		return copy_regset_from_user(child, &user_x86_32_view,
+					     REGSET_GENERAL, 0,
+					     sizeof(struct user_regs_struct32),
+					     datap);
 
-	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
-		unsigned long tmp;
-		int i;
+	case PTRACE_GETFPREGS:	/* Get the child FPU state. */
+		return copy_regset_to_user(child, &user_x86_32_view,
+					   REGSET_FP, 0,
+					   sizeof(struct user_i387_ia32_struct),
+					   datap);
 
-		if (!access_ok(VERIFY_READ, datap, 16*4)) {
-			ret = -EIO;
-			break;
-		}
-		ret = 0;
-		for (i = 0; i < sizeof(struct user_regs_struct32); i += sizeof(u32)) {
-			ret |= __get_user(tmp, (u32 __user *)datap);
-			putreg32(child, i, tmp);
-			datap += sizeof(u32);
-		}
-		break;
-	}
+	case PTRACE_SETFPREGS:	/* Set the child FPU state. */
+		return copy_regset_from_user(
+			child, &user_x86_32_view, REGSET_FP,
+			0, sizeof(struct user_i387_ia32_struct), datap);
 
-	case PTRACE_GETFPREGS:
-		ret = -EIO;
-		if (!access_ok(VERIFY_READ, compat_ptr(data),
-			       sizeof(struct user_i387_struct)))
-			break;
-		save_i387_ia32(child, datap, childregs, 1);
-		ret = 0;
-			break;
+	case PTRACE_GETFPXREGS:	/* Get the child extended FPU state. */
+		return copy_regset_to_user(child, &user_x86_32_view,
+					   REGSET_XFP, 0,
+					   sizeof(struct user32_fxsr_struct),
+					   datap);
 
-	case PTRACE_SETFPREGS:
-		ret = -EIO;
-		if (!access_ok(VERIFY_WRITE, datap,
-			       sizeof(struct user_i387_struct)))
-			break;
-		ret = 0;
-		/* don't check EFAULT to be bug-to-bug compatible to i386 */
-		restore_i387_ia32(child, datap, 1);
-		break;
-
-	case PTRACE_GETFPXREGS: {
-		struct user32_fxsr_struct __user *u = datap;
-
-		init_fpu(child);
-		ret = -EIO;
-		if (!access_ok(VERIFY_WRITE, u, sizeof(*u)))
-			break;
-			ret = -EFAULT;
-		if (__copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u)))
-			break;
-		ret = __put_user(childregs->cs, &u->fcs);
-		ret |= __put_user(child->thread.ds, &u->fos);
-		break;
-	}
-	case PTRACE_SETFPXREGS: {
-		struct user32_fxsr_struct __user *u = datap;
-
-		unlazy_fpu(child);
-		ret = -EIO;
-		if (!access_ok(VERIFY_READ, u, sizeof(*u)))
-			break;
-		/*
-		 * no checking to be bug-to-bug compatible with i386.
-		 * but silence warning
-		 */
-		if (__copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u)))
-			;
-		set_stopped_child_used_math(child);
-		child->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
-		ret = 0;
-		break;
-	}
+	case PTRACE_SETFPXREGS:	/* Set the child extended FPU state. */
+		return copy_regset_from_user(child, &user_x86_32_view,
+					     REGSET_XFP, 0,
+					     sizeof(struct user32_fxsr_struct),
+					     datap);
 
 	case PTRACE_GETEVENTMSG:
 		ret = put_user(child->ptrace_message,

From 60b3b9af35aad66345e395be911e46fb8443f0c5 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:55 +0100
Subject: [PATCH 390/890] x86: x86 user_regset cleanup

This removes a bunch of dead code that is no longer needed now
that the user_regset interfaces are being used for all these jobs.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/i387.c        | 105 +++-------------------------------
 arch/x86/kernel/process_32.c  |  16 ------
 arch/x86/kernel/process_64.c  |  19 ------
 arch/x86/math-emu/fpu_entry.c |  14 -----
 include/asm-x86/elf.h         |  68 ----------------------
 include/asm-x86/i387.h        |  14 -----
 include/asm-x86/math_emu.h    |   5 --
 7 files changed, 9 insertions(+), 232 deletions(-)

diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index f7f7568dd7bc..26719bd2c77c 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -454,113 +454,26 @@ int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
 	return err;
 }
 
-#endif	/* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
-
-#ifdef CONFIG_X86_64
-
-int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *tsk)
-{
-	return xfpregs_get(tsk, NULL, 0, sizeof(*buf), NULL, buf);
-}
-
-int set_fpregs(struct task_struct *tsk, struct user_i387_struct __user *buf)
-{
-	return xfpregs_set(tsk, NULL, 0, sizeof(*buf), NULL, buf);
-}
-
-#else
-
-int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *tsk)
-{
-	return fpregs_get(tsk, NULL, 0, sizeof(*buf), NULL, buf);
-}
-
-int set_fpregs(struct task_struct *tsk, struct user_i387_struct __user *buf)
-{
-	return fpregs_set(tsk, NULL, 0, sizeof(*buf), NULL, buf);
-}
-
-int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *tsk)
-{
-	return xfpregs_get(tsk, NULL, 0, sizeof(*buf), NULL, buf);
-}
-
-int set_fpxregs(struct task_struct *tsk, struct user_fxsr_struct __user *buf)
-{
-	return xfpregs_get(tsk, NULL, 0, sizeof(*buf), NULL, buf);
-}
-
-#endif
-
 /*
  * FPU state for core dumps.
+ * This is only used for a.out dumps now.
+ * It is declared generically using elf_fpregset_t (which is
+ * struct user_i387_struct) but is in fact only used for 32-bit
+ * dumps, so on 64-bit it is really struct user_i387_ia32_struct.
  */
-
-static inline void copy_fpu_fsave(struct task_struct *tsk,
-				  struct user_i387_struct *fpu)
-{
-	memcpy(fpu, &tsk->thread.i387.fsave,
-	       sizeof(struct user_i387_struct));
-}
-
-static inline void copy_fpu_fxsave(struct task_struct *tsk,
-				   struct user_i387_struct *fpu)
-{
-	unsigned short *to;
-	unsigned short *from;
-	int i;
-
-	memcpy(fpu, &tsk->thread.i387.fxsave, 7 * sizeof(long));
-
-	to = (unsigned short *)&fpu->st_space[0];
-	from = (unsigned short *)&tsk->thread.i387.fxsave.st_space[0];
-	for (i = 0; i < 8; i++, to += 5, from += 8)
-		memcpy(to, from, 5 * sizeof(unsigned short));
-}
-
 int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu)
 {
 	int fpvalid;
 	struct task_struct *tsk = current;
 
 	fpvalid = !!used_math();
-	if (fpvalid) {
-		unlazy_fpu(tsk);
-		if (cpu_has_fxsr) {
-			copy_fpu_fxsave(tsk, fpu);
-		} else {
-			copy_fpu_fsave(tsk, fpu);
-		}
-	}
+	if (fpvalid)
+		fpvalid = !fpregs_get(tsk, NULL,
+				      0, sizeof(struct user_i387_ia32_struct),
+				      fpu, NULL);
 
 	return fpvalid;
 }
 EXPORT_SYMBOL(dump_fpu);
 
-int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu)
-{
-	int fpvalid = !!tsk_used_math(tsk);
-
-	if (fpvalid) {
-		if (tsk == current)
-			unlazy_fpu(tsk);
-		if (cpu_has_fxsr)
-			copy_fpu_fxsave(tsk, fpu);
-		else
-			copy_fpu_fsave(tsk, fpu);
-	}
-	return fpvalid;
-}
-
-int dump_task_extended_fpu(struct task_struct *tsk,
-			   struct user32_fxsr_struct *fpu)
-{
-	int fpvalid = tsk_used_math(tsk) && cpu_has_fxsr;
-
-	if (fpvalid) {
-		if (tsk == current)
-		       unlazy_fpu(tsk);
-		memcpy(fpu, &tsk->thread.i387.fxsave, sizeof(*fpu));
-	}
-	return fpvalid;
-}
+#endif	/* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 40cc29695eba..35a6f318c541 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -571,22 +571,6 @@ void dump_thread(struct pt_regs * regs, struct user * dump)
 }
 EXPORT_SYMBOL(dump_thread);
 
-/*
- * Capture the user space registers if the task is not running (in user space)
- */
-int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
-{
-	struct pt_regs ptregs = *task_pt_regs(tsk);
-	ptregs.cs &= 0xffff;
-	ptregs.ds &= 0xffff;
-	ptregs.es &= 0xffff;
-	ptregs.ss &= 0xffff;
-
-	elf_core_copy_regs(regs, &ptregs);
-
-	return 1;
-}
-
 #ifdef CONFIG_SECCOMP
 static void hard_disable_TSC(void)
 {
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e3a3610ade10..78d80067b7f9 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -544,24 +544,6 @@ out:
  */
 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
 
-/*
- * Capture the user space registers if the task is not running (in user space)
- */
-int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
-{
-	struct pt_regs *pp, ptregs;
-
-	pp = task_pt_regs(tsk);
-
-	ptregs = *pp;
-	ptregs.cs &= 0xffff;
-	ptregs.ss &= 0xffff;
-
-	elf_core_copy_regs(regs, &ptregs);
-
-	return 1;
-}
-
 static inline void __switch_to_xtra(struct task_struct *prev_p,
 				    struct task_struct *next_p,
 				    struct tss_struct *tss)
@@ -929,4 +911,3 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
 	unsigned long range_end = mm->brk + 0x02000000;
 	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
 }
-
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index cfbdaa1532ce..760baeea5f07 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -761,17 +761,3 @@ int fpregs_soft_get(struct task_struct *target,
 
 	return ret;
 }
-
-int save_i387_soft(void *s387, struct _fpstate __user *buf)
-{
-	return fpregs_soft_get(current, NULL,
-			       0, sizeof(struct user_i387_struct),
-			       NULL, buf) ? -1 : 1;
-}
-
-int restore_i387_soft(void *s387, struct _fpstate __user *buf)
-{
-	return fpregs_soft_set(current, NULL,
-			       0, sizeof(struct user_i387_struct),
-			       NULL, buf) ? -1 : 1;
-}
diff --git a/include/asm-x86/elf.h b/include/asm-x86/elf.h
index 123e2d599c6d..d6bf7421d7b0 100644
--- a/include/asm-x86/elf.h
+++ b/include/asm-x86/elf.h
@@ -104,28 +104,6 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
 	_r->ax = 0; \
 } while (0)
 
-/* regs is struct pt_regs, pr_reg is elf_gregset_t (which is
-   now struct_user_regs, they are different) */
-
-#define ELF_CORE_COPY_REGS(pr_reg, regs)		\
-	pr_reg[0] = regs->bx;				\
-	pr_reg[1] = regs->cx;				\
-	pr_reg[2] = regs->dx;				\
-	pr_reg[3] = regs->si;				\
-	pr_reg[4] = regs->di;				\
-	pr_reg[5] = regs->bp;				\
-	pr_reg[6] = regs->ax;				\
-	pr_reg[7] = regs->ds & 0xffff;			\
-	pr_reg[8] = regs->es & 0xffff;			\
-	pr_reg[9] = regs->fs & 0xffff;			\
-	savesegment(gs,pr_reg[10]);			\
-	pr_reg[11] = regs->orig_ax;			\
-	pr_reg[12] = regs->ip;				\
-	pr_reg[13] = regs->cs & 0xffff;			\
-	pr_reg[14] = regs->flags;			\
-	pr_reg[15] = regs->sp;				\
-	pr_reg[16] = regs->ss & 0xffff;
-
 #define ELF_PLATFORM	(utsname()->machine)
 #define set_personality_64bit()	do { } while (0)
 extern unsigned int vdso_enabled;
@@ -159,41 +137,6 @@ extern unsigned int vdso_enabled;
 	clear_thread_flag(TIF_IA32);			  \
 } while (0)
 
-/* regs is struct pt_regs, pr_reg is elf_gregset_t (which is
-   now struct_user_regs, they are different). Assumes current is the process
-   getting dumped. */
-
-#define ELF_CORE_COPY_REGS(pr_reg, regs)  do {			\
-	unsigned v;						\
-	(pr_reg)[0] = (regs)->r15;				\
-	(pr_reg)[1] = (regs)->r14;				\
-	(pr_reg)[2] = (regs)->r13;				\
-	(pr_reg)[3] = (regs)->r12;				\
-	(pr_reg)[4] = (regs)->bp;				\
-	(pr_reg)[5] = (regs)->bx;				\
-	(pr_reg)[6] = (regs)->r11;				\
-	(pr_reg)[7] = (regs)->r10;				\
-	(pr_reg)[8] = (regs)->r9;				\
-	(pr_reg)[9] = (regs)->r8;				\
-	(pr_reg)[10] = (regs)->ax;				\
-	(pr_reg)[11] = (regs)->cx;				\
-	(pr_reg)[12] = (regs)->dx;				\
-	(pr_reg)[13] = (regs)->si;				\
-	(pr_reg)[14] = (regs)->di;				\
-	(pr_reg)[15] = (regs)->orig_ax;			\
-	(pr_reg)[16] = (regs)->ip;				\
-	(pr_reg)[17] = (regs)->cs;				\
-	(pr_reg)[18] = (regs)->flags;				\
-	(pr_reg)[19] = (regs)->sp;				\
-	(pr_reg)[20] = (regs)->ss;				\
-	(pr_reg)[21] = current->thread.fs;			\
-	(pr_reg)[22] = current->thread.gs;			\
-	asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v;	\
-	asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v;	\
-	asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v;	\
-	asm("movl %%gs,%0" : "=r" (v)); (pr_reg)[26] = v;	\
-} while(0);
-
 /* I'm not sure if we can use '-' here */
 #define ELF_PLATFORM       ("x86_64")
 extern void set_personality_64bit(void);
@@ -236,18 +179,7 @@ extern int vdso_enabled;
 
 struct task_struct;
 
-extern int dump_task_regs (struct task_struct *, elf_gregset_t *);
-extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *);
-
-#define ELF_CORE_COPY_TASK_REGS(tsk, elf_regs) dump_task_regs(tsk, elf_regs)
-#define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
-
 #ifdef CONFIG_X86_32
-extern int dump_task_extended_fpu (struct task_struct *,
-				   struct user_fxsr_struct *);
-#define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) \
-	dump_task_extended_fpu(tsk, elf_xfpregs)
-#define ELF_CORE_XFPREG_TYPE NT_PRXFPREG
 
 #define VDSO_HIGH_BASE		(__fix_to_virt(FIX_VDSO))
 
diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h
index de435b9114df..ba8105ca822b 100644
--- a/include/asm-x86/i387.h
+++ b/include/asm-x86/i387.h
@@ -327,20 +327,6 @@ static inline void clear_fpu(struct task_struct *tsk)
 
 #endif	/* CONFIG_X86_64 */
 
-/*
- * ptrace request handlers...
- */
-extern int get_fpregs(struct user_i387_struct __user *buf,
-		      struct task_struct *tsk);
-extern int set_fpregs(struct task_struct *tsk,
-		      struct user_i387_struct __user *buf);
-
-struct user_fxsr_struct;
-extern int get_fpxregs(struct user_fxsr_struct __user *buf,
-		       struct task_struct *tsk);
-extern int set_fpxregs(struct task_struct *tsk,
-		       struct user_fxsr_struct __user *buf);
-
 /*
  * i387 state interaction
  */
diff --git a/include/asm-x86/math_emu.h b/include/asm-x86/math_emu.h
index a4b0aa3320e6..9bf4ae93ab10 100644
--- a/include/asm-x86/math_emu.h
+++ b/include/asm-x86/math_emu.h
@@ -1,11 +1,6 @@
 #ifndef _I386_MATH_EMU_H
 #define _I386_MATH_EMU_H
 
-#include <asm/sigcontext.h>
-
-int restore_i387_soft(void *s387, struct _fpstate __user *buf);
-int save_i387_soft(void *s387, struct _fpstate __user *buf);
-
 /* This structure matches the layout of the data saved to the stack
    following a device-not-present interrupt, part of it saved
    automatically by the 80386/80486.

From a97f52e67890fda6b373c1c1895ff1c1c69b36c8 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:55 +0100
Subject: [PATCH 391/890] x86: compat_binfmt_elf

This switches x86-64's 32-bit ELF support to use the shared
fs/compat_binfmt_elf.c code instead of our own ia32_binfmt.c.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig             |  1 +
 arch/x86/ia32/Makefile       |  3 +-
 arch/x86/vdso/vdso32-setup.c | 33 +++++++++++++
 include/asm-x86/elf.h        | 90 ++++++++++++++++++++++++++----------
 4 files changed, 100 insertions(+), 27 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1992b8fe6a2f..d89b94528153 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1532,6 +1532,7 @@ source "fs/Kconfig.binfmt"
 config IA32_EMULATION
 	bool "IA32 Emulation"
 	depends on X86_64
+	select COMPAT_BINFMT_ELF
 	help
 	  Include code to run 32-bit programs under a 64-bit kernel. You should
 	  likely turn this on, unless you're 100% sure that you don't have any
diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile
index 93a6fda65f49..52d0ccfcf6ea 100644
--- a/arch/x86/ia32/Makefile
+++ b/arch/x86/ia32/Makefile
@@ -2,8 +2,7 @@
 # Makefile for the ia32 kernel emulation subsystem.
 #
 
-obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o \
-	ia32_binfmt.o
+obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o
 
 sysv-$(CONFIG_SYSVIPC) := ipc32.o
 obj-$(CONFIG_IA32_EMULATION) += $(sysv-y)
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index e0feb66a2408..348f1341e1c8 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -377,6 +377,39 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
 
 __initcall(sysenter_setup);
 
+#ifdef CONFIG_SYSCTL
+/* Register vsyscall32 into the ABI table */
+#include <linux/sysctl.h>
+
+static ctl_table abi_table2[] = {
+	{
+		.procname	= "vsyscall32",
+		.data		= &sysctl_vsyscall32,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{}
+};
+
+static ctl_table abi_root_table2[] = {
+	{
+		.ctl_name = CTL_ABI,
+		.procname = "abi",
+		.mode = 0555,
+		.child = abi_table2
+	},
+	{}
+};
+
+static __init int ia32_binfmt_init(void)
+{
+	register_sysctl_table(abi_root_table2);
+	return 0;
+}
+__initcall(ia32_binfmt_init);
+#endif
+
 #else  /* CONFIG_X86_32 */
 
 const char *arch_vma_name(struct vm_area_struct *vma)
diff --git a/include/asm-x86/elf.h b/include/asm-x86/elf.h
index d6bf7421d7b0..dab4744f9586 100644
--- a/include/asm-x86/elf.h
+++ b/include/asm-x86/elf.h
@@ -73,6 +73,9 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
 #endif
 
 #ifdef __KERNEL__
+#include <asm/vdso.h>
+
+extern unsigned int vdso_enabled;
 
 /*
  * This is used to ensure we don't load something for the wrong architecture.
@@ -84,7 +87,6 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
 #include <asm/processor.h>
 #include <asm/system.h>		/* for savesegment */
 #include <asm/desc.h>
-#include <asm/vdso.h>
 
 #define elf_check_arch(x)	elf_check_arch_ia32(x)
 
@@ -106,7 +108,6 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
 
 #define ELF_PLATFORM	(utsname()->machine)
 #define set_personality_64bit()	do { } while (0)
-extern unsigned int vdso_enabled;
 
 #else /* CONFIG_X86_32 */
 
@@ -118,29 +119,57 @@ extern unsigned int vdso_enabled;
 #define elf_check_arch(x) \
 	((x)->e_machine == EM_X86_64)
 
+#define compat_elf_check_arch(x)	elf_check_arch_ia32(x)
+
+static inline void start_ia32_thread(struct pt_regs *regs, u32 ip, u32 sp)
+{
+	asm volatile("movl %0,%%fs" :: "r" (0));
+	asm volatile("movl %0,%%es; movl %0,%%ds" : : "r" (__USER32_DS));
+	load_gs_index(0);
+	regs->ip = ip;
+	regs->sp = sp;
+	regs->flags = X86_EFLAGS_IF;
+	regs->cs = __USER32_CS;
+	regs->ss = __USER32_DS;
+}
+
+static inline void elf_common_init(struct thread_struct *t,
+				   struct pt_regs *regs, const u16 ds)
+{
+	regs->ax = regs->bx = regs->cx = regs->dx = 0;
+	regs->si = regs->di = regs->bp = 0;
+	regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
+	regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
+	t->fs = t->gs = 0;
+	t->fsindex = t->gsindex = 0;
+	t->ds = t->es = ds;
+}
+
 #define ELF_PLAT_INIT(_r, load_addr)	do {		  \
-	struct task_struct *cur = current;		  \
-	(_r)->bx = 0; (_r)->cx = 0; (_r)->dx = 0;	  \
-	(_r)->si = 0; (_r)->di = 0; (_r)->bp = 0;	  \
-	(_r)->ax = 0;					  \
-	(_r)->r8 = 0;					  \
-	(_r)->r9 = 0;					  \
-	(_r)->r10 = 0;					  \
-	(_r)->r11 = 0;					  \
-	(_r)->r12 = 0;					  \
-	(_r)->r13 = 0;					  \
-	(_r)->r14 = 0;					  \
-	(_r)->r15 = 0;					  \
-	cur->thread.fs = 0; cur->thread.gs = 0;		  \
-	cur->thread.fsindex = 0; cur->thread.gsindex = 0; \
-	cur->thread.ds = 0; cur->thread.es = 0;		  \
+	elf_common_init(&current->thread, _r, 0);	  \
 	clear_thread_flag(TIF_IA32);			  \
 } while (0)
 
+#define	COMPAT_ELF_PLAT_INIT(regs, load_addr)	\
+	elf_common_init(&current->thread, regs, __USER_DS)
+#define	compat_start_thread(regs, ip, sp)	do {		\
+		start_ia32_thread(regs, ip, sp);		\
+		set_fs(USER_DS);				\
+	} while (0)
+#define COMPAT_SET_PERSONALITY(ex, ibcs2)	do {		\
+		if (test_thread_flag(TIF_IA32))			\
+			clear_thread_flag(TIF_ABI_PENDING);	\
+		else						\
+			set_thread_flag(TIF_ABI_PENDING);	\
+		current->personality |= force_personality32;	\
+	} while (0)
+#define COMPAT_ELF_PLATFORM			("i686")
+
 /* I'm not sure if we can use '-' here */
 #define ELF_PLATFORM       ("x86_64")
 extern void set_personality_64bit(void);
-extern int vdso_enabled;
+extern unsigned int sysctl_vsyscall32;
+extern int force_personality32;
 
 #endif /* !CONFIG_X86_32 */
 
@@ -179,18 +208,20 @@ extern int vdso_enabled;
 
 struct task_struct;
 
-#ifdef CONFIG_X86_32
-
-#define VDSO_HIGH_BASE		(__fix_to_virt(FIX_VDSO))
-
-/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
-
-#define ARCH_DLINFO \
+#define	ARCH_DLINFO_IA32(vdso_enabled) \
 do if (vdso_enabled) {							\
 		NEW_AUX_ENT(AT_SYSINFO,	VDSO_ENTRY);			\
 		NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE);	\
 } while (0)
 
+#ifdef CONFIG_X86_32
+
+#define VDSO_HIGH_BASE		(__fix_to_virt(FIX_VDSO))
+
+#define ARCH_DLINFO		ARCH_DLINFO_IA32(vdso_enabled)
+
+/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
+
 #else /* CONFIG_X86_32 */
 
 #define VDSO_HIGH_BASE		0xffffe000U /* CONFIG_COMPAT_VDSO address */
@@ -203,6 +234,12 @@ do if (vdso_enabled) {						\
 	NEW_AUX_ENT(AT_SYSINFO_EHDR,(unsigned long)current->mm->context.vdso);\
 } while (0)
 
+#define AT_SYSINFO		32
+
+#define COMPAT_ARCH_DLINFO	ARCH_DLINFO_IA32(sysctl_vsyscall32)
+
+#define COMPAT_ELF_ET_DYN_BASE	(TASK_UNMAPPED_BASE + 0x1000000)
+
 #endif /* !CONFIG_X86_32 */
 
 #define VDSO_CURRENT_BASE	((unsigned long)current->mm->context.vdso)
@@ -216,6 +253,9 @@ struct linux_binprm;
 extern int arch_setup_additional_pages(struct linux_binprm *bprm,
 				       int executable_stack);
 
+extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
+#define compat_arch_setup_additional_pages	syscall32_setup_pages
+
 extern unsigned long arch_randomize_brk(struct mm_struct *mm);
 #define arch_randomize_brk arch_randomize_brk
 

From a06b24e8bf03f8677f81e0f5eb03544b60fe250f Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:56 +0100
Subject: [PATCH 392/890] x86: x86 ia32_binfmt removal

Remove the old ia32_binfmt.c file, which is no longer used.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/ia32/ia32_binfmt.c | 284 ------------------------------------
 1 file changed, 284 deletions(-)
 delete mode 100644 arch/x86/ia32/ia32_binfmt.c

diff --git a/arch/x86/ia32/ia32_binfmt.c b/arch/x86/ia32/ia32_binfmt.c
deleted file mode 100644
index 806135c3f975..000000000000
--- a/arch/x86/ia32/ia32_binfmt.c
+++ /dev/null
@@ -1,284 +0,0 @@
-/* 
- * Written 2000,2002 by Andi Kleen. 
- * 
- * Loosely based on the sparc64 and IA64 32bit emulation loaders.
- * This tricks binfmt_elf.c into loading 32bit binaries using lots 
- * of ugly preprocessor tricks. Talk about very very poor man's inheritance.
- */ 
-
-#include <linux/types.h>
-#include <linux/stddef.h>
-#include <linux/rwsem.h>
-#include <linux/sched.h>
-#include <linux/compat.h>
-#include <linux/string.h>
-#include <linux/binfmts.h>
-#include <linux/mm.h>
-#include <linux/security.h>
-#include <linux/elfcore-compat.h>
-
-#include <asm/segment.h> 
-#include <asm/ptrace.h>
-#include <asm/processor.h>
-#include <asm/user32.h>
-#include <asm/sigcontext32.h>
-#include <asm/fpu32.h>
-#include <asm/i387.h>
-#include <asm/uaccess.h>
-#include <asm/ia32.h>
-#include <asm/vdso.h>
-
-#undef	ELF_ARCH
-#undef	ELF_CLASS
-#define ELF_CLASS	ELFCLASS32
-#define ELF_ARCH	EM_386
-
-#undef	elfhdr
-#undef	elf_phdr
-#undef	elf_note
-#undef	elf_addr_t
-#define elfhdr		elf32_hdr
-#define elf_phdr	elf32_phdr
-#define elf_note	elf32_note
-#define elf_addr_t	Elf32_Off
-
-#define ELF_NAME "elf/i386"
-
-#define AT_SYSINFO 32
-#define AT_SYSINFO_EHDR		33
-
-extern int sysctl_vsyscall32;
-
-#undef ARCH_DLINFO
-#define ARCH_DLINFO do {  \
-	if (sysctl_vsyscall32) { \
-		NEW_AUX_ENT(AT_SYSINFO, (u32)VDSO_ENTRY);		\
-		NEW_AUX_ENT(AT_SYSINFO_EHDR, (u32)VDSO_CURRENT_BASE);	\
-	}	\
-} while(0)
-
-struct file;
-
-#define IA32_EMULATOR 1
-
-#undef ELF_ET_DYN_BASE
-
-#define ELF_ET_DYN_BASE		(TASK_UNMAPPED_BASE + 0x1000000)
-
-#define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0)
-
-#define _GET_SEG(x) \
-	({ __u32 seg; asm("movl %%" __stringify(x) ",%0" : "=r"(seg)); seg; })
-
-/* Assumes current==process to be dumped */
-#undef	ELF_CORE_COPY_REGS
-#define ELF_CORE_COPY_REGS(pr_reg, regs)       		\
-	pr_reg[0] = regs->bx;				\
-	pr_reg[1] = regs->cx;				\
-	pr_reg[2] = regs->dx;				\
-	pr_reg[3] = regs->si;				\
-	pr_reg[4] = regs->di;				\
-	pr_reg[5] = regs->bp;				\
-	pr_reg[6] = regs->ax;				\
-	pr_reg[7] = _GET_SEG(ds);   			\
-	pr_reg[8] = _GET_SEG(es);			\
-	pr_reg[9] = _GET_SEG(fs);			\
-	pr_reg[10] = _GET_SEG(gs);			\
-	pr_reg[11] = regs->orig_ax;			\
-	pr_reg[12] = regs->ip;				\
-	pr_reg[13] = regs->cs;				\
-	pr_reg[14] = regs->flags;			\
-	pr_reg[15] = regs->sp;				\
-	pr_reg[16] = regs->ss;
-
-
-#define elf_prstatus	compat_elf_prstatus
-#define elf_prpsinfo	compat_elf_prpsinfo
-#define elf_fpregset_t	struct user_i387_ia32_struct
-#define	elf_fpxregset_t	struct user32_fxsr_struct
-#define user		user32
-
-#undef elf_read_implies_exec
-#define elf_read_implies_exec(ex, executable_stack)     (executable_stack != EXSTACK_DISABLE_X)
-
-#define elf_core_copy_regs		elf32_core_copy_regs
-static inline void elf32_core_copy_regs(compat_elf_gregset_t *elfregs,
-					struct pt_regs *regs)
-{
-	ELF_CORE_COPY_REGS((&elfregs->ebx), regs)
-}
-
-#define elf_core_copy_task_regs		elf32_core_copy_task_regs
-static inline int elf32_core_copy_task_regs(struct task_struct *t,
-					    compat_elf_gregset_t* elfregs)
-{	
-	struct pt_regs *pp = task_pt_regs(t);
-	ELF_CORE_COPY_REGS((&elfregs->ebx), pp);
-	/* fix wrong segments */ 
-	elfregs->ds = t->thread.ds;
-	elfregs->fs = t->thread.fsindex;
-	elfregs->gs = t->thread.gsindex;
-	elfregs->es = t->thread.es;
-	return 1; 
-}
-
-#define elf_core_copy_task_fpregs	elf32_core_copy_task_fpregs
-static inline int 
-elf32_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs,
-			    elf_fpregset_t *fpu)
-{
-	struct _fpstate_ia32 *fpstate = (void*)fpu; 
-	mm_segment_t oldfs = get_fs();
-
-	if (!tsk_used_math(tsk))
-		return 0;
-	if (!regs)
-		regs = task_pt_regs(tsk);
-	if (tsk == current)
-		unlazy_fpu(tsk);
-	set_fs(KERNEL_DS); 
-	save_i387_ia32(tsk, fpstate, regs, 1);
-	/* Correct for i386 bug. It puts the fop into the upper 16bits of 
-	   the tag word (like FXSAVE), not into the fcs*/ 
-	fpstate->cssel |= fpstate->tag & 0xffff0000; 
-	set_fs(oldfs); 
-	return 1; 
-}
-
-#define ELF_CORE_COPY_XFPREGS 1
-#define ELF_CORE_XFPREG_TYPE NT_PRXFPREG
-#define elf_core_copy_task_xfpregs	elf32_core_copy_task_xfpregs
-static inline int 
-elf32_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu)
-{
-	struct pt_regs *regs = task_pt_regs(t);
-	if (!tsk_used_math(t))
-		return 0;
-	if (t == current)
-		unlazy_fpu(t); 
-	memcpy(xfpu, &t->thread.i387.fxsave, sizeof(elf_fpxregset_t));
-	xfpu->fcs = regs->cs; 
-	xfpu->fos = t->thread.ds; /* right? */ 
-	return 1;
-}
-
-#undef elf_check_arch
-#define elf_check_arch(x) \
-	((x)->e_machine == EM_386)
-
-extern int force_personality32;
-
-#undef	ELF_EXEC_PAGESIZE
-#undef	ELF_HWCAP
-#undef	ELF_PLATFORM
-#undef	SET_PERSONALITY
-#define ELF_EXEC_PAGESIZE PAGE_SIZE
-#define ELF_HWCAP (boot_cpu_data.x86_capability[0])
-#define ELF_PLATFORM  ("i686")
-#define SET_PERSONALITY(ex, ibcs2)			\
-do {							\
-	unsigned long new_flags = 0;				\
-	if ((ex).e_ident[EI_CLASS] == ELFCLASS32)		\
-		new_flags = _TIF_IA32;				\
-	if ((current_thread_info()->flags & _TIF_IA32)		\
-	    != new_flags)					\
-		set_thread_flag(TIF_ABI_PENDING);		\
-	else							\
-		clear_thread_flag(TIF_ABI_PENDING);		\
-	/* XXX This overwrites the user set personality */	\
-	current->personality |= force_personality32;		\
-} while (0)
-
-/* Override some function names */
-#define elf_format			elf32_format
-
-#define init_elf_binfmt			init_elf32_binfmt
-#define exit_elf_binfmt			exit_elf32_binfmt
-
-#define load_elf_binary load_elf32_binary
-
-#undef	ELF_PLAT_INIT
-#define ELF_PLAT_INIT(r, load_addr)	elf32_init(r)
-
-#undef start_thread
-#define start_thread(regs,new_rip,new_rsp) do { \
-	asm volatile("movl %0,%%fs" :: "r" (0)); \
-	asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS)); \
-	load_gs_index(0); \
-	(regs)->ip = (new_rip); \
-	(regs)->sp = (new_rsp); \
-	(regs)->flags = X86_EFLAGS_IF; \
-	(regs)->cs = __USER32_CS; \
-	(regs)->ss = __USER32_DS; \
-	set_fs(USER_DS); \
-} while(0) 
-
-
-#include <linux/module.h>
-
-MODULE_DESCRIPTION("Binary format loader for compatibility with IA32 ELF binaries."); 
-MODULE_AUTHOR("Eric Youngdale, Andi Kleen");
-
-#undef MODULE_DESCRIPTION
-#undef MODULE_AUTHOR
-
-static void elf32_init(struct pt_regs *);
-
-#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
-#define arch_setup_additional_pages syscall32_setup_pages
-extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
-
-#include "../../../fs/binfmt_elf.c" 
-
-static void elf32_init(struct pt_regs *regs)
-{
-	struct task_struct *me = current; 
-	regs->di = 0;
-	regs->si = 0;
-	regs->dx = 0;
-	regs->cx = 0;
-	regs->ax = 0;
-	regs->bx = 0;
-	regs->bp = 0;
-	regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
-		regs->r13 = regs->r14 = regs->r15 = 0; 
-    me->thread.fs = 0; 
-	me->thread.gs = 0;
-	me->thread.fsindex = 0; 
-	me->thread.gsindex = 0;
-    me->thread.ds = __USER_DS; 
-	me->thread.es = __USER_DS;
-}
-
-#ifdef CONFIG_SYSCTL
-/* Register vsyscall32 into the ABI table */
-#include <linux/sysctl.h>
-
-static ctl_table abi_table2[] = {
-	{
-		.procname	= "vsyscall32",
-		.data		= &sysctl_vsyscall32,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{}
-};
-
-static ctl_table abi_root_table2[] = {
-	{
-		.ctl_name = CTL_ABI,
-		.procname = "abi",
-		.mode = 0555,
-		.child = abi_table2
-	},
-	{}
-};
-
-static __init int ia32_binfmt_init(void)
-{ 
-	register_sysctl_table(abi_root_table2);
-	return 0;
-}
-__initcall(ia32_binfmt_init);
-#endif

From bb61682b3f31dec7d058cae2f6edd2275248a704 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:56 +0100
Subject: [PATCH 393/890] x86: x86 core dump TLS

This makes ELF core dumps of 32-bit processes include a new
note type NT_386_TLS (0x200) giving the contents of the TLS
slots in struct user_desc format.  This lets post mortem
examination figure out what the segment registers mean like
the debugger does with get_thread_area on a live process.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace.c | 1 +
 include/linux/elf.h      | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index f8b89059e6ed..e6a680c7daf7 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1313,6 +1313,7 @@ static const struct user_regset x86_32_regsets[] = {
 		.active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
 	},
 	[REGSET_TLS] = {
+		.core_note_type = NT_386_TLS,
 		.n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN,
 		.size = sizeof(struct user_desc),
 		.align = sizeof(struct user_desc),
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 576e83bd6d88..7ceb24d87c1a 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -355,6 +355,7 @@ typedef struct elf64_shdr {
 #define NT_AUXV		6
 #define NT_PRXFPREG     0x46e62b7f      /* copied from gdb5.1/include/elf/common.h */
 #define NT_PPC_VMX	0x100		/* PowerPC Altivec/VMX registers */
+#define NT_386_TLS	0x200		/* i386 TLS slots (struct user_desc) */
 
 
 /* Note header in a PT_NOTE section */

From fdadd54db5e9f05e673eb08746bf2bea460d9f0d Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:56 +0100
Subject: [PATCH 394/890] x86: x86 ptrace generic requests

This removes duplicated code by calling the generic ptrace_request and
compat_ptrace_request functions for the things they already handle.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace.c | 37 +------------------------------------
 1 file changed, 1 insertion(+), 36 deletions(-)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index e6a680c7daf7..3e34b14e8846 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -760,12 +760,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 	unsigned long __user *datap = (unsigned long __user *)data;
 
 	switch (request) {
-	/* when I and D space are separate, these will need to be fixed. */
-	case PTRACE_PEEKTEXT: /* read word at location addr. */
-	case PTRACE_PEEKDATA:
-		ret = generic_ptrace_peekdata(child, addr, data);
-		break;
-
 	/* read the word at location addr in the USER area. */
 	case PTRACE_PEEKUSR: {
 		unsigned long tmp;
@@ -787,12 +781,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		break;
 	}
 
-	/* when I and D space are separate, this will have to be fixed. */
-	case PTRACE_POKETEXT: /* write the word at location addr. */
-	case PTRACE_POKEDATA:
-		ret = generic_ptrace_pokedata(child, addr, data);
-		break;
-
 	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
 		ret = -EIO;
 		if ((addr & (sizeof(data) - 1)) || addr < 0 ||
@@ -1183,24 +1171,6 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 	childregs = task_pt_regs(child);
 
 	switch (request) {
-	case PTRACE_PEEKDATA:
-	case PTRACE_PEEKTEXT:
-		ret = 0;
-		if (access_process_vm(child, addr, &val, sizeof(u32), 0) !=
-		    sizeof(u32))
-			ret = -EIO;
-		else
-			ret = put_user(val, (unsigned int __user *)datap);
-		break;
-
-	case PTRACE_POKEDATA:
-	case PTRACE_POKETEXT:
-		ret = 0;
-		if (access_process_vm(child, addr, &data, sizeof(u32), 1) !=
-		    sizeof(u32))
-			ret = -EIO;
-		break;
-
 	case PTRACE_PEEKUSR:
 		ret = getreg32(child, addr, &val);
 		if (ret == 0)
@@ -1246,13 +1216,8 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 					     sizeof(struct user32_fxsr_struct),
 					     datap);
 
-	case PTRACE_GETEVENTMSG:
-		ret = put_user(child->ptrace_message,
-			       (unsigned int __user *)compat_ptr(data));
-		break;
-
 	default:
-		BUG();
+		return compat_ptrace_request(child, request, addr, data);
 	}
 
  out:

From 7ba65c7e17a292fe1e6e48fd38d1b1ecb626b586 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:57 +0100
Subject: [PATCH 395/890] x86: remove __init modifier from header declaration

This patch removes the __init modifier from an extern function
declaration in acpi.h.

Besides not being strictly needed, it requires the inclusion of
linux/init.h, which is usually not even included directly, increasing
header mess by a lot.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/acpi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-x86/acpi.h b/include/asm-x86/acpi.h
index c477e5741751..2feb0c494be7 100644
--- a/include/asm-x86/acpi.h
+++ b/include/asm-x86/acpi.h
@@ -158,7 +158,7 @@ extern int acpi_scan_nodes(unsigned long start, unsigned long end);
 #ifdef CONFIG_X86_64
 # define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
 #endif
-extern void __init acpi_fake_nodes(const struct bootnode *fake_nodes,
+extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
 				   int num_nodes);
 #else
 static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,

From 2f66dcc933f012cd487c5ebf5d400e50b1d5c488 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:31:57 +0100
Subject: [PATCH 396/890] x86: finish processor.h integration

What's left in processor_32.h and processor_64.h cannot be cleanly
integrated. However, it's just a couple of definitions. They are moved
to processor.h around ifdefs, and the original files are deleted. Note that
there's much less headers included in the final version.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/processor.h | 142 +++++++++++++++++++++++++++++++++++-
 1 file changed, 138 insertions(+), 4 deletions(-)

diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index ea222cfe7b00..e6fa06fee72a 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -7,12 +7,24 @@
 struct task_struct;
 struct mm_struct;
 
+#include <asm/vm86.h>
+#include <asm/math_emu.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/types.h>
+#include <asm/sigcontext.h>
+#include <asm/current.h>
+#include <asm/cpufeature.h>
+#include <asm/system.h>
 #include <asm/page.h>
 #include <asm/percpu.h>
-#include <asm/system.h>
-#include <asm/percpu.h>
+#include <asm/msr.h>
+#include <asm/desc_defs.h>
+#include <linux/personality.h>
 #include <linux/cpumask.h>
 #include <linux/cache.h>
+#include <linux/threads.h>
+#include <linux/init.h>
 
 /*
  * Default implementation of macro that returns current
@@ -285,9 +297,13 @@ union i387_union {
 };
 
 #ifdef CONFIG_X86_32
-# include "processor_32.h"
+/*
+ * the following now lives in the per cpu area:
+ * extern	int cpu_llc_id[NR_CPUS];
+ */
+DECLARE_PER_CPU(u8, cpu_llc_id);
 #else
-# include "processor_64.h"
+DECLARE_PER_CPU(struct orig_ist, orig_ist);
 #endif
 
 extern void print_cpu_info(struct cpuinfo_x86 *);
@@ -770,6 +786,124 @@ static inline void prefetchw(const void *x)
 }
 
 #define spin_lock_prefetch(x)	prefetchw(x)
+#ifdef CONFIG_X86_32
+/*
+ * User space process size: 3GB (default).
+ */
+#define TASK_SIZE	(PAGE_OFFSET)
+
+#define INIT_THREAD  {							\
+	.sp0 = sizeof(init_stack) + (long)&init_stack,			\
+	.vm86_info = NULL,						\
+	.sysenter_cs = __KERNEL_CS,					\
+	.io_bitmap_ptr = NULL,						\
+	.fs = __KERNEL_PERCPU,						\
+}
+
+/*
+ * Note that the .io_bitmap member must be extra-big. This is because
+ * the CPU will access an additional byte beyond the end of the IO
+ * permission bitmap. The extra byte must be all 1 bits, and must
+ * be within the limit.
+ */
+#define INIT_TSS  {							\
+	.x86_tss = {							\
+		.sp0		= sizeof(init_stack) + (long)&init_stack, \
+		.ss0		= __KERNEL_DS,				\
+		.ss1		= __KERNEL_CS,				\
+		.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,		\
+	 },								\
+	.io_bitmap	= { [0 ... IO_BITMAP_LONGS] = ~0 },		\
+}
+
+#define start_thread(regs, new_eip, new_esp) do {		\
+	__asm__("movl %0,%%gs": :"r" (0));			\
+	regs->fs = 0;						\
+	set_fs(USER_DS);					\
+	regs->ds = __USER_DS;					\
+	regs->es = __USER_DS;					\
+	regs->ss = __USER_DS;					\
+	regs->cs = __USER_CS;					\
+	regs->ip = new_eip;					\
+	regs->sp = new_esp;					\
+} while (0)
+
+
+extern unsigned long thread_saved_pc(struct task_struct *tsk);
+
+#define THREAD_SIZE_LONGS      (THREAD_SIZE/sizeof(unsigned long))
+#define KSTK_TOP(info)                                                 \
+({                                                                     \
+       unsigned long *__ptr = (unsigned long *)(info);                 \
+       (unsigned long)(&__ptr[THREAD_SIZE_LONGS]);                     \
+})
+
+/*
+ * The below -8 is to reserve 8 bytes on top of the ring0 stack.
+ * This is necessary to guarantee that the entire "struct pt_regs"
+ * is accessable even if the CPU haven't stored the SS/ESP registers
+ * on the stack (interrupt gate does not save these registers
+ * when switching to the same priv ring).
+ * Therefore beware: accessing the ss/esp fields of the
+ * "struct pt_regs" is possible, but they may contain the
+ * completely wrong values.
+ */
+#define task_pt_regs(task)                                             \
+({                                                                     \
+       struct pt_regs *__regs__;                                       \
+       __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \
+       __regs__ - 1;                                                   \
+})
+
+#define KSTK_ESP(task) (task_pt_regs(task)->sp)
+
+#else
+/*
+ * User space process size. 47bits minus one guard page.
+ */
+#define TASK_SIZE64	(0x800000000000UL - 4096)
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
+			   0xc0000000 : 0xFFFFe000)
+
+#define TASK_SIZE 		(test_thread_flag(TIF_IA32) ? \
+				 IA32_PAGE_OFFSET : TASK_SIZE64)
+#define TASK_SIZE_OF(child) 	((test_tsk_thread_flag(child, TIF_IA32)) ? \
+				  IA32_PAGE_OFFSET : TASK_SIZE64)
+
+#define INIT_THREAD  { \
+	.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+}
+
+#define INIT_TSS  { \
+	.x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+}
+
+#define start_thread(regs, new_rip, new_rsp) do { 			     \
+	asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0));  \
+	load_gs_index(0);						     \
+	(regs)->ip = (new_rip);						     \
+	(regs)->sp = (new_rsp);						     \
+	write_pda(oldrsp, (new_rsp));					     \
+	(regs)->cs = __USER_CS;						     \
+	(regs)->ss = __USER_DS;						     \
+	(regs)->flags = 0x200;						     \
+	set_fs(USER_DS);						     \
+} while (0)
+
+/*
+ * Return saved PC of a blocked thread.
+ * What is this good for? it will be always the scheduler or ret_from_fork.
+ */
+#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8))
+
+#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
+#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */
+#endif /* CONFIG_X86_64 */
+
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */

From aca46ba29298810b329518b96f97ace985027b59 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:31:58 +0100
Subject: [PATCH 397/890] x86: remove unused include/asm-x86/processor_32/64.h

remove unused include/asm-x86/processor_32/64.h.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/processor_32.h | 97 ----------------------------------
 include/asm-x86/processor_64.h | 66 -----------------------
 2 files changed, 163 deletions(-)
 delete mode 100644 include/asm-x86/processor_32.h
 delete mode 100644 include/asm-x86/processor_64.h

diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
deleted file mode 100644
index 0d6a430b2bc3..000000000000
--- a/include/asm-x86/processor_32.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (C) 1994 Linus Torvalds
- */
-
-#ifndef __ASM_I386_PROCESSOR_H
-#define __ASM_I386_PROCESSOR_H
-
-#include <asm/vm86.h>
-#include <asm/math_emu.h>
-#include <asm/segment.h>
-#include <asm/page.h>
-#include <asm/types.h>
-#include <asm/sigcontext.h>
-#include <asm/cpufeature.h>
-#include <asm/msr.h>
-#include <asm/system.h>
-#include <linux/threads.h>
-#include <linux/init.h>
-#include <asm/desc_defs.h>
-
-/*
- * the following now lives in the per cpu area:
- * extern	int cpu_llc_id[NR_CPUS];
- */
-DECLARE_PER_CPU(u8, cpu_llc_id);
-
-/*
- * User space process size: 3GB (default).
- */
-#define TASK_SIZE	(PAGE_OFFSET)
-
-#define INIT_THREAD  {							\
-	.sp0 = sizeof(init_stack) + (long)&init_stack,			\
-	.vm86_info = NULL,						\
-	.sysenter_cs = __KERNEL_CS,					\
-	.io_bitmap_ptr = NULL,						\
-	.fs = __KERNEL_PERCPU,						\
-}
-
-/*
- * Note that the .io_bitmap member must be extra-big. This is because
- * the CPU will access an additional byte beyond the end of the IO
- * permission bitmap. The extra byte must be all 1 bits, and must
- * be within the limit.
- */
-#define INIT_TSS  {							\
-	.x86_tss = {							\
-		.sp0		= sizeof(init_stack) + (long)&init_stack, \
-		.ss0		= __KERNEL_DS,				\
-		.ss1		= __KERNEL_CS,				\
-		.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,		\
-	 },								\
-	.io_bitmap	= { [ 0 ... IO_BITMAP_LONGS] = ~0 },		\
-}
-
-#define start_thread(regs, new_eip, new_esp) do {		\
-	__asm__("movl %0,%%gs": :"r" (0));			\
-	regs->fs = 0;						\
-	set_fs(USER_DS);					\
-	regs->ds = __USER_DS;					\
-	regs->es = __USER_DS;					\
-	regs->ss = __USER_DS;					\
-	regs->cs = __USER_CS;					\
-	regs->ip = new_eip;					\
-	regs->sp = new_esp;					\
-} while (0)
-
-
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
-#define THREAD_SIZE_LONGS      (THREAD_SIZE/sizeof(unsigned long))
-#define KSTK_TOP(info)                                                 \
-({                                                                     \
-       unsigned long *__ptr = (unsigned long *)(info);                 \
-       (unsigned long)(&__ptr[THREAD_SIZE_LONGS]);                     \
-})
-
-/*
- * The below -8 is to reserve 8 bytes on top of the ring0 stack.
- * This is necessary to guarantee that the entire "struct pt_regs"
- * is accessable even if the CPU haven't stored the SS/ESP registers
- * on the stack (interrupt gate does not save these registers
- * when switching to the same priv ring).
- * Therefore beware: accessing the ss/esp fields of the
- * "struct pt_regs" is possible, but they may contain the
- * completely wrong values.
- */
-#define task_pt_regs(task)                                             \
-({                                                                     \
-       struct pt_regs *__regs__;                                       \
-       __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \
-       __regs__ - 1;                                                   \
-})
-
-#define KSTK_ESP(task) (task_pt_regs(task)->sp)
-
-#endif /* __ASM_I386_PROCESSOR_H */
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
deleted file mode 100644
index 04ce823ea7e6..000000000000
--- a/include/asm-x86/processor_64.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (C) 1994 Linus Torvalds
- */
-
-#ifndef __ASM_X86_64_PROCESSOR_H
-#define __ASM_X86_64_PROCESSOR_H
-
-#include <asm/segment.h>
-#include <asm/page.h>
-#include <asm/types.h>
-#include <asm/sigcontext.h>
-#include <asm/cpufeature.h>
-#include <linux/threads.h>
-#include <asm/msr.h>
-#include <asm/current.h>
-#include <asm/system.h>
-#include <linux/personality.h>
-#include <asm/desc_defs.h>
-
-/*
- * User space process size. 47bits minus one guard page.
- */
-#define TASK_SIZE64	(0x800000000000UL - 4096)
-
-/* This decides where the kernel will search for a free chunk of vm
- * space during mmap's.
- */
-#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? 0xc0000000 : 0xFFFFe000)
-
-#define TASK_SIZE 		(test_thread_flag(TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE64)
-#define TASK_SIZE_OF(child) 	((test_tsk_thread_flag(child, TIF_IA32)) ? IA32_PAGE_OFFSET : TASK_SIZE64)
-
-
-
-DECLARE_PER_CPU(struct orig_ist, orig_ist);
-
-#define INIT_THREAD  { \
-	.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
-}
-
-#define INIT_TSS  { \
-	.x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
-}
-
-#define start_thread(regs,new_rip,new_rsp) do { \
-	asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0));	 \
-	load_gs_index(0);							\
-	(regs)->ip = (new_rip);						 \
-	(regs)->sp = (new_rsp);						 \
-	write_pda(oldrsp, (new_rsp));						 \
-	(regs)->cs = __USER_CS;							 \
-	(regs)->ss = __USER_DS;							 \
-	(regs)->flags = 0x200;							 \
-	set_fs(USER_DS);							 \
-} while(0) 
-
-/*
- * Return saved PC of a blocked thread.
- * What is this good for? it will be always the scheduler or ret_from_fork.
- */
-#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8))
-
-#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
-#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */
-
-#endif /* __ASM_X86_64_PROCESSOR_H */

From 79da4721117fcf188b4b007b775738a530f574da Mon Sep 17 00:00:00 2001
From: Parag Warudkar <parag.warudkar@gmail.com>
Date: Wed, 30 Jan 2008 13:31:59 +0100
Subject: [PATCH 398/890] x86: fix DMI out of memory problems

People with HP Desktops (including me) encounter couple of DMI errors
during boot - dmi_save_oem_strings_devices: out of memory and
dmi_string: out of memory.

On some HP desktops the DMI data include OEM strings (type 11) out of
which only few are meaningful and most other are empty. DMI code
religiously creates copies of these 27 strings (65 bytes each in my
case) and goes OOM in dmi_string().

If DMI_MAX_DATA is bumped up a little then it goes and fails in
dmi_save_oem_strings while allocating dmi_devices of sizeof(struct
dmi_device) corresponding to these strings.

On x86_64 since we cannot use alloc_bootmem this early, the code uses a
static array of 2048 bytes (DMI_MAX_DATA) for allocating the memory DMI
needs. It does not survive the creation of empty strings and devices.

Fix this by detecting and not newly allocating empty strings and instead
using a one statically defined dmi_empty_string.

Also do not create a new struct dmi_device for each empty string - use
one statically define dmi_device with .name=dmi_empty_string and add
that to the dmi_devices list.

On x64 this should stop the OOM with same current size of DMI_MAX_DATA
and on x86 this should save a good amount of (27*65 bytes +
27*sizeof(struct dmi_device) bootmem.

Compile and boot tested on both 32-bit and 64-bit x86.

Signed-off-by: Parag Warudkar <parag.warudkar@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/firmware/dmi_scan.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 5e596a7e3601..0b24a1141009 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -8,6 +8,8 @@
 #include <linux/slab.h>
 #include <asm/dmi.h>
 
+static char dmi_empty_string[] = "        ";
+
 static char * __init dmi_string(const struct dmi_header *dm, u8 s)
 {
 	const u8 *bp = ((u8 *) dm) + dm->length;
@@ -21,11 +23,16 @@ static char * __init dmi_string(const struct dmi_header *dm, u8 s)
 		}
 
 		if (*bp != 0) {
-			str = dmi_alloc(strlen(bp) + 1);
+			size_t len = strlen(bp)+1;
+			size_t cmp_len = len > 8 ? 8 : len;
+
+			if (!memcmp(bp, dmi_empty_string, cmp_len))
+				return dmi_empty_string;
+			str = dmi_alloc(len);
 			if (str != NULL)
 				strcpy(str, bp);
 			else
-				printk(KERN_ERR "dmi_string: out of memory.\n");
+				printk(KERN_ERR "dmi_string: cannot allocate %Zu bytes.\n", len);
 		}
 	}
 
@@ -175,12 +182,23 @@ static void __init dmi_save_devices(const struct dmi_header *dm)
 	}
 }
 
+static struct dmi_device empty_oem_string_dev = {
+	.name = dmi_empty_string,
+};
+
 static void __init dmi_save_oem_strings_devices(const struct dmi_header *dm)
 {
 	int i, count = *(u8 *)(dm + 1);
 	struct dmi_device *dev;
 
 	for (i = 1; i <= count; i++) {
+		char *devname = dmi_string(dm, i);
+
+		if (!strcmp(devname, dmi_empty_string)) {
+			list_add(&empty_oem_string_dev.list, &dmi_devices);
+			continue;
+		}
+
 		dev = dmi_alloc(sizeof(*dev));
 		if (!dev) {
 			printk(KERN_ERR
@@ -189,7 +207,7 @@ static void __init dmi_save_oem_strings_devices(const struct dmi_header *dm)
 		}
 
 		dev->type = DMI_DEV_TYPE_OEM_STRING;
-		dev->name = dmi_string(dm, i);
+		dev->name = devname;
 		dev->device_data = NULL;
 
 		list_add(&dev->list, &dmi_devices);

From bca25bafbb390eeec376ac994954b99489d198ec Mon Sep 17 00:00:00 2001
From: Parag Warudkar <parag.warudkar@gmail.com>
Date: Wed, 30 Jan 2008 13:31:59 +0100
Subject: [PATCH 399/890] x86: fix dmi_alloc() to not advance alloc index in
 case of

dmi_alloc() for CONFIG_X86_64 is defined to allocate from a static array
and it maintains a allocation index which is advanced each time allocation
is attempted - it gets incremented even if an allocation fails thereby
depriving any future request that may be small enough to be satisfied from
the array.

Fix this by first testing if allocation is going to be possible and
incrementing alloc index only then.

Signed-off-by: Parag Warudkar <parag.warudkar@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/dmi.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/asm-x86/dmi.h b/include/asm-x86/dmi.h
index 8e2b0e6aa8e7..5008c365e6e4 100644
--- a/include/asm-x86/dmi.h
+++ b/include/asm-x86/dmi.h
@@ -22,8 +22,9 @@ extern char dmi_alloc_data[DMI_MAX_DATA];
 static inline void *dmi_alloc(unsigned len)
 {
 	int idx = dmi_alloc_index;
-	if ((dmi_alloc_index += len) > DMI_MAX_DATA)
+	if ((dmi_alloc_index + len) > DMI_MAX_DATA)
 		return NULL;
+	dmi_alloc_index += len;
 	return dmi_alloc_data + idx;
 }
 

From 9f8daccaa05c14e5643bdd4faf5aed9cc8e6f11e Mon Sep 17 00:00:00 2001
From: Gary Hade <garyhade@us.ibm.com>
Date: Wed, 30 Jan 2008 13:31:59 +0100
Subject: [PATCH 400/890] PCI: remove default PCI expansion ROM memory
 allocation

increasing number of PCI slots in large multi-node systems.  The kernel
currently attempts by default to allocate memory for all PCI expansion
ROMs so there has also been an increasing number of PCI memory
allocation failures seen on these systems.  This occurs because the BIOS
either (1) provides insufficient PCI memory resource for all the
expansion ROMs or (2) provides adequate PCI memory resource for
expansion ROMs but provides the space in kernel unexpected BIOS assigned
P2P non-prefetch windows.

The resulting PCI memory allocation failures may be benign when related
to memory requests for expansion ROMs themselves but in some cases they
can occur when attempting to allocate space for more critical BARs.
This can happen when a successful expansion ROM allocation request
consumes memory resource that was intended for a non-ROM BAR.  We have
seen this happen during PCI hotplug of an adapter that contains a P2P
bridge where successful memory allocation for an expansion ROM BAR on
device behind the bridge consumed memory that was intended for a non-ROM
BAR on the P2P bridge.  In all cases the allocation failure messages can
be very confusing for users.

This patch addresses the issue by changing the kernel default behavior
so that expansion ROM memory allocations are no longer attempted by
default when the BIOS has not assigned a specific address range to the
expansion ROM BAR.  This was done by changing the 'pci=rom' boot option
behavior for BIOS unassigned expansion ROMs to actually match it's
current kernel-parameters.txt description which already implies "off" by
default. Behavior for BIOS assigned expansion ROMs implemented in
pcibios_assign_resources() [arch/x86/pci/i386.c] is unchanged.

Signed-off-by: Gary Hade <garyhade@us.ibm.com>
Cc: Greg KH <greg@kroah.com>
Cc: Jan Beulich <jbeulich@novell.com>
Acked-by: "Jun'ichi Nomura" <j-nomura@ce.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/pci/common.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 862746390666..52deabc72a6f 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -109,6 +109,19 @@ static void __devinit pcibios_fixup_ghosts(struct pci_bus *b)
 	}
 }
 
+static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
+{
+	struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE];
+
+	if (rom_r->parent)
+		return;
+	if (rom_r->start)
+		/* we deal with BIOS assigned ROM later */
+		return;
+	if (!(pci_probe & PCI_ASSIGN_ROMS))
+		rom_r->start = rom_r->end = rom_r->flags = 0;
+}
+
 /*
  *  Called after each bus is probed, but before its children
  *  are examined.
@@ -116,8 +129,12 @@ static void __devinit pcibios_fixup_ghosts(struct pci_bus *b)
 
 void __devinit  pcibios_fixup_bus(struct pci_bus *b)
 {
+	struct pci_dev *dev;
+
 	pcibios_fixup_ghosts(b);
 	pci_read_bridge_bases(b);
+	list_for_each_entry(dev, &b->devices, bus_list)
+		pcibios_fixup_device_resources(dev);
 }
 
 /*

From 7d409d6057c7244f8757ce15245f6df27271be0c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:00 +0100
Subject: [PATCH 401/890] x86: add some pirq debugging

we use a few static mapping rules in our pirq routing functions,
and for example regression f3ac84324fd94 was due to the pirq
being out of range of the remapping array. Put in a few
WARN_ON_ONCE() lines so that we get notified about any such
out-of-bound incidents.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/pci/irq.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 88d8f5c0ecb5..ee524ca5f8c0 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -200,6 +200,7 @@ static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
 {
 	static const unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
 
+	WARN_ON_ONCE(pirq >= 16);
 	return irqmap[read_config_nybble(router, 0x48, pirq-1)];
 }
 
@@ -207,7 +208,8 @@ static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, i
 {
 	static const unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
 	unsigned int val = irqmap[irq];
-		
+
+	WARN_ON_ONCE(pirq >= 16);
 	if (val) {
 		write_config_nybble(router, 0x48, pirq-1, val);
 		return 1;
@@ -257,12 +259,16 @@ static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, i
 static int pirq_via586_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
 {
 	static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
+
+	WARN_ON_ONCE(pirq >= 5);
 	return read_config_nybble(router, 0x55, pirqmap[pirq-1]);
 }
 
 static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
 {
 	static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
+
+	WARN_ON_ONCE(pirq >= 5);
 	write_config_nybble(router, 0x55, pirqmap[pirq-1], irq);
 	return 1;
 }
@@ -275,12 +281,16 @@ static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq
 static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
 {
 	static const unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+
+	WARN_ON_ONCE(pirq >= 4);
 	return read_config_nybble(router,0x43, pirqmap[pirq-1]);
 }
 
 static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
 {
 	static const unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+
+	WARN_ON_ONCE(pirq >= 4);
 	write_config_nybble(router, 0x43, pirqmap[pirq-1], irq);
 	return 1;
 }
@@ -419,6 +429,7 @@ static int pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, i
 
 static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
 {
+	WARN_ON_ONCE(pirq >= 9);
 	if (pirq > 8) {
 		printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
 		return 0;
@@ -428,6 +439,7 @@ static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
 
 static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
 {
+	WARN_ON_ONCE(pirq >= 9);
 	if (pirq > 8) {
 		printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
 		return 0;

From bde6f5f59c2b2b48a7a849c129d5b48838fe77ee Mon Sep 17 00:00:00 2001
From: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed, 30 Jan 2008 13:32:01 +0100
Subject: [PATCH 402/890] x86: voluntary leave_mm before entering ACPI C3

Aviod TLB flush IPIs during C3 states by voluntary leave_mm()
before entering C3.

The performance impact of TLB flush on C3 should not be significant with
respect to C3 wakeup latency. Also, CPUs tend to flush TLB in hardware while in
C3 anyways.

On a 8 logical CPU system, running make -j2, the number of tlbflush IPIs goes
down from 40 per second to ~ 0. Total number of interrupts during the run
of this workload was ~1200 per second, which makes it ~3% savings in wakeups.

There was no measurable performance or power impact however.

[ akpm@linux-foundation.org: symbol export fixes. ]

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/smp_32.c         | 3 ++-
 arch/x86/kernel/smp_64.c         | 3 ++-
 drivers/acpi/processor_idle.c    | 2 ++
 include/asm-ia64/acpi.h          | 2 ++
 include/asm-x86/acpi.h           | 3 +++
 include/asm-x86/mmu.h            | 8 ++++++++
 include/asm-x86/mmu_context_32.h | 2 --
 7 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c
index 070816ac79e1..dc0cde9d16fb 100644
--- a/arch/x86/kernel/smp_32.c
+++ b/arch/x86/kernel/smp_32.c
@@ -256,13 +256,14 @@ static DEFINE_SPINLOCK(tlbstate_lock);
  * We need to reload %cr3 since the page tables may be going
  * away from under us..
  */
-void leave_mm(unsigned long cpu)
+void leave_mm(int cpu)
 {
 	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
 		BUG();
 	cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
 	load_cr3(swapper_pg_dir);
 }
+EXPORT_SYMBOL_GPL(leave_mm);
 
 /*
  *
diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c
index 02a6533e8909..2fd74b06db67 100644
--- a/arch/x86/kernel/smp_64.c
+++ b/arch/x86/kernel/smp_64.c
@@ -69,13 +69,14 @@ static DEFINE_PER_CPU(union smp_flush_state, flush_state);
  * We cannot call mmdrop() because we are in interrupt context,
  * instead update mm->cpu_vm_mask.
  */
-static inline void leave_mm(int cpu)
+void leave_mm(int cpu)
 {
 	if (read_pda(mmu_state) == TLBSTATE_OK)
 		BUG();
 	cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
 	load_cr3(swapper_pg_dir);
 }
+EXPORT_SYMBOL_GPL(leave_mm);
 
 /*
  *
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 2235f4e02d26..0721a8183c89 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -534,6 +534,7 @@ static void acpi_processor_idle(void)
 		break;
 
 	case ACPI_STATE_C3:
+		acpi_unlazy_tlb(smp_processor_id());
 		/*
 		 * Must be done before busmaster disable as we might
 		 * need to access HPET !
@@ -1423,6 +1424,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
 		return 0;
 	}
 
+	acpi_unlazy_tlb(smp_processor_id());
 	/*
 	 * Must be done before busmaster disable as we might need to
 	 * access HPET !
diff --git a/include/asm-ia64/acpi.h b/include/asm-ia64/acpi.h
index 81bcd5e51789..cd1cc39b5599 100644
--- a/include/asm-ia64/acpi.h
+++ b/include/asm-ia64/acpi.h
@@ -127,6 +127,8 @@ extern int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS];
 extern int __initdata nid_to_pxm_map[MAX_NUMNODES];
 #endif
 
+#define acpi_unlazy_tlb(x)
+
 #endif /*__KERNEL__*/
 
 #endif /*_ASM_ACPI_H*/
diff --git a/include/asm-x86/acpi.h b/include/asm-x86/acpi.h
index 2feb0c494be7..98a9ca266531 100644
--- a/include/asm-x86/acpi.h
+++ b/include/asm-x86/acpi.h
@@ -27,6 +27,7 @@
 
 #include <asm/numa.h>
 #include <asm/processor.h>
+#include <asm/mmu.h>
 
 #define COMPILER_DEPENDENT_INT64   long long
 #define COMPILER_DEPENDENT_UINT64  unsigned long long
@@ -167,4 +168,6 @@ static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,
 }
 #endif
 
+#define acpi_unlazy_tlb(x)	leave_mm(x)
+
 #endif /*__X86_ASM_ACPI_H*/
diff --git a/include/asm-x86/mmu.h b/include/asm-x86/mmu.h
index 3f922c8e1c88..efa962c38897 100644
--- a/include/asm-x86/mmu.h
+++ b/include/asm-x86/mmu.h
@@ -20,4 +20,12 @@ typedef struct {
 	void *vdso;
 } mm_context_t;
 
+#ifdef CONFIG_SMP
+void leave_mm(int cpu);
+#else
+static inline void leave_mm(int cpu)
+{
+}
+#endif
+
 #endif /* _ASM_X86_MMU_H */
diff --git a/include/asm-x86/mmu_context_32.h b/include/asm-x86/mmu_context_32.h
index 7eb0b0b1fb3c..8198d1cca1f3 100644
--- a/include/asm-x86/mmu_context_32.h
+++ b/include/asm-x86/mmu_context_32.h
@@ -32,8 +32,6 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 #endif
 }
 
-void leave_mm(unsigned long cpu);
-
 static inline void switch_mm(struct mm_struct *prev,
 			     struct mm_struct *next,
 			     struct task_struct *tsk)

From ddc66df876fd33d3956f3c3acc1ae334b16eedee Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Wed, 30 Jan 2008 13:32:01 +0100
Subject: [PATCH 403/890] x86: fix kprobe_handler reenable preemption

Fix a preemption bug in kprobe_handler(). It has to call preempt_enable()
before returning.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/kprobes.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 521a469acaad..f0f2b98b9e20 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -467,7 +467,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 				arch_disarm_kprobe(p);
 				regs->ip = (unsigned long)p->addr;
 				reset_current_kprobe();
-				return 1;
+				ret = 1;
+				goto no_kprobe;
 #endif
 			}
 			/* We have reentered the kprobe_handler(), since

From 40102d4a41312ad4134c0b802ad074445ce8b17b Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:32:02 +0100
Subject: [PATCH 404/890] x86: add reenter_kprobe helper

[ mhiramat@redhat.com: updated it to latest x86.git ]

Factor common X86_32, X86_64 kprobe reenter logic from deeply
indented section to helper function.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Jim Keniston <jkenisto@us.ibm.com>
---
 arch/x86/kernel/kprobes.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index f0f2b98b9e20..7dd918633c30 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -427,6 +427,20 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 	/* Replace the return addr with trampoline addr */
 	*sara = (unsigned long) &kretprobe_trampoline;
 }
+/*
+ * We have reentered the kprobe_handler(), since another probe was hit while
+ * within the handler. We save the original kprobes variables and just single
+ * step on the instruction of the new probe without calling any user handlers.
+ */
+static void __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
+				     struct kprobe_ctlblk *kcb)
+{
+	save_previous_kprobe(kcb);
+	set_current_kprobe(p, regs, kcb);
+	kprobes_inc_nmissed_count(p);
+	prepare_singlestep(p, regs);
+	kcb->kprobe_status = KPROBE_REENTER;
+}
 
 /*
  * Interrupts are disabled on entry as trap3 is an interrupt gate and they
@@ -471,17 +485,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 				goto no_kprobe;
 #endif
 			}
-			/* We have reentered the kprobe_handler(), since
-			 * another probe was hit while within the handler.
-			 * We here save the original kprobes variables and
-			 * just single step on the instruction of the new probe
-			 * without calling any user handlers.
-			 */
-			save_previous_kprobe(kcb);
-			set_current_kprobe(p, regs, kcb);
-			kprobes_inc_nmissed_count(p);
-			prepare_singlestep(p, regs);
-			kcb->kprobe_status = KPROBE_REENTER;
+			reenter_kprobe(p, regs, kcb);
 			return 1;
 		} else {
 			if (*addr != BREAKPOINT_INSTRUCTION) {

From 59e87cdcd268daa85c0732e147c59e0c1bacd704 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Wed, 30 Jan 2008 13:32:02 +0100
Subject: [PATCH 405/890] x86: move deeply indented code to reenter_kprobe

Move some deeply indented code related to re-entrance processing
from kprobe_handler() to reenter_kprobe().

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Jim Keniston <jkenisto@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/kprobes.c | 46 +++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 7dd918633c30..4e33329ce8a3 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -432,14 +432,32 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
  * within the handler. We save the original kprobes variables and just single
  * step on the instruction of the new probe without calling any user handlers.
  */
-static void __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
-				     struct kprobe_ctlblk *kcb)
+static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
+				    struct kprobe_ctlblk *kcb)
 {
+	if (kcb->kprobe_status == KPROBE_HIT_SS &&
+	    *p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
+		regs->flags &= ~X86_EFLAGS_TF;
+		regs->flags |= kcb->kprobe_saved_flags;
+		return 0;
+#ifdef CONFIG_X86_64
+	} else if (kcb->kprobe_status == KPROBE_HIT_SSDONE) {
+		/* TODO: Provide re-entrancy from post_kprobes_handler() and
+		 * avoid exception stack corruption while single-stepping on
+		 * the instruction of the new probe.
+		 */
+		arch_disarm_kprobe(p);
+		regs->ip = (unsigned long)p->addr;
+		reset_current_kprobe();
+		return 1;
+#endif
+	}
 	save_previous_kprobe(kcb);
 	set_current_kprobe(p, regs, kcb);
 	kprobes_inc_nmissed_count(p);
 	prepare_singlestep(p, regs);
 	kcb->kprobe_status = KPROBE_REENTER;
+	return 1;
 }
 
 /*
@@ -466,27 +484,9 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 	if (kprobe_running()) {
 		p = get_kprobe(addr);
 		if (p) {
-			if (kcb->kprobe_status == KPROBE_HIT_SS &&
-				*p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
-				regs->flags &= ~X86_EFLAGS_TF;
-				regs->flags |= kcb->kprobe_saved_flags;
-				goto no_kprobe;
-#ifdef CONFIG_X86_64
-			} else if (kcb->kprobe_status == KPROBE_HIT_SSDONE) {
-				/* TODO: Provide re-entrancy from
-				 * post_kprobes_handler() and avoid exception
-				 * stack corruption while single-stepping on
-				 * the instruction of the new probe.
-				 */
-				arch_disarm_kprobe(p);
-				regs->ip = (unsigned long)p->addr;
-				reset_current_kprobe();
-				ret = 1;
-				goto no_kprobe;
-#endif
-			}
-			reenter_kprobe(p, regs, kcb);
-			return 1;
+			ret = reenter_kprobe(p, regs, kcb);
+			if (kcb->kprobe_status == KPROBE_REENTER)
+				return 1;
 		} else {
 			if (*addr != BREAKPOINT_INSTRUCTION) {
 			/* The breakpoint instruction was removed by

From da35c37198132abebf877cca2ad3c6d9bcd84282 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Wed, 30 Jan 2008 13:32:03 +0100
Subject: [PATCH 406/890] x86, ptrace: rlimit BTS buffer allocation

Check the rlimit of the tracing task for total and locked memory when allocating the BTS buffer.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace.c | 149 +++++++++++++++++++++++++++------------
 1 file changed, 103 insertions(+), 46 deletions(-)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 3e34b14e8846..88ed1e74cee9 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -620,12 +620,80 @@ static int ptrace_bts_drain(struct task_struct *child,
 	return i;
 }
 
+static int ptrace_bts_realloc(struct task_struct *child,
+			      int size, int reduce_size)
+{
+	unsigned long rlim, vm;
+	int ret, old_size;
+
+	if (size < 0)
+		return -EINVAL;
+
+	old_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
+	if (old_size < 0)
+		return old_size;
+
+	ret = ds_free((void **)&child->thread.ds_area_msr);
+	if (ret < 0)
+		goto out;
+
+	size >>= PAGE_SHIFT;
+	old_size >>= PAGE_SHIFT;
+
+	current->mm->total_vm  -= old_size;
+	current->mm->locked_vm -= old_size;
+
+	if (size == 0)
+		goto out;
+
+	rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+	vm = current->mm->total_vm  + size;
+	if (rlim < vm) {
+		ret = -ENOMEM;
+
+		if (!reduce_size)
+			goto out;
+
+		size = rlim - current->mm->total_vm;
+		if (size <= 0)
+			goto out;
+	}
+
+	rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+	vm = current->mm->locked_vm  + size;
+	if (rlim < vm) {
+		ret = -ENOMEM;
+
+		if (!reduce_size)
+			goto out;
+
+		size = rlim - current->mm->locked_vm;
+		if (size <= 0)
+			goto out;
+	}
+
+	ret = ds_allocate((void **)&child->thread.ds_area_msr,
+			  size << PAGE_SHIFT);
+	if (ret < 0)
+		goto out;
+
+	current->mm->total_vm  += size;
+	current->mm->locked_vm += size;
+
+out:
+	if (child->thread.ds_area_msr)
+		set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
+	else
+		clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
+
+	return ret;
+}
+
 static int ptrace_bts_config(struct task_struct *child,
 			     const struct ptrace_bts_config __user *ucfg)
 {
 	struct ptrace_bts_config cfg;
-	unsigned long debugctl_mask;
-	int bts_size, ret;
+	int bts_size, ret = 0;
 	void *ds;
 
 	if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
@@ -638,59 +706,46 @@ static int ptrace_bts_config(struct task_struct *child,
 		if (bts_size < 0)
 			return bts_size;
 	}
+	cfg.size = PAGE_ALIGN(cfg.size);
 
 	if (bts_size != cfg.size) {
-		ret = ds_free((void **)&child->thread.ds_area_msr);
+		ret = ptrace_bts_realloc(child, cfg.size,
+					 cfg.flags & PTRACE_BTS_O_CUT_SIZE);
 		if (ret < 0)
-			return ret;
+			goto errout;
 
-		if (cfg.size > 0)
-			ret = ds_allocate((void **)&child->thread.ds_area_msr,
-					  cfg.size);
 		ds = (void *)child->thread.ds_area_msr;
-		if (ds)
-			set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
-		else
-			clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
-
-		if (ret < 0)
-			return ret;
-
-		bts_size = ds_get_bts_size(ds);
-		if (bts_size <= 0)
-			return bts_size;
 	}
 
-	if (ds) {
-		if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
-			ret = ds_set_overflow(ds, DS_O_SIGNAL);
-		} else {
-			ret = ds_set_overflow(ds, DS_O_WRAP);
-		}
-		if (ret < 0)
-			return ret;
-	}
+	if (cfg.flags & PTRACE_BTS_O_SIGNAL)
+		ret = ds_set_overflow(ds, DS_O_SIGNAL);
+	else
+		ret = ds_set_overflow(ds, DS_O_WRAP);
+	if (ret < 0)
+		goto errout;
 
-	debugctl_mask = ds_debugctl_mask();
-	if (ds && (cfg.flags & PTRACE_BTS_O_TRACE)) {
-		child->thread.debugctlmsr |= debugctl_mask;
-		set_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
-	} else {
-		/* there is no way for us to check whether we 'own'
-		 * the respective bits in the DEBUGCTL MSR, we're
-		 * about to clear */
-		child->thread.debugctlmsr &= ~debugctl_mask;
+	if (cfg.flags & PTRACE_BTS_O_TRACE)
+		child->thread.debugctlmsr |= ds_debugctl_mask();
+	else
+		child->thread.debugctlmsr &= ~ds_debugctl_mask();
 
-		if (!child->thread.debugctlmsr)
-			clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
-	}
-
-	if (ds && (cfg.flags & PTRACE_BTS_O_SCHED))
+	if (cfg.flags & PTRACE_BTS_O_SCHED)
 		set_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
 	else
 		clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
 
-	return 0;
+out:
+	if (child->thread.debugctlmsr)
+		set_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+	else
+		clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+
+	return ret;
+
+errout:
+	child->thread.debugctlmsr &= ~ds_debugctl_mask();
+	clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+	goto out;
 }
 
 static int ptrace_bts_status(struct task_struct *child,
@@ -726,7 +781,7 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk,
 {
 	struct bts_struct rec = {
 		.qualifier = qualifier,
-		.variant.jiffies = jiffies
+		.variant.jiffies = jiffies_64
 	};
 
 	ptrace_bts_write_record(tsk, &rec);
@@ -743,10 +798,12 @@ void ptrace_disable(struct task_struct *child)
 #ifdef TIF_SYSCALL_EMU
 	clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 #endif
-	ptrace_bts_config(child, /* options = */ 0);
 	if (child->thread.ds_area_msr) {
-	    ds_free((void **)&child->thread.ds_area_msr);
-	    clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
+		ptrace_bts_realloc(child, 0, 0);
+		child->thread.debugctlmsr &= ~ds_debugctl_mask();
+		if (!child->thread.debugctlmsr)
+			clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+		clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
 	}
 }
 

From e6ae5d9540727b0e2e5e2fbeb683c84671ed0a31 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Wed, 30 Jan 2008 13:32:03 +0100
Subject: [PATCH 407/890] x86, ptrace: support 32bit-cross-64bit BTS recording

Support BTS recording of 32bit and 64bit tasks from 32bit or 64bit tasks.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ds.c         | 91 ++++++++++++++++++------------------
 arch/x86/kernel/ptrace.c     |  2 +-
 include/asm-x86/ds.h         | 10 ++--
 include/asm-x86/ptrace-abi.h |  4 +-
 4 files changed, 53 insertions(+), 54 deletions(-)

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 6eb5d49a36bb..1c5ca4d18787 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -111,53 +111,53 @@ static struct ds_configuration ds_cfg;
  * Accessor functions for some DS and BTS fields using the above
  * global ptrace_bts_cfg.
  */
-static inline void *get_bts_buffer_base(char *base)
+static inline unsigned long get_bts_buffer_base(char *base)
 {
-	return *(void **)(base + ds_cfg.bts_buffer_base.offset);
+	return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset);
 }
-static inline void set_bts_buffer_base(char *base, void *value)
+static inline void set_bts_buffer_base(char *base, unsigned long value)
 {
-	(*(void **)(base + ds_cfg.bts_buffer_base.offset)) = value;
+	(*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value;
 }
-static inline void *get_bts_index(char *base)
+static inline unsigned long get_bts_index(char *base)
 {
-	return *(void **)(base + ds_cfg.bts_index.offset);
+	return *(unsigned long *)(base + ds_cfg.bts_index.offset);
 }
-static inline void set_bts_index(char *base, void *value)
+static inline void set_bts_index(char *base, unsigned long value)
 {
-	(*(void **)(base + ds_cfg.bts_index.offset)) = value;
+	(*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value;
 }
-static inline void *get_bts_absolute_maximum(char *base)
+static inline unsigned long get_bts_absolute_maximum(char *base)
 {
-	return *(void **)(base + ds_cfg.bts_absolute_maximum.offset);
+	return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset);
 }
-static inline void set_bts_absolute_maximum(char *base, void *value)
+static inline void set_bts_absolute_maximum(char *base, unsigned long value)
 {
-	(*(void **)(base + ds_cfg.bts_absolute_maximum.offset)) = value;
+	(*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value;
 }
-static inline void *get_bts_interrupt_threshold(char *base)
+static inline unsigned long get_bts_interrupt_threshold(char *base)
 {
-	return *(void **)(base + ds_cfg.bts_interrupt_threshold.offset);
+	return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset);
 }
-static inline void set_bts_interrupt_threshold(char *base, void *value)
+static inline void set_bts_interrupt_threshold(char *base, unsigned long value)
 {
-	(*(void **)(base + ds_cfg.bts_interrupt_threshold.offset)) = value;
+	(*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value;
 }
-static inline long get_from_ip(char *base)
+static inline unsigned long get_from_ip(char *base)
 {
-	return *(long *)(base + ds_cfg.from_ip.offset);
+	return *(unsigned long *)(base + ds_cfg.from_ip.offset);
 }
-static inline void set_from_ip(char *base, long value)
+static inline void set_from_ip(char *base, unsigned long value)
 {
-	(*(long *)(base + ds_cfg.from_ip.offset)) = value;
+	(*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value;
 }
-static inline long get_to_ip(char *base)
+static inline unsigned long get_to_ip(char *base)
 {
-	return *(long *)(base + ds_cfg.to_ip.offset);
+	return *(unsigned long *)(base + ds_cfg.to_ip.offset);
 }
-static inline void set_to_ip(char *base, long value)
+static inline void set_to_ip(char *base, unsigned long value)
 {
-	(*(long *)(base + ds_cfg.to_ip.offset)) = value;
+	(*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value;
 }
 static inline unsigned char get_info_type(char *base)
 {
@@ -180,7 +180,7 @@ static inline void set_info_data(char *base, unsigned long value)
 int ds_allocate(void **dsp, size_t bts_size_in_bytes)
 {
 	size_t bts_size_in_records;
-	void *bts;
+	unsigned long bts;
 	void *ds;
 
 	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
@@ -197,7 +197,7 @@ int ds_allocate(void **dsp, size_t bts_size_in_bytes)
 	if (bts_size_in_bytes <= 0)
 		return -EINVAL;
 
-	bts = kzalloc(bts_size_in_bytes, GFP_KERNEL);
+	bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL);
 
 	if (!bts)
 		return -ENOMEM;
@@ -205,7 +205,7 @@ int ds_allocate(void **dsp, size_t bts_size_in_bytes)
 	ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
 
 	if (!ds) {
-		kfree(bts);
+		kfree((void *)bts);
 		return -ENOMEM;
 	}
 
@@ -221,7 +221,7 @@ int ds_allocate(void **dsp, size_t bts_size_in_bytes)
 int ds_free(void **dsp)
 {
 	if (*dsp)
-		kfree(get_bts_buffer_base(*dsp));
+		kfree((void *)get_bts_buffer_base(*dsp));
 	kfree(*dsp);
 	*dsp = 0;
 
@@ -230,7 +230,7 @@ int ds_free(void **dsp)
 
 int ds_get_bts_size(void *ds)
 {
-	size_t size_in_bytes;
+	int size_in_bytes;
 
 	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
 		return -EOPNOTSUPP;
@@ -246,7 +246,7 @@ int ds_get_bts_size(void *ds)
 
 int ds_get_bts_end(void *ds)
 {
-	size_t size_in_bytes = ds_get_bts_size(ds);
+	int size_in_bytes = ds_get_bts_size(ds);
 
 	if (size_in_bytes <= 0)
 		return size_in_bytes;
@@ -256,7 +256,7 @@ int ds_get_bts_end(void *ds)
 
 int ds_get_bts_index(void *ds)
 {
-	size_t index_offset_in_bytes;
+	int index_offset_in_bytes;
 
 	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
 		return -EOPNOTSUPP;
@@ -288,19 +288,19 @@ int ds_get_overflow(void *ds)
 int ds_clear(void *ds)
 {
 	int bts_size = ds_get_bts_size(ds);
-	void *bts_base;
+	unsigned long bts_base;
 
 	if (bts_size <= 0)
 		return bts_size;
 
 	bts_base = get_bts_buffer_base(ds);
-	memset(bts_base, 0, bts_size);
+	memset((void *)bts_base, 0, bts_size);
 
 	set_bts_index(ds, bts_base);
 	return 0;
 }
 
-int ds_read_bts(void *ds, size_t index, struct bts_struct *out)
+int ds_read_bts(void *ds, int index, struct bts_struct *out)
 {
 	void *bts;
 
@@ -313,8 +313,7 @@ int ds_read_bts(void *ds, size_t index, struct bts_struct *out)
 	if (index >= ds_get_bts_size(ds))
 		return -EINVAL;
 
-	bts = get_bts_buffer_base(ds);
-	bts = (char *)bts + (index * ds_cfg.sizeof_bts);
+	bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts));
 
 	memset(out, 0, sizeof(*out));
 	if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) {
@@ -326,12 +325,12 @@ int ds_read_bts(void *ds, size_t index, struct bts_struct *out)
 		out->variant.lbr.to_ip   = get_to_ip(bts);
 	}
 
-	return 0;
+	return sizeof(*out);;
 }
 
 int ds_write_bts(void *ds, const struct bts_struct *in)
 {
-	void *bts;
+	unsigned long bts;
 
 	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
 		return -EOPNOTSUPP;
@@ -341,33 +340,33 @@ int ds_write_bts(void *ds, const struct bts_struct *in)
 
 	bts = get_bts_index(ds);
 
-	memset(bts, 0, ds_cfg.sizeof_bts);
+	memset((void *)bts, 0, ds_cfg.sizeof_bts);
 	switch (in->qualifier) {
 	case BTS_INVALID:
 		break;
 
 	case BTS_BRANCH:
-		set_from_ip(bts, in->variant.lbr.from_ip);
-		set_to_ip(bts, in->variant.lbr.to_ip);
+		set_from_ip((void *)bts, in->variant.lbr.from_ip);
+		set_to_ip((void *)bts, in->variant.lbr.to_ip);
 		break;
 
 	case BTS_TASK_ARRIVES:
 	case BTS_TASK_DEPARTS:
-		set_from_ip(bts, BTS_ESCAPE_ADDRESS);
-		set_info_type(bts, in->qualifier);
-		set_info_data(bts, in->variant.jiffies);
+		set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS);
+		set_info_type((void *)bts, in->qualifier);
+		set_info_data((void *)bts, in->variant.jiffies);
 		break;
 
 	default:
 		return -EINVAL;
 	}
 
-	bts = (char *)bts + ds_cfg.sizeof_bts;
+	bts = bts + ds_cfg.sizeof_bts;
 	if (bts >= get_bts_absolute_maximum(ds))
 		bts = get_bts_buffer_base(ds);
 	set_bts_index(ds, bts);
 
-	return 0;
+	return ds_cfg.sizeof_bts;
 }
 
 unsigned long ds_debugctl_mask(void)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 88ed1e74cee9..236528bec6eb 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -558,7 +558,7 @@ static int ptrace_bts_read_record(struct task_struct *child,
 
 	retval = ds_read_bts((void *)child->thread.ds_area_msr,
 			     bts_index, &ret);
-	if (retval)
+	if (retval < 0)
 		return retval;
 
 	if (copy_to_user(out, &ret, sizeof(ret)))
diff --git a/include/asm-x86/ds.h b/include/asm-x86/ds.h
index b84040abee68..7881368142fa 100644
--- a/include/asm-x86/ds.h
+++ b/include/asm-x86/ds.h
@@ -39,16 +39,16 @@ enum bts_qualifier {
 };
 
 struct bts_struct {
-	enum bts_qualifier qualifier;
+	u64 qualifier;
 	union {
 		/* BTS_BRANCH */
 		struct {
-			long from_ip;
-			long to_ip;
+			u64 from_ip;
+			u64 to_ip;
 		} lbr;
 		/* BTS_TASK_ARRIVES or
 		   BTS_TASK_DEPARTS */
-		unsigned long jiffies;
+		u64 jiffies;
 	} variant;
 };
 
@@ -64,7 +64,7 @@ extern int ds_get_bts_index(void *);
 extern int ds_set_overflow(void *, int);
 extern int ds_get_overflow(void *);
 extern int ds_clear(void *);
-extern int ds_read_bts(void *, size_t, struct bts_struct *);
+extern int ds_read_bts(void *, int, struct bts_struct *);
 extern int ds_write_bts(void *, const struct bts_struct *);
 extern unsigned long ds_debugctl_mask(void);
 extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *c);
diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h
index cf2fe4633ee5..32fe137822bf 100644
--- a/include/asm-x86/ptrace-abi.h
+++ b/include/asm-x86/ptrace-abi.h
@@ -85,9 +85,9 @@
 */
 struct ptrace_bts_config {
 	/* requested or actual size of BTS buffer in bytes */
-	unsigned long size;
+	unsigned int size;
 	/* bitmask of below flags */
-	unsigned long flags;
+	unsigned int flags;
 };
 
 #define PTRACE_BTS_O_TRACE	0x1 /* branch trace */

From cba4b65d359268c40679ca75ac92c0b93cecf6de Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Wed, 30 Jan 2008 13:32:03 +0100
Subject: [PATCH 408/890] x86, ptrace: add buffer size checks

Pass the buffer size for (most) ptrace commands that pass user-allocated buffers and check that size before accessing the buffer. Unfortunately, PTRACE_BTS_GET already uses all 4 parameters.
Commands that access user buffers return the number of bytes or records read or written.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ptrace.c     | 25 +++++++++++++++++++++----
 include/asm-x86/ptrace-abi.h | 14 ++++++++------
 2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 236528bec6eb..e19a91db9b35 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -591,6 +591,7 @@ static int ptrace_bts_clear(struct task_struct *child)
 }
 
 static int ptrace_bts_drain(struct task_struct *child,
+			    long size,
 			    struct bts_struct __user *out)
 {
 	int end, i;
@@ -603,6 +604,9 @@ static int ptrace_bts_drain(struct task_struct *child,
 	if (end <= 0)
 		return end;
 
+	if (size < (end * sizeof(struct bts_struct)))
+		return -EIO;
+
 	for (i = 0; i < end; i++, out++) {
 		struct bts_struct ret;
 		int retval;
@@ -617,7 +621,7 @@ static int ptrace_bts_drain(struct task_struct *child,
 
 	ds_clear(ds);
 
-	return i;
+	return end;
 }
 
 static int ptrace_bts_realloc(struct task_struct *child,
@@ -690,15 +694,22 @@ out:
 }
 
 static int ptrace_bts_config(struct task_struct *child,
+			     long cfg_size,
 			     const struct ptrace_bts_config __user *ucfg)
 {
 	struct ptrace_bts_config cfg;
 	int bts_size, ret = 0;
 	void *ds;
 
+	if (cfg_size < sizeof(cfg))
+		return -EIO;
+
 	if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
 		return -EFAULT;
 
+	if ((int)cfg.size < 0)
+		return -EINVAL;
+
 	bts_size = 0;
 	ds = (void *)child->thread.ds_area_msr;
 	if (ds) {
@@ -734,6 +745,8 @@ static int ptrace_bts_config(struct task_struct *child,
 	else
 		clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
 
+	ret = sizeof(cfg);
+
 out:
 	if (child->thread.debugctlmsr)
 		set_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
@@ -749,11 +762,15 @@ errout:
 }
 
 static int ptrace_bts_status(struct task_struct *child,
+			     long cfg_size,
 			     struct ptrace_bts_config __user *ucfg)
 {
 	void *ds = (void *)child->thread.ds_area_msr;
 	struct ptrace_bts_config cfg;
 
+	if (cfg_size < sizeof(cfg))
+		return -EIO;
+
 	memset(&cfg, 0, sizeof(cfg));
 
 	if (ds) {
@@ -923,12 +940,12 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 
 	case PTRACE_BTS_CONFIG:
 		ret = ptrace_bts_config
-			(child, (struct ptrace_bts_config __user *)addr);
+			(child, data, (struct ptrace_bts_config __user *)addr);
 		break;
 
 	case PTRACE_BTS_STATUS:
 		ret = ptrace_bts_status
-			(child, (struct ptrace_bts_config __user *)addr);
+			(child, data, (struct ptrace_bts_config __user *)addr);
 		break;
 
 	case PTRACE_BTS_SIZE:
@@ -946,7 +963,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 
 	case PTRACE_BTS_DRAIN:
 		ret = ptrace_bts_drain
-			(child, (struct bts_struct __user *) addr);
+			(child, data, (struct bts_struct __user *) addr);
 		break;
 
 	default:
diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h
index 32fe137822bf..bcf67044754c 100644
--- a/include/asm-x86/ptrace-abi.h
+++ b/include/asm-x86/ptrace-abi.h
@@ -99,13 +99,15 @@ struct ptrace_bts_config {
 
 #define PTRACE_BTS_CONFIG	40
 /* Configure branch trace recording.
-   DATA is ignored, ADDR points to a struct ptrace_bts_config.
+   ADDR points to a struct ptrace_bts_config.
+   DATA gives the size of that buffer.
    A new buffer is allocated, iff the size changes.
+   Returns the number of bytes read.
 */
 #define PTRACE_BTS_STATUS	41
-/* Return the current configuration.
-   DATA is ignored, ADDR points to a struct ptrace_bts_config
-   that will contain the result.
+/* Return the current configuration in a struct ptrace_bts_config
+   pointed to by ADDR; DATA gives the size of that buffer.
+   Returns the number of bytes written.
 */
 #define PTRACE_BTS_SIZE		42
 /* Return the number of available BTS records.
@@ -123,8 +125,8 @@ struct ptrace_bts_config {
 */
 #define PTRACE_BTS_DRAIN	45
 /* Read all available BTS records and clear the buffer.
-   DATA is ignored. ADDR points to an array of struct bts_struct of
-   suitable size.
+   ADDR points to an array of struct bts_struct.
+   DATA gives the size of that buffer.
    BTS records are read from oldest to newest.
    Returns number of BTS records drained.
 */

From c6334593c61c71ab2e666c015eef13995736f49a Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Wed, 30 Jan 2008 13:32:04 +0100
Subject: [PATCH 409/890] x86, ptrace: overflow signal API

Establish the user API for sending a user-defined signal to the traced task on a BTS buffer overflow.

This should complete the user API for the BTS ptrace extension.
The patches so far implement wrap-around overflow handling as is needed for debugging.

The remaining open is another overflow handling mechanism that sends a signal to the traced task on a buffer overflow.
This will take some more time from my side.

Since, from a user perspective, this occurs behind the scenes, the patch set should already be useful. More features may/will be added on top of it (overflow signal, pageable back-up buffers, kernel tracing, core file support, profiling, ...).

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/ptrace-abi.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h
index bcf67044754c..b3b9e023afce 100644
--- a/include/asm-x86/ptrace-abi.h
+++ b/include/asm-x86/ptrace-abi.h
@@ -88,11 +88,13 @@ struct ptrace_bts_config {
 	unsigned int size;
 	/* bitmask of below flags */
 	unsigned int flags;
+	/* buffer overflow signal */
+	unsigned int signal;
 };
 
 #define PTRACE_BTS_O_TRACE	0x1 /* branch trace */
 #define PTRACE_BTS_O_SCHED	0x2 /* scheduling events w/ jiffies */
-#define PTRACE_BTS_O_SIGNAL     0x4 /* send SIG? on buffer overflow
+#define PTRACE_BTS_O_SIGNAL     0x4 /* send SIG<signal> on buffer overflow
 				       instead of wrapping around */
 #define PTRACE_BTS_O_CUT_SIZE	0x8 /* cut requested size to max available
 				       instead of failing */

From b1df07bd6674a84fbd9248759dc3fa3ff5c78e5b Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:32:04 +0100
Subject: [PATCH 410/890] x86: change paravirt_32.c name

This patch changes paravirt_32.c to paravirt.c. The goal
is to have paravirt support in x86_64, so we do it in a common file

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/Makefile_32                   | 2 +-
 arch/x86/kernel/{paravirt_32.c => paravirt.c} | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)
 rename arch/x86/kernel/{paravirt_32.c => paravirt.c} (99%)

diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index a67198044b0d..cfb71a5deb19 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -48,7 +48,7 @@ obj-$(CONFIG_K8_NB)		+= k8.o
 obj-$(CONFIG_MGEODE_LX)		+= geode_32.o mfgpt_32.o
 
 obj-$(CONFIG_VMI)		+= vmi_32.o vmiclock_32.o
-obj-$(CONFIG_PARAVIRT)		+= paravirt_32.o
+obj-$(CONFIG_PARAVIRT)		+= paravirt.o
 obj-y				+= pcspeaker.o
 
 obj-$(CONFIG_SCx200)		+= scx200_32.o
diff --git a/arch/x86/kernel/paravirt_32.c b/arch/x86/kernel/paravirt.c
similarity index 99%
rename from arch/x86/kernel/paravirt_32.c
rename to arch/x86/kernel/paravirt.c
index dd063fba2b21..1a170877f46c 100644
--- a/arch/x86/kernel/paravirt_32.c
+++ b/arch/x86/kernel/paravirt.c
@@ -14,7 +14,10 @@
     You should have received a copy of the GNU General Public License
     along with this program; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    2007 - x86_64 support added by Glauber de Oliveira Costa, Red Hat Inc
 */
+
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/efi.h>

From a4746364da0c7caa155cc945a5c10312e7925b46 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:32:05 +0100
Subject: [PATCH 411/890] x86: adjust PVOP_CALL/VCALL macros for 64-bit

This patch adjust the PVOP_VCALL and PVOP_CALL macros to
work with x86_64. It has a different calling convention, and
we use auxiliary macros to account for both calling conventions
as cleanly as possible

Comments are adjusted accordingly.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/paravirt.h | 87 ++++++++++++++++++++++++++++----------
 1 file changed, 65 insertions(+), 22 deletions(-)

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 24406703007f..bef932772661 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -320,7 +320,7 @@ int paravirt_disable_iospace(void);
  * runtime.
  *
  * Normally, a call to a pv_op function is a simple indirect call:
- * (paravirt_ops.operations)(args...).
+ * (pv_op_struct.operations)(args...).
  *
  * Unfortunately, this is a relatively slow operation for modern CPUs,
  * because it cannot necessarily determine what the destination
@@ -330,11 +330,17 @@ int paravirt_disable_iospace(void);
  * calls are essentially free, because the call and return addresses
  * are completely predictable.)
  *
- * These macros rely on the standard gcc "regparm(3)" calling
+ * For i386, these macros rely on the standard gcc "regparm(3)" calling
  * convention, in which the first three arguments are placed in %eax,
  * %edx, %ecx (in that order), and the remaining arguments are placed
  * on the stack.  All caller-save registers (eax,edx,ecx) are expected
  * to be modified (either clobbered or used for return values).
+ * X86_64, on the other hand, already specifies a register-based calling
+ * conventions, returning at %rax, with parameteres going on %rdi, %rsi,
+ * %rdx, and %rcx. Note that for this reason, x86_64 does not need any
+ * special handling for dealing with 4 arguments, unlike i386.
+ * However, x86_64 also have to clobber all caller saved registers, which
+ * unfortunately, are quite a bit (r8 - r11)
  *
  * The call instruction itself is marked by placing its start address
  * and size into the .parainstructions section, so that
@@ -357,10 +363,12 @@ int paravirt_disable_iospace(void);
  * the return type.  The macro then uses sizeof() on that type to
  * determine whether its a 32 or 64 bit value, and places the return
  * in the right register(s) (just %eax for 32-bit, and %edx:%eax for
- * 64-bit).
+ * 64-bit). For x86_64 machines, it just returns at %rax regardless of
+ * the return value size.
  *
  * 64-bit arguments are passed as a pair of adjacent 32-bit arguments
- * in low,high order.
+ * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments
+ * in low,high order
  *
  * Small structures are passed and returned in registers.  The macro
  * calling convention can't directly deal with this, so the wrapper
@@ -370,46 +378,67 @@ int paravirt_disable_iospace(void);
  * means that all uses must be wrapped in inline functions.  This also
  * makes sure the incoming and outgoing types are always correct.
  */
+#ifdef CONFIG_X86_32
+#define PVOP_VCALL_ARGS			unsigned long __eax, __edx, __ecx
+#define PVOP_CALL_ARGS			PVOP_VCALL_ARGS
+#define PVOP_VCALL_CLOBBERS		"=a" (__eax), "=d" (__edx),	\
+					"=c" (__ecx)
+#define PVOP_CALL_CLOBBERS		PVOP_VCALL_CLOBBERS
+#define EXTRA_CLOBBERS
+#define VEXTRA_CLOBBERS
+#else
+#define PVOP_VCALL_ARGS		unsigned long __edi, __esi, __edx, __ecx
+#define PVOP_CALL_ARGS		PVOP_VCALL_ARGS, __eax
+#define PVOP_VCALL_CLOBBERS	"=D" (__edi),				\
+				"=S" (__esi), "=d" (__edx),		\
+				"=c" (__ecx)
+
+#define PVOP_CALL_CLOBBERS	PVOP_VCALL_CLOBBERS, "=a" (__eax)
+
+#define EXTRA_CLOBBERS	 , "r8", "r9", "r10", "r11"
+#define VEXTRA_CLOBBERS	 , "rax", "r8", "r9", "r10", "r11"
+#endif
+
 #define __PVOP_CALL(rettype, op, pre, post, ...)			\
 	({								\
 		rettype __ret;						\
-		unsigned long __eax, __edx, __ecx;			\
+		PVOP_CALL_ARGS;					\
+		/* This is 32-bit specific, but is okay in 64-bit */	\
+		/* since this condition will never hold */		\
 		if (sizeof(rettype) > sizeof(unsigned long)) {		\
 			asm volatile(pre				\
 				     paravirt_alt(PARAVIRT_CALL)	\
 				     post				\
-				     : "=a" (__eax), "=d" (__edx),	\
-				       "=c" (__ecx)			\
+				     : PVOP_CALL_CLOBBERS		\
 				     : paravirt_type(op),		\
 				       paravirt_clobber(CLBR_ANY),	\
 				       ##__VA_ARGS__			\
-				     : "memory", "cc");			\
+				     : "memory", "cc" EXTRA_CLOBBERS);	\
 			__ret = (rettype)((((u64)__edx) << 32) | __eax); \
 		} else {						\
 			asm volatile(pre				\
 				     paravirt_alt(PARAVIRT_CALL)	\
 				     post				\
-				     : "=a" (__eax), "=d" (__edx),	\
-				       "=c" (__ecx)			\
+				     : PVOP_CALL_CLOBBERS		\
 				     : paravirt_type(op),		\
 				       paravirt_clobber(CLBR_ANY),	\
 				       ##__VA_ARGS__			\
-				     : "memory", "cc");			\
+				     : "memory", "cc" EXTRA_CLOBBERS);	\
 			__ret = (rettype)__eax;				\
 		}							\
 		__ret;							\
 	})
 #define __PVOP_VCALL(op, pre, post, ...)				\
 	({								\
-		unsigned long __eax, __edx, __ecx;			\
+		PVOP_VCALL_ARGS;					\
 		asm volatile(pre					\
 			     paravirt_alt(PARAVIRT_CALL)		\
 			     post					\
-			     : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \
+			     : PVOP_VCALL_CLOBBERS			\
 			     : paravirt_type(op),			\
 			       paravirt_clobber(CLBR_ANY),		\
 			       ##__VA_ARGS__				\
-			     : "memory", "cc");				\
+			     : "memory", "cc" VEXTRA_CLOBBERS);		\
 	})
 
 #define PVOP_CALL0(rettype, op)						\
@@ -418,22 +447,26 @@ int paravirt_disable_iospace(void);
 	__PVOP_VCALL(op, "", "")
 
 #define PVOP_CALL1(rettype, op, arg1)					\
-	__PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)))
+	__PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)))
 #define PVOP_VCALL1(op, arg1)						\
-	__PVOP_VCALL(op, "", "", "0" ((u32)(arg1)))
+	__PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)))
 
 #define PVOP_CALL2(rettype, op, arg1, arg2)				\
-	__PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)), "1" ((u32)(arg2)))
+	__PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), 	\
+	"1" ((unsigned long)(arg2)))
 #define PVOP_VCALL2(op, arg1, arg2)					\
-	__PVOP_VCALL(op, "", "", "0" ((u32)(arg1)), "1" ((u32)(arg2)))
+	__PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), 		\
+	"1" ((unsigned long)(arg2)))
 
 #define PVOP_CALL3(rettype, op, arg1, arg2, arg3)			\
-	__PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)),		\
-		    "1"((u32)(arg2)), "2"((u32)(arg3)))
+	__PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)),	\
+	"1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)))
 #define PVOP_VCALL3(op, arg1, arg2, arg3)				\
-	__PVOP_VCALL(op, "", "", "0" ((u32)(arg1)), "1"((u32)(arg2)),	\
-		     "2"((u32)(arg3)))
+	__PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)),		\
+	"1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)))
 
+/* This is the only difference in x86_64. We can make it much simpler */
+#ifdef CONFIG_X86_32
 #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4)			\
 	__PVOP_CALL(rettype, op,					\
 		    "push %[_arg4];", "lea 4(%%esp),%%esp;",		\
@@ -444,6 +477,16 @@ int paravirt_disable_iospace(void);
 		    "push %[_arg4];", "lea 4(%%esp),%%esp;",		\
 		    "0" ((u32)(arg1)), "1" ((u32)(arg2)),		\
 		    "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
+#else
+#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4)			\
+	__PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)),	\
+	"1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)),		\
+	"3"((unsigned long)(arg4)))
+#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4)				\
+	__PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)),		\
+	"1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)),		\
+	"3"((unsigned long)(arg4)))
+#endif
 
 static inline int paravirt_enabled(void)
 {

From f72a9ef979c5a828c64deb88ebba743f7d899907 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:32:05 +0100
Subject: [PATCH 412/890] x86: cleanup write_tsc

write_tsc() does not need to be enclosed in any paravirt closure,
as it uses wrmsr(). So we rip off the duplicate in msr.h
and the definition from paravirt.h

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/msr.h      | 2 --
 include/asm-x86/paravirt.h | 2 --
 2 files changed, 4 deletions(-)

diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index 088652a62d58..bca8c3950132 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -157,8 +157,6 @@ static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high)
 #define rdtscll(val)						\
 	((val) = native_read_tsc())
 
-#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
-
 #define rdpmc(counter,low,high)					\
 	do {							\
 		u64 _l = native_read_pmc(counter);		\
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index bef932772661..66290f58e939 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -657,8 +657,6 @@ static inline unsigned long long paravirt_sched_clock(void)
 }
 #define calculate_cpu_khz() (pv_time_ops.get_cpu_khz())
 
-#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
-
 static inline unsigned long long paravirt_read_pmc(int counter)
 {
 	return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter);

From e5aaac443635c7c6f842f0bf8169f71f3236d574 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:32:05 +0100
Subject: [PATCH 413/890] x86: provide paravirtualized hook for rdtscp

This patch adds a field in pv_cpu_ops for a paravirtualized hook
for rdtscp, needed for x86_64.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/paravirt.c |  1 +
 include/asm-x86/paravirt.h | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 1a170877f46c..072c1a08efe6 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -374,6 +374,7 @@ struct pv_cpu_ops pv_cpu_ops = {
 	.write_msr = native_write_msr_safe,
 	.read_tsc = native_read_tsc,
 	.read_pmc = native_read_pmc,
+	.read_tscp = native_read_tscp,
 	.load_tr_desc = native_load_tr_desc,
 	.set_ldt = native_set_ldt,
 	.load_gdt = native_load_gdt,
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 66290f58e939..8f7984319c30 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -120,6 +120,7 @@ struct pv_cpu_ops {
 
 	u64 (*read_tsc)(void);
 	u64 (*read_pmc)(int counter);
+	unsigned long long (*read_tscp)(unsigned int *aux);
 
 	/* These two are jmp to, not actually called. */
 	void (*irq_enable_syscall_ret)(void);
@@ -668,6 +669,27 @@ static inline unsigned long long paravirt_read_pmc(int counter)
 	high = _l >> 32;			\
 } while(0)
 
+static inline unsigned long long paravirt_rdtscp(unsigned int *aux)
+{
+	return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux);
+}
+
+#define rdtscp(low, high, aux)				\
+do {							\
+	int __aux;					\
+	unsigned long __val = paravirt_rdtscp(&__aux);	\
+	(low) = (u32)__val;				\
+	(high) = (u32)(__val >> 32);			\
+	(aux) = __aux;					\
+} while (0)
+
+#define rdtscpll(val, aux)				\
+do {							\
+	unsigned long __aux; 				\
+	val = paravirt_rdtscp(&__aux);			\
+	(aux) = __aux;					\
+} while (0)
+
 static inline void load_TR_desc(void)
 {
 	PVOP_VCALL0(pv_cpu_ops.load_tr_desc);

From 658be9d395630d30a501c850bde90ac791a678c9 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:32:06 +0100
Subject: [PATCH 414/890] x86: change assembly definition of
 paravirt_patch_site

To account for differences in x86_64, we change the macros that
create raw instances of the paravirt_patch_site struct.
We need to align 64-pointers to 64-bit boundaries, so we add an alignment
directive. Also, we need to make room for a word-sized pointer,
instead of a fixed 32-bit one

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/paravirt.h | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 8f7984319c30..56389954d100 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -5,6 +5,7 @@
 
 #ifdef CONFIG_PARAVIRT
 #include <asm/page.h>
+#include <asm/asm.h>
 
 /* Bitmask of what can be clobbered: usually at least eax. */
 #define CLBR_NONE 0x0
@@ -281,7 +282,8 @@ extern struct pv_mmu_ops pv_mmu_ops;
 #define _paravirt_alt(insn_string, type, clobber)	\
 	"771:\n\t" insn_string "\n" "772:\n"		\
 	".pushsection .parainstructions,\"a\"\n"	\
-	"  .long 771b\n"				\
+	_ASM_ALIGN "\n"					\
+	_ASM_PTR " 771b\n"				\
 	"  .byte " type "\n"				\
 	"  .byte 772b-771b\n"				\
 	"  .short " clobber "\n"			\
@@ -1159,17 +1161,25 @@ static inline unsigned long __raw_local_irq_save(void)
 
 #define PARA_PATCH(struct, off)	((PARAVIRT_PATCH_##struct + (off)) / 4)
 
-#define PARA_SITE(ptype, clobbers, ops)		\
+#define _PVSITE(ptype, clobbers, ops, word, algn)	\
 771:;						\
 	ops;					\
 772:;						\
 	.pushsection .parainstructions,"a";	\
-	 .long 771b;				\
+	 .align	algn;				\
+	 word 771b;				\
 	 .byte ptype;				\
 	 .byte 772b-771b;			\
 	 .short clobbers;			\
 	.popsection
 
+
+#ifdef CONFIG_X86_64
+#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
+#else
+#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
+#endif
+
 #define INTERRUPT_RETURN						\
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE,	\
 		  jmp *%cs:pv_cpu_ops+PV_CPU_iret)

From 6057fc827b3a9f2fe5db18b882ebf96500500b64 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:32:06 +0100
Subject: [PATCH 415/890] x86: adjust assembly macros on 64-bit as well.

This patch adjust the paravirt macros used in assembly code
to accomodate for x86_64 as well.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/paravirt.h | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 56389954d100..854c19364da3 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -1159,8 +1159,6 @@ static inline unsigned long __raw_local_irq_save(void)
 
 #else  /* __ASSEMBLY__ */
 
-#define PARA_PATCH(struct, off)	((PARAVIRT_PATCH_##struct + (off)) / 4)
-
 #define _PVSITE(ptype, clobbers, ops, word, algn)	\
 771:;						\
 	ops;					\
@@ -1175,8 +1173,14 @@ static inline unsigned long __raw_local_irq_save(void)
 
 
 #ifdef CONFIG_X86_64
+#define PV_SAVE_REGS   pushq %rax; pushq %rdi; pushq %rcx; pushq %rdx
+#define PV_RESTORE_REGS popq %rdx; popq %rcx; popq %rdi; popq %rax
+#define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 8)
 #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
 #else
+#define PV_SAVE_REGS   pushl %eax; pushl %edi; pushl %ecx; pushl %edx
+#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax
+#define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 4)
 #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
 #endif
 
@@ -1186,25 +1190,27 @@ static inline unsigned long __raw_local_irq_save(void)
 
 #define DISABLE_INTERRUPTS(clobbers)					\
 	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
-		  pushl %eax; pushl %ecx; pushl %edx;			\
+		  PV_SAVE_REGS;			\
 		  call *%cs:pv_irq_ops+PV_IRQ_irq_disable;		\
-		  popl %edx; popl %ecx; popl %eax)			\
+		  PV_RESTORE_REGS;)			\
 
 #define ENABLE_INTERRUPTS(clobbers)					\
 	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,	\
-		  pushl %eax; pushl %ecx; pushl %edx;			\
+		  PV_SAVE_REGS;			\
 		  call *%cs:pv_irq_ops+PV_IRQ_irq_enable;		\
-		  popl %edx; popl %ecx; popl %eax)
+		  PV_RESTORE_REGS;)
 
 #define ENABLE_INTERRUPTS_SYSCALL_RET					\
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_syscall_ret),\
 		  CLBR_NONE,						\
 		  jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_syscall_ret)
 
+#ifdef CONFIG_X86_32
 #define GET_CR0_INTO_EAX			\
 	push %ecx; push %edx;			\
 	call *pv_cpu_ops+PV_CPU_read_cr0;	\
 	pop %edx; pop %ecx
+#endif
 
 #endif /* __ASSEMBLY__ */
 #endif /* CONFIG_PARAVIRT */

From 2e47d3e6c35bb5b78fea2b2584c7eeaf782f138d Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:32:07 +0100
Subject: [PATCH 416/890] x86: change irq functions to accomodate 64-bit

This patch changes the irq handling function definitions
in paravirt.h (like raw_local_irq_disable) to accomodate for x86_64.
The differences are in the calling convention.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/paravirt.h | 43 ++++++++++++++++++++++++++------------
 1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 854c19364da3..0735a90f531b 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -1085,52 +1085,68 @@ struct paravirt_patch_site {
 extern struct paravirt_patch_site __parainstructions[],
 	__parainstructions_end[];
 
+#ifdef CONFIG_X86_32
+#define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;"
+#define PV_RESTORE_REGS "popl %%edx; popl %%ecx"
+#define PV_FLAGS_ARG "0"
+#define PV_EXTRA_CLOBBERS
+#define PV_VEXTRA_CLOBBERS
+#else
+/* We save some registers, but all of them, that's too much. We clobber all
+ * caller saved registers but the argument parameter */
+#define PV_SAVE_REGS "pushq %%rdi;"
+#define PV_RESTORE_REGS "popq %%rdi;"
+#define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx"
+#define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx"
+#define PV_FLAGS_ARG "D"
+#endif
+
 static inline unsigned long __raw_local_save_flags(void)
 {
 	unsigned long f;
 
-	asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;"
+	asm volatile(paravirt_alt(PV_SAVE_REGS
 				  PARAVIRT_CALL
-				  "popl %%edx; popl %%ecx")
+				  PV_RESTORE_REGS)
 		     : "=a"(f)
 		     : paravirt_type(pv_irq_ops.save_fl),
 		       paravirt_clobber(CLBR_EAX)
-		     : "memory", "cc");
+		     : "memory", "cc" PV_VEXTRA_CLOBBERS);
 	return f;
 }
 
 static inline void raw_local_irq_restore(unsigned long f)
 {
-	asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;"
+	asm volatile(paravirt_alt(PV_SAVE_REGS
 				  PARAVIRT_CALL
-				  "popl %%edx; popl %%ecx")
+				  PV_RESTORE_REGS)
 		     : "=a"(f)
-		     : "0"(f),
+		     : PV_FLAGS_ARG(f),
 		       paravirt_type(pv_irq_ops.restore_fl),
 		       paravirt_clobber(CLBR_EAX)
-		     : "memory", "cc");
+		     : "memory", "cc" PV_EXTRA_CLOBBERS);
 }
 
 static inline void raw_local_irq_disable(void)
 {
-	asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;"
+	asm volatile(paravirt_alt(PV_SAVE_REGS
 				  PARAVIRT_CALL
-				  "popl %%edx; popl %%ecx")
+				  PV_RESTORE_REGS)
 		     :
 		     : paravirt_type(pv_irq_ops.irq_disable),
 		       paravirt_clobber(CLBR_EAX)
-		     : "memory", "eax", "cc");
+		     : "memory", "eax", "cc" PV_EXTRA_CLOBBERS);
 }
 
 static inline void raw_local_irq_enable(void)
 {
-	asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;"
+	asm volatile(paravirt_alt(PV_SAVE_REGS
 				  PARAVIRT_CALL
-				  "popl %%edx; popl %%ecx")
+				  PV_RESTORE_REGS)
 		     :
 		     : paravirt_type(pv_irq_ops.irq_enable),
 		       paravirt_clobber(CLBR_EAX)
-		     : "memory", "eax", "cc");
+		     : "memory", "eax", "cc" PV_EXTRA_CLOBBERS);
 }
 
 static inline unsigned long __raw_local_irq_save(void)
@@ -1205,6 +1221,7 @@ static inline unsigned long __raw_local_irq_save(void)
 		  CLBR_NONE,						\
 		  jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_syscall_ret)
 
+
 #ifdef CONFIG_X86_32
 #define GET_CR0_INTO_EAX			\
 	push %ecx; push %edx;			\

From 4a8c4c4e10d1bc2c3bd80caabb6a76a66849f7e8 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:32:07 +0100
Subject: [PATCH 417/890] x86: add macro for privileged 64-bit operation

i386 has a macro GET_CR0_INTO_EAX, used in early trap handling code.
x86_64 has similar needs, only it needs to put cr2 into rcx. We provide
a macro for such task, in the same way

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/paravirt.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 0735a90f531b..7d1c126e2249 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -1227,6 +1227,12 @@ static inline unsigned long __raw_local_irq_save(void)
 	push %ecx; push %edx;			\
 	call *pv_cpu_ops+PV_CPU_read_cr0;	\
 	pop %edx; pop %ecx
+#else
+#define GET_CR2_INTO_RCX			\
+	call *pv_mmu_ops+PV_MMU_read_cr2;	\
+	movq %rax, %rcx;			\
+	xorq %rax, %rax;
+
 #endif
 
 #endif /* __ASSEMBLY__ */

From e801f864ec7e5b149bd05337800e419f408523bb Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:32:08 +0100
Subject: [PATCH 418/890] x86: adds paravirt hook for swapgs

This patch adds paravirt hook for swapgs operation, which is a privileged
operation in x86_64.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/paravirt.c  | 1 +
 include/asm-x86/paravirt.h  | 9 +++++++++
 include/asm-x86/processor.h | 8 ++++++++
 3 files changed, 18 insertions(+)

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 072c1a08efe6..e7c17cc4a99e 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -390,6 +390,7 @@ struct pv_cpu_ops pv_cpu_ops = {
 
 	.irq_enable_syscall_ret = native_irq_enable_syscall_ret,
 	.iret = native_iret,
+	.swapgs = native_swapgs,
 
 	.set_iopl_mask = native_set_iopl_mask,
 	.io_delay = native_io_delay,
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 7d1c126e2249..e8fbf742d425 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -127,6 +127,8 @@ struct pv_cpu_ops {
 	void (*irq_enable_syscall_ret)(void);
 	void (*iret)(void);
 
+	void (*swapgs)(void);
+
 	struct pv_lazy_ops lazy_mode;
 };
 
@@ -1228,6 +1230,13 @@ static inline unsigned long __raw_local_irq_save(void)
 	call *pv_cpu_ops+PV_CPU_read_cr0;	\
 	pop %edx; pop %ecx
 #else
+#define SWAPGS								\
+	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
+		  PV_SAVE_REGS;						\
+		  call *pv_cpu_ops+PV_CPU_swapgs;			\
+		  PV_RESTORE_REGS					\
+		 )
+
 #define GET_CR2_INTO_RCX			\
 	call *pv_mmu_ops+PV_MMU_read_cr2;	\
 	movq %rax, %rcx;			\
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index e6fa06fee72a..72740c6f1109 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -435,6 +435,13 @@ static inline void native_load_sp0(struct tss_struct *tss,
 #endif
 }
 
+static inline void native_swapgs(void)
+{
+#ifdef CONFIG_X86_64
+	asm volatile("swapgs" ::: "memory");
+#endif
+}
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
@@ -456,6 +463,7 @@ static inline void load_sp0(struct tss_struct *tss,
 }
 
 #define set_iopl_mask native_set_iopl_mask
+#define SWAPGS	swapgs
 #endif /* CONFIG_PARAVIRT */
 
 /*

From 72fe4858544292ad64600765cb78bc02298c6b1c Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:32:08 +0100
Subject: [PATCH 419/890] x86: replace privileged instructions with paravirt
 macros

The assembly code in entry_64.S issues a bunch of privileged instructions,
like cli, sti, swapgs, and others. Paravirt guests are forbidden to do so,
and we then replace them with macros that will do the right thing.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/entry_64.S | 101 ++++++++++++++++++++++---------------
 1 file changed, 59 insertions(+), 42 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e70f3881d7e4..bea8474744ff 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -50,6 +50,7 @@
 #include <asm/hw_irq.h>
 #include <asm/page.h>
 #include <asm/irqflags.h>
+#include <asm/paravirt.h>
 
 	.code64
 
@@ -57,6 +58,13 @@
 #define retint_kernel retint_restore_args
 #endif	
 
+#ifdef CONFIG_PARAVIRT
+ENTRY(native_irq_enable_syscall_ret)
+	movq	%gs:pda_oldrsp,%rsp
+	swapgs
+	sysretq
+#endif /* CONFIG_PARAVIRT */
+
 
 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
 #ifdef CONFIG_TRACE_IRQFLAGS
@@ -216,14 +224,21 @@ ENTRY(system_call)
 	CFI_DEF_CFA	rsp,PDA_STACKOFFSET
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
-	swapgs
+	SWAPGS_UNSAFE_STACK
+	/*
+	 * A hypervisor implementation might want to use a label
+	 * after the swapgs, so that it can do the swapgs
+	 * for the guest and jump here on syscall.
+	 */
+ENTRY(system_call_after_swapgs)
+
 	movq	%rsp,%gs:pda_oldrsp 
 	movq	%gs:pda_kernelstack,%rsp
 	/*
 	 * No need to follow this irqs off/on section - it's straight
 	 * and short:
 	 */
-	sti					
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	SAVE_ARGS 8,1
 	movq  %rax,ORIG_RAX-ARGOFFSET(%rsp) 
 	movq  %rcx,RIP-ARGOFFSET(%rsp)
@@ -246,7 +261,7 @@ ret_from_sys_call:
 sysret_check:		
 	LOCKDEP_SYS_EXIT
 	GET_THREAD_INFO(%rcx)
-	cli
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	movl threadinfo_flags(%rcx),%edx
 	andl %edi,%edx
@@ -260,9 +275,7 @@ sysret_check:
 	CFI_REGISTER	rip,rcx
 	RESTORE_ARGS 0,-ARG_SKIP,1
 	/*CFI_REGISTER	rflags,r11*/
-	movq	%gs:pda_oldrsp,%rsp
-	swapgs
-	sysretq
+	ENABLE_INTERRUPTS_SYSCALL_RET
 
 	CFI_RESTORE_STATE
 	/* Handle reschedules */
@@ -271,7 +284,7 @@ sysret_careful:
 	bt $TIF_NEED_RESCHED,%edx
 	jnc sysret_signal
 	TRACE_IRQS_ON
-	sti
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushq %rdi
 	CFI_ADJUST_CFA_OFFSET 8
 	call schedule
@@ -282,7 +295,7 @@ sysret_careful:
 	/* Handle a signal */ 
 sysret_signal:
 	TRACE_IRQS_ON
-	sti
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	testl $_TIF_DO_NOTIFY_MASK,%edx
 	jz    1f
 
@@ -295,7 +308,7 @@ sysret_signal:
 1:	movl $_TIF_NEED_RESCHED,%edi
 	/* Use IRET because user could have changed frame. This
 	   works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
-	cli
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	jmp int_with_check
 	
@@ -327,7 +340,7 @@ tracesys:
  */
 	.globl int_ret_from_sys_call
 int_ret_from_sys_call:
-	cli
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	testl $3,CS-ARGOFFSET(%rsp)
 	je retint_restore_args
@@ -349,20 +362,20 @@ int_careful:
 	bt $TIF_NEED_RESCHED,%edx
 	jnc  int_very_careful
 	TRACE_IRQS_ON
-	sti
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushq %rdi
 	CFI_ADJUST_CFA_OFFSET 8
 	call schedule
 	popq %rdi
 	CFI_ADJUST_CFA_OFFSET -8
-	cli
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	jmp int_with_check
 
 	/* handle signals and tracing -- both require a full stack frame */
 int_very_careful:
 	TRACE_IRQS_ON
-	sti
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	SAVE_REST
 	/* Check for syscall exit trace */	
 	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
@@ -385,7 +398,7 @@ int_signal:
 1:	movl $_TIF_NEED_RESCHED,%edi	
 int_restore_rest:
 	RESTORE_REST
-	cli
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	jmp int_with_check
 	CFI_ENDPROC
@@ -506,7 +519,7 @@ END(stub_rt_sigreturn)
 	CFI_DEF_CFA_REGISTER	rbp
 	testl $3,CS(%rdi)
 	je 1f
-	swapgs	
+	SWAPGS
 	/* irqcount is used to check if a CPU is already on an interrupt
 	   stack or not. While this is essentially redundant with preempt_count
 	   it is a little cheaper to use a separate counter in the PDA
@@ -527,7 +540,7 @@ ENTRY(common_interrupt)
 	interrupt do_IRQ
 	/* 0(%rsp): oldrsp-ARGOFFSET */
 ret_from_intr:
-	cli	
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	decl %gs:pda_irqcount
 	leaveq
@@ -556,13 +569,13 @@ retint_swapgs:		/* return to user-space */
 	/*
 	 * The iretq could re-enable interrupts:
 	 */
-	cli
+	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_IRETQ
-	swapgs 
+	SWAPGS
 	jmp restore_args
 
 retint_restore_args:	/* return to kernel space */
-	cli
+	DISABLE_INTERRUPTS(CLBR_ANY)
 	/*
 	 * The iretq could re-enable interrupts:
 	 */
@@ -570,10 +583,14 @@ retint_restore_args:	/* return to kernel space */
 restore_args:
 	RESTORE_ARGS 0,8,0						
 iret_label:	
+#ifdef CONFIG_PARAVIRT
+	INTERRUPT_RETURN
+#endif
+ENTRY(native_iret)
 	iretq
 
 	.section __ex_table,"a"
-	.quad iret_label,bad_iret	
+	.quad native_iret, bad_iret
 	.previous
 	.section .fixup,"ax"
 	/* force a signal here? this matches i386 behaviour */
@@ -581,24 +598,24 @@ iret_label:
 bad_iret:
 	movq $11,%rdi	/* SIGSEGV */
 	TRACE_IRQS_ON
-	sti
-	jmp do_exit			
-	.previous	
-	
+	ENABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
+	jmp do_exit
+	.previous
+
 	/* edi: workmask, edx: work */
 retint_careful:
 	CFI_RESTORE_STATE
 	bt    $TIF_NEED_RESCHED,%edx
 	jnc   retint_signal
 	TRACE_IRQS_ON
-	sti
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushq %rdi
 	CFI_ADJUST_CFA_OFFSET	8
 	call  schedule
 	popq %rdi		
 	CFI_ADJUST_CFA_OFFSET	-8
 	GET_THREAD_INFO(%rcx)
-	cli
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	jmp retint_check
 	
@@ -606,14 +623,14 @@ retint_signal:
 	testl $_TIF_DO_NOTIFY_MASK,%edx
 	jz    retint_swapgs
 	TRACE_IRQS_ON
-	sti
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	SAVE_REST
 	movq $-1,ORIG_RAX(%rsp) 			
 	xorl %esi,%esi		# oldset
 	movq %rsp,%rdi		# &pt_regs
 	call do_notify_resume
 	RESTORE_REST
-	cli
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	movl $_TIF_NEED_RESCHED,%edi
 	GET_THREAD_INFO(%rcx)
@@ -731,7 +748,7 @@ END(spurious_interrupt)
 	rdmsr
 	testl %edx,%edx
 	js    1f
-	swapgs
+	SWAPGS
 	xorl  %ebx,%ebx
 1:
 	.if \ist
@@ -747,7 +764,7 @@ END(spurious_interrupt)
 	.if \ist
 	addq	$EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
 	.endif
-	cli
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	.if \irqtrace
 	TRACE_IRQS_OFF
 	.endif
@@ -776,10 +793,10 @@ paranoid_swapgs\trace:
 	.if \trace
 	TRACE_IRQS_IRETQ 0
 	.endif
-	swapgs
+	SWAPGS_UNSAFE_STACK
 paranoid_restore\trace:
 	RESTORE_ALL 8
-	iretq
+	INTERRUPT_RETURN
 paranoid_userspace\trace:
 	GET_THREAD_INFO(%rcx)
 	movl threadinfo_flags(%rcx),%ebx
@@ -794,11 +811,11 @@ paranoid_userspace\trace:
 	.if \trace
 	TRACE_IRQS_ON
 	.endif
-	sti
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	xorl %esi,%esi 			/* arg2: oldset */
 	movq %rsp,%rdi 			/* arg1: &pt_regs */
 	call do_notify_resume
-	cli
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	.if \trace
 	TRACE_IRQS_OFF
 	.endif
@@ -807,9 +824,9 @@ paranoid_schedule\trace:
 	.if \trace
 	TRACE_IRQS_ON
 	.endif
-	sti
+	ENABLE_INTERRUPTS(CLBR_ANY)
 	call schedule
-	cli
+	DISABLE_INTERRUPTS(CLBR_ANY)
 	.if \trace
 	TRACE_IRQS_OFF
 	.endif
@@ -862,7 +879,7 @@ KPROBE_ENTRY(error_entry)
 	testl $3,CS(%rsp)
 	je  error_kernelspace
 error_swapgs:	
-	swapgs
+	SWAPGS
 error_sti:	
 	movq %rdi,RDI(%rsp) 	
 	CFI_REL_OFFSET	rdi,RDI
@@ -874,7 +891,7 @@ error_sti:
 error_exit:
 	movl %ebx,%eax
 	RESTORE_REST
-	cli
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)	
 	testl %eax,%eax
@@ -911,12 +928,12 @@ ENTRY(load_gs_index)
 	CFI_STARTPROC
 	pushf
 	CFI_ADJUST_CFA_OFFSET 8
-	cli
-        swapgs
+	DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
+        SWAPGS
 gs_change:     
         movl %edi,%gs   
 2:	mfence		/* workaround */
-	swapgs
+	SWAPGS
         popf
 	CFI_ADJUST_CFA_OFFSET -8
         ret
@@ -930,7 +947,7 @@ ENDPROC(load_gs_index)
         .section .fixup,"ax"
 	/* running with kernelgs */
 bad_gs: 
-	swapgs			/* switch back to user gs */
+	SWAPGS			/* switch back to user gs */
 	xorl %eax,%eax
         movl %eax,%gs
         jmp  2b

From 1954448fb0c03dd5e301d7bef2dda71cd9b2fcf2 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:32:09 +0100
Subject: [PATCH 420/890] x86: cleanup CLI_STRING, STI_STRING and friends

Since the advent of ticket locking, CLI_STRING, STI_STRING, and friends
are not used anymore. They can now be safely deleted.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/spinlock.h | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h
index 97d52b506af8..2076d5d62d83 100644
--- a/include/asm-x86/spinlock.h
+++ b/include/asm-x86/spinlock.h
@@ -19,15 +19,6 @@
  * (the type definitions are in asm/spinlock_types.h)
  */
 
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define CLI_STRING	"cli"
-#define STI_STRING	"sti"
-#define CLI_STI_CLOBBERS
-#define CLI_STI_INPUT_ARGS
-#endif /* CONFIG_PARAVIRT */
-
 #ifdef CONFIG_X86_32
 typedef char _slock_t;
 # define LOCK_INS_DEC "decb"

From 21438f7c138f0b893a32df3cc77434e39a2145f8 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:32:09 +0100
Subject: [PATCH 421/890] x86: add CLBR_ defines for 64-bit

x86_64 needs a potentially larger clobber list than i386, due to its calling
convention. So we add more CLBR_ defines for it.
Note that CLBR_ANY is different for each of the architectures, since it comprises
the notion of "All call clobbers in this architecture"

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/paravirt.h | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index e8fbf742d425..5935c273af1f 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -8,11 +8,24 @@
 #include <asm/asm.h>
 
 /* Bitmask of what can be clobbered: usually at least eax. */
-#define CLBR_NONE 0x0
-#define CLBR_EAX 0x1
-#define CLBR_ECX 0x2
-#define CLBR_EDX 0x4
-#define CLBR_ANY 0x7
+#define CLBR_NONE 0
+#define CLBR_EAX  (1 << 0)
+#define CLBR_ECX  (1 << 1)
+#define CLBR_EDX  (1 << 2)
+
+#ifdef CONFIG_X86_64
+#define CLBR_RSI  (1 << 3)
+#define CLBR_RDI  (1 << 4)
+#define CLBR_R8   (1 << 5)
+#define CLBR_R9   (1 << 6)
+#define CLBR_R10  (1 << 7)
+#define CLBR_R11  (1 << 8)
+#define CLBR_ANY  ((1 << 9) - 1)
+#include <asm/desc_defs.h>
+#else
+/* CLBR_ANY should match all regs platform has. For i386, that's just it */
+#define CLBR_ANY  ((1 << 3) - 1)
+#endif /* X86_64 */
 
 #ifndef __ASSEMBLY__
 #include <linux/types.h>

From 2f485ef568372af4680c4e2f8490efb9f2523b05 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:32:10 +0100
Subject: [PATCH 422/890] x86: move patching code to arch-specific file.

The core patching code for paravirt is sufficiently different
among i386 and x86_64, and we move them to specific files.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/Makefile_32         |  2 +-
 arch/x86/kernel/paravirt.c          | 50 -----------------------------
 arch/x86/kernel/paravirt_patch_32.c | 49 ++++++++++++++++++++++++++++
 include/asm-x86/paravirt.h          |  8 +++++
 4 files changed, 58 insertions(+), 51 deletions(-)
 create mode 100644 arch/x86/kernel/paravirt_patch_32.c

diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index cfb71a5deb19..86c6327798b2 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -48,7 +48,7 @@ obj-$(CONFIG_K8_NB)		+= k8.o
 obj-$(CONFIG_MGEODE_LX)		+= geode_32.o mfgpt_32.o
 
 obj-$(CONFIG_VMI)		+= vmi_32.o vmiclock_32.o
-obj-$(CONFIG_PARAVIRT)		+= paravirt.o
+obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_32.o
 obj-y				+= pcspeaker.o
 
 obj-$(CONFIG_SCx200)		+= scx200_32.o
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e7c17cc4a99e..864be0498a32 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -58,59 +58,9 @@ char *memory_setup(void)
 	extern const char start_##ops##_##name[], end_##ops##_##name[];	\
 	asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
 
-DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
-DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
-DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
-DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
-DEF_NATIVE(pv_cpu_ops, iret, "iret");
-DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "sti; sysexit");
-DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
-DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
-DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
-DEF_NATIVE(pv_cpu_ops, clts, "clts");
-DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
-
 /* Undefined instruction for dealing with missing ops pointers. */
 static const unsigned char ud2a[] = { 0x0f, 0x0b };
 
-static unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
-			     unsigned long addr, unsigned len)
-{
-	const unsigned char *start, *end;
-	unsigned ret;
-
-	switch(type) {
-#define SITE(ops, x)						\
-	case PARAVIRT_PATCH(ops.x):				\
-		start = start_##ops##_##x;			\
-		end = end_##ops##_##x;				\
-		goto patch_site
-
-	SITE(pv_irq_ops, irq_disable);
-	SITE(pv_irq_ops, irq_enable);
-	SITE(pv_irq_ops, restore_fl);
-	SITE(pv_irq_ops, save_fl);
-	SITE(pv_cpu_ops, iret);
-	SITE(pv_cpu_ops, irq_enable_syscall_ret);
-	SITE(pv_mmu_ops, read_cr2);
-	SITE(pv_mmu_ops, read_cr3);
-	SITE(pv_mmu_ops, write_cr3);
-	SITE(pv_cpu_ops, clts);
-	SITE(pv_cpu_ops, read_tsc);
-#undef SITE
-
-	patch_site:
-		ret = paravirt_patch_insns(ibuf, len, start, end);
-		break;
-
-	default:
-		ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
-		break;
-	}
-
-	return ret;
-}
-
 unsigned paravirt_patch_nop(void)
 {
 	return 0;
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
new file mode 100644
index 000000000000..82fc5fcab4f4
--- /dev/null
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -0,0 +1,49 @@
+#include <asm/paravirt.h>
+
+DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
+DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
+DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
+DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
+DEF_NATIVE(pv_cpu_ops, iret, "iret");
+DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "sti; sysexit");
+DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
+DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
+DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
+DEF_NATIVE(pv_cpu_ops, clts, "clts");
+DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
+
+unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
+		      unsigned long addr, unsigned len)
+{
+	const unsigned char *start, *end;
+	unsigned ret;
+
+#define PATCH_SITE(ops, x)					\
+		case PARAVIRT_PATCH(ops.x):			\
+			start = start_##ops##_##x;		\
+			end = end_##ops##_##x;			\
+			goto patch_site
+	switch(type) {
+		PATCH_SITE(pv_irq_ops, irq_disable);
+		PATCH_SITE(pv_irq_ops, irq_enable);
+		PATCH_SITE(pv_irq_ops, restore_fl);
+		PATCH_SITE(pv_irq_ops, save_fl);
+		PATCH_SITE(pv_cpu_ops, iret);
+		PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret);
+		PATCH_SITE(pv_mmu_ops, read_cr2);
+		PATCH_SITE(pv_mmu_ops, read_cr3);
+		PATCH_SITE(pv_mmu_ops, write_cr3);
+		PATCH_SITE(pv_cpu_ops, clts);
+		PATCH_SITE(pv_cpu_ops, read_tsc);
+
+	patch_site:
+		ret = paravirt_patch_insns(ibuf, len, start, end);
+		break;
+
+	default:
+		ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
+		break;
+	}
+#undef PATCH_SITE
+	return ret;
+}
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 5935c273af1f..b35ed95166e4 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -308,6 +308,11 @@ extern struct pv_mmu_ops pv_mmu_ops;
 #define paravirt_alt(insn_string)					\
 	_paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
 
+/* Simple instruction patching code. */
+#define DEF_NATIVE(ops, name, code) 					\
+	extern const char start_##ops##_##name[], end_##ops##_##name[];	\
+	asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
+
 unsigned paravirt_patch_nop(void);
 unsigned paravirt_patch_ignore(unsigned len);
 unsigned paravirt_patch_call(void *insnbuf,
@@ -322,6 +327,9 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
 unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
 			      const char *start, const char *end);
 
+unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
+		      unsigned long addr, unsigned len);
+
 int paravirt_disable_iospace(void);
 
 /*

From 53fd13cff04ce27ff3e8d3eb7e5ad4f56b580f2f Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:32:10 +0100
Subject: [PATCH 423/890] x86: patching functions on 64-bit

Like i386, x86_64 also need to include its own patching function.
(Well, if you're not in a hurry, and don't care about speed, you don't
really _need_ ;-))

So here they are. Not much different in essence from i386

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/Makefile_64         |  1 +
 arch/x86/kernel/paravirt_patch_64.c | 56 +++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)
 create mode 100644 arch/x86/kernel/paravirt_patch_64.c

diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index 7fcf972aa5d6..b8f9d13eb5e3 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -41,6 +41,7 @@ obj-$(CONFIG_X86_VSMP)		+= vsmp_64.o
 obj-$(CONFIG_K8_NB)		+= k8.o
 obj-$(CONFIG_AUDIT)		+= audit_64.o
 obj-$(CONFIG_EFI)		+= efi.o efi_64.o efi_stub_64.o
+obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_64.o
 
 obj-$(CONFIG_MODULES)		+= module_64.o
 obj-$(CONFIG_PCI)		+= early-quirks.o
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
new file mode 100644
index 000000000000..cbfc4f3069e3
--- /dev/null
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -0,0 +1,56 @@
+#include <asm/paravirt.h>
+#include <asm/asm-offsets.h>
+
+DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
+DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
+DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq");
+DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
+DEF_NATIVE(pv_cpu_ops, iret, "iretq");
+DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
+DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
+DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
+DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
+DEF_NATIVE(pv_cpu_ops, clts, "clts");
+DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
+
+/* the three commands give us more control to how to return from a syscall */
+DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "movq %gs:" __stringify(pda_oldrsp) ", %rsp; swapgs; sysretq;");
+DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
+
+unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
+		      unsigned long addr, unsigned len)
+{
+	const unsigned char *start, *end;
+	unsigned ret;
+
+#define PATCH_SITE(ops, x)					\
+		case PARAVIRT_PATCH(ops.x):			\
+			start = start_##ops##_##x;		\
+			end = end_##ops##_##x;			\
+			goto patch_site
+	switch(type) {
+		PATCH_SITE(pv_irq_ops, restore_fl);
+		PATCH_SITE(pv_irq_ops, save_fl);
+		PATCH_SITE(pv_irq_ops, irq_enable);
+		PATCH_SITE(pv_irq_ops, irq_disable);
+		PATCH_SITE(pv_cpu_ops, iret);
+		PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret);
+		PATCH_SITE(pv_cpu_ops, swapgs);
+		PATCH_SITE(pv_mmu_ops, read_cr2);
+		PATCH_SITE(pv_mmu_ops, read_cr3);
+		PATCH_SITE(pv_mmu_ops, write_cr3);
+		PATCH_SITE(pv_cpu_ops, clts);
+		PATCH_SITE(pv_mmu_ops, flush_tlb_single);
+		PATCH_SITE(pv_cpu_ops, wbinvd);
+
+	patch_site:
+		ret = paravirt_patch_insns(ibuf, len, start, end);
+		break;
+
+	default:
+		ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
+		break;
+	}
+#undef PATCH_SITE
+	return ret;
+}

From bfd074e05bdb69652d24ebc60b126899174ca788 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:32:11 +0100
Subject: [PATCH 424/890] replace x86_read/write_per_cpu with a common
 function.

x86_read_per_cpu() and its writeish sister are not present in x86_64. So in
this patch, we replace them with __get_cpu_var(), which is present in both

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/paravirt.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 864be0498a32..c20b4f8d62f5 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -238,18 +238,18 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA
 
 static inline void enter_lazy(enum paravirt_lazy_mode mode)
 {
-	BUG_ON(x86_read_percpu(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
+	BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
 	BUG_ON(preemptible());
 
-	x86_write_percpu(paravirt_lazy_mode, mode);
+	__get_cpu_var(paravirt_lazy_mode) = mode;
 }
 
 void paravirt_leave_lazy(enum paravirt_lazy_mode mode)
 {
-	BUG_ON(x86_read_percpu(paravirt_lazy_mode) != mode);
+	BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode);
 	BUG_ON(preemptible());
 
-	x86_write_percpu(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
+	__get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
 }
 
 void paravirt_enter_lazy_mmu(void)
@@ -274,7 +274,7 @@ void paravirt_leave_lazy_cpu(void)
 
 enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
 {
-	return x86_read_percpu(paravirt_lazy_mode);
+	return __get_cpu_var(paravirt_lazy_mode);
 }
 
 struct pv_info pv_info = {

From 8b2cb7a8f531d6ca72a8aff873b9bb1c6b3122ba Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Wed, 30 Jan 2008 13:32:11 +0100
Subject: [PATCH 425/890] x86: 32-bit EFI runtime service support: fixes in
 sync with 64-bit support

support according to fixes of x86_64 support.

- Delete efi_rt_lock because it is used during system early boot,
  before SMP is initialized.

- Change local_flush_tlb() to __flush_tlb_all() to flush global page
  mapping.

- Clean up includes.

- Revise Kconfig description.

- Enable noefi kernel parameter on i386.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/kernel-parameters.txt   |  2 ++
 Documentation/x86_64/boot-options.txt |  4 ----
 arch/x86/Kconfig                      | 19 ++++++++-----------
 arch/x86/kernel/efi.c                 |  7 +++++++
 arch/x86/kernel/efi_32.c              | 25 +++++--------------------
 arch/x86/kernel/efi_64.c              |  7 -------
 arch/x86/kernel/setup_32.c            |  6 +++---
 7 files changed, 25 insertions(+), 45 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index b3f20beea411..e7910f8b4fcc 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1169,6 +1169,8 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	nodisconnect	[HW,SCSI,M68K] Disables SCSI disconnects.
 
+	noefi		[X86-32,X86-64] Disable EFI runtime services support.
+
 	noexec		[IA-64]
 
 	noexec		[X86-32,X86-64]
diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt
index 638bf46ca059..34abae4e9442 100644
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86_64/boot-options.txt
@@ -306,8 +306,4 @@ Debugging
 		newfallback: use new unwinder but fall back to old if it gets
 			stuck (default)
 
-EFI
-
-  noefi		Disable EFI support
-
 Miscellaneous
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d89b94528153..fa6fa52248d9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -961,21 +961,18 @@ config MTRR
 
 config EFI
 	def_bool n
-	prompt "Boot from EFI support"
+	prompt "EFI runtime service support"
 	depends on ACPI
 	---help---
-	This enables the kernel to boot on EFI platforms using
-	system configuration information passed to it from the firmware.
-	This also enables the kernel to use any EFI runtime services that are
+	This enables the kernel to use EFI runtime services that are
 	available (such as the EFI variable services).
 
-	This option is only useful on systems that have EFI firmware
-	and will result in a kernel image that is ~8k larger.  In addition,
-	you must use the latest ELILO loader available at
-	<http://elilo.sourceforge.net> in order to take advantage of
-	kernel initialization using EFI information (neither GRUB nor LILO know
-	anything about EFI).  However, even with this option, the resultant
-	kernel should continue to boot on existing non-EFI platforms.
+	This option is only useful on systems that have EFI firmware.
+  	In addition, you should use the latest ELILO loader available
+  	at <http://elilo.sourceforge.net> in order to take advantage
+  	of EFI runtime services. However, even with this option, the
+  	resultant kernel should continue to boot on existing non-EFI
+  	platforms.
 
 config IRQBALANCE
 	def_bool y
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 0a61522e85c7..2939b015c2ed 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -55,6 +55,13 @@ struct efi_memory_map memmap;
 struct efi efi_phys __initdata;
 static efi_system_table_t efi_systab __initdata;
 
+static int __init setup_noefi(char *arg)
+{
+	efi_enabled = 0;
+	return 0;
+}
+early_param("noefi", setup_noefi);
+
 static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
 {
 	return efi_call_virt2(get_time, tm, tc);
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c
index 30f937116288..afd2c3b039d6 100644
--- a/arch/x86/kernel/efi_32.c
+++ b/arch/x86/kernel/efi_32.c
@@ -20,27 +20,15 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mm.h>
 #include <linux/types.h>
-#include <linux/time.h>
-#include <linux/spinlock.h>
-#include <linux/bootmem.h>
 #include <linux/ioport.h>
-#include <linux/module.h>
 #include <linux/efi.h>
-#include <linux/kexec.h>
 
-#include <asm/setup.h>
 #include <asm/io.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/desc.h>
 #include <asm/tlbflush.h>
 
-#define PFX 		"EFI: "
-
 /*
  * To make EFI call EFI runtime service in physical addressing mode we need
  * prelog/epilog before/after the invocation to disable interrupt, to
@@ -49,16 +37,14 @@
  */
 
 static unsigned long efi_rt_eflags;
-static DEFINE_SPINLOCK(efi_rt_lock);
 static pgd_t efi_bak_pg_dir_pointer[2];
 
-void efi_call_phys_prelog(void) __acquires(efi_rt_lock)
+void efi_call_phys_prelog(void)
 {
 	unsigned long cr4;
 	unsigned long temp;
 	struct desc_ptr gdt_descr;
 
-	spin_lock(&efi_rt_lock);
 	local_irq_save(efi_rt_eflags);
 
 	/*
@@ -88,14 +74,14 @@ void efi_call_phys_prelog(void) __acquires(efi_rt_lock)
 	/*
 	 * After the lock is released, the original page table is restored.
 	 */
-	local_flush_tlb();
+	__flush_tlb_all();
 
 	gdt_descr.address = __pa(get_cpu_gdt_table(0));
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 }
 
-void efi_call_phys_epilog(void) __releases(efi_rt_lock)
+void efi_call_phys_epilog(void)
 {
 	unsigned long cr4;
 	struct desc_ptr gdt_descr;
@@ -119,10 +105,9 @@ void efi_call_phys_epilog(void) __releases(efi_rt_lock)
 	/*
 	 * After the lock is released, the original page table is restored.
 	 */
-	local_flush_tlb();
+	__flush_tlb_all();
 
 	local_irq_restore(efi_rt_eflags);
-	spin_unlock(&efi_rt_lock);
 }
 
 /*
@@ -135,7 +120,7 @@ void __init efi_map_memmap(void)
 	memmap.map = bt_ioremap((unsigned long) memmap.phys_map,
 			(memmap.nr_map * memmap.desc_size));
 	if (memmap.map == NULL)
-		printk(KERN_ERR PFX "Could not remap the EFI memmap!\n");
+		printk(KERN_ERR "Could not remap the EFI memmap!\n");
 
 	memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
 }
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index f2000dbc7195..269de2e049a3 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -40,13 +40,6 @@
 static pgd_t save_pgd __initdata;
 static unsigned long efi_flags __initdata;
 
-static int __init setup_noefi(char *arg)
-{
-	efi_enabled = 0;
-	return 0;
-}
-early_param("noefi", setup_noefi);
-
 static void __init early_mapping_set_exec(unsigned long start,
 					  unsigned long end,
 					  int executable)
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 2e805da337a2..704550fdb84c 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -648,9 +648,6 @@ void __init setup_arch(char **cmdline_p)
 	printk(KERN_INFO "BIOS-provided physical RAM map:\n");
 	print_memory_map(memory_setup());
 
-	if (efi_enabled)
-		efi_init();
-
 	copy_edd();
 
 	if (!boot_params.hdr.root_flags)
@@ -677,6 +674,9 @@ void __init setup_arch(char **cmdline_p)
 	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = command_line;
 
+	if (efi_enabled)
+		efi_init();
+
 	max_low_pfn = setup_memory();
 
 #ifdef CONFIG_VMI

From 9e8b6d90ac973939c4605236d140ae64d459622b Mon Sep 17 00:00:00 2001
From: Min Zhang <mzhang@mvista.com>
Date: Wed, 30 Jan 2008 13:32:11 +0100
Subject: [PATCH 426/890] arch/x86/kernel/cpu/mcheck/p4.c: cleanups

SMP, the machine check exception dispatches all logical processors within a
physical package to the machine-check exception handler, so the printk
within each handler outputs concurrently and makes the output unreadable.
Refer to Intel system programming guide Part 1 Section 7.8.5
http://developer.intel.com/design/processor/manuals/253668.pdf

Signed-off-by: Min Zhang <mzhang@mvista.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mcheck/k7.c | 11 +++++++----
 arch/x86/kernel/cpu/mcheck/p4.c | 21 ++++++++++++---------
 arch/x86/kernel/cpu/mcheck/p6.c | 11 +++++++----
 3 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index 39fa76fd3851..9e929409fd7a 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -33,21 +33,24 @@ static void k7_machine_check(struct pt_regs * regs, long error_code)
 	for (i=1; i<nr_mce_banks; i++) {
 		rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
 		if (high&(1<<31)) {
+			char misc[20];
+			char addr[24];
+			misc[0] = addr[0] = '\0';
 			if (high & (1<<29))
 				recover |= 1;
 			if (high & (1<<25))
 				recover |= 2;
-			printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
 			high &= ~(1<<31);
 			if (high & (1<<27)) {
 				rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
-				printk ("[%08x%08x]", ahigh, alow);
+				snprintf (misc, 20, "[%08x%08x]", ahigh, alow);
 			}
 			if (high & (1<<26)) {
 				rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
-				printk (" at %08x%08x", ahigh, alow);
+				snprintf (addr, 24, " at %08x%08x", ahigh, alow);
 			}
-			printk ("\n");
+			printk (KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
+				smp_processor_id(), i, high, low, misc, addr);
 			/* Clear it */
 			wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
 			/* Serialize */
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index 16a6238dbc2a..cb03a1b50504 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -158,32 +158,35 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
 	if (mce_num_extended_msrs > 0) {
 		struct intel_mce_extended_msrs dbg;
 		intel_get_extended_msrs(&dbg);
-		printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n",
-			smp_processor_id(), dbg.eip, dbg.eflags);
-		printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n",
-			dbg.eax, dbg.ebx, dbg.ecx, dbg.edx);
-		printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
+		printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n"
+			"\teax: %08x ebx: %08x ecx: %08x edx: %08x\n"
+			"\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
+			smp_processor_id(), dbg.eip, dbg.eflags,
+			dbg.eax, dbg.ebx, dbg.ecx, dbg.edx,
 			dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
 	}
 
 	for (i=0; i<nr_mce_banks; i++) {
 		rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
 		if (high & (1<<31)) {
+			char misc[20];
+			char addr[24];
+			misc[0] = addr[0] = '\0';
 			if (high & (1<<29))
 				recover |= 1;
 			if (high & (1<<25))
 				recover |= 2;
-			printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
 			high &= ~(1<<31);
 			if (high & (1<<27)) {
 				rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
-				printk ("[%08x%08x]", ahigh, alow);
+				snprintf (misc, 20, "[%08x%08x]", ahigh, alow);
 			}
 			if (high & (1<<26)) {
 				rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
-				printk (" at %08x%08x", ahigh, alow);
+				snprintf (addr, 24, " at %08x%08x", ahigh, alow);
 			}
-			printk ("\n");
+			printk (KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
+				smp_processor_id(), i, high, low, misc, addr);
 		}
 	}
 
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
index cb3829e07987..b61f3038c4c8 100644
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ b/arch/x86/kernel/cpu/mcheck/p6.c
@@ -33,21 +33,24 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
 	for (i=0; i<nr_mce_banks; i++) {
 		rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
 		if (high & (1<<31)) {
+			char misc[20];
+			char addr[24];
+			misc[0] = addr[0] = '\0';
 			if (high & (1<<29))
 				recover |= 1;
 			if (high & (1<<25))
 				recover |= 2;
-			printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
 			high &= ~(1<<31);
 			if (high & (1<<27)) {
 				rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
-				printk ("[%08x%08x]", ahigh, alow);
+				snprintf (misc, 20, "[%08x%08x]", ahigh, alow);
 			}
 			if (high & (1<<26)) {
 				rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
-				printk (" at %08x%08x", ahigh, alow);
+				snprintf (addr, 24, " at %08x%08x", ahigh, alow);
 			}
-			printk ("\n");
+			printk (KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
+				smp_processor_id(), i, high, low, misc, addr);
 		}
 	}
 

From b912a1c73739b6016ced049552392a148c1736c8 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 30 Jan 2008 13:32:12 +0100
Subject: [PATCH 427/890] x86: arch/x86/kernel/cpu/mcheck/ checkpatch fixes

#40: FILE: arch/x86/kernel/cpu/mcheck/k7.c:46:
+				snprintf (misc, 20, "[%08x%08x]", ahigh, alow);

WARNING: line over 80 characters
#45: FILE: arch/x86/kernel/cpu/mcheck/k7.c:50:
+				snprintf (addr, 24, " at %08x%08x", ahigh, alow);

WARNING: no space between function name and open parenthesis '('
#45: FILE: arch/x86/kernel/cpu/mcheck/k7.c:50:
+				snprintf (addr, 24, " at %08x%08x", ahigh, alow);

WARNING: no space between function name and open parenthesis '('
#48: FILE: arch/x86/kernel/cpu/mcheck/k7.c:52:
+			printk (KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",

WARNING: no space between function name and open parenthesis '('
#65: FILE: arch/x86/kernel/cpu/mcheck/p4.c:161:
+		printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n"

WARNING: no space between function name and open parenthesis '('
#88: FILE: arch/x86/kernel/cpu/mcheck/p4.c:182:
+				snprintf (misc, 20, "[%08x%08x]", ahigh, alow);

WARNING: line over 80 characters
#93: FILE: arch/x86/kernel/cpu/mcheck/p4.c:186:
+				snprintf (addr, 24, " at %08x%08x", ahigh, alow);

WARNING: no space between function name and open parenthesis '('
#93: FILE: arch/x86/kernel/cpu/mcheck/p4.c:186:
+				snprintf (addr, 24, " at %08x%08x", ahigh, alow);

WARNING: no space between function name and open parenthesis '('
#96: FILE: arch/x86/kernel/cpu/mcheck/p4.c:188:
+			printk (KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",

WARNING: no space between function name and open parenthesis '('
#120: FILE: arch/x86/kernel/cpu/mcheck/p6.c:46:
+				snprintf (misc, 20, "[%08x%08x]", ahigh, alow);

WARNING: line over 80 characters
#125: FILE: arch/x86/kernel/cpu/mcheck/p6.c:50:
+				snprintf (addr, 24, " at %08x%08x", ahigh, alow);

WARNING: no space between function name and open parenthesis '('
#125: FILE: arch/x86/kernel/cpu/mcheck/p6.c:50:
+				snprintf (addr, 24, " at %08x%08x", ahigh, alow);

WARNING: no space between function name and open parenthesis '('
#128: FILE: arch/x86/kernel/cpu/mcheck/p6.c:52:
+			printk (KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",

total: 0 errors, 13 warnings, 100 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Please run checkpatch prior to sending patches

Cc: Min Zhang <mzhang@mvista.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mcheck/k7.c | 18 +++++++++---------
 arch/x86/kernel/cpu/mcheck/p4.c | 18 +++++++++---------
 arch/x86/kernel/cpu/mcheck/p6.c | 16 ++++++++--------
 3 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index 9e929409fd7a..55dfd4f9446a 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -27,11 +27,11 @@ static void k7_machine_check(struct pt_regs * regs, long error_code)
 	if (mcgstl & (1<<0))	/* Recoverable ? */
 		recover=0;
 
-	printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+	printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
 		smp_processor_id(), mcgsth, mcgstl);
 
-	for (i=1; i<nr_mce_banks; i++) {
-		rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+	for (i = 1; i < nr_mce_banks; i++) {
+		rdmsr(MSR_IA32_MC0_STATUS+i*4,low, high);
 		if (high&(1<<31)) {
 			char misc[20];
 			char addr[24];
@@ -42,17 +42,17 @@ static void k7_machine_check(struct pt_regs * regs, long error_code)
 				recover |= 2;
 			high &= ~(1<<31);
 			if (high & (1<<27)) {
-				rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
-				snprintf (misc, 20, "[%08x%08x]", ahigh, alow);
+				rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+				snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
 			}
 			if (high & (1<<26)) {
-				rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
-				snprintf (addr, 24, " at %08x%08x", ahigh, alow);
+				rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+				snprintf(addr, 24, " at %08x%08x", ahigh, alow);
 			}
-			printk (KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
+			printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
 				smp_processor_id(), i, high, low, misc, addr);
 			/* Clear it */
-			wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
+			wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
 			/* Serialize */
 			wmb();
 			add_taint(TAINT_MACHINE_CHECK);
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index cb03a1b50504..77463a3e9adf 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -152,13 +152,13 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
 	if (mcgstl & (1<<0))	/* Recoverable ? */
 		recover=0;
 
-	printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+	printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
 		smp_processor_id(), mcgsth, mcgstl);
 
 	if (mce_num_extended_msrs > 0) {
 		struct intel_mce_extended_msrs dbg;
 		intel_get_extended_msrs(&dbg);
-		printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n"
+		printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n"
 			"\teax: %08x ebx: %08x ecx: %08x edx: %08x\n"
 			"\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
 			smp_processor_id(), dbg.eip, dbg.eflags,
@@ -166,8 +166,8 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
 			dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
 	}
 
-	for (i=0; i<nr_mce_banks; i++) {
-		rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+	for (i = 0; i < nr_mce_banks; i++) {
+		rdmsr(MSR_IA32_MC0_STATUS+i*4,low, high);
 		if (high & (1<<31)) {
 			char misc[20];
 			char addr[24];
@@ -178,14 +178,14 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
 				recover |= 2;
 			high &= ~(1<<31);
 			if (high & (1<<27)) {
-				rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
-				snprintf (misc, 20, "[%08x%08x]", ahigh, alow);
+				rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+				snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
 			}
 			if (high & (1<<26)) {
-				rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
-				snprintf (addr, 24, " at %08x%08x", ahigh, alow);
+				rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+				snprintf(addr, 24, " at %08x%08x", ahigh, alow);
 			}
-			printk (KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
+			printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
 				smp_processor_id(), i, high, low, misc, addr);
 		}
 	}
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
index b61f3038c4c8..20376a2c0cf8 100644
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ b/arch/x86/kernel/cpu/mcheck/p6.c
@@ -27,11 +27,11 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
 	if (mcgstl & (1<<0))	/* Recoverable ? */
 		recover=0;
 
-	printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+	printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
 		smp_processor_id(), mcgsth, mcgstl);
 
-	for (i=0; i<nr_mce_banks; i++) {
-		rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+	for (i = 0; i < nr_mce_banks; i++) {
+		rdmsr(MSR_IA32_MC0_STATUS+i*4,low, high);
 		if (high & (1<<31)) {
 			char misc[20];
 			char addr[24];
@@ -42,14 +42,14 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
 				recover |= 2;
 			high &= ~(1<<31);
 			if (high & (1<<27)) {
-				rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
-				snprintf (misc, 20, "[%08x%08x]", ahigh, alow);
+				rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+				snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
 			}
 			if (high & (1<<26)) {
-				rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
-				snprintf (addr, 24, " at %08x%08x", ahigh, alow);
+				rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+				snprintf(addr, 24, " at %08x%08x", ahigh, alow);
 			}
-			printk (KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
+			printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
 				smp_processor_id(), i, high, low, misc, addr);
 		}
 	}

From 7271339347653f6d652bb47aaee1f57d936002f6 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 30 Jan 2008 13:32:13 +0100
Subject: [PATCH 428/890] x86: arch/x86/kernel/cpu/mcheck/k7.c checkpatch fixes

#88: FILE: arch/x86/kernel/cpu/mcheck/k7.c:34:
+		rdmsr(MSR_IA32_MC0_STATUS+i*4,low, high);
 		                             ^

ERROR: need space after that ',' (ctx:VxV)
#142: FILE: arch/x86/kernel/cpu/mcheck/p4.c:170:
+		rdmsr(MSR_IA32_MC0_STATUS+i*4,low, high);
 		                             ^

ERROR: need space after that ',' (ctx:VxV)
#180: FILE: arch/x86/kernel/cpu/mcheck/p6.c:34:
+		rdmsr(MSR_IA32_MC0_STATUS+i*4,low, high);
 		                             ^

total: 3 errors, 0 warnings, 114 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Please run checkpatch prior to sending patches

Cc: Min Zhang <mzhang@mvista.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mcheck/k7.c | 2 +-
 arch/x86/kernel/cpu/mcheck/p4.c | 2 +-
 arch/x86/kernel/cpu/mcheck/p6.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index 55dfd4f9446a..e633c9c2b764 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -31,7 +31,7 @@ static void k7_machine_check(struct pt_regs * regs, long error_code)
 		smp_processor_id(), mcgsth, mcgstl);
 
 	for (i = 1; i < nr_mce_banks; i++) {
-		rdmsr(MSR_IA32_MC0_STATUS+i*4,low, high);
+		rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
 		if (high&(1<<31)) {
 			char misc[20];
 			char addr[24];
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index 77463a3e9adf..cb03345554a5 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -167,7 +167,7 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
 	}
 
 	for (i = 0; i < nr_mce_banks; i++) {
-		rdmsr(MSR_IA32_MC0_STATUS+i*4,low, high);
+		rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
 		if (high & (1<<31)) {
 			char misc[20];
 			char addr[24];
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
index 20376a2c0cf8..74342604d30e 100644
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ b/arch/x86/kernel/cpu/mcheck/p6.c
@@ -31,7 +31,7 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
 		smp_processor_id(), mcgsth, mcgstl);
 
 	for (i = 0; i < nr_mce_banks; i++) {
-		rdmsr(MSR_IA32_MC0_STATUS+i*4,low, high);
+		rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
 		if (high & (1<<31)) {
 			char misc[20];
 			char addr[24];

From e419190683cdeeda9be69af1dbc77031478559af Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 30 Jan 2008 13:32:13 +0100
Subject: [PATCH 429/890] x86: arch/x86/math-emu/errors.c: fix printk warnings

arch/x86/math-emu/errors.c:163: warning: format '%ld' expects type 'long int', but argument 3 has type 'u32'
arch/x86/math-emu/errors.c:175: warning: format '%ld' expects type 'long int', but argument 3 has type 'u32'
arch/x86/math-emu/errors.c:175: warning: format '%ld' expects type 'long int', but argument 4 has type 'u32'
arch/x86/math-emu/errors.c:175: warning: format '%ld' expects type 'long int', but argument 5 has type 'u32'
arch/x86/math-emu/errors.c:175: warning: format '%ld' expects type 'long int', but argument 6 has type 'u32'

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/math-emu/errors.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c
index 145b68a99516..59d353d2c599 100644
--- a/arch/x86/math-emu/errors.c
+++ b/arch/x86/math-emu/errors.c
@@ -149,7 +149,7 @@ void FPU_printall(void)
 		printk("SW: invalid operation\n");
 #endif /* DEBUGGING */
 
-	printk(" SW: b=%d st=%ld es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n", partial_status & 0x8000 ? 1 : 0,	/* busy */
+	printk(" SW: b=%d st=%d es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n", partial_status & 0x8000 ? 1 : 0,	/* busy */
 	       (partial_status & 0x3800) >> 11,	/* stack top pointer */
 	       partial_status & 0x80 ? 1 : 0,	/* Error summary status */
 	       partial_status & 0x40 ? 1 : 0,	/* Stack flag */
@@ -162,7 +162,7 @@ void FPU_printall(void)
 	       partial_status & SW_Denorm_Op ? 1 : 0,
 	       partial_status & SW_Invalid ? 1 : 0);
 
-	printk(" CW: ic=%d rc=%ld%ld pc=%ld%ld iem=%d     ef=%d%d%d%d%d%d\n",
+	printk(" CW: ic=%d rc=%d%d pc=%d%d iem=%d     ef=%d%d%d%d%d%d\n",
 	       control_word & 0x1000 ? 1 : 0,
 	       (control_word & 0x800) >> 11, (control_word & 0x400) >> 10,
 	       (control_word & 0x200) >> 9, (control_word & 0x100) >> 8,

From 9930927f36ac3e39ffa674dc23ef06f13c39cef7 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:32:14 +0100
Subject: [PATCH 430/890] x86: introduce REX prefix helper for kprobes

Fold some small ifdefs into a helper function.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/kprobes.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 4e33329ce8a3..b1804e40235d 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -170,6 +170,19 @@ static void __kprobes set_jmp_op(void *from, void *to)
 	jop->op = RELATIVEJUMP_INSTRUCTION;
 }
 
+/*
+ * Check for the REX prefix which can only exist on X86_64
+ * X86_32 always returns 0
+ */
+static int __kprobes is_REX_prefix(kprobe_opcode_t *insn)
+{
+#ifdef CONFIG_X86_64
+	if ((*insn & 0xf0) == 0x40)
+		return 1;
+#endif
+	return 0;
+}
+
 /*
  * Returns non-zero if opcode is boostable.
  * RIP relative instructions are adjusted at copying time in 64 bits mode
@@ -239,14 +252,14 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
 	case 0x9d:		/* popf/popfd */
 		return 1;
 	}
-#ifdef CONFIG_X86_64
+
 	/*
-	 * on 64 bit x86, 0x40-0x4f are prefixes so we need to look
+	 * on X86_64, 0x40-0x4f are REX prefixes so we need to look
 	 * at the next byte instead.. but of course not recurse infinitely
 	 */
-	if (*insn  >= 0x40 && *insn <= 0x4f)
+	if (is_REX_prefix(insn))
 		return is_IF_modifier(++insn);
-#endif
+
 	return 0;
 }
 
@@ -284,7 +297,7 @@ static void __kprobes fix_riprel(struct kprobe *p)
 	}
 
 	/* Skip REX instruction prefix.  */
-	if ((*insn & 0xf0) == 0x40)
+	if (is_REX_prefix(insn))
 		++insn;
 
 	if (*insn == 0x0f) {
@@ -748,11 +761,9 @@ static void __kprobes resume_execution(struct kprobe *p,
 	unsigned long orig_ip = (unsigned long)p->addr;
 	kprobe_opcode_t *insn = p->ainsn.insn;
 
-#ifdef CONFIG_X86_64
 	/*skip the REX prefix*/
-	if (*insn >= 0x40 && *insn <= 0x4f)
+	if (is_REX_prefix(insn))
 		insn++;
-#endif
 
 	regs->flags &= ~X86_EFLAGS_TF;
 	switch (*insn) {

From 31f80e45ea26008939790b4363a4fdcff240e0d6 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:32:16 +0100
Subject: [PATCH 431/890] x86: kprobes remove fix_riprel #ifdef

Move #ifdef around function definiton into the function and
unconditionally return on X86_32.  Saves an ifdef from the
one callsite.

[ mingo@elte.hu: minor cleanup. ]

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/kprobes.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index b1804e40235d..80bcb7635465 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -263,15 +263,16 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
 	return 0;
 }
 
-#ifdef CONFIG_X86_64
 /*
  * Adjust the displacement if the instruction uses the %rip-relative
  * addressing mode.
  * If it does, Return the address of the 32-bit displacement word.
  * If not, return null.
+ * Only applicable to 64-bit x86.
  */
 static void __kprobes fix_riprel(struct kprobe *p)
 {
+#ifdef CONFIG_X86_64
 	u8 *insn = p->ainsn.insn;
 	s64 disp;
 	int need_modrm;
@@ -335,15 +336,15 @@ static void __kprobes fix_riprel(struct kprobe *p)
 			*(s32 *)insn = (s32) disp;
 		}
 	}
-}
 #endif
+}
 
 static void __kprobes arch_copy_kprobe(struct kprobe *p)
 {
 	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
-#ifdef CONFIG_X86_64
+
 	fix_riprel(p);
-#endif
+
 	if (can_boost(p->addr))
 		p->ainsn.boostable = 0;
 	else

From e5fc3161969b3c82c8c61af8c8d360e39977ae2e Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Wed, 30 Jan 2008 13:32:17 +0100
Subject: [PATCH 432/890] arch/x86/ia32: use time_before, time_before_eq, etc.

The functions time_before, time_before_eq, time_after, and time_after_eq
are more robust for comparing jiffies against other values.

A simplified version of the semantic patch making this change is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@ change_compare_np @
expression E;
@@

(
- jiffies <= E
+ time_before_eq(jiffies,E)
|
- jiffies >= E
+ time_after_eq(jiffies,E)
|
- jiffies < E
+ time_before(jiffies,E)
|
- jiffies > E
+ time_after(jiffies,E)
)

@ include depends on change_compare_np @
@@

#include <linux/jiffies.h>

@ no_include depends on !include && change_compare_np @
@@

  #include <linux/...>
+ #include <linux/jiffies.h>
// </smpl>

[ mingo@elte.hu: merge to x86.git ]

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/ia32/ia32_aout.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index f1a0f83676dc..e4c12079171b 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -25,6 +25,7 @@
 #include <linux/binfmts.h>
 #include <linux/personality.h>
 #include <linux/init.h>
+#include <linux/jiffies.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -359,13 +360,14 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 #ifdef WARN_OLD
 		static unsigned long error_time, error_time2;
 		if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
-		    (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ) {
+		    (N_MAGIC(ex) != NMAGIC) &&
+				time_after(jiffies, error_time2 + 5*HZ)) {
 			printk(KERN_NOTICE "executable not page aligned\n");
 			error_time2 = jiffies;
 		}
 
 		if ((fd_offset & ~PAGE_MASK) != 0 &&
-		    (jiffies - error_time) > 5*HZ) {
+			    time_after(jiffies, error_time + 5*HZ)) {
 			printk(KERN_WARNING
 			       "fd_offset is not page aligned. Please convert "
 			       "program: %s\n",
@@ -484,7 +486,7 @@ static int load_aout_library(struct file *file)
 
 #ifdef WARN_OLD
 		static unsigned long error_time;
-		if ((jiffies-error_time) > 5*HZ) {
+		if (time_after(jiffies, error_time + 5*HZ)) {
 			printk(KERN_WARNING
 			       "N_TXTOFF is not page aligned. Please convert "
 			       "library: %s\n",

From 4f2479f03c06e29d7d9e1e02191f8b5ba8ad4eef Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Wed, 30 Jan 2008 13:32:18 +0100
Subject: [PATCH 433/890] arch/x86/kernel/apm_32.c: use time_before,
 time_before_eq,

The functions time_before, time_before_eq, time_after, and time_after_eq
are more robust for comparing jiffies against other values.

A simplified version of the semantic patch making this change is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@ change_compare_np @
expression E;
@@

(
- jiffies <= E
+ time_before_eq(jiffies,E)
|
- jiffies >= E
+ time_after_eq(jiffies,E)
|
- jiffies < E
+ time_before(jiffies,E)
|
- jiffies > E
+ time_after(jiffies,E)
)

@ include depends on change_compare_np @
@@

#include <linux/jiffies.h>

@ no_include depends on !include && change_compare_np @
@@

  #include <linux/...>
+ #include <linux/jiffies.h>
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apm_32.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index e32f6c37db9b..2467df7eca0b 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -227,6 +227,7 @@
 #include <linux/dmi.h>
 #include <linux/suspend.h>
 #include <linux/kthread.h>
+#include <linux/jiffies.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -1287,7 +1288,7 @@ static void check_events(void)
 				       "event 0x%02x\n", event);
 		}
 		if (ignore_bounce
-		    && ((jiffies - last_resume) > bounce_interval))
+		    && (time_after(jiffies, last_resume + bounce_interval)))
 			ignore_bounce = 0;
 
 		switch (event) {

From 1d16b53e387b255d8e30f00594220b23b1290e6b Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Wed, 30 Jan 2008 13:32:19 +0100
Subject: [PATCH 434/890] arch/x86/kernel/io_apic_{64,32}.c: use time_before

The functions time_before, time_before_eq, time_after, and time_after_eq
are more robust for comparing jiffies against other values.

A simplified version of the semantic patch making this change is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@ change_compare_np @
expression E;
@@

(
- jiffies <= E
+ time_before_eq(jiffies,E)
|
- jiffies >= E
+ time_after_eq(jiffies,E)
|
- jiffies < E
+ time_before(jiffies,E)
|
- jiffies > E
+ time_after(jiffies,E)
)

@ include depends on change_compare_np @
@@

#include <linux/jiffies.h>

@ no_include depends on !include && change_compare_np @
@@

  #include <linux/...>
+ #include <linux/jiffies.h>
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/io_apic_32.c | 5 +++--
 arch/x86/kernel/io_apic_64.c | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 0d204237489e..4ca548632c8d 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -35,6 +35,7 @@
 #include <linux/htirq.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
+#include <linux/jiffies.h>	/* time_after() */
 
 #include <asm/io.h>
 #include <asm/smp.h>
@@ -349,7 +350,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
 # include <asm/processor.h>	/* kernel_thread() */
 # include <linux/kernel_stat.h>	/* kstat */
 # include <linux/slab.h>		/* kmalloc() */
-# include <linux/timer.h>	/* time_after() */
+# include <linux/timer.h>
  
 #define IRQBALANCE_CHECK_ARCH -999
 #define MAX_BALANCED_IRQ_INTERVAL	(5*HZ)
@@ -1898,7 +1899,7 @@ static int __init timer_irq_works(void)
 	 * might have cached one ExtINT interrupt.  Finally, at
 	 * least one tick may be lost due to delays.
 	 */
-	if (jiffies - t1 > 4)
+	if (time_after(jiffies, t1 + 4))
 		return 1;
 
 	return 0;
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index f914d84a21da..1627c0d53e0b 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -32,6 +32,7 @@
 #include <linux/msi.h>
 #include <linux/htirq.h>
 #include <linux/dmar.h>
+#include <linux/jiffies.h>
 #ifdef CONFIG_ACPI
 #include <acpi/acpi_bus.h>
 #endif
@@ -1299,7 +1300,7 @@ static int __init timer_irq_works(void)
 	 */
 
 	/* jiffies wrap? */
-	if (jiffies - t1 > 4)
+	if (time_after(jiffies, t1 + 4))
 		return 1;
 	return 0;
 }

From 33cb52438341d6e2d7e06e3b64ed776bc54a2ca4 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:32:19 +0100
Subject: [PATCH 435/890] x86: cosmetic fixes fault_{32|64}.c

First step towards unifying these files.
- Checkpatch trailing whitespace fixes
- Checkpatch indentation of switch statement fixes
- Checkpatch single statement ifs need no braces fixes
- Checkpatch consistent spacing after comma fixes
- Introduce defines for pagefault error bits from X86_64 and add useful
  comment from X86_32.  Use these defines in X86_32 where obvious.
- Unify comments between 32|64 bit
- Small ifdef movement for CONFIG_KPROBES in notify_page_fault()
- Introduce X86_64 only case statement

No Functional Changes.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_32.c | 148 ++++++++++++++++++++++------------------
 arch/x86/mm/fault_64.c | 151 +++++++++++++++++++++--------------------
 2 files changed, 160 insertions(+), 139 deletions(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index db8d748814e4..bfb0917d699d 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -1,6 +1,4 @@
 /*
- *  linux/arch/i386/mm/fault.c
- *
  *  Copyright (C) 1995  Linus Torvalds
  */
 
@@ -30,11 +28,25 @@
 #include <asm/desc.h>
 #include <asm/segment.h>
 
-extern void die(const char *,struct pt_regs *,long);
+/*
+ * Page fault error code bits
+ *	bit 0 == 0 means no page found, 1 means protection fault
+ *	bit 1 == 0 means read, 1 means write
+ *	bit 2 == 0 means kernel, 1 means user-mode
+ *	bit 3 == 1 means use of reserved bit detected
+ *	bit 4 == 1 means fault was an instruction fetch
+ */
+#define PF_PROT	(1<<0)
+#define PF_WRITE	(1<<1)
+#define PF_USER	(1<<2)
+#define PF_RSVD	(1<<3)
+#define PF_INSTR	(1<<4)
+
+extern void die(const char *, struct pt_regs *, long);
 
-#ifdef CONFIG_KPROBES
 static inline int notify_page_fault(struct pt_regs *regs)
 {
+#ifdef CONFIG_KPROBES
 	int ret = 0;
 
 	/* kprobe_running() needs smp_processor_id() */
@@ -46,13 +58,10 @@ static inline int notify_page_fault(struct pt_regs *regs)
 	}
 
 	return ret;
-}
 #else
-static inline int notify_page_fault(struct pt_regs *regs)
-{
 	return 0;
-}
 #endif
+}
 
 /*
  * Return EIP plus the CS segment base.  The segment limit is also
@@ -65,7 +74,7 @@ static inline int notify_page_fault(struct pt_regs *regs)
  * If CS is no longer a valid code segment, or if EIP is beyond the
  * limit, or if it is a kernel address when CS is not a kernel segment,
  * then the returned value will be greater than *eip_limit.
- * 
+ *
  * This is slow, but is very rarely executed.
  */
 static inline unsigned long get_segment_eip(struct pt_regs *regs,
@@ -84,7 +93,7 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
 
 	/* The standard kernel/user address space limit. */
 	*eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
-	
+
 	/* By far the most common cases. */
 	if (likely(SEGMENT_IS_FLAT_CODE(seg)))
 		return ip;
@@ -99,7 +108,7 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
 		return 1;	 /* So that returned ip > *eip_limit. */
 	}
 
-	/* Get the GDT/LDT descriptor base. 
+	/* Get the GDT/LDT descriptor base.
 	   When you look for races in this code remember that
 	   LDT and other horrors are only used in user space. */
 	if (seg & (1<<2)) {
@@ -109,16 +118,16 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
 		desc = (void *)desc + (seg & ~7);
 	} else {
 		/* Must disable preemption while reading the GDT. */
- 		desc = (u32 *)get_cpu_gdt_table(get_cpu());
+		desc = (u32 *)get_cpu_gdt_table(get_cpu());
 		desc = (void *)desc + (seg & ~7);
 	}
 
 	/* Decode the code segment base from the descriptor */
 	base = get_desc_base((struct desc_struct *)desc);
 
-	if (seg & (1<<2)) { 
+	if (seg & (1<<2))
 		mutex_unlock(&current->mm->context.lock);
-	} else
+	else
 		put_cpu();
 
 	/* Adjust EIP and segment limit, and clamp at the kernel limit.
@@ -129,19 +138,19 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
 	return ip + base;
 }
 
-/* 
+/*
  * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
  * Check that here and ignore it.
  */
 static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
-{ 
+{
 	unsigned long limit;
-	unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit);
+	unsigned char *instr = (unsigned char *)get_segment_eip(regs, &limit);
 	int scan_more = 1;
-	int prefetch = 0; 
+	int prefetch = 0;
 	int i;
 
-	for (i = 0; scan_more && i < 15; i++) { 
+	for (i = 0; scan_more && i < 15; i++) {
 		unsigned char opcode;
 		unsigned char instr_hi;
 		unsigned char instr_lo;
@@ -149,27 +158,43 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
 		if (instr > (unsigned char *)limit)
 			break;
 		if (probe_kernel_address(instr, opcode))
-			break; 
+			break;
 
-		instr_hi = opcode & 0xf0; 
-		instr_lo = opcode & 0x0f; 
+		instr_hi = opcode & 0xf0;
+		instr_lo = opcode & 0x0f;
 		instr++;
 
-		switch (instr_hi) { 
+		switch (instr_hi) {
 		case 0x20:
 		case 0x30:
-			/* Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. */
+			/*
+			 * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
+			 * In X86_64 long mode, the CPU will signal invalid
+			 * opcode if some of these prefixes are present so
+			 * X86_64 will never get here anyway
+			 */
 			scan_more = ((instr_lo & 7) == 0x6);
 			break;
-			
+#ifdef CONFIG_X86_64
+		case 0x40:
+			/*
+			 * In AMD64 long mode 0x40..0x4F are valid REX prefixes
+			 * Need to figure out under what instruction mode the
+			 * instruction was issued. Could check the LDT for lm,
+			 * but for now it's good enough to assume that long
+			 * mode only uses well known segments or kernel.
+			 */
+			scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
+			break;
+#endif
 		case 0x60:
 			/* 0x64 thru 0x67 are valid prefixes in all modes. */
 			scan_more = (instr_lo & 0xC) == 0x4;
-			break;		
+			break;
 		case 0xF0:
-			/* 0xF0, 0xF2, and 0xF3 are valid prefixes */
+			/* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
 			scan_more = !instr_lo || (instr_lo>>1) == 1;
-			break;			
+			break;
 		case 0x00:
 			/* Prefetch instruction is 0x0F0D or 0x0F18 */
 			scan_more = 0;
@@ -179,11 +204,11 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
 				break;
 			prefetch = (instr_lo == 0xF) &&
 				(opcode == 0x0D || opcode == 0x18);
-			break;			
+			break;
 		default:
 			scan_more = 0;
 			break;
-		} 
+		}
 	}
 	return prefetch;
 }
@@ -199,7 +224,7 @@ static inline int is_prefetch(struct pt_regs *regs, unsigned long addr,
 		return __is_prefetch(regs, addr);
 	}
 	return 0;
-} 
+}
 
 static noinline void force_sig_info_fault(int si_signo, int si_code,
 	unsigned long address, struct task_struct *tsk)
@@ -284,19 +309,12 @@ int show_unhandled_signals = 1;
  * This routine handles page faults.  It determines the address,
  * and the problem, and then passes it off to one of the appropriate
  * routines.
- *
- * error_code:
- *	bit 0 == 0 means no page found, 1 means protection fault
- *	bit 1 == 0 means read, 1 means write
- *	bit 2 == 0 means kernel, 1 means user-mode
- *	bit 3 == 1 means use of reserved bit detected
- *	bit 4 == 1 means fault was an instruction fetch
  */
 void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
 	struct task_struct *tsk;
 	struct mm_struct *mm;
-	struct vm_area_struct * vma;
+	struct vm_area_struct *vma;
 	unsigned long address;
 	int write, si_code;
 	int fault;
@@ -307,7 +325,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	trace_hardirqs_fixup();
 
 	/* get the address */
-        address = read_cr2();
+	address = read_cr2();
 
 	tsk = current;
 
@@ -350,7 +368,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 
 	/*
 	 * If we're in an interrupt, have no user context or are running in an
-	 * atomic region then we must not take the fault..
+	 * atomic region then we must not take the fault.
 	 */
 	if (in_atomic() || !mm)
 		goto bad_area_nosemaphore;
@@ -371,7 +389,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	 * thus avoiding the deadlock.
 	 */
 	if (!down_read_trylock(&mm->mmap_sem)) {
-		if ((error_code & 4) == 0 &&
+		if ((error_code & PF_USER) == 0 &&
 		    !search_exception_tables(regs->ip))
 			goto bad_area_nosemaphore;
 		down_read(&mm->mmap_sem);
@@ -384,7 +402,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 		goto good_area;
 	if (!(vma->vm_flags & VM_GROWSDOWN))
 		goto bad_area;
-	if (error_code & 4) {
+	if (error_code & PF_USER) {
 		/*
 		 * Accessing the stack below %sp is always a bug.
 		 * The large cushion allows instructions like enter
@@ -403,19 +421,19 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 good_area:
 	si_code = SEGV_ACCERR;
 	write = 0;
-	switch (error_code & 3) {
-		default:	/* 3: write, present */
-				/* fall through */
-		case 2:		/* write, not present */
-			if (!(vma->vm_flags & VM_WRITE))
-				goto bad_area;
-			write++;
-			break;
-		case 1:		/* read, present */
+	switch (error_code & (PF_PROT|PF_WRITE)) {
+	default:	/* 3: write, present */
+		/* fall through */
+	case PF_WRITE:		/* write, not present */
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+		write++;
+		break;
+	case PF_PROT:		/* read, present */
+		goto bad_area;
+	case 0:			/* read, not present */
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
 			goto bad_area;
-		case 0:		/* read, not present */
-			if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
-				goto bad_area;
 	}
 
  survive:
@@ -457,14 +475,14 @@ bad_area:
 
 bad_area_nosemaphore:
 	/* User mode accesses just cause a SIGSEGV */
-	if (error_code & 4) {
+	if (error_code & PF_USER) {
 		/*
 		 * It's possible to have interrupts off here.
 		 */
 		local_irq_enable();
 
-		/* 
-		 * Valid to do another page fault here because this one came 
+		/*
+		 * Valid to do another page fault here because this one came
 		 * from user space.
 		 */
 		if (is_prefetch(regs, address, error_code))
@@ -492,7 +510,7 @@ bad_area_nosemaphore:
 	 */
 	if (boot_cpu_data.f00f_bug) {
 		unsigned long nr;
-		
+
 		nr = (address - idt_descr.address) >> 3;
 
 		if (nr == 6) {
@@ -507,13 +525,13 @@ no_context:
 	if (fixup_exception(regs))
 		return;
 
-	/* 
+	/*
 	 * Valid to do another page fault here, because if this fault
-	 * had been triggered by is_prefetch fixup_exception would have 
+	 * had been triggered by is_prefetch fixup_exception would have
 	 * handled it.
 	 */
- 	if (is_prefetch(regs, address, error_code))
- 		return;
+	if (is_prefetch(regs, address, error_code))
+		return;
 
 /*
  * Oops. The kernel tried to access some bad page. We'll have to
@@ -541,7 +559,7 @@ no_context:
 		else
 			printk(KERN_ALERT "BUG: unable to handle kernel paging"
 					" request");
-		printk(" at virtual address %08lx\n",address);
+		printk(" at virtual address %08lx\n", address);
 		printk(KERN_ALERT "printing ip: %08lx ", regs->ip);
 
 		page = read_cr3();
@@ -605,7 +623,7 @@ do_sigbus:
 	up_read(&mm->mmap_sem);
 
 	/* Kernel mode? Handle exceptions or die */
-	if (!(error_code & 4))
+	if (!(error_code & PF_USER))
 		goto no_context;
 
 	/* User space => ok to do another page fault */
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 3a94941578fa..7e98a7691283 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -1,6 +1,4 @@
 /*
- *  linux/arch/x86-64/mm/fault.c
- *
  *  Copyright (C) 1995  Linus Torvalds
  *  Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
  */
@@ -33,16 +31,23 @@
 #include <asm/proto.h>
 #include <asm-generic/sections.h>
 
-/* Page fault error code bits */
-#define PF_PROT	(1<<0)		/* or no page found */
+/*
+ * Page fault error code bits
+ *	bit 0 == 0 means no page found, 1 means protection fault
+ *	bit 1 == 0 means read, 1 means write
+ *	bit 2 == 0 means kernel, 1 means user-mode
+ *	bit 3 == 1 means use of reserved bit detected
+ *	bit 4 == 1 means fault was an instruction fetch
+ */
+#define PF_PROT	(1<<0)
 #define PF_WRITE	(1<<1)
 #define PF_USER	(1<<2)
 #define PF_RSVD	(1<<3)
 #define PF_INSTR	(1<<4)
 
-#ifdef CONFIG_KPROBES
 static inline int notify_page_fault(struct pt_regs *regs)
 {
+#ifdef CONFIG_KPROBES
 	int ret = 0;
 
 	/* kprobe_running() needs smp_processor_id() */
@@ -54,75 +59,75 @@ static inline int notify_page_fault(struct pt_regs *regs)
 	}
 
 	return ret;
-}
 #else
-static inline int notify_page_fault(struct pt_regs *regs)
-{
 	return 0;
-}
 #endif
+}
 
 /* Sometimes the CPU reports invalid exceptions on prefetch.
    Check that here and ignore.
    Opcode checker based on code by Richard Brunner */
 static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
 				unsigned long error_code)
-{ 
+{
 	unsigned char *instr;
 	int scan_more = 1;
-	int prefetch = 0; 
+	int prefetch = 0;
 	unsigned char *max_instr;
 
 	/* If it was a exec fault ignore */
 	if (error_code & PF_INSTR)
 		return 0;
-	
+
 	instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
 	max_instr = instr + 15;
 
 	if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
 		return 0;
 
-	while (scan_more && instr < max_instr) { 
+	while (scan_more && instr < max_instr) {
 		unsigned char opcode;
 		unsigned char instr_hi;
 		unsigned char instr_lo;
 
 		if (probe_kernel_address(instr, opcode))
-			break; 
+			break;
 
-		instr_hi = opcode & 0xf0; 
-		instr_lo = opcode & 0x0f; 
+		instr_hi = opcode & 0xf0;
+		instr_lo = opcode & 0x0f;
 		instr++;
 
-		switch (instr_hi) { 
+		switch (instr_hi) {
 		case 0x20:
 		case 0x30:
-			/* Values 0x26,0x2E,0x36,0x3E are valid x86
-			   prefixes.  In long mode, the CPU will signal
-			   invalid opcode if some of these prefixes are
-			   present so we will never get here anyway */
+			/*
+			 * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
+			 * In X86_64 long mode, the CPU will signal invalid
+			 * opcode if some of these prefixes are present so
+			 * X86_64 will never get here anyway
+			 */
 			scan_more = ((instr_lo & 7) == 0x6);
 			break;
-			
+#ifdef CONFIG_X86_64
 		case 0x40:
-			/* In AMD64 long mode, 0x40 to 0x4F are valid REX prefixes
-			   Need to figure out under what instruction mode the
-			   instruction was issued ... */
-			/* Could check the LDT for lm, but for now it's good
-			   enough to assume that long mode only uses well known
-			   segments or kernel. */
+			/*
+			 * In AMD64 long mode 0x40..0x4F are valid REX prefixes
+			 * Need to figure out under what instruction mode the
+			 * instruction was issued. Could check the LDT for lm,
+			 * but for now it's good enough to assume that long
+			 * mode only uses well known segments or kernel.
+			 */
 			scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
 			break;
-			
+#endif
 		case 0x60:
 			/* 0x64 thru 0x67 are valid prefixes in all modes. */
 			scan_more = (instr_lo & 0xC) == 0x4;
-			break;		
+			break;
 		case 0xF0:
 			/* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */
 			scan_more = !instr_lo || (instr_lo>>1) == 1;
-			break;			
+			break;
 		case 0x00:
 			/* Prefetch instruction is 0x0F0D or 0x0F18 */
 			scan_more = 0;
@@ -130,20 +135,20 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
 				break;
 			prefetch = (instr_lo == 0xF) &&
 				(opcode == 0x0D || opcode == 0x18);
-			break;			
+			break;
 		default:
 			scan_more = 0;
 			break;
-		} 
+		}
 	}
 	return prefetch;
 }
 
-static int bad_address(void *p) 
-{ 
+static int bad_address(void *p)
+{
 	unsigned long dummy;
 	return probe_kernel_address((unsigned long *)p, dummy);
-} 
+}
 
 void dump_pagetable(unsigned long address)
 {
@@ -154,11 +159,11 @@ void dump_pagetable(unsigned long address)
 
 	pgd = (pgd_t *)read_cr3();
 
-	pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); 
+	pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
 	pgd += pgd_index(address);
 	if (bad_address(pgd)) goto bad;
 	printk("PGD %lx ", pgd_val(*pgd));
-	if (!pgd_present(*pgd)) goto ret; 
+	if (!pgd_present(*pgd)) goto ret;
 
 	pud = pud_offset(pgd, address);
 	if (bad_address(pud)) goto bad;
@@ -172,7 +177,7 @@ void dump_pagetable(unsigned long address)
 
 	pte = pte_offset_kernel(pmd, address);
 	if (bad_address(pte)) goto bad;
-	printk("PTE %lx", pte_val(*pte)); 
+	printk("PTE %lx", pte_val(*pte));
 ret:
 	printk("\n");
 	return;
@@ -180,7 +185,7 @@ bad:
 	printk("BAD\n");
 }
 
-static const char errata93_warning[] = 
+static const char errata93_warning[] =
 KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
 KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
 KERN_ERR "******* Please consider a BIOS update.\n"
@@ -188,31 +193,31 @@ KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
 
 /* Workaround for K8 erratum #93 & buggy BIOS.
    BIOS SMM functions are required to use a specific workaround
-   to avoid corruption of the 64bit RIP register on C stepping K8. 
-   A lot of BIOS that didn't get tested properly miss this. 
+   to avoid corruption of the 64bit RIP register on C stepping K8.
+   A lot of BIOS that didn't get tested properly miss this.
    The OS sees this as a page fault with the upper 32bits of RIP cleared.
    Try to work around it here.
    Note we only handle faults in kernel here. */
 
-static int is_errata93(struct pt_regs *regs, unsigned long address) 
+static int is_errata93(struct pt_regs *regs, unsigned long address)
 {
 	static int warned;
 	if (address != regs->ip)
 		return 0;
-	if ((address >> 32) != 0) 
+	if ((address >> 32) != 0)
 		return 0;
 	address |= 0xffffffffUL << 32;
-	if ((address >= (u64)_stext && address <= (u64)_etext) || 
-	    (address >= MODULES_VADDR && address <= MODULES_END)) { 
+	if ((address >= (u64)_stext && address <= (u64)_etext) ||
+	    (address >= MODULES_VADDR && address <= MODULES_END)) {
 		if (!warned) {
-			printk(errata93_warning); 		
+			printk(errata93_warning);
 			warned = 1;
 		}
 		regs->ip = address;
 		return 1;
 	}
 	return 0;
-} 
+}
 
 static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
 				 unsigned long error_code)
@@ -296,7 +301,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 {
 	struct task_struct *tsk;
 	struct mm_struct *mm;
-	struct vm_area_struct * vma;
+	struct vm_area_struct *vma;
 	unsigned long address;
 	int write, fault;
 	unsigned long flags;
@@ -360,8 +365,8 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 		pgtable_bad(address, regs, error_code);
 
 	/*
-	 * If we're in an interrupt or have no user
-	 * context, we must not take the fault..
+	 * If we're in an interrupt, have no user context or are running in an
+	 * atomic region then we must not take the fault.
 	 */
 	if (unlikely(in_atomic() || !mm))
 		goto bad_area_nosemaphore;
@@ -403,7 +408,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 		goto good_area;
 	if (!(vma->vm_flags & VM_GROWSDOWN))
 		goto bad_area;
-	if (error_code & 4) {
+	if (error_code & PF_USER) {
 		/* Allow userspace just enough access below the stack pointer
 		 * to let the 'enter' instruction work.
 		 */
@@ -420,18 +425,18 @@ good_area:
 	info.si_code = SEGV_ACCERR;
 	write = 0;
 	switch (error_code & (PF_PROT|PF_WRITE)) {
-		default:	/* 3: write, present */
-			/* fall through */
-		case PF_WRITE:		/* write, not present */
-			if (!(vma->vm_flags & VM_WRITE))
-				goto bad_area;
-			write++;
-			break;
-		case PF_PROT:		/* read, present */
+	default:	/* 3: write, present */
+		/* fall through */
+	case PF_WRITE:		/* write, not present */
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+		write++;
+		break;
+	case PF_PROT:		/* read, present */
+		goto bad_area;
+	case 0:			/* read, not present */
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
 			goto bad_area;
-		case 0:			/* read, not present */
-			if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
-				goto bad_area;
 	}
 
 	/*
@@ -491,7 +496,7 @@ bad_area_nosemaphore:
 					tsk->comm, tsk->pid, address, regs->ip,
 					regs->sp, error_code);
 		}
-       
+
 		tsk->thread.cr2 = address;
 		/* Kernel addresses are always protection faults */
 		tsk->thread.error_code = error_code | (address >= TASK_SIZE);
@@ -505,21 +510,19 @@ bad_area_nosemaphore:
 	}
 
 no_context:
-	
 	/* Are we prepared to handle this kernel fault?  */
-	if (fixup_exception(regs)) {
+	if (fixup_exception(regs))
 		return;
-	}
 
-	/* 
+	/*
 	 * Hall of shame of CPU/BIOS bugs.
 	 */
 
- 	if (is_prefetch(regs, address, error_code))
- 		return;
+	if (is_prefetch(regs, address, error_code))
+		return;
 
 	if (is_errata93(regs, address))
-		return; 
+		return;
 
 /*
  * Oops. The kernel tried to access some bad page. We'll have to
@@ -532,7 +535,7 @@ no_context:
 		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
 	else
 		printk(KERN_ALERT "Unable to handle kernel paging request");
-	printk(" at %016lx RIP: \n" KERN_ALERT,address);
+	printk(" at %016lx RIP: \n" KERN_ALERT, address);
 	printk_address(regs->ip);
 	dump_pagetable(address);
 	tsk->thread.cr2 = address;
@@ -582,7 +585,7 @@ LIST_HEAD(pgd_list);
 
 void vmalloc_sync_all(void)
 {
-	/* Note that races in the updates of insync and start aren't 
+	/* Note that races in the updates of insync and start aren't
 	   problematic:
 	   insync can only get set bits added, and updates to start are only
 	   improving performance (without affecting correctness if undone). */
@@ -614,6 +617,6 @@ void vmalloc_sync_all(void)
 	}
 	/* Check that there is no need to do the same for the modules area. */
 	BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
-	BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == 
+	BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
 				(__START_KERNEL & PGDIR_MASK)));
 }

From b97601563704751162b122c652d7f390b8f480d2 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:32:19 +0100
Subject: [PATCH 436/890] x86: kprobes change kprobe_handler flow

Make the control flow of kprobe_handler more obvious.

Collapse the separate if blocks/gotos with if/else blocks
this unifies the duplication of the check for a breakpoint
instruction race with another cpu.

Create two jump targets:
	preempt_out: re-enables preemption before returning ret
	out: only returns ret

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/kprobes.c | 62 ++++++++++++++++++---------------------
 1 file changed, 29 insertions(+), 33 deletions(-)

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 80bcb7635465..a72e02bf1135 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -494,32 +494,28 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 	preempt_disable();
 	kcb = get_kprobe_ctlblk();
 
-	/* Check we're not actually recursing */
-	if (kprobe_running()) {
-		p = get_kprobe(addr);
-		if (p) {
+	p = get_kprobe(addr);
+	if (p) {
+		/* Check we're not actually recursing */
+		if (kprobe_running()) {
 			ret = reenter_kprobe(p, regs, kcb);
 			if (kcb->kprobe_status == KPROBE_REENTER)
-				return 1;
-		} else {
-			if (*addr != BREAKPOINT_INSTRUCTION) {
-			/* The breakpoint instruction was removed by
-			 * another cpu right after we hit, no further
-			 * handling of this interrupt is appropriate
-			 */
-				regs->ip = (unsigned long)addr;
+			{
 				ret = 1;
-				goto no_kprobe;
+				goto out;
+			}
+			goto preempt_out;
+		} else {
+			set_current_kprobe(p, regs, kcb);
+			kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+			if (p->pre_handler && p->pre_handler(p, regs))
+			{
+				/* handler set things up, skip ss setup */
+				ret = 1;
+				goto out;
 			}
-			p = __get_cpu_var(current_kprobe);
-			if (p->break_handler && p->break_handler(p, regs))
-				goto ss_probe;
 		}
-		goto no_kprobe;
-	}
-
-	p = get_kprobe(addr);
-	if (!p) {
+	} else {
 		if (*addr != BREAKPOINT_INSTRUCTION) {
 			/*
 			 * The breakpoint instruction was removed right
@@ -532,34 +528,34 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 			 */
 			regs->ip = (unsigned long)addr;
 			ret = 1;
+			goto preempt_out;
+		}
+		if (kprobe_running()) {
+			p = __get_cpu_var(current_kprobe);
+			if (p->break_handler && p->break_handler(p, regs))
+				goto ss_probe;
 		}
 		/* Not one of ours: let kernel handle it */
-		goto no_kprobe;
+		goto preempt_out;
 	}
 
-	set_current_kprobe(p, regs, kcb);
-	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
-
-	if (p->pre_handler && p->pre_handler(p, regs))
-		/* handler has already set things up, so skip ss setup */
-		return 1;
-
 ss_probe:
+	ret = 1;
 #if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
 	if (p->ainsn.boostable == 1 && !p->post_handler) {
 		/* Boost up -- we can execute copied instructions directly */
 		reset_current_kprobe();
 		regs->ip = (unsigned long)p->ainsn.insn;
-		preempt_enable_no_resched();
-		return 1;
+		goto preempt_out;
 	}
 #endif
 	prepare_singlestep(p, regs);
 	kcb->kprobe_status = KPROBE_HIT_SS;
-	return 1;
+	goto out;
 
-no_kprobe:
+preempt_out:
 	preempt_enable_no_resched();
+out:
 	return ret;
 }
 

From 0a6ef376d4baf298aaa2db71794df821a0d7795c Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 30 Jan 2008 13:32:20 +0100
Subject: [PATCH 437/890] x86: unification of arch/x86/Makefiles

Unify the 32 and 64 bit specific Makefiles.
The unification was simplest to do in one step although the
readability of the patch suffers a bit from this.

Noteworthy remarks on the unification:
- The 64 bit cpu stuff should be moved to Makefile_32.cpu
  but I did not feel confident doing it due to subtle differences
- The use of cflags-y were abandoned since we have seen one bug where
  we did wrong due to missing assignment to KBUILD_CFLAGS.
  The cc-option marcro uses KBUILD_CFLAGS.
- The "No need to remake" line are deleted. It caused "make -B" to fail
- For 64 bit the sub architecture stuff is not used.
- The way head64.o is specified could be nicer - but it awaits the
  introduction of head32.o (which seems like a win to introduce for readability)
- Patch is checkpatch clean

Patch is tested by doing a defconfig build for i386 and x86_64 and in both
cases monitoring that only relevant files were recompiled when applying
the patch.

[ mingo@elte.hu: build fix ]

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Makefile    | 237 ++++++++++++++++++++++++++++++++++++++++++-
 arch/x86/Makefile_32 | 177 --------------------------------
 arch/x86/Makefile_64 | 142 --------------------------
 3 files changed, 233 insertions(+), 323 deletions(-)
 delete mode 100644 arch/x86/Makefile_32
 delete mode 100644 arch/x86/Makefile_64

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 7aa1dc6d67c8..d3debde0ce0b 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -7,13 +7,242 @@ else
         KBUILD_DEFCONFIG := $(ARCH)_defconfig
 endif
 
-# No need to remake these files
-$(srctree)/arch/x86/Makefile%: ;
+# BITS is used as extension for files which are available in a 32 bit
+# and a 64 bit version to simplify shared Makefiles.
+# e.g.: obj-y += foo_$(BITS).o
+export BITS
 
 ifeq ($(CONFIG_X86_32),y)
+        BITS := 32
         UTS_MACHINE := i386
-        include $(srctree)/arch/x86/Makefile_32
+
+        HAS_BIARCH := $(call cc-option-yn, -m32)
+        ifeq ($(HAS_BIARCH),y)
+                AS := $(AS) --32
+                LD := $(LD) -m elf_i386
+                CC := $(CC) -m32
+        endif
+
+        ifdef CONFIG_RELOCATABLE
+                LDFLAGS_vmlinux := --emit-relocs
+        endif
+        CHECKFLAGS += -D__i386__
+
+        KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return
+
+        # prevent gcc from keeping the stack 16 byte aligned
+        KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
+
+        # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
+        # a lot more stack due to the lack of sharing of stacklots:
+        KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \
+                echo $(call cc-option,-fno-unit-at-a-time); fi ;)
+
+        # CPU-specific tuning. Anything which can be shared with UML should go here.
+        include $(srctree)/arch/x86/Makefile_32.cpu
+        KBUILD_CFLAGS += $(cflags-y)
+
+        # temporary until string.h is fixed
+        KBUILD_CFLAGS += -ffreestanding
 else
+        BITS := 64
         UTS_MACHINE := x86_64
-        include $(srctree)/arch/x86/Makefile_64
+        CHECKFLAGS += -D__x86_64__ -m64
+
+        KBUILD_AFLAGS += -m64
+        KBUILD_CFLAGS += -m64
+
+        # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
+        cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
+        cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
+
+        cflags-$(CONFIG_MCORE2) += \
+                $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
+        cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
+        KBUILD_CFLAGS += $(cflags-y)
+
+        KBUILD_CFLAGS += -mno-red-zone
+        KBUILD_CFLAGS += -mcmodel=kernel
+
+        KBUILD_CFLAGS += -Wno-sign-compare
+        KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
+        # -funit-at-a-time shrinks the kernel .text considerably
+        # unfortunately it makes reading oopses harder.
+        KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
+
+        # prevent gcc from generating any FP code by mistake
+        KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
+        # this works around some issues with generating unwind tables in older gccs
+        # newer gccs do it by default
+        KBUILD_CFLAGS += -maccumulate-outgoing-args
+
+        stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh
+        stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \
+                "$(CC)" -fstack-protector )
+        stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \
+                "$(CC)" -fstack-protector-all )
+
+        KBUILD_CFLAGS += $(stackp-y)
 endif
+
+# Stackpointer is addressed different for 32 bit and 64 bit x86
+sp-$(CONFIG_X86_32) := esp
+sp-$(CONFIG_X86_64) := rsp
+
+# do binutils support CFI?
+cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1)
+# is .cfi_signal_frame supported too?
+cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1)
+KBUILD_AFLAGS += $(cfi) $(cfi-sigframe)
+KBUILD_CFLAGS += $(cfi) $(cfi-sigframe)
+
+LDFLAGS := -m elf_$(UTS_MACHINE)
+OBJCOPYFLAGS := -O binary -R .note -R .comment -S
+KBUILD_CFLAGS += -pipe
+
+###
+# Sub architecture support
+# fcore-y is linked before mcore-y files.
+
+# Default subarch .c files
+mcore-y  := arch/x86/mach-default/
+
+# Voyager subarch support
+mflags-$(CONFIG_X86_VOYAGER)	:= -Iinclude/asm-x86/mach-voyager
+mcore-$(CONFIG_X86_VOYAGER)	:= arch/x86/mach-voyager/
+
+# VISWS subarch support
+mflags-$(CONFIG_X86_VISWS)	:= -Iinclude/asm-x86/mach-visws
+mcore-$(CONFIG_X86_VISWS)	:= arch/x86/mach-visws/
+
+# NUMAQ subarch support
+mflags-$(CONFIG_X86_NUMAQ)	:= -Iinclude/asm-x86/mach-numaq
+mcore-$(CONFIG_X86_NUMAQ)	:= arch/x86/mach-default/
+
+# BIGSMP subarch support
+mflags-$(CONFIG_X86_BIGSMP)	:= -Iinclude/asm-x86/mach-bigsmp
+mcore-$(CONFIG_X86_BIGSMP)	:= arch/x86/mach-default/
+
+#Summit subarch support
+mflags-$(CONFIG_X86_SUMMIT)	:= -Iinclude/asm-x86/mach-summit
+mcore-$(CONFIG_X86_SUMMIT)	:= arch/x86/mach-default/
+
+# generic subarchitecture
+mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic
+fcore-$(CONFIG_X86_GENERICARCH)	+= arch/x86/mach-generic/
+mcore-$(CONFIG_X86_GENERICARCH)	:= arch/x86/mach-default/
+
+
+# ES7000 subarch support
+mflags-$(CONFIG_X86_ES7000)	:= -Iinclude/asm-x86/mach-es7000
+fcore-$(CONFIG_X86_ES7000)	:= arch/x86/mach-es7000/
+mcore-$(CONFIG_X86_ES7000)	:= arch/x86/mach-default/
+
+# default subarch .h files
+mflags-y += -Iinclude/asm-x86/mach-default
+
+# 64 bit does not support subarch support - clear sub arch variables
+fcore-$(CONFIG_X86_64)  :=
+mcore-$(CONFIG_X86_64)  :=
+mflags-$(CONFIG_X86_64) :=
+
+KBUILD_CFLAGS += $(mflags-y)
+KBUILD_AFLAGS += $(mflags-y)
+
+###
+# Kernel objects
+
+head-y                := arch/x86/kernel/head_$(BITS).o
+head-$(CONFIG_X86_64) += arch/x86/kernel/head64.o
+head-y                += arch/x86/kernel/init_task.o
+
+libs-y  += arch/x86/lib/
+
+# Sub architecture files that needs linking first
+core-y += $(fcore-y)
+
+# Xen paravirtualization support
+core-$(CONFIG_XEN) += arch/x86/xen/
+
+# lguest paravirtualization support
+core-$(CONFIG_LGUEST_GUEST) += arch/x86/lguest/
+
+core-y += arch/x86/kernel/
+core-y += arch/x86/mm/
+
+# Remaining sub architecture files
+core-y += $(mcore-y)
+
+core-y += arch/x86/crypto/
+core-y += arch/x86/vdso/
+core-$(CONFIG_IA32_EMULATION) += arch/x86/ia32/
+
+# drivers-y are linked after core-y
+drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/
+drivers-$(CONFIG_PCI)            += arch/x86/pci/
+
+# must be linked after kernel/
+drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
+
+ifeq ($(CONFIG_X86_32),y)
+drivers-$(CONFIG_PM) += arch/x86/power/
+drivers-$(CONFIG_FB) += arch/x86/video/
+endif
+
+####
+# boot loader support. Several targets are kept for legacy purposes
+
+boot := arch/x86/boot
+
+PHONY += zImage bzImage compressed zlilo bzlilo \
+         zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install
+
+# Default kernel to build
+all: bzImage
+
+# KBUILD_IMAGE specify target image being built
+                    KBUILD_IMAGE := $(boot)/bzImage
+zImage zlilo zdisk: KBUILD_IMAGE := arch/x86/boot/zImage
+
+zImage bzImage: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
+	$(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
+	$(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/bzImage
+
+compressed: zImage
+
+zlilo bzlilo: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zlilo
+
+zdisk bzdisk: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zdisk
+
+fdimage fdimage144 fdimage288 isoimage: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
+
+install: vdso_install
+	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install
+
+PHONY += vdso_install
+vdso_install:
+	$(Q)$(MAKE) $(build)=arch/x86/vdso $@
+
+archclean:
+	$(Q)rm -rf $(objtree)/arch/i386
+	$(Q)rm -rf $(objtree)/arch/x86_64
+	$(Q)$(MAKE) $(clean)=$(boot)
+
+define archhelp
+  echo  '* bzImage	- Compressed kernel image (arch/x86/boot/bzImage)'
+  echo  '  install	- Install kernel using'
+  echo  '		   (your) ~/bin/installkernel or'
+  echo  '		   (distribution) /sbin/installkernel or'
+  echo  '		   install to $$(INSTALL_PATH) and run lilo'
+  echo  '  bzdisk       - Create a boot floppy in /dev/fd0'
+  echo  '  fdimage      - Create a boot floppy image'
+  echo  '  isoimage     - Create a boot CD-ROM image'
+endef
+
+CLEAN_FILES += arch/x86/boot/fdimage \
+	       arch/x86/boot/image.iso \
+	       arch/x86/boot/mtools.conf
diff --git a/arch/x86/Makefile_32 b/arch/x86/Makefile_32
deleted file mode 100644
index 5c6724de904e..000000000000
--- a/arch/x86/Makefile_32
+++ /dev/null
@@ -1,177 +0,0 @@
-#
-# i386 Makefile
-#
-# This file is included by the global makefile so that you can add your own
-# architecture-specific flags and dependencies. Remember to do have actions
-# for "archclean" cleaning up for this architecture.
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-# Copyright (C) 1994 by Linus Torvalds
-#
-# 19990713  Artur Skawina <skawina@geocities.com>
-#           Added '-march' and '-mpreferred-stack-boundary' support
-#
-# 20050320  Kianusch Sayah Karadji <kianusch@sk-tech.net>
-#           Added support for GEODE CPU
-
-# BITS is used as extension for files which are available in a 32 bit
-# and a 64 bit version to simplify shared Makefiles.
-# e.g.: obj-y += foo_$(BITS).o
-BITS := 32
-export BITS
-
-HAS_BIARCH      := $(call cc-option-yn, -m32)
-ifeq ($(HAS_BIARCH),y)
-AS              := $(AS) --32
-LD              := $(LD) -m elf_i386
-CC              := $(CC) -m32
-endif
-
-LDFLAGS		:= -m elf_i386
-OBJCOPYFLAGS	:= -O binary -R .note -R .comment -S
-ifdef CONFIG_RELOCATABLE
-LDFLAGS_vmlinux := --emit-relocs
-endif
-CHECKFLAGS	+= -D__i386__
-
-KBUILD_CFLAGS += -pipe -msoft-float -mregparm=3 -freg-struct-return
-
-# prevent gcc from keeping the stack 16 byte aligned
-KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
-
-# CPU-specific tuning. Anything which can be shared with UML should go here.
-include $(srctree)/arch/x86/Makefile_32.cpu
-
-# temporary until string.h is fixed
-cflags-y += -ffreestanding
-
-# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
-# a lot more stack due to the lack of sharing of stacklots:
-KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;)
-
-# do binutils support CFI?
-cflags-y += $(call as-instr,.cfi_startproc\n.cfi_rel_offset esp${comma}0\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
-KBUILD_AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_rel_offset esp${comma}0\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
-
-# is .cfi_signal_frame supported too?
-cflags-y += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,)
-KBUILD_AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,)
-
-KBUILD_CFLAGS += $(cflags-y)
-
-# Default subarch .c files
-mcore-y  := arch/x86/mach-default
-
-# Voyager subarch support
-mflags-$(CONFIG_X86_VOYAGER)	:= -Iinclude/asm-x86/mach-voyager
-mcore-$(CONFIG_X86_VOYAGER)	:= arch/x86/mach-voyager
-
-# VISWS subarch support
-mflags-$(CONFIG_X86_VISWS)	:= -Iinclude/asm-x86/mach-visws
-mcore-$(CONFIG_X86_VISWS)	:= arch/x86/mach-visws
-
-# NUMAQ subarch support
-mflags-$(CONFIG_X86_NUMAQ)	:= -Iinclude/asm-x86/mach-numaq
-mcore-$(CONFIG_X86_NUMAQ)	:= arch/x86/mach-default
-
-# BIGSMP subarch support
-mflags-$(CONFIG_X86_BIGSMP)	:= -Iinclude/asm-x86/mach-bigsmp
-mcore-$(CONFIG_X86_BIGSMP)	:= arch/x86/mach-default
-
-#Summit subarch support
-mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-x86/mach-summit
-mcore-$(CONFIG_X86_SUMMIT)  := arch/x86/mach-default
-
-# generic subarchitecture
-mflags-$(CONFIG_X86_GENERICARCH) := -Iinclude/asm-x86/mach-generic
-mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default
-core-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/
-
-# ES7000 subarch support
-mflags-$(CONFIG_X86_ES7000)	:= -Iinclude/asm-x86/mach-es7000
-mcore-$(CONFIG_X86_ES7000)	:= arch/x86/mach-default
-core-$(CONFIG_X86_ES7000)	:= arch/x86/mach-es7000/
-
-# Xen paravirtualization support
-core-$(CONFIG_XEN)		+= arch/x86/xen/
-
-# lguest paravirtualization support
-core-$(CONFIG_LGUEST_GUEST)	+= arch/x86/lguest/
-
-# default subarch .h files
-mflags-y += -Iinclude/asm-x86/mach-default
-
-head-y := arch/x86/kernel/head_32.o arch/x86/kernel/init_task.o
-
-libs-y 					+= arch/x86/lib/
-core-y					+= arch/x86/kernel/ \
-					   arch/x86/mm/ \
-					   $(mcore-y)/ \
-					   arch/x86/crypto/ \
-					   arch/x86/vdso/
-drivers-$(CONFIG_MATH_EMULATION)	+= arch/x86/math-emu/
-drivers-$(CONFIG_PCI)			+= arch/x86/pci/
-# must be linked after kernel/
-drivers-$(CONFIG_OPROFILE)		+= arch/x86/oprofile/
-drivers-$(CONFIG_PM)			+= arch/x86/power/
-drivers-$(CONFIG_FB)                    += arch/x86/video/
-
-KBUILD_CFLAGS += $(mflags-y)
-KBUILD_AFLAGS += $(mflags-y)
-
-boot := arch/x86/boot
-
-PHONY += zImage bzImage compressed zlilo bzlilo \
-         zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install
-
-all: bzImage
-
-# KBUILD_IMAGE specify target image being built
-                    KBUILD_IMAGE := $(boot)/bzImage
-zImage zlilo zdisk: KBUILD_IMAGE := arch/x86/boot/zImage
-
-zImage bzImage: vmlinux
-	$(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
-	$(Q)mkdir -p $(objtree)/arch/i386/boot
-	$(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/i386/boot/bzImage
-
-compressed: zImage
-
-zlilo bzlilo: vmlinux
-	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zlilo
-
-zdisk bzdisk: vmlinux
-	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zdisk
-
-fdimage fdimage144 fdimage288 isoimage: vmlinux
-	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
-
-install: vdso_install
-	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install
-
-PHONY += vdso_install
-vdso_install:
-	$(Q)$(MAKE) $(build)=arch/x86/vdso $@
-
-archclean:
-	$(Q)rm -rf $(objtree)/arch/i386
-	$(Q)rm -rf $(objtree)/arch/x86_64
-	$(Q)$(MAKE) $(clean)=arch/x86/boot
-
-define archhelp
-  echo  '* bzImage	- Compressed kernel image (arch/x86/boot/bzImage)'
-  echo  '  install	- Install kernel using'
-  echo  '		   (your) ~/bin/installkernel or'
-  echo  '		   (distribution) /sbin/installkernel or'
-  echo  '		   install to $$(INSTALL_PATH) and run lilo'
-  echo  '  bzdisk       - Create a boot floppy in /dev/fd0'
-  echo  '  fdimage      - Create a boot floppy image'
-  echo  '  isoimage     - Create a boot CD-ROM image'
-endef
-
-CLEAN_FILES += arch/x86/boot/fdimage \
-	       arch/x86/boot/image.iso \
-	       arch/x86/boot/mtools.conf
diff --git a/arch/x86/Makefile_64 b/arch/x86/Makefile_64
deleted file mode 100644
index d35a0c50b67b..000000000000
--- a/arch/x86/Makefile_64
+++ /dev/null
@@ -1,142 +0,0 @@
-#
-# x86_64 Makefile
-#
-# This file is included by the global makefile so that you can add your own
-# architecture-specific flags and dependencies. Remember to do have actions
-# for "archclean" and "archdep" for cleaning up and making dependencies for
-# this architecture
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-# Copyright (C) 1994 by Linus Torvalds
-#
-# 19990713  Artur Skawina <skawina@geocities.com>
-#           Added '-march' and '-mpreferred-stack-boundary' support
-# 20000913  Pavel Machek <pavel@suse.cz>
-#	    Converted for x86_64 architecture
-# 20010105  Andi Kleen, add IA32 compiler.
-#           ....and later removed it again....
-#
-# $Id: Makefile,v 1.31 2002/03/22 15:56:07 ak Exp $
-
-# BITS is used as extension for files which are available in a 32 bit
-# and a 64 bit version to simplify shared Makefiles.
-# e.g.: obj-y += foo_$(BITS).o
-BITS := 64
-export BITS
-
-LDFLAGS		:= -m elf_x86_64
-OBJCOPYFLAGS	:= -O binary -R .note -R .comment -S
-LDFLAGS_vmlinux :=
-CHECKFLAGS      += -D__x86_64__ -m64
-
-cflags-y	:=
-cflags-kernel-y	:=
-cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
-cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
-# gcc doesn't support -march=core2 yet as of gcc 4.3, but I hope it
-# will eventually. Use -mtune=generic as fallback
-cflags-$(CONFIG_MCORE2) += \
-	$(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
-cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
-
-cflags-y += -m64
-cflags-y += -mno-red-zone
-cflags-y += -mcmodel=kernel
-cflags-y += -pipe
-cflags-y += -Wno-sign-compare
-cflags-y += -fno-asynchronous-unwind-tables
-ifneq ($(CONFIG_DEBUG_INFO),y)
-# -fweb shrinks the kernel a bit, but the difference is very small
-# it also messes up debugging, so don't use it for now.
-#cflags-y += $(call cc-option,-fweb)
-endif
-# -funit-at-a-time shrinks the kernel .text considerably
-# unfortunately it makes reading oopses harder.
-cflags-y += $(call cc-option,-funit-at-a-time)
-# prevent gcc from generating any FP code by mistake
-cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
-# this works around some issues with generating unwind tables in older gccs
-# newer gccs do it by default
-cflags-y += -maccumulate-outgoing-args
-
-# do binutils support CFI?
-cflags-y += $(call as-instr,.cfi_startproc\n.cfi_rel_offset rsp${comma}0\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
-KBUILD_AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_rel_offset rsp${comma}0\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
-
-# is .cfi_signal_frame supported too?
-cflags-y += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,)
-KBUILD_AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,)
-
-cflags-$(CONFIG_CC_STACKPROTECTOR) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh "$(CC)" -fstack-protector )
-cflags-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh "$(CC)" -fstack-protector-all )
-
-KBUILD_CFLAGS += $(cflags-y)
-CFLAGS_KERNEL += $(cflags-kernel-y)
-KBUILD_AFLAGS += -m64
-
-head-y := arch/x86/kernel/head_64.o arch/x86/kernel/head64.o arch/x86/kernel/init_task.o
-
-libs-y 					+= arch/x86/lib/
-core-y					+= arch/x86/kernel/ \
-					   arch/x86/mm/ \
-					   arch/x86/crypto/ \
-					   arch/x86/vdso/
-core-$(CONFIG_IA32_EMULATION)		+= arch/x86/ia32/
-drivers-$(CONFIG_PCI)			+= arch/x86/pci/
-drivers-$(CONFIG_OPROFILE)		+= arch/x86/oprofile/
-
-boot := arch/x86/boot
-
-PHONY += bzImage bzlilo install archmrproper \
-	 fdimage fdimage144 fdimage288 isoimage archclean
-
-#Default target when executing "make"
-all: bzImage
-
-BOOTIMAGE                     := arch/x86/boot/bzImage
-KBUILD_IMAGE                  := $(BOOTIMAGE)
-
-bzImage: vmlinux
-	$(Q)$(MAKE) $(build)=$(boot) $(BOOTIMAGE)
-	$(Q)mkdir -p $(objtree)/arch/x86_64/boot
-	$(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/x86_64/boot/bzImage
-
-bzlilo: vmlinux
-	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) zlilo
-
-bzdisk: vmlinux
-	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) zdisk
-
-fdimage fdimage144 fdimage288 isoimage: vmlinux
-	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@
-
-install: vdso_install
-	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@ 
-
-vdso_install:
-	$(Q)$(MAKE) $(build)=arch/x86/vdso $@
-
-archclean:
-	$(Q)rm -rf $(objtree)/arch/i386
-	$(Q)rm -rf $(objtree)/arch/x86_64
-	$(Q)$(MAKE) $(clean)=$(boot)
-
-define archhelp
-  echo  '* bzImage	- Compressed kernel image (arch/x86/boot/bzImage)'
-  echo  '  install	- Install kernel using'
-  echo  '		   (your) ~/bin/installkernel or'
-  echo  '		   (distribution) /sbin/installkernel or'
-  echo  '		   install to $$(INSTALL_PATH) and run lilo'
-  echo  '  bzdisk       - Create a boot floppy in /dev/fd0'
-  echo  '  fdimage      - Create a boot floppy image'
-  echo  '  isoimage     - Create a boot CD-ROM image'
-endef
-
-CLEAN_FILES += arch/x86/boot/fdimage \
-	       arch/x86/boot/image.iso \
-	       arch/x86/boot/mtools.conf
-
-

From 25dfeeb757bdf06521d65973676b21dac1d26d20 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 30 Jan 2008 13:32:21 +0100
Subject: [PATCH 438/890] x86: share more options between 32 and 64 bit build

On recommendation from Andi Kleen share a few more options
between 32 and 64 bit builds.
A defconfig build for i386 did not show any difference in
size of text and data.

The additional shared options are:
-Wno-sign-compare
-fno-asynchronous-unwind-tables
-mno-sse
-mno-mmx
-mno-sse2
-mno-3dnow

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Makefile | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index d3debde0ce0b..0c5b4072d134 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -64,14 +64,10 @@ else
         KBUILD_CFLAGS += -mno-red-zone
         KBUILD_CFLAGS += -mcmodel=kernel
 
-        KBUILD_CFLAGS += -Wno-sign-compare
-        KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
         # -funit-at-a-time shrinks the kernel .text considerably
         # unfortunately it makes reading oopses harder.
         KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
 
-        # prevent gcc from generating any FP code by mistake
-        KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
         # this works around some issues with generating unwind tables in older gccs
         # newer gccs do it by default
         KBUILD_CFLAGS += -maccumulate-outgoing-args
@@ -98,7 +94,15 @@ KBUILD_CFLAGS += $(cfi) $(cfi-sigframe)
 
 LDFLAGS := -m elf_$(UTS_MACHINE)
 OBJCOPYFLAGS := -O binary -R .note -R .comment -S
+
+# Speed up the build
 KBUILD_CFLAGS += -pipe
+# Workaround for a gcc prelease that unfortunately was shipped in a suse release
+KBUILD_CFLAGS += -Wno-sign-compare
+#
+KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
+# prevent gcc from generating any FP code by mistake
+KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
 
 ###
 # Sub architecture support

From 4ba7e5cd499a72983e6d011e76c1d8fad896d310 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 30 Jan 2008 13:32:23 +0100
Subject: [PATCH 439/890] x86: biarch support for 32 bit builds beautified

There were no reason to mess around with CC, AS and LD.
Fixing this up avoided duplicated option for ld.

A small fixlet were needed in boot/Makefile which assumed
that CC were modified.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Makefile      | 11 ++++-------
 arch/x86/boot/Makefile |  4 +---
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 0c5b4072d134..bfe061de8ed6 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -15,18 +15,15 @@ export BITS
 ifeq ($(CONFIG_X86_32),y)
         BITS := 32
         UTS_MACHINE := i386
+        CHECKFLAGS += -D__i386__
 
-        HAS_BIARCH := $(call cc-option-yn, -m32)
-        ifeq ($(HAS_BIARCH),y)
-                AS := $(AS) --32
-                LD := $(LD) -m elf_i386
-                CC := $(CC) -m32
-        endif
+        biarch := $(call cc-option,-m32)
+        KBUILD_AFLAGS += $(biarch)
+        KBUILD_CFLAGS += $(biarch)
 
         ifdef CONFIG_RELOCATABLE
                 LDFLAGS_vmlinux := --emit-relocs
         endif
-        CHECKFLAGS += -D__i386__
 
         KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return
 
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 7a3116ccf387..dcad6507f196 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -49,10 +49,7 @@ HOSTCFLAGS_build.o := $(LINUXINCLUDE)
 
 # How to compile the 16-bit code.  Note we always compile for -march=i386,
 # that way we can complain to the user if the CPU is insufficient.
-cflags-$(CONFIG_X86_32) :=
-cflags-$(CONFIG_X86_64) := -m32
 KBUILD_CFLAGS	:= $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
-		   $(cflags-y) \
 		   -Wall -Wstrict-prototypes \
 		   -march=i386 -mregparm=3 \
 		   -include $(srctree)/$(src)/code16gcc.h \
@@ -62,6 +59,7 @@ KBUILD_CFLAGS	:= $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
 			$(call cc-option, -fno-unit-at-a-time)) \
 		   $(call cc-option, -fno-stack-protector) \
 		   $(call cc-option, -mpreferred-stack-boundary=2)
+KBUILD_CFLAGS +=   $(call cc-option,-m32)
 KBUILD_AFLAGS	:= $(KBUILD_CFLAGS) -D__ASSEMBLY__
 
 $(obj)/zImage:  IMAGE_OFFSET := 0x1000

From d531b4fd2fa3c619d973ea639476f172d5a3732d Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 30 Jan 2008 13:32:27 +0100
Subject: [PATCH 440/890] x86: teach vdso to clean

A few files remained after 'make clean' in arch/x86/vdso/.
Teach vdso to clean up those files in a bit brutal fashion.
The filenames are just hardcoded in the Makefile.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Roland McGrath <roland@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/vdso/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 58708f6c1329..d28dda574700 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -130,3 +130,5 @@ $(vdso-install-y): %.so: $(obj)/%.so.dbg FORCE
 
 PHONY += vdso_install $(vdso-install-y)
 vdso_install: $(vdso-install-y)
+
+clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80*

From 6b0c3d44d33e9429dbc568dc7fd3aee3774c5707 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 30 Jan 2008 13:32:27 +0100
Subject: [PATCH 441/890] x86: unify arch/x86/kernel/Makefile(s)

Combine the 32 and 64 bit specific Makefiles in one file.
While doing so link order was (almost) preserved on 32 bit
but on 64 bit link order changed a lot.

Patch was checked with defconfig + allyesconfig builds.
The same .o files were linked in these configurations.

To keep readability of the Makefiles a few Kconfig
symbols was added/modified and it was checked that
they were not used anywhere else.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig            | 10 +++-
 arch/x86/Kconfig.debug      |  3 +-
 arch/x86/kernel/Makefile    | 93 +++++++++++++++++++++++++++++++++----
 arch/x86/kernel/Makefile_32 | 54 ---------------------
 arch/x86/kernel/Makefile_64 | 52 ---------------------
 5 files changed, 95 insertions(+), 117 deletions(-)
 delete mode 100644 arch/x86/kernel/Makefile_32
 delete mode 100644 arch/x86/kernel/Makefile_64

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index fa6fa52248d9..3d2c8dfba915 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -129,9 +129,17 @@ config GENERIC_PENDING_IRQ
 
 config X86_SMP
 	bool
-	depends on X86_32 && SMP && !X86_VOYAGER
+	depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64)
 	default y
 
+config X86_32_SMP
+	def_bool y
+	depends on X86_32 && SMP
+
+config X86_64_SMP
+	def_bool y
+	depends on X86_64 && SMP
+
 config X86_HT
 	bool
 	depends on SMP
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index b747ab38814d..660200915baa 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -75,8 +75,7 @@ config X86_FIND_SMP_CONFIG
 
 config X86_MPPARSE
 	def_bool y
-	depends on X86_LOCAL_APIC && !X86_VISWS
-	depends on X86_32
+	depends on (X86_32 && (X86_LOCAL_APIC && !X86_VISWS)) || X86_64
 
 config DOUBLEFAULT
 	default y
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 38573340b143..0903bbf0ca4d 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -1,9 +1,86 @@
-ifeq ($(CONFIG_X86_32),y)
-include ${srctree}/arch/x86/kernel/Makefile_32
-else
-include ${srctree}/arch/x86/kernel/Makefile_64
-endif
+#
+# Makefile for the linux kernel.
+#
 
-# Workaround to delete .lds files with make clean
-# The problem is that we do not enter Makefile_32 with make clean.
-clean-files := vsyscall*.lds vsyscall*.so
+extra-y                := head_$(BITS).o init_task.o vmlinux.lds
+extra-$(CONFIG_X86_64) += head64.o
+
+CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
+CFLAGS_vsyscall_64.o := $(PROFILING) -g0
+
+obj-y			:= process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
+obj-y			+= traps_$(BITS).o irq_$(BITS).o
+obj-y			+= time_$(BITS).o ioport_$(BITS).o ldt.o
+obj-y			+= setup_$(BITS).o i8259_$(BITS).o
+obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
+obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
+obj-$(CONFIG_X86_64)	+= syscall_64.o vsyscall_64.o setup64.o
+obj-y			+= pci-dma_$(BITS).o  bootflag.o e820_$(BITS).o
+obj-y			+= quirks.o i8237.o topology.o
+obj-y			+= alternative.o i8253.o
+obj-$(CONFIG_X86_64)	+= pci-nommu_64.o bugs_64.o
+obj-y			+= tsc_$(BITS).o io_delay.o rtc.o
+
+obj-y				+= i387.o
+obj-y				+= ptrace.o
+obj-y				+= ds.o
+obj-$(CONFIG_X86_32)		+= tls.o
+obj-$(CONFIG_IA32_EMULATION)	+= tls.o
+obj-y				+= step.o
+obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
+obj-y				+= cpu/
+obj-y				+= acpi/
+obj-$(CONFIG_X86_BIOS_REBOOT)	+= reboot_32.o
+obj-$(CONFIG_X86_64)            += reboot_64.o
+obj-$(CONFIG_MCA)		+= mca_32.o
+obj-$(CONFIG_X86_MSR)		+= msr.o
+obj-$(CONFIG_X86_CPUID)		+= cpuid.o
+obj-$(CONFIG_MICROCODE)		+= microcode.o
+obj-$(CONFIG_PCI)		+= early-quirks.o
+obj-$(CONFIG_APM)		+= apm_32.o
+obj-$(CONFIG_X86_SMP)		+= smp_$(BITS).o smpboot_$(BITS).o tsc_sync.o
+obj-$(CONFIG_X86_32_SMP)	+= smpcommon_32.o
+obj-$(CONFIG_X86_64_SMP)	+= smp_64.o smpboot_64.o tsc_sync.o
+obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline_$(BITS).o
+obj-$(CONFIG_X86_MPPARSE)	+= mpparse_$(BITS).o
+obj-$(CONFIG_X86_LOCAL_APIC)	+= apic_$(BITS).o nmi_$(BITS).o
+obj-$(CONFIG_X86_IO_APIC)	+= io_apic_$(BITS).o
+obj-$(CONFIG_X86_REBOOTFIXUPS)	+= reboot_fixups_32.o
+obj-$(CONFIG_KEXEC)		+= machine_kexec_$(BITS).o
+obj-$(CONFIG_KEXEC)		+= relocate_kernel_$(BITS).o crash.o
+obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
+obj-$(CONFIG_X86_NUMAQ)		+= numaq_32.o
+obj-$(CONFIG_X86_SUMMIT_NUMA)	+= summit_32.o
+obj-$(CONFIG_X86_VSMP)		+= vsmp_64.o
+obj-$(CONFIG_KPROBES)		+= kprobes.o
+obj-$(CONFIG_MODULES)		+= module_$(BITS).o
+obj-$(CONFIG_ACPI_SRAT) 	+= srat_32.o
+obj-$(CONFIG_EFI) 		+= efi.o efi_$(BITS).o efi_stub_$(BITS).o
+obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault_32.o
+obj-$(CONFIG_VM86)		+= vm86_32.o
+obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
+
+obj-$(CONFIG_HPET_TIMER) 	+= hpet.o
+
+obj-$(CONFIG_K8_NB)		+= k8.o
+obj-$(CONFIG_MGEODE_LX)		+= geode_32.o mfgpt_32.o
+
+obj-$(CONFIG_VMI)		+= vmi_32.o vmiclock_32.o
+obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
+obj-y				+= pcspeaker.o
+
+obj-$(CONFIG_SCx200)		+= scx200_32.o
+
+###
+# 64 bit specific files
+ifeq ($(CONFIG_X86_64),y)
+        obj-y				+= genapic_64.o genapic_flat_64.o
+        obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
+        obj-$(CONFIG_AUDIT)		+= audit_64.o
+        obj-$(CONFIG_PM)		+= suspend_64.o
+        obj-$(CONFIG_HIBERNATION)	+= suspend_asm_64.o
+
+        obj-$(CONFIG_GART_IOMMU)	+= pci-gart_64.o aperture_64.o
+        obj-$(CONFIG_CALGARY_IOMMU)	+= pci-calgary_64.o tce_64.o
+        obj-$(CONFIG_SWIOTLB)		+= pci-swiotlb_64.o
+endif
diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
deleted file mode 100644
index 86c6327798b2..000000000000
--- a/arch/x86/kernel/Makefile_32
+++ /dev/null
@@ -1,54 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-extra-y := head_32.o init_task.o vmlinux.lds
-CPPFLAGS_vmlinux.lds += -Ui386
-
-obj-y	:= process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
-		time_32.o ioport_32.o ldt.o setup_32.o i8259_32.o sys_i386_32.o \
-		pci-dma_32.o i386_ksyms_32.o bootflag.o e820_32.o\
-		quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o rtc.o
-
-obj-y				+= i387.o
-obj-y				+= ptrace.o
-obj-y				+= ds.o
-obj-y				+= tls.o
-obj-y				+= step.o
-obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
-obj-y				+= cpu/
-obj-y				+= acpi/
-obj-$(CONFIG_X86_BIOS_REBOOT)	+= reboot_32.o
-obj-$(CONFIG_MCA)		+= mca_32.o
-obj-$(CONFIG_X86_MSR)		+= msr.o
-obj-$(CONFIG_X86_CPUID)		+= cpuid.o
-obj-$(CONFIG_MICROCODE)		+= microcode.o
-obj-$(CONFIG_PCI)		+= early-quirks.o
-obj-$(CONFIG_APM)		+= apm_32.o
-obj-$(CONFIG_X86_SMP)		+= smp_32.o smpboot_32.o tsc_sync.o
-obj-$(CONFIG_SMP)		+= smpcommon_32.o
-obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline_32.o
-obj-$(CONFIG_X86_MPPARSE)	+= mpparse_32.o
-obj-$(CONFIG_X86_LOCAL_APIC)	+= apic_32.o nmi_32.o
-obj-$(CONFIG_X86_IO_APIC)	+= io_apic_32.o
-obj-$(CONFIG_X86_REBOOTFIXUPS)	+= reboot_fixups_32.o
-obj-$(CONFIG_KEXEC)		+= machine_kexec_32.o relocate_kernel_32.o crash.o
-obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_32.o
-obj-$(CONFIG_X86_NUMAQ)		+= numaq_32.o
-obj-$(CONFIG_X86_SUMMIT_NUMA)	+= summit_32.o
-obj-$(CONFIG_KPROBES)		+= kprobes.o
-obj-$(CONFIG_MODULES)		+= module_32.o
-obj-$(CONFIG_ACPI_SRAT) 	+= srat_32.o
-obj-$(CONFIG_EFI) 		+= efi.o efi_32.o efi_stub_32.o
-obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault_32.o
-obj-$(CONFIG_VM86)		+= vm86_32.o
-obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
-obj-$(CONFIG_HPET_TIMER) 	+= hpet.o
-obj-$(CONFIG_K8_NB)		+= k8.o
-obj-$(CONFIG_MGEODE_LX)		+= geode_32.o mfgpt_32.o
-
-obj-$(CONFIG_VMI)		+= vmi_32.o vmiclock_32.o
-obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_32.o
-obj-y				+= pcspeaker.o
-
-obj-$(CONFIG_SCx200)		+= scx200_32.o
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
deleted file mode 100644
index b8f9d13eb5e3..000000000000
--- a/arch/x86/kernel/Makefile_64
+++ /dev/null
@@ -1,52 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-extra-y 	:= head_64.o head64.o init_task.o vmlinux.lds
-CPPFLAGS_vmlinux.lds += -Ux86_64
-
-obj-y	:= process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
-		time_64.o ioport_64.o ldt.o setup_64.o i8259_64.o sys_x86_64.o \
-		x8664_ksyms_64.o syscall_64.o vsyscall_64.o \
-		setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \
-		pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \
-		i8253.o io_delay.o rtc.o
-
-obj-y				+= ptrace.o
-obj-y				+= ds.o
-obj-y				+= step.o
-
-obj-y				+= i387.o
-obj-$(CONFIG_IA32_EMULATION)	+= tls.o
-obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
-obj-y				+= cpu/
-obj-y				+= acpi/
-obj-$(CONFIG_X86_MSR)		+= msr.o
-obj-$(CONFIG_MICROCODE)		+= microcode.o
-obj-$(CONFIG_X86_CPUID)		+= cpuid.o
-obj-$(CONFIG_SMP)		+= smp_64.o smpboot_64.o trampoline_64.o tsc_sync.o
-obj-y				+= apic_64.o  nmi_64.o
-obj-y				+= io_apic_64.o mpparse_64.o genapic_64.o genapic_flat_64.o
-obj-$(CONFIG_KEXEC)		+= machine_kexec_64.o relocate_kernel_64.o crash.o
-obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_64.o
-obj-$(CONFIG_PM)		+= suspend_64.o
-obj-$(CONFIG_HIBERNATION)	+= suspend_asm_64.o
-obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
-obj-$(CONFIG_GART_IOMMU)	+= pci-gart_64.o aperture_64.o
-obj-$(CONFIG_CALGARY_IOMMU)	+= pci-calgary_64.o tce_64.o
-obj-$(CONFIG_SWIOTLB)		+= pci-swiotlb_64.o
-obj-$(CONFIG_KPROBES)		+= kprobes.o
-obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
-obj-$(CONFIG_X86_VSMP)		+= vsmp_64.o
-obj-$(CONFIG_K8_NB)		+= k8.o
-obj-$(CONFIG_AUDIT)		+= audit_64.o
-obj-$(CONFIG_EFI)		+= efi.o efi_64.o efi_stub_64.o
-obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_64.o
-
-obj-$(CONFIG_MODULES)		+= module_64.o
-obj-$(CONFIG_PCI)		+= early-quirks.o
-
-obj-y				+= topology.o
-obj-y				+= pcspeaker.o
-
-CFLAGS_vsyscall_64.o		:= $(PROFILING) -g0

From db569afa4e69ceaf89b3723a8f9cd2846ed09c76 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 30 Jan 2008 13:32:30 +0100
Subject: [PATCH 442/890] x86: unify arch/x86/boot/compressed/Makefile(s)

Trivial unification of the two Makefiles.
Tested doing a defconfig build for both 32 and 64 bit and
no build changes occured.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/compressed/Makefile    | 62 +++++++++++++++++++++++++++-
 arch/x86/boot/compressed/Makefile_32 | 50 ----------------------
 arch/x86/boot/compressed/Makefile_64 | 30 --------------
 3 files changed, 60 insertions(+), 82 deletions(-)
 delete mode 100644 arch/x86/boot/compressed/Makefile_32
 delete mode 100644 arch/x86/boot/compressed/Makefile_64

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 52c1db854520..d6ae347d11f1 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -1,5 +1,63 @@
+#
+# linux/arch/x86/boot/compressed/Makefile
+#
+# create a compressed vmlinux image from the original vmlinux
+#
+
+targets := vmlinux vmlinux.bin vmlinux.bin.gz head_$(BITS).o misc_$(BITS).o piggy.o
+
+KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
+KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
+cflags-$(CONFIG_X86_64) := -mcmodel=small
+KBUILD_CFLAGS += $(cflags-y)
+KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
+KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
+
+KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
+
+LDFLAGS := -m elf_$(UTS_MACHINE)
+LDFLAGS_vmlinux := -T
+
+$(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc_$(BITS).o $(obj)/piggy.o FORCE
+	$(call if_changed,ld)
+	@:
+
+$(obj)/vmlinux.bin: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+
 ifeq ($(CONFIG_X86_32),y)
-include ${srctree}/arch/x86/boot/compressed/Makefile_32
+targets += vmlinux.bin.all vmlinux.relocs
+hostprogs-y := relocs
+
+quiet_cmd_relocs = RELOCS  $@
+      cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $<
+$(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE
+	$(call if_changed,relocs)
+
+vmlinux.bin.all-y := $(obj)/vmlinux.bin
+vmlinux.bin.all-$(CONFIG_RELOCATABLE) += $(obj)/vmlinux.relocs
+quiet_cmd_relocbin = BUILD   $@
+      cmd_relocbin = cat $(filter-out FORCE,$^) > $@
+$(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE
+	$(call if_changed,relocbin)
+
+ifdef CONFIG_RELOCATABLE
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE
+	$(call if_changed,gzip)
 else
-include ${srctree}/arch/x86/boot/compressed/Makefile_64
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,gzip)
 endif
+LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T
+
+else
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,gzip)
+
+LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
+endif
+
+
+$(obj)/piggy.o: $(obj)/vmlinux_$(BITS).scr $(obj)/vmlinux.bin.gz FORCE
+	$(call if_changed,ld)
diff --git a/arch/x86/boot/compressed/Makefile_32 b/arch/x86/boot/compressed/Makefile_32
deleted file mode 100644
index e43ff7c56e6e..000000000000
--- a/arch/x86/boot/compressed/Makefile_32
+++ /dev/null
@@ -1,50 +0,0 @@
-#
-# linux/arch/x86/boot/compressed/Makefile
-#
-# create a compressed vmlinux image from the original vmlinux
-#
-
-targets		:= vmlinux vmlinux.bin vmlinux.bin.gz head_32.o misc_32.o piggy.o \
-			vmlinux.bin.all vmlinux.relocs
-EXTRA_AFLAGS	:= -traditional
-
-LDFLAGS_vmlinux := -T
-hostprogs-y	:= relocs
-
-KBUILD_CFLAGS  := -m32 -D__KERNEL__ $(LINUX_INCLUDE) -O2 \
-	   -fno-strict-aliasing -fPIC \
-	   $(call cc-option,-ffreestanding) \
-	   $(call cc-option,-fno-stack-protector)
-LDFLAGS := -m elf_i386
-
-$(obj)/vmlinux: $(src)/vmlinux_32.lds $(obj)/head_32.o $(obj)/misc_32.o $(obj)/piggy.o FORCE
-	$(call if_changed,ld)
-	@:
-
-$(obj)/vmlinux.bin: vmlinux FORCE
-	$(call if_changed,objcopy)
-
-quiet_cmd_relocs = RELOCS  $@
-      cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $<
-$(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE
-	$(call if_changed,relocs)
-
-vmlinux.bin.all-y := $(obj)/vmlinux.bin
-vmlinux.bin.all-$(CONFIG_RELOCATABLE) += $(obj)/vmlinux.relocs
-quiet_cmd_relocbin = BUILD   $@
-      cmd_relocbin = cat $(filter-out FORCE,$^) > $@
-$(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE
-	$(call if_changed,relocbin)
-
-ifdef CONFIG_RELOCATABLE
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE
-	$(call if_changed,gzip)
-else
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
-	$(call if_changed,gzip)
-endif
-
-LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T
-
-$(obj)/piggy.o: $(src)/vmlinux_32.scr $(obj)/vmlinux.bin.gz FORCE
-	$(call if_changed,ld)
diff --git a/arch/x86/boot/compressed/Makefile_64 b/arch/x86/boot/compressed/Makefile_64
deleted file mode 100644
index 7801e8dd90b2..000000000000
--- a/arch/x86/boot/compressed/Makefile_64
+++ /dev/null
@@ -1,30 +0,0 @@
-#
-# linux/arch/x86/boot/compressed/Makefile
-#
-# create a compressed vmlinux image from the original vmlinux
-#
-
-targets		:= vmlinux vmlinux.bin vmlinux.bin.gz head_64.o misc_64.o piggy.o
-
-KBUILD_CFLAGS := -m64 -D__KERNEL__ $(LINUXINCLUDE) -O2  \
-	  -fno-strict-aliasing -fPIC -mcmodel=small \
-	   $(call cc-option, -ffreestanding) \
-	   $(call cc-option, -fno-stack-protector)
-KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
-LDFLAGS := -m elf_x86_64
-
-LDFLAGS_vmlinux := -T
-$(obj)/vmlinux: $(src)/vmlinux_64.lds $(obj)/head_64.o $(obj)/misc_64.o $(obj)/piggy.o FORCE
-	$(call if_changed,ld)
-	@:
-
-$(obj)/vmlinux.bin: vmlinux FORCE
-	$(call if_changed,objcopy)
-
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
-	$(call if_changed,gzip)
-
-LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
-
-$(obj)/piggy.o: $(obj)/vmlinux_64.scr $(obj)/vmlinux.bin.gz FORCE
-	$(call if_changed,ld)

From 583d0e90ea52f02ae4d103359ee59e8218799e27 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 30 Jan 2008 13:32:31 +0100
Subject: [PATCH 443/890] x86: unify arch/x86/lib/Makefile(s)

Trivial unification of Makefiles for the
x86 specific library part.
Linking order is slightly modified but should be harmless.

Tested doing a defconfig build before and after and saw
no build changes.

It adds almost as many lines as it deletes - bacause
I broke a few lines up fo readability in the Makefile.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/lib/Makefile    | 26 ++++++++++++++++++++++++--
 arch/x86/lib/Makefile_32 | 11 -----------
 arch/x86/lib/Makefile_64 | 13 -------------
 3 files changed, 24 insertions(+), 26 deletions(-)
 delete mode 100644 arch/x86/lib/Makefile_32
 delete mode 100644 arch/x86/lib/Makefile_64

diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 329da276c6f1..4876182daf8a 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -1,5 +1,27 @@
+#
+# Makefile for x86 specific library files.
+#
+
+obj-$(CONFIG_SMP) := msr-on-cpu.o
+
+lib-y := delay_$(BITS).o
+lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o
+lib-y += memcpy_$(BITS).o
+
 ifeq ($(CONFIG_X86_32),y)
-include ${srctree}/arch/x86/lib/Makefile_32
+        lib-y += checksum_32.o
+        lib-y += strstr_32.o
+        lib-y += bitops_32.o semaphore_32.o string_32.o
+
+        lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
 else
-include ${srctree}/arch/x86/lib/Makefile_64
+        obj-y += io_64.o iomap_copy_64.o
+
+        CFLAGS_csum-partial_64.o := -funroll-loops
+
+        lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
+        lib-y += thunk_64.o clear_page_64.o copy_page_64.o
+        lib-y += bitstr_64.o bitops_64.o
+        lib-y += memmove_64.o memset_64.o
+        lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
 endif
diff --git a/arch/x86/lib/Makefile_32 b/arch/x86/lib/Makefile_32
deleted file mode 100644
index 98d1f1e2e2ef..000000000000
--- a/arch/x86/lib/Makefile_32
+++ /dev/null
@@ -1,11 +0,0 @@
-#
-# Makefile for i386-specific library files..
-#
-
-
-lib-y = checksum_32.o delay_32.o usercopy_32.o getuser_32.o putuser_32.o memcpy_32.o strstr_32.o \
-	bitops_32.o semaphore_32.o string_32.o
-
-lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
-
-obj-$(CONFIG_SMP)	+= msr-on-cpu.o
diff --git a/arch/x86/lib/Makefile_64 b/arch/x86/lib/Makefile_64
deleted file mode 100644
index bbabad3c9335..000000000000
--- a/arch/x86/lib/Makefile_64
+++ /dev/null
@@ -1,13 +0,0 @@
-#
-# Makefile for x86_64-specific library files.
-#
-
-CFLAGS_csum-partial_64.o := -funroll-loops
-
-obj-y := io_64.o iomap_copy_64.o
-obj-$(CONFIG_SMP)	+= msr-on-cpu.o
-
-lib-y := csum-partial_64.o csum-copy_64.o csum-wrappers_64.o delay_64.o \
-	usercopy_64.o getuser_64.o putuser_64.o  \
-	thunk_64.o clear_page_64.o copy_page_64.o bitstr_64.o bitops_64.o
-lib-y += memcpy_64.o memmove_64.o memset_64.o copy_user_64.o rwlock_64.o copy_user_nocache_64.o

From fb0328e2e6407d6f48a608aefa62b2be91989e7e Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 30 Jan 2008 13:32:31 +0100
Subject: [PATCH 444/890] x86: reduce CONFIG_X86_PPRO_FENCE bloat

CONFIG_X86_PPRO_FENCE bloats text:

i386 allmodconf: size mm/built-in.o
           text    data     bss     dec     hex         text ratio
vanilla: 163082   20372   40120  223574   36956          100.00%
bugfix : 163509   20372   40120  224001   36b01            0.26%
noppro : 162191   20372   40120  222683   365db         -  0.55%
both   : 162267   20372   40120  222759   36627         -  0.50% (+0.05% vs noppro)

So with the ppro memory ordering bug out of the way, the PG_uptodate fix
only adds 76 bytes of text.

allow this config to be specified by distros.

[ mingo@elte.hu: x86.git merge ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig.cpu | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 018d68e3184e..e09a6b73a1aa 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -321,8 +321,17 @@ config X86_XADD
 	depends on X86_32 && !M386
 
 config X86_PPRO_FENCE
-	def_bool y
+	bool "PentiumPro memory ordering errata workaround"
 	depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1
+	help
+	  Old PentiumPro multiprocessor systems had errata that could cause memory
+	  operations to violate the x86 ordering standard in rare cases. Enabling this
+	  option will attempt to work around some (but not all) occurances of
+	  this problem, at the cost of much heavier spinlock and memory barrier
+	  operations.
+
+	  If unsure, say n here. Even distro kernels should think twice before enabling
+	  this: there are few systems, and an unlikely bug.
 
 config X86_F00F_BUG
 	def_bool y

From 39657b6546558469734eb960f7c0c9492111096f Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 30 Jan 2008 13:32:31 +0100
Subject: [PATCH 445/890] git-x86: drivers/pnp/pnpbios/bioscalls.c build fix

drivers/pnp/pnpbios/bioscalls.c:64: warning: (near initialization for 'bad_bios_desc.<anonymous>')

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/pnp/pnpbios/bioscalls.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/pnp/pnpbios/bioscalls.c b/drivers/pnp/pnpbios/bioscalls.c
index 5dba68fe33f5..a8364d815222 100644
--- a/drivers/pnp/pnpbios/bioscalls.c
+++ b/drivers/pnp/pnpbios/bioscalls.c
@@ -61,7 +61,7 @@ set_base(gdt[(selname) >> 3], (u32)(address)); \
 set_limit(gdt[(selname) >> 3], size); \
 } while(0)
 
-static struct desc_struct bad_bios_desc = { 0, 0x00409200 };
+static struct desc_struct bad_bios_desc;
 
 /*
  * At some point we want to use this stack frame pointer to unwind
@@ -477,6 +477,9 @@ void pnpbios_calls_init(union pnp_bios_install_struct *header)
 	pnp_bios_callpoint.offset = header->fields.pm16offset;
 	pnp_bios_callpoint.segment = PNP_CS16;
 
+	bad_bios_desc.a = 0;
+	bad_bios_desc.b = 0x00409200;
+
 	set_base(bad_bios_desc, __va((unsigned long)0x40 << 4));
 	_set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4));
 	for (i = 0; i < NR_CPUS; i++) {

From ff3cf856120743c7386e8f6ab9f08e068886ce5c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:31 +0100
Subject: [PATCH 446/890] x86: hlt on early crash

H. Peter Anvin <hpa@zytor.com> wrote:

> It probably should actually HLT, to avoid sucking power, and stressing
> the thermal system.  We're dead at this point, and the early 486's
> which had problems with HLT will lock up - we don't care.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/compressed/misc_32.c | 3 ++-
 arch/x86/boot/compressed/misc_64.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c
index 288e16283ef9..9103652058c4 100644
--- a/arch/x86/boot/compressed/misc_32.c
+++ b/arch/x86/boot/compressed/misc_32.c
@@ -339,7 +339,8 @@ static void error(char *x)
 	putstr(x);
 	putstr("\n\n -- System halted");
 
-	while(1);	/* Halt */
+	while (1)
+		asm("hlt");
 }
 
 asmlinkage void decompress_kernel(void *rmode, unsigned long end,
diff --git a/arch/x86/boot/compressed/misc_64.c b/arch/x86/boot/compressed/misc_64.c
index 7d8338e21b00..8494f0dcff21 100644
--- a/arch/x86/boot/compressed/misc_64.c
+++ b/arch/x86/boot/compressed/misc_64.c
@@ -338,7 +338,8 @@ static void error(char *x)
 	putstr(x);
 	putstr("\n\n -- System halted");
 
-	while(1);	/* Halt */
+	while (1)
+		asm("hlt");
 }
 
 asmlinkage void decompress_kernel(void *rmode, unsigned long heap,

From 8eed92605334f1cd8fd025631f6a61522746b124 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Wed, 30 Jan 2008 13:32:31 +0100
Subject: [PATCH 447/890] x86: coding style cleanup for kernel/bootflag.c

This patch eliminates checkpatch.pl complaints on bootflag.c

No code changed:

   text    data     bss     dec     hex filename
    321       8       0     329     149 bootflag.o.before
    321       8       0     329     149 bootflag.o.after

   md5:
      9c1b474bcf25ddc1724a29c19880043f  bootflag.o.before.asm
      9c1b474bcf25ddc1724a29c19880043f  bootflag.o.after.asm

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/bootflag.c | 52 ++++++++++++++++++++------------------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/arch/x86/kernel/bootflag.c b/arch/x86/kernel/bootflag.c
index 0b9860530a6b..30f25a75fe28 100644
--- a/arch/x86/kernel/bootflag.c
+++ b/arch/x86/kernel/bootflag.c
@@ -1,8 +1,6 @@
 /*
  *	Implement 'Simple Boot Flag Specification 2.0'
  */
-
-
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -14,40 +12,38 @@
 
 #include <linux/mc146818rtc.h>
 
-
 #define SBF_RESERVED (0x78)
 #define SBF_PNPOS    (1<<0)
 #define SBF_BOOTING  (1<<1)
 #define SBF_DIAG     (1<<2)
 #define SBF_PARITY   (1<<7)
 
-
 int sbf_port __initdata = -1;	/* set via acpi_boot_init() */
 
-
 static int __init parity(u8 v)
 {
 	int x = 0;
 	int i;
-	
-	for(i=0;i<8;i++)
-	{
-		x^=(v&1);
-		v>>=1;
+
+	for (i = 0; i < 8; i++) {
+		x ^= (v & 1);
+		v >>= 1;
 	}
+
 	return x;
 }
 
 static void __init sbf_write(u8 v)
 {
 	unsigned long flags;
-	if(sbf_port != -1)
-	{
-		v &= ~SBF_PARITY;
-		if(!parity(v))
-			v|=SBF_PARITY;
 
-		printk(KERN_INFO "Simple Boot Flag at 0x%x set to 0x%x\n", sbf_port, v);
+	if (sbf_port != -1) {
+		v &= ~SBF_PARITY;
+		if (!parity(v))
+			v |= SBF_PARITY;
+
+		printk(KERN_INFO "Simple Boot Flag at 0x%x set to 0x%x\n",
+			sbf_port, v);
 
 		spin_lock_irqsave(&rtc_lock, flags);
 		CMOS_WRITE(v, sbf_port);
@@ -57,33 +53,41 @@ static void __init sbf_write(u8 v)
 
 static u8 __init sbf_read(void)
 {
-	u8 v;
 	unsigned long flags;
-	if(sbf_port == -1)
+	u8 v;
+
+	if (sbf_port == -1)
 		return 0;
+
 	spin_lock_irqsave(&rtc_lock, flags);
 	v = CMOS_READ(sbf_port);
 	spin_unlock_irqrestore(&rtc_lock, flags);
+
 	return v;
 }
 
 static int __init sbf_value_valid(u8 v)
 {
-	if(v&SBF_RESERVED)		/* Reserved bits */
+	if (v & SBF_RESERVED)		/* Reserved bits */
 		return 0;
-	if(!parity(v))
+	if (!parity(v))
 		return 0;
+
 	return 1;
 }
 
 static int __init sbf_init(void)
 {
 	u8 v;
-	if(sbf_port == -1)
+
+	if (sbf_port == -1)
 		return 0;
+
 	v = sbf_read();
-	if(!sbf_value_valid(v))
-		printk(KERN_WARNING "Simple Boot Flag value 0x%x read from CMOS RAM was invalid\n",v);
+	if (!sbf_value_valid(v)) {
+		printk(KERN_WARNING "Simple Boot Flag value 0x%x read from "
+			"CMOS RAM was invalid\n", v);
+	}
 
 	v &= ~SBF_RESERVED;
 	v &= ~SBF_BOOTING;
@@ -92,7 +96,7 @@ static int __init sbf_init(void)
 	v |= SBF_PNPOS;
 #endif
 	sbf_write(v);
+
 	return 0;
 }
-
 module_init(sbf_init);

From 037f52aca499d9d4e18d03824ce1cd481330c778 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 30 Jan 2008 13:32:32 +0100
Subject: [PATCH 448/890] x86: gitignore arch/x86/vdso files

Teach git to ignore generated files in
arch/x86/vdso/*

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Roland McGrath <roland@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/vdso/.gitignore        | 4 ++++
 arch/x86/vdso/vdso32/.gitignore | 1 +
 2 files changed, 5 insertions(+)
 create mode 100644 arch/x86/vdso/vdso32/.gitignore

diff --git a/arch/x86/vdso/.gitignore b/arch/x86/vdso/.gitignore
index f8b69d84238e..b9030e533418 100644
--- a/arch/x86/vdso/.gitignore
+++ b/arch/x86/vdso/.gitignore
@@ -1 +1,5 @@
 vdso.lds
+vdso-syms.lds
+vdso32-syms.lds
+vdso32-syscall-syms.lds
+vdso32-sysenter-syms.lds
diff --git a/arch/x86/vdso/vdso32/.gitignore b/arch/x86/vdso/vdso32/.gitignore
new file mode 100644
index 000000000000..e45fba9d0ced
--- /dev/null
+++ b/arch/x86/vdso/vdso32/.gitignore
@@ -0,0 +1 @@
+vdso32.lds

From 1c858087cd49d36b2f25b4fc2a47e39ac6cf09df Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 30 Jan 2008 13:32:32 +0100
Subject: [PATCH 449/890] x86: default to PCI=y

PCI is one of the few hardware stuff where defaulting to y makes sense.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3d2c8dfba915..5271665a22e0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1339,7 +1339,7 @@ menu "Bus options (PCI etc.)"
 config PCI
 	bool "PCI support" if !X86_VISWS
 	depends on !X86_VOYAGER
-	default y if X86_VISWS
+	default y
 	select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC)
 	help
 	  Find out whether you have a PCI motherboard. PCI is the name of a

From 3f4380a1e0ea44bc1062ca55e8e479ddcda369fc Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Wed, 30 Jan 2008 13:32:32 +0100
Subject: [PATCH 450/890] x86: get rid of checkpatch.pl complains on apm_32.c

This patch eliminates most of code-style errors
discovered by checkpatch.pl on arch/x86/kernel/apm_32.c

no code changed:

      text    data     bss     dec     hex filename
     12142    1837      84   14063    36ef apm_32.o.before
     12142    1837      84   14063    36ef apm_32.o.after

   md5:
       2676b881ad55e387da4a995e8b9ee372  apm_32.o.before.asm
       2676b881ad55e387da4a995e8b9ee372  apm_32.o.after.asm

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apm_32.c | 364 +++++++++++++++++++++------------------
 1 file changed, 195 insertions(+), 169 deletions(-)

diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 2467df7eca0b..955dd43b1676 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -323,7 +323,7 @@ extern int (*console_blank_hook)(int);
 /*
  * Ignore suspend events for this amount of time after a resume
  */
-#define DEFAULT_BOUNCE_INTERVAL		(3 * HZ)
+#define DEFAULT_BOUNCE_INTERVAL	(3 * HZ)
 
 /*
  * Maximum number of events stored
@@ -335,7 +335,7 @@ extern int (*console_blank_hook)(int);
  */
 struct apm_user {
 	int		magic;
-	struct apm_user *	next;
+	struct apm_user *next;
 	unsigned int	suser: 1;
 	unsigned int	writer: 1;
 	unsigned int	reader: 1;
@@ -371,44 +371,44 @@ struct apm_user {
 static struct {
 	unsigned long	offset;
 	unsigned short	segment;
-}				apm_bios_entry;
-static int			clock_slowed;
-static int			idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD;
-static int			idle_period __read_mostly = DEFAULT_IDLE_PERIOD;
-static int			set_pm_idle;
-static int			suspends_pending;
-static int			standbys_pending;
-static int			ignore_sys_suspend;
-static int			ignore_normal_resume;
-static int			bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL;
+} apm_bios_entry;
+static int clock_slowed;
+static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD;
+static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD;
+static int set_pm_idle;
+static int suspends_pending;
+static int standbys_pending;
+static int ignore_sys_suspend;
+static int ignore_normal_resume;
+static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL;
 
-static int			debug __read_mostly;
-static int			smp __read_mostly;
-static int			apm_disabled = -1;
+static int debug __read_mostly;
+static int smp __read_mostly;
+static int apm_disabled = -1;
 #ifdef CONFIG_SMP
-static int			power_off;
+static int power_off;
 #else
-static int			power_off = 1;
+static int power_off = 1;
 #endif
 #ifdef CONFIG_APM_REAL_MODE_POWER_OFF
-static int			realmode_power_off = 1;
+static int realmode_power_off = 1;
 #else
-static int			realmode_power_off;
+static int realmode_power_off;
 #endif
 #ifdef CONFIG_APM_ALLOW_INTS
-static int			allow_ints = 1;
+static int allow_ints = 1;
 #else
-static int			allow_ints;
+static int allow_ints;
 #endif
-static int			broken_psr;
+static int broken_psr;
 
 static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
 static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
-static struct apm_user *	user_list;
+static struct apm_user *user_list;
 static DEFINE_SPINLOCK(user_list_lock);
 static const struct desc_struct	bad_bios_desc = { { { 0, 0x00409200 } } };
 
-static const char		driver_version[] = "1.16ac";	/* no spaces */
+static const char driver_version[] = "1.16ac";	/* no spaces */
 
 static struct task_struct *kapmd_task;
 
@@ -416,7 +416,7 @@ static struct task_struct *kapmd_task;
  *	APM event names taken from the APM 1.2 specification. These are
  *	the message codes that the BIOS uses to tell us about events
  */
-static const char *	const apm_event_name[] = {
+static const char * const apm_event_name[] = {
 	"system standby",
 	"system suspend",
 	"normal resume",
@@ -434,14 +434,14 @@ static const char *	const apm_event_name[] = {
 
 typedef struct lookup_t {
 	int	key;
-	char *	msg;
+	char 	*msg;
 } lookup_t;
 
 /*
  *	The BIOS returns a set of standard error codes in AX when the
  *	carry flag is set.
  */
- 
+
 static const lookup_t error_table[] = {
 /* N/A	{ APM_SUCCESS,		"Operation succeeded" }, */
 	{ APM_DISABLED,		"Power management disabled" },
@@ -471,24 +471,25 @@ static const lookup_t error_table[] = {
  *	Write a meaningful log entry to the kernel log in the event of
  *	an APM error.
  */
- 
+
 static void apm_error(char *str, int err)
 {
-	int	i;
+	int i;
 
 	for (i = 0; i < ERROR_COUNT; i++)
-		if (error_table[i].key == err) break;
+		if (error_table[i].key == err)
+			break;
 	if (i < ERROR_COUNT)
 		printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg);
 	else
 		printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n",
-			str, err);
+		       str, err);
 }
 
 /*
  * Lock APM functionality to physical CPU 0
  */
- 
+
 #ifdef CONFIG_SMP
 
 static cpumask_t apm_save_cpus(void)
@@ -510,7 +511,7 @@ static inline void apm_restore_cpus(cpumask_t mask)
 /*
  *	No CPU lockdown needed on a uniprocessor
  */
- 
+
 #define apm_save_cpus()		(current->cpus_allowed)
 #define apm_restore_cpus(x)	(void)(x)
 
@@ -589,7 +590,7 @@ static inline void apm_irq_restore(unsigned long flags)
  *	code is returned in AH (bits 8-15 of eax) and this function
  *	returns non-zero.
  */
- 
+
 static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in,
 	u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, u32 *esi)
 {
@@ -601,7 +602,7 @@ static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in,
 	struct desc_struct	*gdt;
 
 	cpus = apm_save_cpus();
-	
+
 	cpu = get_cpu();
 	gdt = get_cpu_gdt_table(cpu);
 	save_desc_40 = gdt[0x40 / 8];
@@ -615,7 +616,7 @@ static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in,
 	gdt[0x40 / 8] = save_desc_40;
 	put_cpu();
 	apm_restore_cpus(cpus);
-	
+
 	return *eax & 0xff;
 }
 
@@ -644,7 +645,7 @@ static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax)
 	struct desc_struct	*gdt;
 
 	cpus = apm_save_cpus();
-	
+
 	cpu = get_cpu();
 	gdt = get_cpu_gdt_table(cpu);
 	save_desc_40 = gdt[0x40 / 8];
@@ -679,7 +680,7 @@ static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax)
 
 static int apm_driver_version(u_short *val)
 {
-	u32	eax;
+	u32 eax;
 
 	if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax))
 		return (eax >> 8) & 0xff;
@@ -703,16 +704,16 @@ static int apm_driver_version(u_short *val)
  *	that APM 1.2 is in use. If no messges are pending the value 0x80
  *	is returned (No power management events pending).
  */
- 
+
 static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info)
 {
-	u32	eax;
-	u32	ebx;
-	u32	ecx;
-	u32	dummy;
+	u32 eax;
+	u32 ebx;
+	u32 ecx;
+	u32 dummy;
 
 	if (apm_bios_call(APM_FUNC_GET_EVENT, 0, 0, &eax, &ebx, &ecx,
-			&dummy, &dummy))
+			  &dummy, &dummy))
 		return (eax >> 8) & 0xff;
 	*event = ebx;
 	if (apm_info.connection_version < 0x0102)
@@ -735,10 +736,10 @@ static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info)
  *	The state holds the state to transition to, which may in fact
  *	be an acceptance of a BIOS requested state change.
  */
- 
+
 static int set_power_state(u_short what, u_short state)
 {
-	u32	eax;
+	u32 eax;
 
 	if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax))
 		return (eax >> 8) & 0xff;
@@ -751,7 +752,7 @@ static int set_power_state(u_short what, u_short state)
  *
  *	Transition the entire system into a new APM power state.
  */
- 
+
 static int set_system_power_state(u_short state)
 {
 	return set_power_state(APM_DEVICE_ALL, state);
@@ -765,13 +766,13 @@ static int set_system_power_state(u_short state)
  *	to handle the idle request. On a success the function returns 1
  *	if the BIOS did clock slowing or 0 otherwise.
  */
- 
+
 static int apm_do_idle(void)
 {
-	u32	eax;
-	u8	ret = 0;
-	int	idled = 0;
-	int	polling;
+	u32 eax;
+	u8 ret = 0;
+	int idled = 0;
+	int polling;
 
 	polling = !!(current_thread_info()->status & TS_POLLING);
 	if (polling) {
@@ -798,10 +799,9 @@ static int apm_do_idle(void)
 		/* This always fails on some SMP boards running UP kernels.
 		 * Only report the failure the first 5 times.
 		 */
-		if (++t < 5)
-		{
+		if (++t < 5) {
 			printk(KERN_DEBUG "apm_do_idle failed (%d)\n",
-					(eax >> 8) & 0xff);
+			       (eax >> 8) & 0xff);
 			t = jiffies;
 		}
 		return -1;
@@ -813,15 +813,15 @@ static int apm_do_idle(void)
 /**
  *	apm_do_busy	-	inform the BIOS the CPU is busy
  *
- *	Request that the BIOS brings the CPU back to full performance. 
+ *	Request that the BIOS brings the CPU back to full performance.
  */
- 
+
 static void apm_do_busy(void)
 {
-	u32	dummy;
+	u32 dummy;
 
 	if (clock_slowed || ALWAYS_CALL_BUSY) {
-		(void) apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy);
+		(void)apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy);
 		clock_slowed = 0;
 	}
 }
@@ -832,15 +832,15 @@ static void apm_do_busy(void)
  * power management - we probably want
  * to conserve power.
  */
-#define IDLE_CALC_LIMIT   (HZ * 100)
-#define IDLE_LEAKY_MAX    16
+#define IDLE_CALC_LIMIT	(HZ * 100)
+#define IDLE_LEAKY_MAX	16
 
 static void (*original_pm_idle)(void) __read_mostly;
 
 /**
  * apm_cpu_idle		-	cpu idling for APM capable Linux
  *
- * This is the idling function the kernel executes when APM is available. It 
+ * This is the idling function the kernel executes when APM is available. It
  * tries to do BIOS powermanagement based on the average system idle time.
  * Furthermore it calls the system default idle routine.
  */
@@ -881,7 +881,8 @@ recalc:
 
 			t = jiffies;
 			switch (apm_do_idle()) {
-			case 0: apm_idle_done = 1;
+			case 0:
+				apm_idle_done = 1;
 				if (t != jiffies) {
 					if (bucket) {
 						bucket = IDLE_LEAKY_MAX;
@@ -892,7 +893,8 @@ recalc:
 					continue;
 				}
 				break;
-			case 1: apm_idle_done = 1;
+			case 1:
+				apm_idle_done = 1;
 				break;
 			default: /* BIOS refused */
 				break;
@@ -920,10 +922,10 @@ recalc:
  *	the SMP call on CPU0 as some systems will only honour this call
  *	on their first cpu.
  */
- 
+
 static void apm_power_off(void)
 {
-	unsigned char	po_bios_call[] = {
+	unsigned char po_bios_call[] = {
 		0xb8, 0x00, 0x10,	/* movw  $0x1000,ax  */
 		0x8e, 0xd0,		/* movw  ax,ss       */
 		0xbc, 0x00, 0xf0,	/* movw  $0xf000,sp  */
@@ -934,13 +936,12 @@ static void apm_power_off(void)
 	};
 
 	/* Some bioses don't like being called from CPU != 0 */
-	if (apm_info.realmode_power_off)
-	{
+	if (apm_info.realmode_power_off) {
 		(void)apm_save_cpus();
 		machine_real_restart(po_bios_call, sizeof(po_bios_call));
+	} else {
+		(void)set_system_power_state(APM_STATE_OFF);
 	}
-	else
-		(void) set_system_power_state(APM_STATE_OFF);
 }
 
 #ifdef CONFIG_APM_DO_ENABLE
@@ -949,17 +950,17 @@ static void apm_power_off(void)
  *	apm_enable_power_management - enable BIOS APM power management
  *	@enable: enable yes/no
  *
- *	Enable or disable the APM BIOS power services. 
+ *	Enable or disable the APM BIOS power services.
  */
- 
+
 static int apm_enable_power_management(int enable)
 {
-	u32	eax;
+	u32 eax;
 
 	if ((enable == 0) && (apm_info.bios.flags & APM_BIOS_DISENGAGED))
 		return APM_NOT_ENGAGED;
 	if (apm_bios_call_simple(APM_FUNC_ENABLE_PM, APM_DEVICE_BALL,
-			enable, &eax))
+				 enable, &eax))
 		return (eax >> 8) & 0xff;
 	if (enable)
 		apm_info.bios.flags &= ~APM_BIOS_DISABLED;
@@ -982,19 +983,19 @@ static int apm_enable_power_management(int enable)
  *	if reported is a lifetime in secodnds/minutes at current powwer
  *	consumption.
  */
- 
+
 static int apm_get_power_status(u_short *status, u_short *bat, u_short *life)
 {
-	u32	eax;
-	u32	ebx;
-	u32	ecx;
-	u32	edx;
-	u32	dummy;
+	u32 eax;
+	u32 ebx;
+	u32 ecx;
+	u32 edx;
+	u32 dummy;
 
 	if (apm_info.get_power_status_broken)
 		return APM_32_UNSUPPORTED;
 	if (apm_bios_call(APM_FUNC_GET_STATUS, APM_DEVICE_ALL, 0,
-			&eax, &ebx, &ecx, &edx, &dummy))
+			  &eax, &ebx, &ecx, &edx, &dummy))
 		return (eax >> 8) & 0xff;
 	*status = ebx;
 	*bat = ecx;
@@ -1010,11 +1011,11 @@ static int apm_get_power_status(u_short *status, u_short *bat, u_short *life)
 static int apm_get_battery_status(u_short which, u_short *status,
 				  u_short *bat, u_short *life, u_short *nbat)
 {
-	u32	eax;
-	u32	ebx;
-	u32	ecx;
-	u32	edx;
-	u32	esi;
+	u32 eax;
+	u32 ebx;
+	u32 ecx;
+	u32 edx;
+	u32 esi;
 
 	if (apm_info.connection_version < 0x0102) {
 		/* pretend we only have one battery. */
@@ -1025,7 +1026,7 @@ static int apm_get_battery_status(u_short which, u_short *status,
 	}
 
 	if (apm_bios_call(APM_FUNC_GET_STATUS, (0x8000 | (which)), 0, &eax,
-			&ebx, &ecx, &edx, &esi))
+			  &ebx, &ecx, &edx, &esi))
 		return (eax >> 8) & 0xff;
 	*status = ebx;
 	*bat = ecx;
@@ -1043,10 +1044,10 @@ static int apm_get_battery_status(u_short which, u_short *status,
  *	Activate or deactive power management on either a specific device
  *	or the entire system (%APM_DEVICE_ALL).
  */
- 
+
 static int apm_engage_power_management(u_short device, int enable)
 {
-	u32	eax;
+	u32 eax;
 
 	if ((enable == 0) && (device == APM_DEVICE_ALL)
 	    && (apm_info.bios.flags & APM_BIOS_DISABLED))
@@ -1073,7 +1074,7 @@ static int apm_engage_power_management(u_short device, int enable)
  *	all video devices. Typically the BIOS will do laptop backlight and
  *	monitor powerdown for us.
  */
- 
+
 static int apm_console_blank(int blank)
 {
 	int error = APM_NOT_ENGAGED; /* silence gcc */
@@ -1125,7 +1126,7 @@ static apm_event_t get_queued_event(struct apm_user *as)
 
 static void queue_event(apm_event_t event, struct apm_user *sender)
 {
-	struct apm_user *	as;
+	struct apm_user *as;
 
 	spin_lock(&user_list_lock);
 	if (user_list == NULL)
@@ -1185,7 +1186,7 @@ static void reinit_timer(void)
 
 static int suspend(int vetoable)
 {
-	int		err;
+	int err;
 	struct apm_user	*as;
 
 	if (pm_send_all(PM_SUSPEND, (void *)3)) {
@@ -1238,7 +1239,7 @@ static int suspend(int vetoable)
 
 static void standby(void)
 {
-	int	err;
+	int err;
 
 	local_irq_disable();
 	device_power_down(PMSG_SUSPEND);
@@ -1255,8 +1256,8 @@ static void standby(void)
 
 static apm_event_t get_event(void)
 {
-	int		error;
-	apm_event_t	event = APM_NO_EVENTS; /* silence gcc */
+	int error;
+	apm_event_t event = APM_NO_EVENTS; /* silence gcc */
 	apm_eventinfo_t	info;
 
 	static int notified;
@@ -1274,9 +1275,9 @@ static apm_event_t get_event(void)
 
 static void check_events(void)
 {
-	apm_event_t		event;
-	static unsigned long	last_resume;
-	static int		ignore_bounce;
+	apm_event_t event;
+	static unsigned long last_resume;
+	static int ignore_bounce;
 
 	while ((event = get_event()) != 0) {
 		if (debug) {
@@ -1356,7 +1357,7 @@ static void check_events(void)
 			/*
 			 * We are not allowed to reject a critical suspend.
 			 */
-			(void) suspend(0);
+			(void)suspend(0);
 			break;
 		}
 	}
@@ -1364,12 +1365,12 @@ static void check_events(void)
 
 static void apm_event_handler(void)
 {
-	static int	pending_count = 4;
-	int		err;
+	static int pending_count = 4;
+	int err;
 
 	if ((standbys_pending > 0) || (suspends_pending > 0)) {
 		if ((apm_info.connection_version > 0x100) &&
-				(pending_count-- <= 0)) {
+		    (pending_count-- <= 0)) {
 			pending_count = 4;
 			if (debug)
 				printk(KERN_DEBUG "apm: setting state busy\n");
@@ -1417,9 +1418,9 @@ static int check_apm_user(struct apm_user *as, const char *func)
 
 static ssize_t do_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos)
 {
-	struct apm_user *	as;
-	int			i;
-	apm_event_t		event;
+	struct apm_user *as;
+	int i;
+	apm_event_t event;
 
 	as = fp->private_data;
 	if (check_apm_user(as, "read"))
@@ -1458,9 +1459,9 @@ static ssize_t do_read(struct file *fp, char __user *buf, size_t count, loff_t *
 	return 0;
 }
 
-static unsigned int do_poll(struct file *fp, poll_table * wait)
+static unsigned int do_poll(struct file *fp, poll_table *wait)
 {
-	struct apm_user * as;
+	struct apm_user *as;
 
 	as = fp->private_data;
 	if (check_apm_user(as, "poll"))
@@ -1471,10 +1472,10 @@ static unsigned int do_poll(struct file *fp, poll_table * wait)
 	return 0;
 }
 
-static int do_ioctl(struct inode * inode, struct file *filp,
+static int do_ioctl(struct inode *inode, struct file *filp,
 		    u_int cmd, u_long arg)
 {
-	struct apm_user *	as;
+	struct apm_user *as;
 
 	as = filp->private_data;
 	if (check_apm_user(as, "ioctl"))
@@ -1514,9 +1515,9 @@ static int do_ioctl(struct inode * inode, struct file *filp,
 	return 0;
 }
 
-static int do_release(struct inode * inode, struct file * filp)
+static int do_release(struct inode *inode, struct file *filp)
 {
-	struct apm_user *	as;
+	struct apm_user *as;
 
 	as = filp->private_data;
 	if (check_apm_user(as, "release"))
@@ -1532,11 +1533,11 @@ static int do_release(struct inode * inode, struct file * filp)
 		if (suspends_pending <= 0)
 			(void) suspend(1);
 	}
-  	spin_lock(&user_list_lock);
+	spin_lock(&user_list_lock);
 	if (user_list == as)
 		user_list = as->next;
 	else {
-		struct apm_user *	as1;
+		struct apm_user *as1;
 
 		for (as1 = user_list;
 		     (as1 != NULL) && (as1->next != as);
@@ -1552,9 +1553,9 @@ static int do_release(struct inode * inode, struct file * filp)
 	return 0;
 }
 
-static int do_open(struct inode * inode, struct file * filp)
+static int do_open(struct inode *inode, struct file *filp)
 {
-	struct apm_user *	as;
+	struct apm_user *as;
 
 	as = kmalloc(sizeof(*as), GFP_KERNEL);
 	if (as == NULL) {
@@ -1568,7 +1569,7 @@ static int do_open(struct inode * inode, struct file * filp)
 	as->suspends_read = as->standbys_read = 0;
 	/*
 	 * XXX - this is a tiny bit broken, when we consider BSD
-         * process accounting. If the device is opened by root, we
+	 * process accounting. If the device is opened by root, we
 	 * instantly flag that we used superuser privs. Who knows,
 	 * we might close the device immediately without doing a
 	 * privileged operation -- cevans
@@ -1651,16 +1652,16 @@ static int proc_apm_show(struct seq_file *m, void *v)
 	   8) min = minutes; sec = seconds */
 
 	seq_printf(m, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n",
-		     driver_version,
-		     (apm_info.bios.version >> 8) & 0xff,
-		     apm_info.bios.version & 0xff,
-		     apm_info.bios.flags,
-		     ac_line_status,
-		     battery_status,
-		     battery_flag,
-		     percentage,
-		     time_units,
-		     units);
+		   driver_version,
+		   (apm_info.bios.version >> 8) & 0xff,
+		   apm_info.bios.version & 0xff,
+		   apm_info.bios.flags,
+		   ac_line_status,
+		   battery_status,
+		   battery_flag,
+		   percentage,
+		   time_units,
+		   units);
 	return 0;
 }
 
@@ -1683,8 +1684,8 @@ static int apm(void *unused)
 	unsigned short	cx;
 	unsigned short	dx;
 	int		error;
-	char *		power_stat;
-	char *		bat_stat;
+	char 		*power_stat;
+	char 		*bat_stat;
 
 #ifdef CONFIG_SMP
 	/* 2002/08/01 - WT
@@ -1743,23 +1744,41 @@ static int apm(void *unused)
 		}
 	}
 
-	if (debug && (num_online_cpus() == 1 || smp )) {
+	if (debug && (num_online_cpus() == 1 || smp)) {
 		error = apm_get_power_status(&bx, &cx, &dx);
 		if (error)
 			printk(KERN_INFO "apm: power status not available\n");
 		else {
 			switch ((bx >> 8) & 0xff) {
-			case 0: power_stat = "off line"; break;
-			case 1: power_stat = "on line"; break;
-			case 2: power_stat = "on backup power"; break;
-			default: power_stat = "unknown"; break;
+			case 0:
+				power_stat = "off line";
+				break;
+			case 1:
+				power_stat = "on line";
+				break;
+			case 2:
+				power_stat = "on backup power";
+				break;
+			default:
+				power_stat = "unknown";
+				break;
 			}
 			switch (bx & 0xff) {
-			case 0: bat_stat = "high"; break;
-			case 1: bat_stat = "low"; break;
-			case 2: bat_stat = "critical"; break;
-			case 3: bat_stat = "charging"; break;
-			default: bat_stat = "unknown"; break;
+			case 0:
+				bat_stat = "high";
+				break;
+			case 1:
+				bat_stat = "low";
+				break;
+			case 2:
+				bat_stat = "critical";
+				break;
+			case 3:
+				bat_stat = "charging";
+				break;
+			default:
+				bat_stat = "unknown";
+				break;
 			}
 			printk(KERN_INFO
 			       "apm: AC %s, battery status %s, battery life ",
@@ -1776,8 +1795,8 @@ static int apm(void *unused)
 					printk("unknown\n");
 				else
 					printk("%d %s\n", dx & 0x7fff,
-						(dx & 0x8000) ?
-						"minutes" : "seconds");
+					       (dx & 0x8000) ?
+					       "minutes" : "seconds");
 			}
 		}
 	}
@@ -1802,7 +1821,7 @@ static int apm(void *unused)
 #ifndef MODULE
 static int __init apm_setup(char *str)
 {
-	int	invert;
+	int invert;
 
 	while ((str != NULL) && (*str != '\0')) {
 		if (strncmp(str, "off", 3) == 0)
@@ -1827,14 +1846,13 @@ static int __init apm_setup(char *str)
 		if ((strncmp(str, "power-off", 9) == 0) ||
 		    (strncmp(str, "power_off", 9) == 0))
 			power_off = !invert;
-		if (strncmp(str, "smp", 3) == 0)
-		{
+		if (strncmp(str, "smp", 3) == 0) {
 			smp = !invert;
 			idle_threshold = 100;
 		}
 		if ((strncmp(str, "allow-ints", 10) == 0) ||
 		    (strncmp(str, "allow_ints", 10) == 0))
- 			apm_info.allow_ints = !invert;
+			apm_info.allow_ints = !invert;
 		if ((strncmp(str, "broken-psr", 10) == 0) ||
 		    (strncmp(str, "broken_psr", 10) == 0))
 			apm_info.get_power_status_broken = !invert;
@@ -1880,7 +1898,8 @@ static int __init print_if_true(const struct dmi_system_id *d)
  */
 static int __init broken_ps2_resume(const struct dmi_system_id *d)
 {
-	printk(KERN_INFO "%s machine detected. Mousepad Resume Bug workaround hopefully not needed.\n", d->ident);
+	printk(KERN_INFO "%s machine detected. Mousepad Resume Bug "
+	       "workaround hopefully not needed.\n", d->ident);
 	return 0;
 }
 
@@ -1889,7 +1908,8 @@ static int __init set_realmode_power_off(const struct dmi_system_id *d)
 {
 	if (apm_info.realmode_power_off == 0) {
 		apm_info.realmode_power_off = 1;
-		printk(KERN_INFO "%s bios detected. Using realmode poweroff only.\n", d->ident);
+		printk(KERN_INFO "%s bios detected. "
+		       "Using realmode poweroff only.\n", d->ident);
 	}
 	return 0;
 }
@@ -1899,7 +1919,8 @@ static int __init set_apm_ints(const struct dmi_system_id *d)
 {
 	if (apm_info.allow_ints == 0) {
 		apm_info.allow_ints = 1;
-		printk(KERN_INFO "%s machine detected. Enabling interrupts during APM calls.\n", d->ident);
+		printk(KERN_INFO "%s machine detected. "
+		       "Enabling interrupts during APM calls.\n", d->ident);
 	}
 	return 0;
 }
@@ -1909,7 +1930,8 @@ static int __init apm_is_horked(const struct dmi_system_id *d)
 {
 	if (apm_info.disabled == 0) {
 		apm_info.disabled = 1;
-		printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident);
+		printk(KERN_INFO "%s machine detected. "
+		       "Disabling APM.\n", d->ident);
 	}
 	return 0;
 }
@@ -1918,7 +1940,8 @@ static int __init apm_is_horked_d850md(const struct dmi_system_id *d)
 {
 	if (apm_info.disabled == 0) {
 		apm_info.disabled = 1;
-		printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident);
+		printk(KERN_INFO "%s machine detected. "
+		       "Disabling APM.\n", d->ident);
 		printk(KERN_INFO "This bug is fixed in bios P15 which is available for \n");
 		printk(KERN_INFO "download from support.intel.com \n");
 	}
@@ -1930,7 +1953,8 @@ static int __init apm_likes_to_melt(const struct dmi_system_id *d)
 {
 	if (apm_info.forbid_idle == 0) {
 		apm_info.forbid_idle = 1;
-		printk(KERN_INFO "%s machine detected. Disabling APM idle calls.\n", d->ident);
+		printk(KERN_INFO "%s machine detected. "
+		       "Disabling APM idle calls.\n", d->ident);
 	}
 	return 0;
 }
@@ -1953,7 +1977,8 @@ static int __init apm_likes_to_melt(const struct dmi_system_id *d)
 static int __init broken_apm_power(const struct dmi_system_id *d)
 {
 	apm_info.get_power_status_broken = 1;
-	printk(KERN_WARNING "BIOS strings suggest APM bugs, disabling power status reporting.\n");
+	printk(KERN_WARNING "BIOS strings suggest APM bugs, "
+	       "disabling power status reporting.\n");
 	return 0;
 }
 
@@ -1964,7 +1989,8 @@ static int __init broken_apm_power(const struct dmi_system_id *d)
 static int __init swab_apm_power_in_minutes(const struct dmi_system_id *d)
 {
 	apm_info.get_power_status_swabinminutes = 1;
-	printk(KERN_WARNING "BIOS strings suggest APM reports battery life in minutes and wrong byte order.\n");
+	printk(KERN_WARNING "BIOS strings suggest APM reports battery life "
+	       "in minutes and wrong byte order.\n");
 	return 0;
 }
 
@@ -1989,8 +2015,8 @@ static struct dmi_system_id __initdata apm_dmi_table[] = {
 		apm_is_horked, "Dell Inspiron 2500",
 		{	DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
 			DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"),
-			DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
-			DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
+			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_BIOS_VERSION, "A11"), },
 	},
 	{	/* Allow interrupts during suspend on Dell Inspiron laptops*/
 		set_apm_ints, "Dell Inspiron", {
@@ -2013,15 +2039,15 @@ static struct dmi_system_id __initdata apm_dmi_table[] = {
 		apm_is_horked, "Dell Dimension 4100",
 		{	DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
 			DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"),
-			DMI_MATCH(DMI_BIOS_VENDOR,"Intel Corp."),
-			DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
+			DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."),
+			DMI_MATCH(DMI_BIOS_VERSION, "A11"), },
 	},
 	{	/* Allow interrupts during suspend on Compaq Laptops*/
 		set_apm_ints, "Compaq 12XL125",
 		{	DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
 			DMI_MATCH(DMI_PRODUCT_NAME, "Compaq PC"),
 			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-			DMI_MATCH(DMI_BIOS_VERSION,"4.06"), },
+			DMI_MATCH(DMI_BIOS_VERSION, "4.06"), },
 	},
 	{	/* Allow interrupts during APM or the clock goes slow */
 		set_apm_ints, "ASUSTeK",
@@ -2063,15 +2089,15 @@ static struct dmi_system_id __initdata apm_dmi_table[] = {
 		apm_is_horked, "Sharp PC-PJ/AX",
 		{	DMI_MATCH(DMI_SYS_VENDOR, "SHARP"),
 			DMI_MATCH(DMI_PRODUCT_NAME, "PC-PJ/AX"),
-			DMI_MATCH(DMI_BIOS_VENDOR,"SystemSoft"),
-			DMI_MATCH(DMI_BIOS_VERSION,"Version R2.08"), },
+			DMI_MATCH(DMI_BIOS_VENDOR, "SystemSoft"),
+			DMI_MATCH(DMI_BIOS_VERSION, "Version R2.08"), },
 	},
 	{	/* APM crashes */
 		apm_is_horked, "Dell Inspiron 2500",
 		{	DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
 			DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"),
-			DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
-			DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
+			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_BIOS_VERSION, "A11"), },
 	},
 	{	/* APM idle hangs */
 		apm_likes_to_melt, "Jabil AMD",
@@ -2202,11 +2228,11 @@ static int __init apm_init(void)
 		return -ENODEV;
 	}
 	printk(KERN_INFO
-		"apm: BIOS version %d.%d Flags 0x%02x (Driver version %s)\n",
-		((apm_info.bios.version >> 8) & 0xff),
-		(apm_info.bios.version & 0xff),
-		apm_info.bios.flags,
-		driver_version);
+	       "apm: BIOS version %d.%d Flags 0x%02x (Driver version %s)\n",
+	       ((apm_info.bios.version >> 8) & 0xff),
+	       (apm_info.bios.version & 0xff),
+	       apm_info.bios.flags,
+	       driver_version);
 	if ((apm_info.bios.flags & APM_32_BIT_SUPPORT) == 0) {
 		printk(KERN_INFO "apm: no 32 bit BIOS support\n");
 		return -ENODEV;
@@ -2311,9 +2337,9 @@ static int __init apm_init(void)
 	}
 	wake_up_process(kapmd_task);
 
-	if (num_online_cpus() > 1 && !smp ) {
+	if (num_online_cpus() > 1 && !smp) {
 		printk(KERN_NOTICE
-		   "apm: disabled - APM is not SMP safe (power off active).\n");
+		       "apm: disabled - APM is not SMP safe (power off active).\n");
 		return 0;
 	}
 
@@ -2338,7 +2364,7 @@ static int __init apm_init(void)
 
 static void __exit apm_exit(void)
 {
-	int	error;
+	int error;
 
 	if (set_pm_idle) {
 		pm_idle = original_pm_idle;

From b506a9d08bae9336ff9223c8a46a37bf27bd924a Mon Sep 17 00:00:00 2001
From: Quentin Barnes <qbarnes@gmail.com>
Date: Wed, 30 Jan 2008 13:32:32 +0100
Subject: [PATCH 451/890] x86: code clarification patch to Kprobes arch code

When developing the Kprobes arch code for ARM, I ran across some code
found in x86 and s390 Kprobes arch code which I didn't consider as
good as it could be.

Once I figured out what the code was doing, I changed the code
for ARM Kprobes to work the way I felt was more appropriate.
I've tested the code this way in ARM for about a year and would
like to push the same change to the other affected architectures.

The code in question is in kprobe_exceptions_notify() which
does:
====
          /* kprobe_running() needs smp_processor_id() */
          preempt_disable();
          if (kprobe_running() &&
              kprobe_fault_handler(args->regs, args->trapnr))
                  ret = NOTIFY_STOP;
          preempt_enable();
====

For the moment, ignore the code having the preempt_disable()/
preempt_enable() pair in it.

The problem is that kprobe_running() needs to call smp_processor_id()
which will assert if preemption is enabled.  That sanity check by
smp_processor_id() makes perfect sense since calling it with preemption
enabled would return an unreliable result.

But the function kprobe_exceptions_notify() can be called from a
context where preemption could be enabled.  If that happens, the
assertion in smp_processor_id() happens and we're dead.  So what
the original author did (speculation on my part!) is put in the
preempt_disable()/preempt_enable() pair to simply defeat the check.

Once I figured out what was going on, I considered this an
inappropriate approach.  If kprobe_exceptions_notify() is called
from a preemptible context, we can't be in a kprobe processing
context at that time anyways since kprobes requires preemption to
already be disabled, so just check for preemption enabled, and if
so, blow out before ever calling kprobe_running().  I wrote the ARM
kprobe code like this:
====
          /* To be potentially processing a kprobe fault and to
           * trust the result from kprobe_running(), we have
           * be non-preemptible. */
          if (!preemptible() && kprobe_running() &&
              kprobe_fault_handler(args->regs, args->trapnr))
                  ret = NOTIFY_STOP;
====

The above code has been working fine for ARM Kprobes for a year.
So I changed the x86 code (2.6.24-rc6) to be the same way and ran
the Systemtap tests on that kernel.  As on ARM, Systemtap on x86
comes up with the same test results either way, so it's a neutral
external functional change (as expected).

This issue has been discussed previously on linux-arm-kernel and the
Systemtap mailing lists.  Pointers to the by base for the two
discussions:
http://lists.arm.linux.org.uk/lurker/message/20071219.223225.1f5c2a5e.en.html
http://sourceware.org/ml/systemtap/2007-q1/msg00251.html

Signed-off-by: Quentin Barnes <qbarnes@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Ananth N Mavinakayahanalli <ananth@in.ibm.com>
Acked-by: Ananth N Mavinakayahanalli <ananth@in.ibm.com>
---
 arch/x86/kernel/kprobes.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index a72e02bf1135..711fec8f6379 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -44,6 +44,7 @@
 #include <linux/ptrace.h>
 #include <linux/string.h>
 #include <linux/slab.h>
+#include <linux/hardirq.h>
 #include <linux/preempt.h>
 #include <linux/module.h>
 #include <linux/kdebug.h>
@@ -951,12 +952,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 			ret = NOTIFY_STOP;
 		break;
 	case DIE_GPF:
-		/* kprobe_running() needs smp_processor_id() */
-		preempt_disable();
-		if (kprobe_running() &&
+		/*
+		 * To be potentially processing a kprobe fault and to
+		 * trust the result from kprobe_running(), we have
+		 * be non-preemptible.
+		 */
+		if (!preemptible() && kprobe_running() &&
 		    kprobe_fault_handler(args->regs, args->trapnr))
 			ret = NOTIFY_STOP;
-		preempt_enable();
 		break;
 	default:
 		break;

From b75f53dba8a4a61fda1ff7e0fb0fe3b0d80e0c64 Mon Sep 17 00:00:00 2001
From: "Carlos R. Mafra" <crmafra@gmail.com>
Date: Wed, 30 Jan 2008 13:32:33 +0100
Subject: [PATCH 452/890] x86: fix style errors in nmi_int.c

This patch fixes most errors detected by checkpatch.pl.

                                     errors   lines of code   errors/KLOC
arch/x86/oprofile/nmi_int.c (after)       1             461           2.1
arch/x86/oprofile/nmi_int.c (before)     60             477         125.7

No code changed.

size:
   text    data     bss     dec     hex filename
   2675     264     472    3411     d53 nmi_int.o.after
   2675     264     472    3411     d53 nmi_int.o.before

md5sum:
  847aea0cc68fe1a2b5e7019439f3b4dd  nmi_int.o.after
  847aea0cc68fe1a2b5e7019439f3b4dd  nmi_int.o.before

Signed-off-by: Carlos R. Mafra <crmafra@gmail.com>
Reviewed-by: Jesper Juhl <jesper.juhl@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/oprofile/nmi_int.c | 208 +++++++++++++++++-------------------
 1 file changed, 96 insertions(+), 112 deletions(-)

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index c8ab79ef4276..1f11cf0a307f 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -18,11 +18,11 @@
 #include <asm/nmi.h>
 #include <asm/msr.h>
 #include <asm/apic.h>
- 
+
 #include "op_counter.h"
 #include "op_x86_model.h"
 
-static struct op_x86_model_spec const * model;
+static struct op_x86_model_spec const *model;
 static struct op_msrs cpu_msrs[NR_CPUS];
 static unsigned long saved_lvtpc[NR_CPUS];
 
@@ -41,7 +41,6 @@ static int nmi_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-
 static int nmi_resume(struct sys_device *dev)
 {
 	if (nmi_enabled == 1)
@@ -49,29 +48,27 @@ static int nmi_resume(struct sys_device *dev)
 	return 0;
 }
 
-
 static struct sysdev_class oprofile_sysclass = {
 	.name		= "oprofile",
 	.resume		= nmi_resume,
 	.suspend	= nmi_suspend,
 };
 
-
 static struct sys_device device_oprofile = {
 	.id	= 0,
 	.cls	= &oprofile_sysclass,
 };
 
-
 static int __init init_sysfs(void)
 {
 	int error;
-	if (!(error = sysdev_class_register(&oprofile_sysclass)))
+
+	error = sysdev_class_register(&oprofile_sysclass);
+	if (!error)
 		error = sysdev_register(&device_oprofile);
 	return error;
 }
 
-
 static void exit_sysfs(void)
 {
 	sysdev_unregister(&device_oprofile);
@@ -90,7 +87,7 @@ static int profile_exceptions_notify(struct notifier_block *self,
 	int ret = NOTIFY_DONE;
 	int cpu = smp_processor_id();
 
-	switch(val) {
+	switch (val) {
 	case DIE_NMI:
 		if (model->check_ctrs(args->regs, &cpu_msrs[cpu]))
 			ret = NOTIFY_STOP;
@@ -101,24 +98,24 @@ static int profile_exceptions_notify(struct notifier_block *self,
 	return ret;
 }
 
-static void nmi_cpu_save_registers(struct op_msrs * msrs)
+static void nmi_cpu_save_registers(struct op_msrs *msrs)
 {
 	unsigned int const nr_ctrs = model->num_counters;
-	unsigned int const nr_ctrls = model->num_controls; 
-	struct op_msr * counters = msrs->counters;
-	struct op_msr * controls = msrs->controls;
+	unsigned int const nr_ctrls = model->num_controls;
+	struct op_msr *counters = msrs->counters;
+	struct op_msr *controls = msrs->controls;
 	unsigned int i;
 
 	for (i = 0; i < nr_ctrs; ++i) {
-		if (counters[i].addr){
+		if (counters[i].addr) {
 			rdmsr(counters[i].addr,
 				counters[i].saved.low,
 				counters[i].saved.high);
 		}
 	}
- 
+
 	for (i = 0; i < nr_ctrls; ++i) {
-		if (controls[i].addr){
+		if (controls[i].addr) {
 			rdmsr(controls[i].addr,
 				controls[i].saved.low,
 				controls[i].saved.high);
@@ -126,15 +123,13 @@ static void nmi_cpu_save_registers(struct op_msrs * msrs)
 	}
 }
 
-
-static void nmi_save_registers(void * dummy)
+static void nmi_save_registers(void *dummy)
 {
 	int cpu = smp_processor_id();
-	struct op_msrs * msrs = &cpu_msrs[cpu];
+	struct op_msrs *msrs = &cpu_msrs[cpu];
 	nmi_cpu_save_registers(msrs);
 }
 
-
 static void free_msrs(void)
 {
 	int i;
@@ -146,7 +141,6 @@ static void free_msrs(void)
 	}
 }
 
-
 static int allocate_msrs(void)
 {
 	int success = 1;
@@ -173,11 +167,10 @@ static int allocate_msrs(void)
 	return success;
 }
 
-
-static void nmi_cpu_setup(void * dummy)
+static void nmi_cpu_setup(void *dummy)
 {
 	int cpu = smp_processor_id();
-	struct op_msrs * msrs = &cpu_msrs[cpu];
+	struct op_msrs *msrs = &cpu_msrs[cpu];
 	spin_lock(&oprofilefs_lock);
 	model->setup_ctrs(msrs);
 	spin_unlock(&oprofilefs_lock);
@@ -193,13 +186,14 @@ static struct notifier_block profile_exceptions_nb = {
 
 static int nmi_setup(void)
 {
-	int err=0;
+	int err = 0;
 	int cpu;
 
 	if (!allocate_msrs())
 		return -ENOMEM;
 
-	if ((err = register_die_notifier(&profile_exceptions_nb))){
+	err = register_die_notifier(&profile_exceptions_nb);
+	if (err) {
 		free_msrs();
 		return err;
 	}
@@ -210,7 +204,7 @@ static int nmi_setup(void)
 
 	/* Assume saved/restored counters are the same on all CPUs */
 	model->fill_in_addresses(&cpu_msrs[0]);
-	for_each_possible_cpu (cpu) {
+	for_each_possible_cpu(cpu) {
 		if (cpu != 0) {
 			memcpy(cpu_msrs[cpu].counters, cpu_msrs[0].counters,
 				sizeof(struct op_msr) * model->num_counters);
@@ -226,39 +220,37 @@ static int nmi_setup(void)
 	return 0;
 }
 
-
-static void nmi_restore_registers(struct op_msrs * msrs)
+static void nmi_restore_registers(struct op_msrs *msrs)
 {
 	unsigned int const nr_ctrs = model->num_counters;
-	unsigned int const nr_ctrls = model->num_controls; 
-	struct op_msr * counters = msrs->counters;
-	struct op_msr * controls = msrs->controls;
+	unsigned int const nr_ctrls = model->num_controls;
+	struct op_msr *counters = msrs->counters;
+	struct op_msr *controls = msrs->controls;
 	unsigned int i;
 
 	for (i = 0; i < nr_ctrls; ++i) {
-		if (controls[i].addr){
+		if (controls[i].addr) {
 			wrmsr(controls[i].addr,
 				controls[i].saved.low,
 				controls[i].saved.high);
 		}
 	}
- 
+
 	for (i = 0; i < nr_ctrs; ++i) {
-		if (counters[i].addr){
+		if (counters[i].addr) {
 			wrmsr(counters[i].addr,
 				counters[i].saved.low,
 				counters[i].saved.high);
 		}
 	}
 }
- 
 
-static void nmi_cpu_shutdown(void * dummy)
+static void nmi_cpu_shutdown(void *dummy)
 {
 	unsigned int v;
 	int cpu = smp_processor_id();
-	struct op_msrs * msrs = &cpu_msrs[cpu];
- 
+	struct op_msrs *msrs = &cpu_msrs[cpu];
+
 	/* restoring APIC_LVTPC can trigger an apic error because the delivery
 	 * mode and vector nr combination can be illegal. That's by design: on
 	 * power on apic lvt contain a zero vector nr which are legal only for
@@ -271,7 +263,6 @@ static void nmi_cpu_shutdown(void * dummy)
 	nmi_restore_registers(msrs);
 }
 
- 
 static void nmi_shutdown(void)
 {
 	nmi_enabled = 0;
@@ -281,45 +272,40 @@ static void nmi_shutdown(void)
 	free_msrs();
 }
 
- 
-static void nmi_cpu_start(void * dummy)
+static void nmi_cpu_start(void *dummy)
 {
-	struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()];
+	struct op_msrs const *msrs = &cpu_msrs[smp_processor_id()];
 	model->start(msrs);
 }
- 
 
 static int nmi_start(void)
 {
 	on_each_cpu(nmi_cpu_start, NULL, 0, 1);
 	return 0;
 }
- 
- 
-static void nmi_cpu_stop(void * dummy)
+
+static void nmi_cpu_stop(void *dummy)
 {
-	struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()];
+	struct op_msrs const *msrs = &cpu_msrs[smp_processor_id()];
 	model->stop(msrs);
 }
- 
- 
+
 static void nmi_stop(void)
 {
 	on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
 }
 
-
 struct op_counter_config counter_config[OP_MAX_COUNTER];
 
-static int nmi_create_files(struct super_block * sb, struct dentry * root)
+static int nmi_create_files(struct super_block *sb, struct dentry *root)
 {
 	unsigned int i;
 
 	for (i = 0; i < model->num_counters; ++i) {
-		struct dentry * dir;
+		struct dentry *dir;
 		char buf[4];
- 
- 		/* quick little hack to _not_ expose a counter if it is not
+
+		/* quick little hack to _not_ expose a counter if it is not
 		 * available for use.  This should protect userspace app.
 		 * NOTE:  assumes 1:1 mapping here (that counters are organized
 		 *        sequentially in their struct assignment).
@@ -329,21 +315,21 @@ static int nmi_create_files(struct super_block * sb, struct dentry * root)
 
 		snprintf(buf,  sizeof(buf), "%d", i);
 		dir = oprofilefs_mkdir(sb, root, buf);
-		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); 
-		oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); 
-		oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count); 
-		oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); 
-		oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); 
-		oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); 
+		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
+		oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
+		oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
+		oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
+		oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
+		oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
 	}
 
 	return 0;
 }
- 
+
 static int p4force;
 module_param(p4force, int, 0);
- 
-static int __init p4_init(char ** cpu_type)
+
+static int __init p4_init(char **cpu_type)
 {
 	__u8 cpu_model = boot_cpu_data.x86_model;
 
@@ -356,15 +342,15 @@ static int __init p4_init(char ** cpu_type)
 	return 1;
 #else
 	switch (smp_num_siblings) {
-		case 1:
-			*cpu_type = "i386/p4";
-			model = &op_p4_spec;
-			return 1;
+	case 1:
+		*cpu_type = "i386/p4";
+		model = &op_p4_spec;
+		return 1;
 
-		case 2:
-			*cpu_type = "i386/p4-ht";
-			model = &op_p4_ht2_spec;
-			return 1;
+	case 2:
+		*cpu_type = "i386/p4-ht";
+		model = &op_p4_ht2_spec;
+		return 1;
 	}
 #endif
 
@@ -373,8 +359,7 @@ static int __init p4_init(char ** cpu_type)
 	return 0;
 }
 
-
-static int __init ppro_init(char ** cpu_type)
+static int __init ppro_init(char **cpu_type)
 {
 	__u8 cpu_model = boot_cpu_data.x86_model;
 
@@ -409,52 +394,52 @@ int __init op_nmi_init(struct oprofile_operations *ops)
 
 	if (!cpu_has_apic)
 		return -ENODEV;
- 
+
 	switch (vendor) {
-		case X86_VENDOR_AMD:
-			/* Needs to be at least an Athlon (or hammer in 32bit mode) */
+	case X86_VENDOR_AMD:
+		/* Needs to be at least an Athlon (or hammer in 32bit mode) */
 
-			switch (family) {
-			default:
-				return -ENODEV;
-			case 6:
-				model = &op_athlon_spec;
-				cpu_type = "i386/athlon";
-				break;
-			case 0xf:
-				model = &op_athlon_spec;
-				/* Actually it could be i386/hammer too, but give
-				   user space an consistent name. */
-				cpu_type = "x86-64/hammer";
-				break;
-			case 0x10:
-				model = &op_athlon_spec;
-				cpu_type = "x86-64/family10";
-				break;
-			}
+		switch (family) {
+		default:
+			return -ENODEV;
+		case 6:
+			model = &op_athlon_spec;
+			cpu_type = "i386/athlon";
 			break;
- 
-		case X86_VENDOR_INTEL:
-			switch (family) {
-				/* Pentium IV */
-				case 0xf:
-					if (!p4_init(&cpu_type))
-						return -ENODEV;
-					break;
+		case 0xf:
+			model = &op_athlon_spec;
+			/* Actually it could be i386/hammer too, but give
+			 user space an consistent name. */
+			cpu_type = "x86-64/hammer";
+			break;
+		case 0x10:
+			model = &op_athlon_spec;
+			cpu_type = "x86-64/family10";
+			break;
+		}
+		break;
 
-				/* A P6-class processor */
-				case 6:
-					if (!ppro_init(&cpu_type))
-						return -ENODEV;
-					break;
+	case X86_VENDOR_INTEL:
+		switch (family) {
+			/* Pentium IV */
+		case 0xf:
+			if (!p4_init(&cpu_type))
+				return -ENODEV;
+			break;
 
-				default:
-					return -ENODEV;
-			}
+			/* A P6-class processor */
+		case 6:
+			if (!ppro_init(&cpu_type))
+				return -ENODEV;
 			break;
 
 		default:
 			return -ENODEV;
+		}
+		break;
+
+	default:
+		return -ENODEV;
 	}
 
 	init_sysfs();
@@ -469,7 +454,6 @@ int __init op_nmi_init(struct oprofile_operations *ops)
 	return 0;
 }
 
-
 void op_nmi_exit(void)
 {
 	if (using_nmi)

From b6795e65f158d12d3124379fc50ec156ae60f888 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:32:34 +0100
Subject: [PATCH 453/890] x86: fault_32.c cleanup

We get die() from kdebug.h, no need for forward declaration.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_32.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index bfb0917d699d..870b5610555c 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -42,8 +42,6 @@
 #define PF_RSVD	(1<<3)
 #define PF_INSTR	(1<<4)
 
-extern void die(const char *, struct pt_regs *, long);
-
 static inline int notify_page_fault(struct pt_regs *regs)
 {
 #ifdef CONFIG_KPROBES

From 1dc85be087d6645575847dc23c37147a2352312b Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:32:35 +0100
Subject: [PATCH 454/890] x86: begin fault_{32|64}.c unification

Move X86_32 only get_segment_eip to X86_64
Move X86_64 only is_errata93 to X86_32

Change X86_32 loop in is_prefetch to highlight the differences
between them.  Fold the logic from __is_prefetch in as well on
X86_32.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_32.c |  97 ++++++++++++++++++++++++++------
 arch/x86/mm/fault_64.c | 124 ++++++++++++++++++++++++++++++++++++++---
 2 files changed, 196 insertions(+), 25 deletions(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 870b5610555c..300c9d8b684a 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -61,6 +61,7 @@ static inline int notify_page_fault(struct pt_regs *regs)
 #endif
 }
 
+#ifdef CONFIG_X86_32
 /*
  * Return EIP plus the CS segment base.  The segment limit is also
  * adjusted, clamped to the kernel/user address space (whichever is
@@ -135,26 +136,61 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
 		*eip_limit = seg_limit;
 	return ip + base;
 }
+#endif
 
 /*
+ * X86_32
  * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
  * Check that here and ignore it.
+ *
+ * X86_64
+ * Sometimes the CPU reports invalid exceptions on prefetch.
+ * Check that here and ignore it.
+ *
+ * Opcode checker based on code by Richard Brunner
  */
-static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
+static int is_prefetch(struct pt_regs *regs, unsigned long addr,
+		       unsigned long error_code)
 {
-	unsigned long limit;
-	unsigned char *instr = (unsigned char *)get_segment_eip(regs, &limit);
+	unsigned char *instr;
 	int scan_more = 1;
 	int prefetch = 0;
-	int i;
+	unsigned char *max_instr;
 
-	for (i = 0; scan_more && i < 15; i++) {
+#ifdef CONFIG_X86_32
+	unsigned long limit;
+	if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+		     boot_cpu_data.x86 >= 6)) {
+		/* Catch an obscure case of prefetch inside an NX page. */
+		if (nx_enabled && (error_code & PF_INSTR))
+			return 0;
+	} else {
+		return 0;
+	}
+	instr = (unsigned char *)get_segment_eip(regs, &limit);
+#else
+	/* If it was a exec fault ignore */
+	if (error_code & PF_INSTR)
+		return 0;
+	instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
+#endif
+
+	max_instr = instr + 15;
+
+#ifdef CONFIG_X86_64
+	if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
+		return 0;
+#endif
+
+	while (scan_more && instr < max_instr) {
 		unsigned char opcode;
 		unsigned char instr_hi;
 		unsigned char instr_lo;
 
+#ifdef CONFIG_X86_32
 		if (instr > (unsigned char *)limit)
 			break;
+#endif
 		if (probe_kernel_address(instr, opcode))
 			break;
 
@@ -196,8 +232,10 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
 		case 0x00:
 			/* Prefetch instruction is 0x0F0D or 0x0F18 */
 			scan_more = 0;
+#ifdef CONFIG_X86_32
 			if (instr > (unsigned char *)limit)
 				break;
+#endif
 			if (probe_kernel_address(instr, opcode))
 				break;
 			prefetch = (instr_lo == 0xF) &&
@@ -211,19 +249,6 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
 	return prefetch;
 }
 
-static inline int is_prefetch(struct pt_regs *regs, unsigned long addr,
-			      unsigned long error_code)
-{
-	if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-		     boot_cpu_data.x86 >= 6)) {
-		/* Catch an obscure case of prefetch inside an NX page. */
-		if (nx_enabled && (error_code & 16))
-			return 0;
-		return __is_prefetch(regs, addr);
-	}
-	return 0;
-}
-
 static noinline void force_sig_info_fault(int si_signo, int si_code,
 	unsigned long address, struct task_struct *tsk)
 {
@@ -274,6 +299,42 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
 	return pmd_k;
 }
 
+#ifdef CONFIG_X86_64
+static const char errata93_warning[] =
+KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
+KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
+KERN_ERR "******* Please consider a BIOS update.\n"
+KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
+
+/* Workaround for K8 erratum #93 & buggy BIOS.
+   BIOS SMM functions are required to use a specific workaround
+   to avoid corruption of the 64bit RIP register on C stepping K8.
+   A lot of BIOS that didn't get tested properly miss this.
+   The OS sees this as a page fault with the upper 32bits of RIP cleared.
+   Try to work around it here.
+   Note we only handle faults in kernel here. */
+
+static int is_errata93(struct pt_regs *regs, unsigned long address)
+{
+	static int warned;
+	if (address != regs->ip)
+		return 0;
+	if ((address >> 32) != 0)
+		return 0;
+	address |= 0xffffffffUL << 32;
+	if ((address >= (u64)_stext && address <= (u64)_etext) ||
+	    (address >= MODULES_VADDR && address <= MODULES_END)) {
+		if (!warned) {
+			printk(errata93_warning);
+			warned = 1;
+		}
+		regs->ip = address;
+		return 1;
+	}
+	return 0;
+}
+#endif
+
 /*
  * Handle a fault on the vmalloc or module mapping area
  *
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 7e98a7691283..0d3d5979ce2c 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -64,32 +64,136 @@ static inline int notify_page_fault(struct pt_regs *regs)
 #endif
 }
 
-/* Sometimes the CPU reports invalid exceptions on prefetch.
-   Check that here and ignore.
-   Opcode checker based on code by Richard Brunner */
-static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
-				unsigned long error_code)
+#ifdef CONFIG_X86_32
+/*
+ * Return EIP plus the CS segment base.  The segment limit is also
+ * adjusted, clamped to the kernel/user address space (whichever is
+ * appropriate), and returned in *eip_limit.
+ *
+ * The segment is checked, because it might have been changed by another
+ * task between the original faulting instruction and here.
+ *
+ * If CS is no longer a valid code segment, or if EIP is beyond the
+ * limit, or if it is a kernel address when CS is not a kernel segment,
+ * then the returned value will be greater than *eip_limit.
+ *
+ * This is slow, but is very rarely executed.
+ */
+static inline unsigned long get_segment_eip(struct pt_regs *regs,
+					    unsigned long *eip_limit)
+{
+	unsigned long ip = regs->ip;
+	unsigned seg = regs->cs & 0xffff;
+	u32 seg_ar, seg_limit, base, *desc;
+
+	/* Unlikely, but must come before segment checks. */
+	if (unlikely(regs->flags & VM_MASK)) {
+		base = seg << 4;
+		*eip_limit = base + 0xffff;
+		return base + (ip & 0xffff);
+	}
+
+	/* The standard kernel/user address space limit. */
+	*eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
+
+	/* By far the most common cases. */
+	if (likely(SEGMENT_IS_FLAT_CODE(seg)))
+		return ip;
+
+	/* Check the segment exists, is within the current LDT/GDT size,
+	   that kernel/user (ring 0..3) has the appropriate privilege,
+	   that it's a code segment, and get the limit. */
+	__asm__("larl %3,%0; lsll %3,%1"
+		 : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg));
+	if ((~seg_ar & 0x9800) || ip > seg_limit) {
+		*eip_limit = 0;
+		return 1;	 /* So that returned ip > *eip_limit. */
+	}
+
+	/* Get the GDT/LDT descriptor base.
+	   When you look for races in this code remember that
+	   LDT and other horrors are only used in user space. */
+	if (seg & (1<<2)) {
+		/* Must lock the LDT while reading it. */
+		mutex_lock(&current->mm->context.lock);
+		desc = current->mm->context.ldt;
+		desc = (void *)desc + (seg & ~7);
+	} else {
+		/* Must disable preemption while reading the GDT. */
+		desc = (u32 *)get_cpu_gdt_table(get_cpu());
+		desc = (void *)desc + (seg & ~7);
+	}
+
+	/* Decode the code segment base from the descriptor */
+	base = get_desc_base((struct desc_struct *)desc);
+
+	if (seg & (1<<2))
+		mutex_unlock(&current->mm->context.lock);
+	else
+		put_cpu();
+
+	/* Adjust EIP and segment limit, and clamp at the kernel limit.
+	   It's legitimate for segments to wrap at 0xffffffff. */
+	seg_limit += base;
+	if (seg_limit < *eip_limit && seg_limit >= base)
+		*eip_limit = seg_limit;
+	return ip + base;
+}
+#endif
+
+/*
+ * X86_32
+ * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
+ * Check that here and ignore it.
+ *
+ * X86_64
+ * Sometimes the CPU reports invalid exceptions on prefetch.
+ * Check that here and ignore it.
+ *
+ * Opcode checker based on code by Richard Brunner
+ */
+static int is_prefetch(struct pt_regs *regs, unsigned long addr,
+		       unsigned long error_code)
 {
 	unsigned char *instr;
 	int scan_more = 1;
 	int prefetch = 0;
 	unsigned char *max_instr;
 
+#ifdef CONFIG_X86_32
+	unsigned long limit;
+	if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+		     boot_cpu_data.x86 >= 6)) {
+		/* Catch an obscure case of prefetch inside an NX page. */
+		if (nx_enabled && (error_code & PF_INSTR))
+			return 0;
+	} else {
+		return 0;
+	}
+	instr = (unsigned char *)get_segment_eip(regs, &limit);
+#else
 	/* If it was a exec fault ignore */
 	if (error_code & PF_INSTR)
 		return 0;
-
 	instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
+#endif
+
 	max_instr = instr + 15;
 
+#ifdef CONFIG_X86_64
 	if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
 		return 0;
+#endif
 
 	while (scan_more && instr < max_instr) {
 		unsigned char opcode;
 		unsigned char instr_hi;
 		unsigned char instr_lo;
 
+#ifdef CONFIG_X86_32
+		if (instr > (unsigned char *)limit)
+			break;
+#endif
 		if (probe_kernel_address(instr, opcode))
 			break;
 
@@ -125,12 +229,16 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
 			scan_more = (instr_lo & 0xC) == 0x4;
 			break;
 		case 0xF0:
-			/* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */
+			/* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
 			scan_more = !instr_lo || (instr_lo>>1) == 1;
 			break;
 		case 0x00:
 			/* Prefetch instruction is 0x0F0D or 0x0F18 */
 			scan_more = 0;
+#ifdef CONFIG_X86_32
+			if (instr > (unsigned char *)limit)
+				break;
+#endif
 			if (probe_kernel_address(instr, opcode))
 				break;
 			prefetch = (instr_lo == 0xF) &&
@@ -185,6 +293,7 @@ bad:
 	printk("BAD\n");
 }
 
+#ifdef CONFIG_X86_64
 static const char errata93_warning[] =
 KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
 KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
@@ -218,6 +327,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address)
 	}
 	return 0;
 }
+#endif
 
 static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
 				 unsigned long error_code)

From c4aba4a8ec795124394bc79e3e8dbbc319338a98 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:32:35 +0100
Subject: [PATCH 455/890] x86: introduce force_sig_info_fault helper to X86_64

Use the force_sig_info_fault helper from X86_32 in X86_64.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_32.c |  2 +-
 arch/x86/mm/fault_64.c | 31 ++++++++++++++++++-------------
 2 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 300c9d8b684a..b1893ebf6456 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -249,7 +249,7 @@ static int is_prefetch(struct pt_regs *regs, unsigned long addr,
 	return prefetch;
 }
 
-static noinline void force_sig_info_fault(int si_signo, int si_code,
+static void force_sig_info_fault(int si_signo, int si_code,
 	unsigned long address, struct task_struct *tsk)
 {
 	siginfo_t info;
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 0d3d5979ce2c..dcf430bb62ee 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -252,6 +252,18 @@ static int is_prefetch(struct pt_regs *regs, unsigned long addr,
 	return prefetch;
 }
 
+static void force_sig_info_fault(int si_signo, int si_code,
+	unsigned long address, struct task_struct *tsk)
+{
+	siginfo_t info;
+
+	info.si_signo = si_signo;
+	info.si_errno = 0;
+	info.si_code = si_code;
+	info.si_addr = (void __user *)address;
+	force_sig_info(si_signo, &info, tsk);
+}
+
 static int bad_address(void *p)
 {
 	unsigned long dummy;
@@ -415,7 +427,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	unsigned long address;
 	int write, fault;
 	unsigned long flags;
-	siginfo_t info;
+	int si_code;
 
 	/*
 	 * We can fault from pretty much anywhere, with unknown IRQ state.
@@ -429,7 +441,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	/* get the address */
 	address = read_cr2();
 
-	info.si_code = SEGV_MAPERR;
+	si_code = SEGV_MAPERR;
 
 
 	/*
@@ -532,7 +544,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
  * we can handle it..
  */
 good_area:
-	info.si_code = SEGV_ACCERR;
+	si_code = SEGV_ACCERR;
 	write = 0;
 	switch (error_code & (PF_PROT|PF_WRITE)) {
 	default:	/* 3: write, present */
@@ -611,11 +623,8 @@ bad_area_nosemaphore:
 		/* Kernel addresses are always protection faults */
 		tsk->thread.error_code = error_code | (address >= TASK_SIZE);
 		tsk->thread.trap_no = 14;
-		info.si_signo = SIGSEGV;
-		info.si_errno = 0;
-		/* info.si_code has been set above */
-		info.si_addr = (void __user *)address;
-		force_sig_info(SIGSEGV, &info, tsk);
+
+		force_sig_info_fault(SIGSEGV, si_code, address, tsk);
 		return;
 	}
 
@@ -682,11 +691,7 @@ do_sigbus:
 	tsk->thread.cr2 = address;
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_no = 14;
-	info.si_signo = SIGBUS;
-	info.si_errno = 0;
-	info.si_code = BUS_ADRERR;
-	info.si_addr = (void __user *)address;
-	force_sig_info(SIGBUS, &info, tsk);
+	force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
 	return;
 }
 

From e83a5fdca89970f87e06f87a1d18be22b161e2ba Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 30 Jan 2008 13:32:35 +0100
Subject: [PATCH 456/890] x86: clean up apic_32/64.c

White space and coding style clean up.
Make apic_32/64.c similar.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic_32.c |  5 ++---
 arch/x86/kernel/apic_64.c | 23 +++++++++++++++++------
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 4330a899ddcb..ffbad74e5be0 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -577,7 +577,6 @@ static void local_apic_timer_interrupt(void)
  * [ if a single-CPU system runs an SMP kernel then we call the local
  *   interrupt as well. Thus we cannot inline the local irq ... ]
  */
-
 void smp_apic_timer_interrupt(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
@@ -1021,7 +1020,7 @@ void __cpuinit setup_local_APIC(void)
 /*
  * Detect and initialize APIC
  */
-static int __init detect_init_APIC (void)
+static int __init detect_init_APIC(void)
 {
 	u32 h, l, features;
 
@@ -1165,7 +1164,7 @@ fake_ioapic_page:
  * This initializes the IO-APIC and APIC hardware if this is
  * a UP kernel.
  */
-int __init APIC_init_uniprocessor (void)
+int __init APIC_init_uniprocessor(void)
 {
 	if (enable_local_apic < 0)
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 47b8ef51dde0..d1a696673d9d 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -23,33 +23,37 @@
 #include <linux/mc146818rtc.h>
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
-#include <linux/module.h>
 #include <linux/ioport.h>
 #include <linux/clockchips.h>
 #include <linux/acpi_pmtmr.h>
+#include <linux/module.h>
 
 #include <asm/atomic.h>
 #include <asm/smp.h>
 #include <asm/mtrr.h>
 #include <asm/mpspec.h>
+#include <asm/hpet.h>
 #include <asm/pgalloc.h>
 #include <asm/mach_apic.h>
 #include <asm/nmi.h>
 #include <asm/idle.h>
 #include <asm/proto.h>
 #include <asm/timex.h>
-#include <asm/hpet.h>
 #include <asm/apic.h>
 
-int apic_verbosity;
 int disable_apic_timer __cpuinitdata;
 static int apic_calibrate_pmtmr __initdata;
 int disable_apic;
 
-/* Local APIC timer works in C2? */
+/* Local APIC timer works in C2 */
 int local_apic_timer_c2_ok;
 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
 
+/*
+ * Debug level, exported for io_apic.c
+ */
+int apic_verbosity;
+
 static struct resource lapic_resource = {
 	.name = "Local APIC",
 	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
@@ -355,6 +359,11 @@ static void __init calibrate_APIC_clock(void)
 	calibration_result = result / HZ;
 }
 
+/*
+ * Setup the boot APIC
+ *
+ * Calibrate and verify the result.
+ */
 void __init setup_boot_APIC_clock(void)
 {
 	/*
@@ -1109,8 +1118,8 @@ static struct sysdev_class lapic_sysclass = {
 };
 
 static struct sys_device device_lapic = {
-	.id		= 0,
-	.cls		= &lapic_sysclass,
+	.id	= 0,
+	.cls	= &lapic_sysclass,
 };
 
 static void __cpuinit apic_pm_activate(void)
@@ -1121,9 +1130,11 @@ static void __cpuinit apic_pm_activate(void)
 static int __init init_lapic_sysfs(void)
 {
 	int error;
+
 	if (!cpu_has_apic)
 		return 0;
 	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
+
 	error = sysdev_class_register(&lapic_sysclass);
 	if (!error)
 		error = sysdev_register(&device_lapic);

From 48ddb154cf55fb1e292af49c270b950ef2c6ef32 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Wed, 30 Jan 2008 13:32:36 +0100
Subject: [PATCH 457/890] x86: not clear empty_zero_page again

empty_zero_page is in .bss section, and it is cleared in clear_bss by
x86_64_start_kernel(). So don't clear that again in mem_init

Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_64.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 15e05a004fcf..6c3f6eb1f790 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -495,8 +495,7 @@ void __init mem_init(void)
 
 	pci_iommu_alloc();
 
-	/* clear the zero-page */
-	memset(empty_zero_page, 0, PAGE_SIZE);
+	/* clear_bss() already clear the empty_zero_page */
 
 	reservedpages = 0;
 

From f2633105cd92b793dd6a6f623b4140287d46160a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:36 +0100
Subject: [PATCH 458/890] x86: debug: double-check the empty zero page

temporary debugging - remove before this hits v2.6.25.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_64.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 6c3f6eb1f790..05f12c527b02 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -497,6 +497,14 @@ void __init mem_init(void)
 
 	/* clear_bss() already clear the empty_zero_page */
 
+	/* temporary debugging - double check it's true: */
+	{
+		int i;
+
+		for (i = 0; i < 1024; i++)
+			WARN_ON_ONCE(empty_zero_page[i]);
+	}
+
 	reservedpages = 0;
 
 	/* this will put all low memory onto the freelists */

From ff8a03a623d8e9a7431b4236a901c02634cd5d43 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 30 Jan 2008 13:32:36 +0100
Subject: [PATCH 459/890] x86: clean up apic_32.c, take 2

More white space and coding style clean up.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic_32.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index ffbad74e5be0..cbcf72cde956 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -46,7 +46,7 @@
 /*
  * Sanity check
  */
-#if (SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F
+#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F)
 # error SPURIOUS_APIC_VECTOR definition error
 #endif
 
@@ -55,7 +55,7 @@
  *
  * -1=force-disable, +1=force-enable
  */
-static int enable_local_apic __initdata = 0;
+static int enable_local_apic __initdata;
 
 /* Local APIC timer verification ok */
 static int local_apic_timer_verify_ok;
@@ -432,7 +432,7 @@ void __init setup_boot_APIC_clock(void)
 			       "with PM Timer: %ldms instead of 100ms\n",
 			       (long)res);
 			/* Correct the lapic counter value */
-			res = (((u64) delta ) * pm_100ms);
+			res = (((u64) delta) * pm_100ms);
 			do_div(res, deltapm);
 			printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
 			       "%lu (%ld)\n", (unsigned long) res, delta);
@@ -976,7 +976,8 @@ void __cpuinit setup_local_APIC(void)
 		value |= APIC_LVT_LEVEL_TRIGGER;
 	apic_write_around(APIC_LVT1, value);
 
-	if (integrated && !esr_disable) {		/* !82489DX */
+	if (integrated && !esr_disable) {
+		/* !82489DX */
 		maxlvt = lapic_get_maxlvt();
 		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
 			apic_write(APIC_ESR, 0);
@@ -1262,7 +1263,7 @@ void smp_error_interrupt(struct pt_regs *regs)
 	   6: Received illegal vector
 	   7: Illegal register address
 	*/
-	printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
+	printk(KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
 		smp_processor_id(), v , v1);
 	irq_exit();
 }
@@ -1349,7 +1350,7 @@ void disconnect_bsp_APIC(int virt_wire_setup)
 			value = apic_read(APIC_LVT0);
 			value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
 				APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-				APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
+				APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
 			value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
 			value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
 			apic_write_around(APIC_LVT0, value);

From 3aa88cdf6bcc9e510c0707581131b821a7d3b7cb Mon Sep 17 00:00:00 2001
From: "Carlos R. Mafra" <crmafra@gmail.com>
Date: Wed, 30 Jan 2008 13:32:36 +0100
Subject: [PATCH 460/890] x86: clean up k8topology.c

This patch fixes all errors pointed out by checkpatch.pl.

                                      errors   lines of code   errors/KLOC
arch/x86/mm/k8topology_64.c (before)      72             185         389.1
arch/x86/mm/k8topology_64.c (after)        0             185             0

No code changed.

   text    data     bss     dec     hex filename
   1506       0       0    1506     5e2 k8topology_64.o.after
   1506       0       0    1506     5e2 k8topology_64.o.before

md5sum:

   f9f48331a7eca4fc60d2a03369dc5f53  k8topology_64.o.after
   f9f48331a7eca4fc60d2a03369dc5f53  k8topology_64.o.before

Signed-off-by: Carlos R. Mafra <crmafra@gmail.com>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/k8topology_64.c | 150 ++++++++++++++++++------------------
 1 file changed, 75 insertions(+), 75 deletions(-)

diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index 3695c9e8b9b6..32b963de9b8f 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -1,9 +1,9 @@
-/* 
+/*
  * AMD K8 NUMA support.
  * Discover the memory map and associated nodes.
- * 
+ *
  * This version reads it directly from the K8 northbridge.
- * 
+ *
  * Copyright 2002,2003 Andi Kleen, SuSE Labs.
  */
 #include <linux/kernel.h>
@@ -22,26 +22,26 @@
 
 static __init int find_northbridge(void)
 {
-	int num; 
+	int num;
 
-	for (num = 0; num < 32; num++) { 
+	for (num = 0; num < 32; num++) {
 		u32 header;
-		
-		header = read_pci_config(0, num, 0, 0x00);  
+
+		header = read_pci_config(0, num, 0, 0x00);
 		if (header != (PCI_VENDOR_ID_AMD | (0x1100<<16)))
-			continue; 	
+			continue;
 
-		header = read_pci_config(0, num, 1, 0x00); 
+		header = read_pci_config(0, num, 1, 0x00);
 		if (header != (PCI_VENDOR_ID_AMD | (0x1101<<16)))
-			continue;	
-		return num; 
-	} 
+			continue;
+		return num;
+	}
 
-	return -1; 	
+	return -1;
 }
 
 int __init k8_scan_nodes(unsigned long start, unsigned long end)
-{ 
+{
 	unsigned long prevbase;
 	struct bootnode nodes[8];
 	int nodeid, i, nb;
@@ -56,97 +56,97 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
 	if (!early_pci_allowed())
 		return -1;
 
-	nb = find_northbridge(); 
-	if (nb < 0) 
+	nb = find_northbridge();
+	if (nb < 0)
 		return nb;
 
-	printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb); 
+	printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb);
 
-	reg = read_pci_config(0, nb, 0, 0x60); 
+	reg = read_pci_config(0, nb, 0, 0x60);
 	numnodes = ((reg >> 4) & 0xF) + 1;
 	if (numnodes <= 1)
 		return -1;
 
 	printk(KERN_INFO "Number of nodes %d\n", numnodes);
 
-	memset(&nodes,0,sizeof(nodes)); 
+	memset(&nodes, 0, sizeof(nodes));
 	prevbase = 0;
-	for (i = 0; i < 8; i++) { 
-		unsigned long base,limit; 
+	for (i = 0; i < 8; i++) {
+		unsigned long base, limit;
 		u32 nodeid;
-		
+
 		base = read_pci_config(0, nb, 1, 0x40 + i*8);
 		limit = read_pci_config(0, nb, 1, 0x44 + i*8);
 
-		nodeid = limit & 7; 
+		nodeid = limit & 7;
 		nodeids[i] = nodeid;
-		if ((base & 3) == 0) { 
+		if ((base & 3) == 0) {
 			if (i < numnodes)
-				printk("Skipping disabled node %d\n", i); 
+				printk("Skipping disabled node %d\n", i);
 			continue;
-		} 
+		}
 		if (nodeid >= numnodes) {
 			printk("Ignoring excess node %d (%lx:%lx)\n", nodeid,
-			       base, limit); 
+			       base, limit);
 			continue;
-		} 
+		}
 
-		if (!limit) { 
-			printk(KERN_INFO "Skipping node entry %d (base %lx)\n", i,
-			       base);
+		if (!limit) {
+			printk(KERN_INFO "Skipping node entry %d (base %lx)\n",
+			       i, base);
 			continue;
 		}
 		if ((base >> 8) & 3 || (limit >> 8) & 3) {
-			printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n", 
-			       nodeid, (base>>8)&3, (limit>>8) & 3); 
-			return -1; 
-		}	
+			printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n",
+			       nodeid, (base>>8)&3, (limit>>8) & 3);
+			return -1;
+		}
 		if (node_isset(nodeid, node_possible_map)) {
-			printk(KERN_INFO "Node %d already present. Skipping\n", 
+			printk(KERN_INFO "Node %d already present. Skipping\n",
 			       nodeid);
 			continue;
 		}
 
-		limit >>= 16; 
-		limit <<= 24; 
+		limit >>= 16;
+		limit <<= 24;
 		limit |= (1<<24)-1;
 		limit++;
 
 		if (limit > end_pfn << PAGE_SHIFT)
 			limit = end_pfn << PAGE_SHIFT;
 		if (limit <= base)
-			continue; 
-			
-		base >>= 16;
-		base <<= 24; 
-
-		if (base < start) 
-			base = start; 
-		if (limit > end) 
-			limit = end; 
-		if (limit == base) { 
-			printk(KERN_ERR "Empty node %d\n", nodeid); 
-			continue; 
-		}
-		if (limit < base) { 
-			printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n",
-			       nodeid, base, limit); 			       
 			continue;
-		} 
-		
+
+		base >>= 16;
+		base <<= 24;
+
+		if (base < start)
+			base = start;
+		if (limit > end)
+			limit = end;
+		if (limit == base) {
+			printk(KERN_ERR "Empty node %d\n", nodeid);
+			continue;
+		}
+		if (limit < base) {
+			printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n",
+			       nodeid, base, limit);
+			continue;
+		}
+
 		/* Could sort here, but pun for now. Should not happen anyroads. */
-		if (prevbase > base) { 
+		if (prevbase > base) {
 			printk(KERN_ERR "Node map not sorted %lx,%lx\n",
-			       prevbase,base);
+			       prevbase, base);
 			return -1;
 		}
-			
-		printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n", 
-		       nodeid, base, limit); 
-		
+
+		printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n",
+		       nodeid, base, limit);
+
 		found++;
-		
-		nodes[nodeid].start = base; 
+
+		nodes[nodeid].start = base;
 		nodes[nodeid].end = limit;
 		e820_register_active_regions(nodeid,
 				nodes[nodeid].start >> PAGE_SHIFT,
@@ -155,31 +155,31 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
 		prevbase = base;
 
 		node_set(nodeid, node_possible_map);
-	} 
+	}
 
 	if (!found)
-		return -1; 
+		return -1;
 
 	memnode_shift = compute_hash_shift(nodes, 8);
-	if (memnode_shift < 0) { 
-		printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); 
-		return -1; 
-	} 
-	printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift); 
+	if (memnode_shift < 0) {
+		printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n");
+		return -1;
+	}
+	printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift);
 
 	/* use the coreid bits from early_identify_cpu */
 	bits = boot_cpu_data.x86_coreid_bits;
 	cores = (1<<bits);
 
 	for (i = 0; i < 8; i++) {
-		if (nodes[i].start != nodes[i].end) { 
+		if (nodes[i].start != nodes[i].end) {
 			nodeid = nodeids[i];
 			for (j = 0; j < cores; j++)
 				apicid_to_node[(nodeid << bits) + j] = i;
-			setup_node_bootmem(i, nodes[i].start, nodes[i].end); 
-		} 
+			setup_node_bootmem(i, nodes[i].start, nodes[i].end);
+		}
 	}
 
 	numa_init_array();
 	return 0;
-} 
+}

From 27efeb67714608b28c0b213cceb6080749435c6b Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:32:36 +0100
Subject: [PATCH 461/890] x86: make ptrace.h safe to include from assembler
 code

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/ptrace-abi.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h
index b3b9e023afce..08a12b790a77 100644
--- a/include/asm-x86/ptrace-abi.h
+++ b/include/asm-x86/ptrace-abi.h
@@ -80,6 +80,7 @@
 
 #define PTRACE_SINGLEBLOCK	33	/* resume execution until next branch */
 
+#ifndef __ASSEMBLY__
 /* configuration/status structure used in PTRACE_BTS_CONFIG and
    PTRACE_BTS_STATUS commands.
 */
@@ -91,6 +92,7 @@ struct ptrace_bts_config {
 	/* buffer overflow signal */
 	unsigned int signal;
 };
+#endif
 
 #define PTRACE_BTS_O_TRACE	0x1 /* branch trace */
 #define PTRACE_BTS_O_SCHED	0x2 /* scheduling events w/ jiffies */

From de4218634e3df6d73a3e6cdfdf3a17fa3bc7e013 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:32:37 +0100
Subject: [PATCH 462/890] x86: implement support to synchronize RDTSC through
 MFENCE on AMD CPUs

According to AMD RDTSC can be synchronized through MFENCE.
Implement the necessary CPUID bit for that.

Cc: andreas.herrmann3@amd.com
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/amd.c    | 3 +++
 arch/x86/kernel/setup_64.c   | 4 ++--
 include/asm-x86/cpufeature.h | 1 +
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 1ff88c7f45cf..aaa8101d3d80 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -301,6 +301,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	/* K6s reports MCEs but don't actually have all the MSRs */
 	if (c->x86 < 6)
 		clear_bit(X86_FEATURE_MCE, c->x86_capability);
+
+	if (cpu_has_xmm)
+		set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability);
 }
 
 static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 02409100f456..2139aa6ac469 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -746,8 +746,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11)
 		set_cpu_cap(c, X86_FEATURE_K8);
 
-	/* RDTSC can be speculated around */
-	clear_cpu_cap(c, X86_FEATURE_SYNC_RDTSC);
+	/* MFENCE stops RDTSC speculation */
+	set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
 
 	/* Family 10 doesn't support C states in MWAIT so don't use it */
 	if (c->x86 == 0x10 && !force_mwait)
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 75e2f78a7fda..7d53eea8b946 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -79,6 +79,7 @@
 /* 14 free */
 #define X86_FEATURE_SYNC_RDTSC	(3*32+15)  /* RDTSC synchronizes the CPU */
 #define X86_FEATURE_REP_GOOD	(3*32+16) /* rep microcode works well on this CPU */
+#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */

From 707fa8ed923b1b6a3d7af0d386b0b3abad28ed19 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:32:37 +0100
Subject: [PATCH 463/890] x86: Implement support to synchronize RDTSC with
 LFENCE on Intel CPUs

According to Intel RDTSC can be always synchronized with LFENCE
on all current CPUs. Implement the necessary CPUID bit for that.

It is unclear yet if that is true for all future CPUs too,
but if there's another way the kernel can be always updated.

Cc: asit.k.mallick@intel.com
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/intel.c  | 3 ++-
 arch/x86/kernel/setup_64.c   | 5 +----
 include/asm-x86/cpufeature.h | 1 +
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index e4b7e73e9024..0a4abdb61ae4 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -203,9 +203,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 	}
 #endif
 
+	if (cpu_has_xmm)
+		set_bit(X86_FEATURE_LFENCE_RDTSC, c->x86_capability);
 	if (c->x86 == 15) {
 		set_bit(X86_FEATURE_P4, c->x86_capability);
-		set_bit(X86_FEATURE_SYNC_RDTSC, c->x86_capability);
 	}
 	if (c->x86 == 6) 
 		set_bit(X86_FEATURE_P3, c->x86_capability);
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 2139aa6ac469..bc7758ea06af 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -888,10 +888,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 	if (c->x86 == 6)
 		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
-	if (c->x86 == 15)
-		set_cpu_cap(c, X86_FEATURE_SYNC_RDTSC);
-	else
-		clear_cpu_cap(c, X86_FEATURE_SYNC_RDTSC);
+	set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
 	c->x86_max_cores = intel_num_cpu_cores(c);
 
 	srat_detect_node();
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 7d53eea8b946..c1a7e07859c8 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -80,6 +80,7 @@
 #define X86_FEATURE_SYNC_RDTSC	(3*32+15)  /* RDTSC synchronizes the CPU */
 #define X86_FEATURE_REP_GOOD	(3*32+16) /* rep microcode works well on this CPU */
 #define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */
+#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */

From 6d5f718a497375f853d90247f5f6963368e89803 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:38 +0100
Subject: [PATCH 464/890] x86: lfence fix

LFENCE is available on XMM2 or higher Intel CPUs - not XMM or higher...

this caused boot failures on XMM1 & !XMM1 capable CPUs.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/intel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 0a4abdb61ae4..5731de3e1bd1 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -203,7 +203,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 	}
 #endif
 
-	if (cpu_has_xmm)
+	if (cpu_has_xmm2)
 		set_bit(X86_FEATURE_LFENCE_RDTSC, c->x86_capability);
 	if (c->x86 == 15) {
 		set_bit(X86_FEATURE_P4, c->x86_capability);

From bd61643ef6139ba44142f2ba6e79e03267e6b54a Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:32:38 +0100
Subject: [PATCH 465/890] x86: move nop declarations into separate include file

Moving things out of processor.h is always a good thing.

Also needed to avoid include loop in later patch.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/nops.h      | 90 +++++++++++++++++++++++++++++++++++++
 include/asm-x86/processor.h | 86 +----------------------------------
 2 files changed, 91 insertions(+), 85 deletions(-)
 create mode 100644 include/asm-x86/nops.h

diff --git a/include/asm-x86/nops.h b/include/asm-x86/nops.h
new file mode 100644
index 000000000000..fec025c7f58c
--- /dev/null
+++ b/include/asm-x86/nops.h
@@ -0,0 +1,90 @@
+#ifndef _ASM_NOPS_H
+#define _ASM_NOPS_H 1
+
+/* Define nops for use with alternative() */
+
+/* generic versions from gas */
+#define GENERIC_NOP1	".byte 0x90\n"
+#define GENERIC_NOP2    	".byte 0x89,0xf6\n"
+#define GENERIC_NOP3        ".byte 0x8d,0x76,0x00\n"
+#define GENERIC_NOP4        ".byte 0x8d,0x74,0x26,0x00\n"
+#define GENERIC_NOP5        GENERIC_NOP1 GENERIC_NOP4
+#define GENERIC_NOP6	".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n"
+#define GENERIC_NOP7	".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n"
+#define GENERIC_NOP8	GENERIC_NOP1 GENERIC_NOP7
+
+/* Opteron 64bit nops */
+#define K8_NOP1 GENERIC_NOP1
+#define K8_NOP2	".byte 0x66,0x90\n"
+#define K8_NOP3	".byte 0x66,0x66,0x90\n"
+#define K8_NOP4	".byte 0x66,0x66,0x66,0x90\n"
+#define K8_NOP5	K8_NOP3 K8_NOP2
+#define K8_NOP6	K8_NOP3 K8_NOP3
+#define K8_NOP7	K8_NOP4 K8_NOP3
+#define K8_NOP8	K8_NOP4 K8_NOP4
+
+/* K7 nops */
+/* uses eax dependencies (arbitary choice) */
+#define K7_NOP1  GENERIC_NOP1
+#define K7_NOP2	".byte 0x8b,0xc0\n"
+#define K7_NOP3	".byte 0x8d,0x04,0x20\n"
+#define K7_NOP4	".byte 0x8d,0x44,0x20,0x00\n"
+#define K7_NOP5	K7_NOP4 ASM_NOP1
+#define K7_NOP6	".byte 0x8d,0x80,0,0,0,0\n"
+#define K7_NOP7        ".byte 0x8D,0x04,0x05,0,0,0,0\n"
+#define K7_NOP8        K7_NOP7 ASM_NOP1
+
+/* P6 nops */
+/* uses eax dependencies (Intel-recommended choice) */
+#define P6_NOP1	GENERIC_NOP1
+#define P6_NOP2	".byte 0x66,0x90\n"
+#define P6_NOP3	".byte 0x0f,0x1f,0x00\n"
+#define P6_NOP4	".byte 0x0f,0x1f,0x40,0\n"
+#define P6_NOP5	".byte 0x0f,0x1f,0x44,0x00,0\n"
+#define P6_NOP6	".byte 0x66,0x0f,0x1f,0x44,0x00,0\n"
+#define P6_NOP7	".byte 0x0f,0x1f,0x80,0,0,0,0\n"
+#define P6_NOP8	".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n"
+
+#if defined(CONFIG_MK8)
+#define ASM_NOP1 K8_NOP1
+#define ASM_NOP2 K8_NOP2
+#define ASM_NOP3 K8_NOP3
+#define ASM_NOP4 K8_NOP4
+#define ASM_NOP5 K8_NOP5
+#define ASM_NOP6 K8_NOP6
+#define ASM_NOP7 K8_NOP7
+#define ASM_NOP8 K8_NOP8
+#elif defined(CONFIG_MK7)
+#define ASM_NOP1 K7_NOP1
+#define ASM_NOP2 K7_NOP2
+#define ASM_NOP3 K7_NOP3
+#define ASM_NOP4 K7_NOP4
+#define ASM_NOP5 K7_NOP5
+#define ASM_NOP6 K7_NOP6
+#define ASM_NOP7 K7_NOP7
+#define ASM_NOP8 K7_NOP8
+#elif defined(CONFIG_M686) || defined(CONFIG_MPENTIUMII) || \
+      defined(CONFIG_MPENTIUMIII) || defined(CONFIG_MPENTIUMM) || \
+      defined(CONFIG_MCORE2) || defined(CONFIG_PENTIUM4)
+#define ASM_NOP1 P6_NOP1
+#define ASM_NOP2 P6_NOP2
+#define ASM_NOP3 P6_NOP3
+#define ASM_NOP4 P6_NOP4
+#define ASM_NOP5 P6_NOP5
+#define ASM_NOP6 P6_NOP6
+#define ASM_NOP7 P6_NOP7
+#define ASM_NOP8 P6_NOP8
+#else
+#define ASM_NOP1 GENERIC_NOP1
+#define ASM_NOP2 GENERIC_NOP2
+#define ASM_NOP3 GENERIC_NOP3
+#define ASM_NOP4 GENERIC_NOP4
+#define ASM_NOP5 GENERIC_NOP5
+#define ASM_NOP6 GENERIC_NOP6
+#define ASM_NOP7 GENERIC_NOP7
+#define ASM_NOP8 GENERIC_NOP8
+#endif
+
+#define ASM_NOP_MAX 8
+
+#endif
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 72740c6f1109..41c002440de7 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -20,6 +20,7 @@ struct mm_struct;
 #include <asm/percpu.h>
 #include <asm/msr.h>
 #include <asm/desc_defs.h>
+#include <asm/nops.h>
 #include <linux/personality.h>
 #include <linux/cpumask.h>
 #include <linux/cache.h>
@@ -674,91 +675,6 @@ extern int bootloader_type;
 extern char ignore_fpu_irq;
 #define cache_line_size() (boot_cpu_data.x86_cache_alignment)
 
-/* generic versions from gas */
-#define GENERIC_NOP1	".byte 0x90\n"
-#define GENERIC_NOP2    	".byte 0x89,0xf6\n"
-#define GENERIC_NOP3        ".byte 0x8d,0x76,0x00\n"
-#define GENERIC_NOP4        ".byte 0x8d,0x74,0x26,0x00\n"
-#define GENERIC_NOP5        GENERIC_NOP1 GENERIC_NOP4
-#define GENERIC_NOP6	".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n"
-#define GENERIC_NOP7	".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n"
-#define GENERIC_NOP8	GENERIC_NOP1 GENERIC_NOP7
-
-/* Opteron nops */
-#define K8_NOP1 GENERIC_NOP1
-#define K8_NOP2	".byte 0x66,0x90\n"
-#define K8_NOP3	".byte 0x66,0x66,0x90\n"
-#define K8_NOP4	".byte 0x66,0x66,0x66,0x90\n"
-#define K8_NOP5	K8_NOP3 K8_NOP2
-#define K8_NOP6	K8_NOP3 K8_NOP3
-#define K8_NOP7	K8_NOP4 K8_NOP3
-#define K8_NOP8	K8_NOP4 K8_NOP4
-
-/* K7 nops */
-/* uses eax dependencies (arbitary choice) */
-#define K7_NOP1  GENERIC_NOP1
-#define K7_NOP2	".byte 0x8b,0xc0\n"
-#define K7_NOP3	".byte 0x8d,0x04,0x20\n"
-#define K7_NOP4	".byte 0x8d,0x44,0x20,0x00\n"
-#define K7_NOP5	K7_NOP4 ASM_NOP1
-#define K7_NOP6	".byte 0x8d,0x80,0,0,0,0\n"
-#define K7_NOP7        ".byte 0x8D,0x04,0x05,0,0,0,0\n"
-#define K7_NOP8        K7_NOP7 ASM_NOP1
-
-/* P6 nops */
-/* uses eax dependencies (Intel-recommended choice) */
-#define P6_NOP1	GENERIC_NOP1
-#define P6_NOP2	".byte 0x66,0x90\n"
-#define P6_NOP3	".byte 0x0f,0x1f,0x00\n"
-#define P6_NOP4	".byte 0x0f,0x1f,0x40,0\n"
-#define P6_NOP5	".byte 0x0f,0x1f,0x44,0x00,0\n"
-#define P6_NOP6	".byte 0x66,0x0f,0x1f,0x44,0x00,0\n"
-#define P6_NOP7	".byte 0x0f,0x1f,0x80,0,0,0,0\n"
-#define P6_NOP8	".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n"
-
-#ifdef CONFIG_MK7
-#define ASM_NOP1 K7_NOP1
-#define ASM_NOP2 K7_NOP2
-#define ASM_NOP3 K7_NOP3
-#define ASM_NOP4 K7_NOP4
-#define ASM_NOP5 K7_NOP5
-#define ASM_NOP6 K7_NOP6
-#define ASM_NOP7 K7_NOP7
-#define ASM_NOP8 K7_NOP8
-#elif defined(CONFIG_M686) || defined(CONFIG_MPENTIUMII) || \
-      defined(CONFIG_MPENTIUMIII) || defined(CONFIG_MPENTIUMM) || \
-      defined(CONFIG_MCORE2) || defined(CONFIG_PENTIUM4) || \
-      defined(CONFIG_MPSC)
-#define ASM_NOP1 P6_NOP1
-#define ASM_NOP2 P6_NOP2
-#define ASM_NOP3 P6_NOP3
-#define ASM_NOP4 P6_NOP4
-#define ASM_NOP5 P6_NOP5
-#define ASM_NOP6 P6_NOP6
-#define ASM_NOP7 P6_NOP7
-#define ASM_NOP8 P6_NOP8
-#elif defined(CONFIG_MK8) || defined(CONFIG_X86_64)
-#define ASM_NOP1 K8_NOP1
-#define ASM_NOP2 K8_NOP2
-#define ASM_NOP3 K8_NOP3
-#define ASM_NOP4 K8_NOP4
-#define ASM_NOP5 K8_NOP5
-#define ASM_NOP6 K8_NOP6
-#define ASM_NOP7 K8_NOP7
-#define ASM_NOP8 K8_NOP8
-#else
-#define ASM_NOP1 GENERIC_NOP1
-#define ASM_NOP2 GENERIC_NOP2
-#define ASM_NOP3 GENERIC_NOP3
-#define ASM_NOP4 GENERIC_NOP4
-#define ASM_NOP5 GENERIC_NOP5
-#define ASM_NOP6 GENERIC_NOP6
-#define ASM_NOP7 GENERIC_NOP7
-#define ASM_NOP8 GENERIC_NOP8
-#endif
-
-#define ASM_NOP_MAX 8
-
 #define HAVE_ARCH_PICK_MMAP_LAYOUT 1
 #define ARCH_HAS_PREFETCHW
 #define ARCH_HAS_SPINLOCK_PREFETCH

From 2a10e7c41254941cac87be1eccdcb6379ce097f5 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Wed, 30 Jan 2008 13:32:38 +0100
Subject: [PATCH 466/890] git-x86: unbreak UML

Acked-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-um/nops.h | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 include/asm-um/nops.h

diff --git a/include/asm-um/nops.h b/include/asm-um/nops.h
new file mode 100644
index 000000000000..814e9bf5dea6
--- /dev/null
+++ b/include/asm-um/nops.h
@@ -0,0 +1,6 @@
+#ifndef __UM_NOPS_H
+#define __UM_NOPS_H
+
+#include "asm/arch/nops.h"
+
+#endif

From fde1b3fa947c2512e3715962ebb1d3a6a9b9bb7d Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:32:38 +0100
Subject: [PATCH 467/890] x86: introduce rdtsc_barrier()

rdtsc_barrier() is a new barrier primitive that stops RDTSC speculation
to avoid races with timer interrupts on other CPUs.

It expands either to LFENCE (for Intel CPUs) or MFENCE (for
AMD CPUs) which stops RDTSC on all currently known microarchitectures
that implement SSE. On CPUs without SSE there is generally no RDTSC
speculation.

[ mingo@elte.hu: renamed it to rdtsc_barrier() and made it x86-only ]

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/system.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index 6c7d1fda4995..39474f2957a3 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -5,6 +5,7 @@
 #include <asm/segment.h>
 #include <asm/cpufeature.h>
 #include <asm/cmpxchg.h>
+#include <asm/nops.h>
 
 #include <linux/kernel.h>
 #include <linux/irqflags.h>
@@ -395,5 +396,17 @@ void default_idle(void);
 #define set_mb(var, value) do { var = value; barrier(); } while (0)
 #endif
 
+/*
+ * Stop RDTSC speculation. This is needed when you need to use RDTSC
+ * (or get_cycles or vread that possibly accesses the TSC) in a defined
+ * code region.
+ *
+ * (Could use an alternative three way for this if there was one.)
+ */
+static inline void rdtsc_barrier(void)
+{
+	alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
+	alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
+}
 
 #endif

From cdc7957d1954908a39a6964e9c6f643916e76c4b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:39 +0100
Subject: [PATCH 468/890] x86: move native_read_tsc() offline

move native_read_tsc() offline.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/rtc.c | 10 ++++++++++
 include/asm-x86/msr.h |  8 +-------
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index d040840ff1b6..c42cf1263882 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -6,6 +6,7 @@
 #include <linux/mc146818rtc.h>
 
 #include <asm/time.h>
+#include <asm/vsyscall.h>
 
 #ifdef CONFIG_X86_32
 # define CMOS_YEARS_OFFS 1900
@@ -194,3 +195,12 @@ int update_persistent_clock(struct timespec now)
 {
 	return set_rtc_mmss(now.tv_sec);
 }
+
+unsigned long long __vsyscall_fn native_read_tsc(void)
+{
+	DECLARE_ARGS(val, low, high);
+
+	asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));
+	return EAX_EDX_VAL(val, low, high);
+}
+EXPORT_SYMBOL_GPL(native_read_tsc);
diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index bca8c3950132..decfec4ab17c 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -91,13 +91,7 @@ static inline int native_write_msr_safe(unsigned int msr,
 	return err;
 }
 
-static inline unsigned long long native_read_tsc(void)
-{
-	DECLARE_ARGS(val, low, high);
-
-	asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));
-	return EAX_EDX_VAL(val, low, high);
-}
+extern unsigned long long native_read_tsc(void);
 
 static inline unsigned long long native_read_pmc(int counter)
 {

From e4026440130b84101f2da7f5a0c7a3b046173d3c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:39 +0100
Subject: [PATCH 469/890] x86: map vsyscalls early enough

map vsyscalls early enough. This is important if a __vsyscall_fn
function is used by other kernel code too.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup_64.c    | 2 ++
 arch/x86/kernel/vsyscall_64.c | 3 +--
 include/asm-x86/vsyscall.h    | 2 ++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index bc7758ea06af..8618178db842 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -45,6 +45,7 @@
 #include <asm/mtrr.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
+#include <asm/vsyscall.h>
 #include <asm/io.h>
 #include <asm/smp.h>
 #include <asm/msr.h>
@@ -453,6 +454,7 @@ void __init setup_arch(char **cmdline_p)
 #endif
 	reserve_crashkernel();
 	paging_init();
+	map_vsyscall();
 
 	early_quirks();
 
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index e5c1118a8098..3f8242774580 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -319,7 +319,7 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
 	return NOTIFY_DONE;
 }
 
-static void __init map_vsyscall(void)
+void __init map_vsyscall(void)
 {
 	extern char __vsyscall_0;
 	unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
@@ -335,7 +335,6 @@ static int __init vsyscall_init(void)
 	BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
 	BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
 	BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
-	map_vsyscall();
 #ifdef CONFIG_SYSCTL
 	register_sysctl_table(kernel_root_table2);
 #endif
diff --git a/include/asm-x86/vsyscall.h b/include/asm-x86/vsyscall.h
index f01c49f5d108..17b3700949bf 100644
--- a/include/asm-x86/vsyscall.h
+++ b/include/asm-x86/vsyscall.h
@@ -36,6 +36,8 @@ extern volatile unsigned long __jiffies;
 extern int vgetcpu_mode;
 extern struct timezone sys_tz;
 
+extern void map_vsyscall(void);
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_X86_64_VSYSCALL_H_ */

From f06e4ec1c15691b0cfd2397ae32214fa36c90d71 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:39 +0100
Subject: [PATCH 470/890] x86: read_tsc sync

make native_read_tsc() always non-speculative.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/rtc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index c42cf1263882..276cb7073ab1 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -200,7 +200,10 @@ unsigned long long __vsyscall_fn native_read_tsc(void)
 {
 	DECLARE_ARGS(val, low, high);
 
+	rdtsc_barrier();
 	asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));
+	rdtsc_barrier();
+
 	return EAX_EDX_VAL(val, low, high);
 }
 EXPORT_SYMBOL_GPL(native_read_tsc);

From 6d63de8dbcda98511206897562ecfcdacf18f523 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:32:39 +0100
Subject: [PATCH 471/890] x86: remove get_cycles_sync

rdtsc is now speculation-safe, so no need for the sync variants of
the APIs.

[ mingo@elte.hu: removed the nsec_barrier() complication. ]

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/time_64.c  |  2 +-
 arch/x86/kernel/tsc_64.c   | 12 ++++----
 arch/x86/kernel/tsc_sync.c |  4 +--
 include/asm-x86/tsc.h      | 60 +++++---------------------------------
 4 files changed, 17 insertions(+), 61 deletions(-)

diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index 91d4d495904e..61b17f5ec867 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -83,7 +83,7 @@ unsigned long __init native_calculate_cpu_khz(void)
 	rdtscl(tsc_start);
 	do {
 		rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now);
-		tsc_now = get_cycles_sync();
+		tsc_now = get_cycles();
 	} while ((tsc_now - tsc_start) < TICK_COUNT);
 
 	local_irq_restore(flags);
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index 3723401c4593..2cc55b726c22 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -181,12 +181,12 @@ static unsigned long __init tsc_read_refs(unsigned long *pm,
 	int i;
 
 	for (i = 0; i < MAX_RETRIES; i++) {
-		t1 = get_cycles_sync();
+		t1 = get_cycles();
 		if (hpet)
 			*hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
 		else
 			*pm = acpi_pm_read_early();
-		t2 = get_cycles_sync();
+		t2 = get_cycles();
 		if ((t2 - t1) < SMI_TRESHOLD)
 			return t2;
 	}
@@ -210,9 +210,9 @@ void __init tsc_calibrate(void)
 	outb(0xb0, 0x43);
 	outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
 	outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42);
-	tr1 = get_cycles_sync();
+	tr1 = get_cycles();
 	while ((inb(0x61) & 0x20) == 0);
-	tr2 = get_cycles_sync();
+	tr2 = get_cycles();
 
 	tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
 
@@ -300,13 +300,13 @@ __setup("notsc", notsc_setup);
 /* clock source code: */
 static cycle_t read_tsc(void)
 {
-	cycle_t ret = (cycle_t)get_cycles_sync();
+	cycle_t ret = (cycle_t)get_cycles();
 	return ret;
 }
 
 static cycle_t __vsyscall_fn vread_tsc(void)
 {
-	cycle_t ret = (cycle_t)vget_cycles_sync();
+	cycle_t ret = (cycle_t)vget_cycles();
 	return ret;
 }
 
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 05d8f25de6ae..ace340524c01 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -46,7 +46,7 @@ static __cpuinit void check_tsc_warp(void)
 	cycles_t start, now, prev, end;
 	int i;
 
-	start = get_cycles_sync();
+	start = get_cycles();
 	/*
 	 * The measurement runs for 20 msecs:
 	 */
@@ -61,7 +61,7 @@ static __cpuinit void check_tsc_warp(void)
 		 */
 		__raw_spin_lock(&sync_lock);
 		prev = last_tsc;
-		now = get_cycles_sync();
+		now = get_cycles();
 		last_tsc = now;
 		__raw_spin_unlock(&sync_lock);
 
diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h
index 401303724130..f51a50da35aa 100644
--- a/include/asm-x86/tsc.h
+++ b/include/asm-x86/tsc.h
@@ -36,62 +36,18 @@ static inline cycles_t get_cycles(void)
 	return ret;
 }
 
-/* Like get_cycles, but make sure the CPU is synchronized. */
-static __always_inline cycles_t __get_cycles_sync(void)
+static inline cycles_t vget_cycles(void)
 {
-	unsigned long long ret;
-	unsigned eax, edx;
-
 	/*
-	 * Use RDTSCP if possible; it is guaranteed to be synchronous
-	 * and doesn't cause a VMEXIT on Hypervisors
+	 * We only do VDSOs on TSC capable CPUs, so this shouldnt
+	 * access boot_cpu_data (which is not VDSO-safe):
 	 */
-	alternative_io(ASM_NOP3, ".byte 0x0f,0x01,0xf9", X86_FEATURE_RDTSCP,
-		       ASM_OUTPUT2("=a" (eax), "=d" (edx)),
-		       "a" (0U), "d" (0U) : "ecx", "memory");
-	ret = (((unsigned long long)edx) << 32) | ((unsigned long long)eax);
-	if (ret)
-		return ret;
-
-	/*
-	 * Don't do an additional sync on CPUs where we know
-	 * RDTSC is already synchronous:
-	 */
-	alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC,
-			  "=a" (eax), "0" (1) : "ebx","ecx","edx","memory");
-
-	return 0;
-}
-
-static __always_inline cycles_t get_cycles_sync(void)
-{
-	unsigned long long ret;
-	ret = __get_cycles_sync();
-	if (!ret)
-		rdtscll(ret);
-	return ret;
-}
-
-#ifdef CONFIG_PARAVIRT
-/*
- * For paravirt guests, some functionalities are executed through function
- * pointers in the various pvops structures.
- * These function pointers exist inside the kernel and can not
- * be accessed by user space. To avoid this, we make a copy of the
- * get_cycles_sync (called in kernel) but force the use of native_read_tsc.
- * Ideally, the guest should set up it's own clock and vread
- */
-static __always_inline long long vget_cycles_sync(void)
-{
-	unsigned long long ret;
-	ret = __get_cycles_sync();
-	if (!ret)
-		ret = native_read_tsc();
-	return ret;
-}
-#else
-# define vget_cycles_sync() get_cycles_sync()
+#ifndef CONFIG_X86_TSC
+	if (!cpu_has_tsc)
+		return 0;
 #endif
+	return (cycles_t) native_read_tsc();
+}
 
 extern void tsc_init(void);
 extern void mark_tsc_unstable(char *reason);

From 92767af0e3904b4d35ed547fb514ff6cb227e678 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:40 +0100
Subject: [PATCH 472/890] x86: fix sched_clock()

[ andi@firstfloor.org: build fix ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/rtc.c | 13 ++++---------
 include/asm-x86/msr.h | 11 +++++++++++
 include/asm-x86/tsc.h |  2 +-
 3 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 276cb7073ab1..eb9b1a198f5e 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -196,14 +196,9 @@ int update_persistent_clock(struct timespec now)
 	return set_rtc_mmss(now.tv_sec);
 }
 
-unsigned long long __vsyscall_fn native_read_tsc(void)
+unsigned long long native_read_tsc(void)
 {
-	DECLARE_ARGS(val, low, high);
-
-	rdtsc_barrier();
-	asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));
-	rdtsc_barrier();
-
-	return EAX_EDX_VAL(val, low, high);
+	return __native_read_tsc();
 }
-EXPORT_SYMBOL_GPL(native_read_tsc);
+EXPORT_SYMBOL(native_read_tsc);
+
diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index decfec4ab17c..204a8a30fecf 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -93,6 +93,17 @@ static inline int native_write_msr_safe(unsigned int msr,
 
 extern unsigned long long native_read_tsc(void);
 
+static __always_inline unsigned long long __native_read_tsc(void)
+{
+	DECLARE_ARGS(val, low, high);
+
+	rdtsc_barrier();
+	asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));
+	rdtsc_barrier();
+
+	return EAX_EDX_VAL(val, low, high);
+}
+
 static inline unsigned long long native_read_pmc(int counter)
 {
 	DECLARE_ARGS(val, low, high);
diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h
index f51a50da35aa..071e0ce5b664 100644
--- a/include/asm-x86/tsc.h
+++ b/include/asm-x86/tsc.h
@@ -46,7 +46,7 @@ static inline cycles_t vget_cycles(void)
 	if (!cpu_has_tsc)
 		return 0;
 #endif
-	return (cycles_t) native_read_tsc();
+	return (cycles_t) __native_read_tsc();
 }
 
 extern void tsc_init(void);

From 68071a96655c883b316da9ef497f6dec8953529f Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:32:40 +0100
Subject: [PATCH 473/890] x86: remove the now unused X86_FEATURE_SYNC_RDTSC

we need to know whether RDTSC is synchronous or not.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/cpufeature.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index c1a7e07859c8..93c143fb8a51 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -77,7 +77,7 @@
 #define X86_FEATURE_PEBS	(3*32+12)  /* Precise-Event Based Sampling */
 #define X86_FEATURE_BTS		(3*32+13)  /* Branch Trace Store */
 /* 14 free */
-#define X86_FEATURE_SYNC_RDTSC	(3*32+15)  /* RDTSC synchronizes the CPU */
+/* 15 free */
 #define X86_FEATURE_REP_GOOD	(3*32+16) /* rep microcode works well on this CPU */
 #define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */
 #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */

From 2b16a2353814a513cdb5c5c739b76a19d7ea39ce Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:32:40 +0100
Subject: [PATCH 474/890] x86: move X86_FEATURE_CONSTANT_TSC into early cpu
 feature detection

Need this in the next patch in time_init and that happens early.

This includes a minor fix on i386 where early_intel_workarounds()
[which is now called early_init_intel] really executes early as
the comments say.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/amd.c    | 17 ++++++++++------
 arch/x86/kernel/cpu/common.c | 11 ++++++++--
 arch/x86/kernel/cpu/cpu.h    |  3 ++-
 arch/x86/kernel/cpu/intel.c  | 13 ++++++------
 arch/x86/kernel/setup_64.c   | 39 ++++++++++++++++++++++++++++--------
 5 files changed, 59 insertions(+), 24 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index aaa8101d3d80..cd2fe15ff4b5 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -63,6 +63,15 @@ static __cpuinit int amd_apic_timer_broken(void)
 
 int force_mwait __cpuinitdata;
 
+void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
+{
+	if (cpuid_eax(0x80000000) >= 0x80000007) {
+		c->x86_power = cpuid_edx(0x80000007);
+		if (c->x86_power & (1<<8))
+			set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+	}
+}
+
 static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
@@ -85,6 +94,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	}
 #endif
 
+	early_init_amd(c);
+
 	/*
 	 *	FIXME: We should handle the K5 here. Set up the write
 	 *	range and also turn on MSR 83 bits 4 and 31 (write alloc,
@@ -257,12 +268,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 		c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
 	}
 
-	if (cpuid_eax(0x80000000) >= 0x80000007) {
-		c->x86_power = cpuid_edx(0x80000007);
-		if (c->x86_power & (1<<8))
-			set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
-	}
-
 #ifdef CONFIG_X86_HT
 	/*
 	 * On a AMD multi core setup the lower bits of the APIC id
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index e48832a6c2a9..dbb9142a8241 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -307,6 +307,15 @@ static void __init early_cpu_detect(void)
 	cpu_detect(c);
 
 	get_cpu_vendor(c, 1);
+
+	switch (c->x86_vendor) {
+	case X86_VENDOR_AMD:
+		early_init_amd(c);
+		break;
+	case X86_VENDOR_INTEL:
+		early_init_intel(c);
+		break;
+	}
 }
 
 static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
@@ -364,8 +373,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
 		init_scattered_cpuid_features(c);
 	}
 
-	early_intel_workaround(c);
-
 #ifdef CONFIG_X86_HT
 	c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
 #endif
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 2f6432cef6ff..ad6527a5beb1 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -24,5 +24,6 @@ extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM];
 extern int get_model_name(struct cpuinfo_x86 *c);
 extern void display_cacheinfo(struct cpuinfo_x86 *c);
 
-extern void early_intel_workaround(struct cpuinfo_x86 *c);
+extern void early_init_intel(struct cpuinfo_x86 *c);
+extern void early_init_amd(struct cpuinfo_x86 *c);
 
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 5731de3e1bd1..f1136115279a 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -29,13 +29,14 @@
 struct movsl_mask movsl_mask __read_mostly;
 #endif
 
-void __cpuinit early_intel_workaround(struct cpuinfo_x86 *c)
+void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 {
-	if (c->x86_vendor != X86_VENDOR_INTEL)
-		return;
 	/* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
 	if (c->x86 == 15 && c->x86_cache_alignment == 64)
 		c->x86_cache_alignment = 128;
+	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
+		(c->x86 == 0x6 && c->x86_model >= 0x0e))
+		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 }
 
 /*
@@ -115,6 +116,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 	unsigned int l2 = 0;
 	char *p = NULL;
 
+	early_init_intel(c);
+
 #ifdef CONFIG_X86_F00F_BUG
 	/*
 	 * All current models of Pentium and Pentium with MMX technology CPUs
@@ -210,10 +213,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 	}
 	if (c->x86 == 6) 
 		set_bit(X86_FEATURE_P3, c->x86_capability);
-	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
-		(c->x86 == 0x6 && c->x86_model >= 0x0e))
-		set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
-
 	if (cpu_has_ds) {
 		unsigned int l1;
 		rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 8618178db842..3cae326093cb 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -544,9 +544,6 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
 		printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
 		c->x86_cache_size, ecx & 0xFF);
 	}
-
-	if (n >= 0x80000007)
-		cpuid(0x80000007, &dummy, &dummy, &dummy, &c->x86_power);
 	if (n >= 0x80000008) {
 		cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
 		c->x86_virt_bits = (eax >> 8) & 0xff;
@@ -624,7 +621,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
 #endif
 }
 
-static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
+static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
 	unsigned bits, ecx;
@@ -682,6 +679,15 @@ static __cpuinit int amd_apic_timer_broken(void)
 	return 0;
 }
 
+static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
+{
+	early_init_amd_mc(c);
+
+ 	/* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
+	if (c->x86_power & (1<<8))
+		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+}
+
 static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 {
 	unsigned level;
@@ -731,10 +737,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	}
 	display_cacheinfo(c);
 
-	/* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
-	if (c->x86_power & (1<<8))
-		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
-
 	/* Multi core CPU? */
 	if (c->extended_cpuid_level >= 0x80000008)
 		amd_detect_cmp(c);
@@ -845,6 +847,13 @@ static void srat_detect_node(void)
 #endif
 }
 
+static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
+{
+	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
+	    (c->x86 == 0x6 && c->x86_model >= 0x0e))
+		set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
+}
+
 static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 {
 	/* Cache sizes */
@@ -1056,6 +1065,20 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 #ifdef CONFIG_NUMA
 	numa_add_cpu(smp_processor_id());
 #endif
+
+	c->extended_cpuid_level = cpuid_eax(0x80000000);
+
+	if (c->extended_cpuid_level >= 0x80000007)
+		c->x86_power = cpuid_edx(0x80000007);
+
+	switch (c->x86_vendor) {
+	case X86_VENDOR_AMD:
+		early_init_amd(c);
+		break;
+	case X86_VENDOR_INTEL:
+		early_init_intel(c);
+		break;
+	}
 }
 
 void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)

From 51fc97b93545e71cec578d6771bceeb92bc2d50b Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:32:40 +0100
Subject: [PATCH 475/890] x86: allow TSC clock source on AMD Fam10h and some
 cleanup

After a lot of discussions with AMD it turns out that TSC
on Fam10h CPUs is synchronized when the CONSTANT_TSC cpuid bit is set.
Or rather that if there are ever systems where that is not
true it would be their BIOS' task to disable the bit.

So finally use TSC gettimeofday on Fam10h by default.

Or rather it is always used now on CPUs where the AMD
specific CONSTANT_TSC bit is set.

This gives a nice speed bost for gettimeofday() on these systems
which tends to be by far the most common v/syscall.

On a Fam10h system here TSC gtod uses about 20% of the CPU time of
acpi_pm based gtod(). This was measured on 32bit, on 64bit
it is even better because TSC gtod() can use a vsyscall
and stay in ring 3, which acpi_pm doesn't.

The Intel check simply checks for CONSTANT_TSC too without hardcoding
Intel vendor. This is equivalent on 64bit because all 64bit capable Intel
CPUs will have CONSTANT_TSC set.

On Intel there is no CPU supplied CONSTANT_TSC bit currently,
but we synthesize one based on hardcoded knowledge which steppings
have p-state invariant TSC.

So the new logic is now: On CPUs which have the AMD specific
CONSTANT_TSC bit set or on Intel CPUs which are new enough
to be known to have p-state invariant TSC always use
TSC based gettimeofday()

Cc: lenb@kernel.org

Signed-off-by: Andi Kleen <ak@suse.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/tsc_32.c | 5 +++++
 arch/x86/kernel/tsc_64.c | 5 ++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index 00bb4c1c0593..2a7b95bd8509 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -354,6 +354,11 @@ __cpuinit int unsynchronized_tsc(void)
 {
 	if (!cpu_has_tsc || tsc_unstable)
 		return 1;
+
+	/* Anything with constant TSC should be synchronized */
+	if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+		return 0;
+
 	/*
 	 * Intel systems are normally all synchronized.
 	 * Exceptions must mark TSC as unstable:
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index 2cc55b726c22..322b38c68198 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -272,9 +272,8 @@ __cpuinit int unsynchronized_tsc(void)
 	if (apic_is_clustered_box())
 		return 1;
 #endif
-	/* Most intel systems have synchronized TSCs except for
-	   multi node systems */
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+
+	if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
 #ifdef CONFIG_ACPI
 		/* But TSC doesn't tick in C3 so don't use it there */
 		if (acpi_gbl_FADT.header.length > 0 &&

From 32c7553f824d0d76771404f0e11d6059f82e8de7 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:32:41 +0100
Subject: [PATCH 476/890] x86: remove explicit C3 TSC check on 64bit

Trust the ACPI code to disable TSC instead when C3 is used.

AMD Fam10h does not disable TSC in any C states so the
check was incorrect there anyways after the change
to handle this like Intel on AMD too.

This allows to use the TSC when C3 is disabled in software
(acpi.max_c_state=2), but the BIOS supports it anyways.

Match i386 behaviour.

Cc: lenb@kernel.org

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/tsc_64.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index 322b38c68198..c62f3b6eacc0 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -273,15 +273,8 @@ __cpuinit int unsynchronized_tsc(void)
 		return 1;
 #endif
 
-	if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
-#ifdef CONFIG_ACPI
-		/* But TSC doesn't tick in C3 so don't use it there */
-		if (acpi_gbl_FADT.header.length > 0 &&
-		    acpi_gbl_FADT.C3latency < 1000)
-			return 1;
-#endif
+	if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
 		return 0;
-	}
 
 	/* Assume multi socket systems are not synchronized */
 	return num_present_cpus() > 1;

From ddb25f9ac1c4b4f9ba0bdacd7850a921a0c6886c Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:32:41 +0100
Subject: [PATCH 477/890] x86: don't disable TSC in any C states on AMD Fam10h

The ACPI code currently disables TSC use in any C2 and C3
states. But the AMD Fam10h BKDG documents that the TSC
will never stop in any C states when the CONSTANT_TSC bit is
set. Make this disabling conditional on CONSTANT_TSC
not set on AMD.

I actually think this is true on Intel too for C2 states
on CPUs with p-state invariant TSC, but this needs
further discussions with Len to really confirm :-)

So far it is only enabled on AMD.

Cc: lenb@kernel.org

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/acpi/processor_idle.c | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 0721a8183c89..eb1f82f79153 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -357,6 +357,26 @@ int acpi_processor_resume(struct acpi_device * device)
 	return 0;
 }
 
+#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
+static int tsc_halts_in_c(int state)
+{
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		/*
+		 * AMD Fam10h TSC will tick in all
+		 * C/P/S0/S1 states when this bit is set.
+		 */
+		if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+			return 0;
+		/*FALL THROUGH*/
+	case X86_VENDOR_INTEL:
+		/* Several cases known where TSC halts in C2 too */
+	default:
+		return state > ACPI_STATE_C1;
+	}
+}
+#endif
+
 #ifndef CONFIG_CPU_IDLE
 static void acpi_processor_idle(void)
 {
@@ -516,7 +536,8 @@ static void acpi_processor_idle(void)
 
 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
 		/* TSC halts in C2, so notify users */
-		mark_tsc_unstable("possible TSC halt in C2");
+		if (tsc_halts_in_c(ACPI_STATE_C2))
+			mark_tsc_unstable("possible TSC halt in C2");
 #endif
 		/* Compute time (ticks) that we were actually asleep */
 		sleep_ticks = ticks_elapsed(t1, t2);
@@ -580,7 +601,8 @@ static void acpi_processor_idle(void)
 
 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
 		/* TSC halts in C3, so notify users */
-		mark_tsc_unstable("TSC halts in C3");
+		if (tsc_halts_in_c(ACPI_STATE_C3))
+			mark_tsc_unstable("TSC halts in C3");
 #endif
 		/* Compute time (ticks) that we were actually asleep */
 		sleep_ticks = ticks_elapsed(t1, t2);
@@ -1445,7 +1467,8 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
 
 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
 	/* TSC could halt in idle, so notify users */
-	mark_tsc_unstable("TSC halts in idle");;
+	if (tsc_halts_in_c(cx->type))
+		mark_tsc_unstable("TSC halts in idle");;
 #endif
 	sleep_ticks = ticks_elapsed(t1, t2);
 
@@ -1556,7 +1579,8 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
 
 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
 	/* TSC could halt in idle, so notify users */
-	mark_tsc_unstable("TSC halts in idle");
+	if (tsc_halts_in_c(ACPI_STATE_C3))
+		mark_tsc_unstable("TSC halts in idle");
 #endif
 	sleep_ticks = ticks_elapsed(t1, t2);
 	/* Tell the scheduler how much we idled: */

From 9566e91d494ed0668edf88f852de7f251fe8fe9a Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Wed, 30 Jan 2008 13:32:41 +0100
Subject: [PATCH 478/890] x86: fix detection of CONSTANT_TSC bit for AMD CPUs

Commits
 - c52f61fcbdb2aa84f0e4d831ef07f375e6b99b2c
  (x86: allow TSC clock source on AMD Fam10h and some cleanup)
 - e30436f05d456efaff77611e4494f607b14c2782
  (x86: move X86_FEATURE_CONSTANT_TSC into early cpu feature detection)

are supposed to fix the detection of contant TSC for AMD CPUs.
Unfortunately on x86_64 it does still not work with current x86/mm.
For a Phenom I still get:

  ...
  TSC calibrated against PM_TIMER
  Marking TSC unstable due to TSCs unsynchronized
  time.c: Detected 2288.366 MHz processor.
  ...

We have to set c->x86_power in early_identify_cpu to properly detect
the CONSTANT_TSC bit in early_init_amd.

Attached patch fixes this issue. Following the relevant boot
messages when the fix is used:

  ...
  TSC calibrated against PM_TIMER
  time.c: Detected 2288.279 MHz processor.
  ...
  Initializing CPU#1
  ...
  checking TSC synchronization [CPU#0 -> CPU#1]: passed.
  ...
  Initializing CPU#2
  ...
  checking TSC synchronization [CPU#0 -> CPU#2]: passed.
  ...
  Booting processor 3/4 APIC 0x3
  ...
  checking TSC synchronization [CPU#0 -> CPU#3]: passed.
  Brought up 4 CPUs
  ...

Patch is against x86/mm (v2.6.24-rc8-672-ga9f7faa).
Please apply.

Set c->x86_power in early_identify_cpu. This ensures that
X86_FEATURE_CONSTANT_TSC can properly be set in early_init_amd.

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup_64.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 3cae326093cb..1caf7458dc48 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -996,6 +996,10 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 			c->x86_capability[2] = cpuid_edx(0x80860001);
 	}
 
+	c->extended_cpuid_level = cpuid_eax(0x80000000);
+	if (c->extended_cpuid_level >= 0x80000007)
+		c->x86_power = cpuid_edx(0x80000007);
+
 	switch (c->x86_vendor) {
 	case X86_VENDOR_AMD:
 		early_init_amd(c);
@@ -1066,11 +1070,6 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 	numa_add_cpu(smp_processor_id());
 #endif
 
-	c->extended_cpuid_level = cpuid_eax(0x80000000);
-
-	if (c->extended_cpuid_level >= 0x80000007)
-		c->x86_power = cpuid_edx(0x80000007);
-
 	switch (c->x86_vendor) {
 	case X86_VENDOR_AMD:
 		early_init_amd(c);

From 83a5101bf2fa7dcf09ffd436078a021d32c97f85 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:32:41 +0100
Subject: [PATCH 479/890] x86: page.h: unify constants

# HG changeset patch
# User Jeremy Fitzhardinge <jeremy@xensource.com>
# Date 1199317360 28800
# Node ID ba0ec40a50a7aef1a3153cea124c35e261f5a2df
# Parent  c45c263179cb78284b6b869c574457df088027d1
x86: page.h: unify constants

There are many constants which are shared by 32 and 64-bit.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/page.h    | 127 ++++++++++++++++++++++++++++++++++----
 include/asm-x86/page_32.h |  46 --------------
 include/asm-x86/page_64.h |  74 ++--------------------
 3 files changed, 120 insertions(+), 127 deletions(-)

diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index a757eb26141d..e0fa4a032ea7 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -1,13 +1,116 @@
-#ifdef __KERNEL__
-# ifdef CONFIG_X86_32
-#  include "page_32.h"
-# else
-#  include "page_64.h"
-# endif
-#else
-# ifdef __i386__
-#  include "page_32.h"
-# else
-#  include "page_64.h"
-# endif
+#ifndef _ASM_X86_PAGE_H
+#define _ASM_X86_PAGE_H
+
+#include <linux/const.h>
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT	12
+#define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
+#define PAGE_MASK	(~(PAGE_SIZE-1))
+
+#define PHYSICAL_PAGE_MASK	(PAGE_MASK & __PHYSICAL_MASK)
+
+#define LARGE_PAGE_SIZE		(_AC(1,UL) << PMD_SHIFT)
+#define LARGE_PAGE_MASK		(~(LARGE_PAGE_SIZE-1))
+
+#define HPAGE_SHIFT		PMD_SHIFT
+#define HPAGE_SIZE		(_AC(1,UL) << HPAGE_SHIFT)
+#define HPAGE_MASK		(~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
+
+#define __PHYSICAL_MASK		((_AC(1,UL) << __PHYSICAL_MASK_SHIFT) - 1)
+#define __VIRTUAL_MASK		((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1)
+
+
+#ifdef CONFIG_X86_64
+#define THREAD_ORDER	1
+#define THREAD_SIZE  (PAGE_SIZE << THREAD_ORDER)
+#define CURRENT_MASK (~(THREAD_SIZE-1))
+
+#define EXCEPTION_STACK_ORDER 0
+#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
+
+#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
+#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
+
+#define IRQSTACK_ORDER 2
+#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER)
+
+#define STACKFAULT_STACK 1
+#define DOUBLEFAULT_STACK 2
+#define NMI_STACK 3
+#define DEBUG_STACK 4
+#define MCE_STACK 5
+#define N_EXCEPTION_STACKS 5  /* hw limit: 7 */
+
+#define __PAGE_OFFSET           _AC(0xffff810000000000, UL)
+
+#define __PHYSICAL_START	CONFIG_PHYSICAL_START
+#define __KERNEL_ALIGN		0x200000
+
+/*
+ * Make sure kernel is aligned to 2MB address. Catching it at compile
+ * time is better. Change your config file and compile the kernel
+ * for a 2MB aligned address (CONFIG_PHYSICAL_START)
+ */
+#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0
+#error "CONFIG_PHYSICAL_START must be a multiple of 2MB"
 #endif
+
+#define __START_KERNEL		(__START_KERNEL_map + __PHYSICAL_START)
+#define __START_KERNEL_map	_AC(0xffffffff80000000, UL)
+
+/* See Documentation/x86_64/mm.txt for a description of the memory map. */
+#define __PHYSICAL_MASK_SHIFT	46
+#define __VIRTUAL_MASK_SHIFT	48
+
+#define KERNEL_TEXT_SIZE  (40*1024*1024)
+#define KERNEL_TEXT_START _AC(0xffffffff80000000, UL)
+
+#endif	/* CONFIG_X86_64 */
+
+#ifdef CONFIG_X86_32
+
+/*
+ * This handles the memory map.
+ *
+ * A __PAGE_OFFSET of 0xC0000000 means that the kernel has
+ * a virtual address space of one gigabyte, which limits the
+ * amount of physical memory you can use to about 950MB.
+ *
+ * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
+ * and CONFIG_HIGHMEM64G options in the kernel configuration.
+ */
+#define __PAGE_OFFSET		_AC(CONFIG_PAGE_OFFSET, UL)
+
+#ifdef CONFIG_X86_PAE
+#define __PHYSICAL_MASK_SHIFT	36
+#define __VIRTUAL_MASK_SHIFT	32
+#else  /* !CONFIG_X86_PAE */
+#define __PHYSICAL_MASK_SHIFT	32
+#define __VIRTUAL_MASK_SHIFT	32
+#endif	/* CONFIG_X86_PAE */
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
+
+#endif	/* CONFIG_X86_32 */
+
+#define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
+
+#define VM_DATA_DEFAULT_FLAGS \
+	(((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
+	 VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+
+#ifdef CONFIG_X86_32
+# include "page_32.h"
+#else
+# include "page_64.h"
+#endif
+
+#endif	/* _ASM_X86_PAGE_H */
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index c620c934e557..69e520059b26 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -1,14 +1,6 @@
 #ifndef _I386_PAGE_H
 #define _I386_PAGE_H
 
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT	12
-#define PAGE_SIZE	(1UL << PAGE_SHIFT)
-#define PAGE_MASK	(~(PAGE_SIZE-1))
-
-#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
-#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
-
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
@@ -111,7 +103,6 @@ static inline pte_t native_make_pte(unsigned long long val)
 #define __pmd(x)	native_make_pmd(x)
 #endif
 
-#define HPAGE_SHIFT	21
 #include <asm-generic/pgtable-nopud.h>
 #else  /* !CONFIG_X86_PAE */
 typedef struct { unsigned long pte_low; } pte_t;
@@ -139,19 +130,11 @@ static inline pte_t native_make_pte(unsigned long val)
 	return (pte_t) { .pte_low = val };
 }
 
-#define HPAGE_SHIFT	22
 #include <asm-generic/pgtable-nopmd.h>
 #endif	/* CONFIG_X86_PAE */
 
 #define PTE_MASK	PAGE_MASK
 
-#ifdef CONFIG_HUGETLB_PAGE
-#define HPAGE_SIZE	((1UL) << HPAGE_SHIFT)
-#define HPAGE_MASK	(~(HPAGE_SIZE - 1))
-#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
-#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-#endif
-
 #define pgprot_val(x)	((x).pgprot)
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
@@ -164,22 +147,6 @@ static inline pte_t native_make_pte(unsigned long val)
 
 #endif /* !__ASSEMBLY__ */
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
-/*
- * This handles the memory map.. We could make this a config
- * option, but too many people screw it up, and too few need
- * it.
- *
- * A __PAGE_OFFSET of 0xC0000000 means that the kernel has
- * a virtual address space of one gigabyte, which limits the
- * amount of physical memory you can use to about 950MB. 
- *
- * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
- * and CONFIG_HIGHMEM64G options in the kernel configuration.
- */
-
 #ifndef __ASSEMBLY__
 
 struct vm_area_struct;
@@ -196,14 +163,6 @@ extern int page_is_ram(unsigned long pagenr);
 
 #endif /* __ASSEMBLY__ */
 
-#ifdef __ASSEMBLY__
-#define __PAGE_OFFSET		CONFIG_PAGE_OFFSET
-#else
-#define __PAGE_OFFSET		((unsigned long)CONFIG_PAGE_OFFSET)
-#endif
-
-
-#define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
 #define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
 #define MAXMEM			(-__PAGE_OFFSET-__VMALLOC_RESERVE)
 #define __pa(x)			((unsigned long)(x)-PAGE_OFFSET)
@@ -219,11 +178,6 @@ extern int page_is_ram(unsigned long pagenr);
 
 #define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
 
-#define VM_DATA_DEFAULT_FLAGS \
-	(VM_READ | VM_WRITE | \
-	((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
-		 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
 #include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index 068b5e4cea4c..69ef7cf9dadd 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -1,42 +1,6 @@
 #ifndef _X86_64_PAGE_H
 #define _X86_64_PAGE_H
 
-#include <linux/const.h>
-
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT	12
-#define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK	(~(PAGE_SIZE-1))
-#define PHYSICAL_PAGE_MASK	(~(PAGE_SIZE-1) & __PHYSICAL_MASK)
-
-#define THREAD_ORDER 1 
-#define THREAD_SIZE  (PAGE_SIZE << THREAD_ORDER)
-#define CURRENT_MASK (~(THREAD_SIZE-1))
-
-#define EXCEPTION_STACK_ORDER 0
-#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
-
-#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
-#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
-
-#define IRQSTACK_ORDER 2
-#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER)
-
-#define STACKFAULT_STACK 1
-#define DOUBLEFAULT_STACK 2
-#define NMI_STACK 3
-#define DEBUG_STACK 4
-#define MCE_STACK 5
-#define N_EXCEPTION_STACKS 5  /* hw limit: 7 */
-
-#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
-#define LARGE_PAGE_SIZE (_AC(1,UL) << PMD_SHIFT)
-
-#define HPAGE_SHIFT PMD_SHIFT
-#define HPAGE_SIZE	(_AC(1,UL) << HPAGE_SHIFT)
-#define HPAGE_MASK	(~(HPAGE_SIZE - 1))
-#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
-
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
@@ -90,35 +54,6 @@ extern unsigned long phys_base;
 
 #endif /* !__ASSEMBLY__ */
 
-#define __PHYSICAL_START	CONFIG_PHYSICAL_START
-#define __KERNEL_ALIGN		0x200000
-
-/*
- * Make sure kernel is aligned to 2MB address. Catching it at compile
- * time is better. Change your config file and compile the kernel
- * for a 2MB aligned address (CONFIG_PHYSICAL_START)
- */
-#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0
-#error "CONFIG_PHYSICAL_START must be a multiple of 2MB"
-#endif
-
-#define __START_KERNEL		(__START_KERNEL_map + __PHYSICAL_START)
-#define __START_KERNEL_map	_AC(0xffffffff80000000, UL)
-#define __PAGE_OFFSET           _AC(0xffff810000000000, UL)
-
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
-/* See Documentation/x86_64/mm.txt for a description of the memory map. */
-#define __PHYSICAL_MASK_SHIFT	46
-#define __PHYSICAL_MASK		((_AC(1,UL) << __PHYSICAL_MASK_SHIFT) - 1)
-#define __VIRTUAL_MASK_SHIFT	48
-#define __VIRTUAL_MASK		((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1)
-
-#define KERNEL_TEXT_SIZE  (40*1024*1024)
-#define KERNEL_TEXT_START _AC(0xffffffff80000000, UL)
-#define PAGE_OFFSET		__PAGE_OFFSET
-
 #ifndef __ASSEMBLY__
 
 #include <asm/bug.h>
@@ -138,10 +73,6 @@ extern unsigned long __phys_addr(unsigned long);
 #define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
 #define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
 
-#define VM_DATA_DEFAULT_FLAGS \
-	(((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
-	 VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
 #define __HAVE_ARCH_GATE_AREA 1	
 #define vmemmap ((struct page *)VMEMMAP_START)
 
@@ -150,4 +81,9 @@ extern unsigned long __phys_addr(unsigned long);
 
 #endif /* __KERNEL__ */
 
+#ifdef CONFIG_FLATMEM
+#define pfn_valid(pfn)          ((pfn) < end_pfn)
+#endif
+
+
 #endif /* _X86_64_PAGE_H */

From 345b904c3f7c24fbfadfee7cddd5896d13b176d9 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:32:42 +0100
Subject: [PATCH 480/890] x86: page.h: unify page copying and clearing

# HG changeset patch
# User Jeremy Fitzhardinge <jeremy@xensource.com>
# Date 1199317362 28800
# Node ID 4d9a413a0f4c1d98dbea704f0366457b5117045d
# Parent  ba0ec40a50a7aef1a3153cea124c35e261f5a2df
x86: page.h: unify page copying and clearing

Move, and to some extent unify, the various page copying and clearing
functions.  The only unification here is that both architectures use
the same function for copying/clearing user and kernel pages.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/page.h    | 55 +++++++++++++++++++++++++++++++++++++++
 include/asm-x86/page_32.h | 53 -------------------------------------
 include/asm-x86/page_64.h | 20 --------------
 3 files changed, 55 insertions(+), 73 deletions(-)

diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index e0fa4a032ea7..2ebb6977e00c 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -70,6 +70,11 @@
 #define KERNEL_TEXT_SIZE  (40*1024*1024)
 #define KERNEL_TEXT_START _AC(0xffffffff80000000, UL)
 
+#ifndef __ASSEMBLY__
+void clear_page(void *page);
+void copy_page(void *to, void *from);
+#endif	/* !__ASSEMBLY__ */
+
 #endif	/* CONFIG_X86_64 */
 
 #ifdef CONFIG_X86_32
@@ -98,6 +103,34 @@
 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
 #endif
 
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_X86_USE_3DNOW
+#include <asm/mmx.h>
+
+static inline void clear_page(void *page)
+{
+	mmx_clear_page(page);
+}
+
+static inline void copy_page(void *to, void *from)
+{
+	mmx_copy_page(to, from);
+}
+#else  /* !CONFIG_X86_USE_3DNOW */
+#include <linux/string.h>
+
+static inline void clear_page(void *page)
+{
+	memset(page, 0, PAGE_SIZE);
+}
+
+static inline void copy_page(void *to, void *from)
+{
+	memcpy(to, from, PAGE_SIZE);
+}
+#endif	/* CONFIG_X86_3DNOW */
+#endif	/* !__ASSEMBLY__ */
+
 #endif	/* CONFIG_X86_32 */
 
 #define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
@@ -107,6 +140,28 @@
 	 VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 
+#ifndef __ASSEMBLY__
+struct page;
+
+static void inline clear_user_page(void *page, unsigned long vaddr,
+				struct page *pg)
+{
+	clear_page(page);
+}
+
+static void inline copy_user_page(void *to, void *from, unsigned long vaddr,
+				struct page *topage)
+{
+	copy_page(to, from);
+}
+
+#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
+	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+
+#endif	/* __ASSEMBLY__ */
+
+
 #ifdef CONFIG_X86_32
 # include "page_32.h"
 #else
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index 69e520059b26..b4750ef44920 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -4,59 +4,6 @@
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
-#include <linux/string.h>
-
-#ifdef CONFIG_X86_USE_3DNOW
-
-#include <asm/mmx.h>
-
-static inline void clear_page(void *page)
-{
-	mmx_clear_page(page);
-}
-
-static inline void copy_page(void *to, void *from)
-{
-	mmx_copy_page(to, from);
-}
-
-#else
-
-/*
- *	On older X86 processors it's not a win to use MMX here it seems.
- *	Maybe the K6-III ?
- */
- 
-static inline void clear_page(void *page)
-{
-	memset(page, 0, PAGE_SIZE);
-}
-
-static inline void copy_page(void *to, void *from)
-{
-	memcpy(to, from, PAGE_SIZE);
-}
-
-#endif
-
-struct page;
-
-static void inline clear_user_page(void *page, unsigned long vaddr,
-				struct page *pg)
-{
-	clear_page(page);
-}
-
-static void inline copy_user_page(void *to, void *from, unsigned long vaddr,
-				struct page *topage)
-{
-	copy_page(to, from);
-}
-
-#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
-	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
-
 /*
  * These are used to make use of C type-checking..
  */
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index 69ef7cf9dadd..f5e0543b8afa 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -7,26 +7,6 @@
 extern unsigned long end_pfn;
 extern unsigned long end_pfn_map;
 
-void clear_page(void *page);
-void copy_page(void *to, void *from);
-
-struct page;
-
-static void inline clear_user_page(void *page, unsigned long vaddr,
-				struct page *pg)
-{
-	clear_page(page);
-}
-
-static void inline copy_user_page(void *to, void *from, unsigned long vaddr,
-				struct page *topage)
-{
-	copy_page(to, from);
-}
-
-#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
-	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 /*
  * These are used to make use of C type-checking..
  */

From 74ef649fe847fdfbd3e1732d21b923f59ca04e8c Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:32:42 +0100
Subject: [PATCH 481/890] x86: add _AT() macro to conditionally cast

# HG changeset patch
# User Jeremy Fitzhardinge <jeremy@xensource.com>
# Date 1199317452 28800
# Node ID f7e7db3facd9406545103164f9be8f9ba1a2b549
# Parent  4d9a413a0f4c1d98dbea704f0366457b5117045d
x86: add _AT() macro to conditionally cast

Define _AT(type, value) to conditionally cast a value when compiling C
code, but not when used in assembler.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/const.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/linux/const.h b/include/linux/const.h
index 07b300bfe34b..c22c707c455d 100644
--- a/include/linux/const.h
+++ b/include/linux/const.h
@@ -7,13 +7,18 @@
  * C code.  Therefore we cannot annotate them always with
  * 'UL' and other type specifiers unilaterally.  We
  * use the following macros to deal with this.
+ *
+ * Similarly, _AT() will cast an expression with a type in C, but
+ * leave it unchanged in asm.
  */
 
 #ifdef __ASSEMBLY__
 #define _AC(X,Y)	X
+#define _AT(T,X)	X
 #else
 #define __AC(X,Y)	(X##Y)
 #define _AC(X,Y)	__AC(X,Y)
+#define _AT(T,X)	((T)(X))
 #endif
 
 #endif /* !(_LINUX_CONST_H) */

From 3da1bcc2659de27094592e5a037d0b1d59351e03 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:42 +0100
Subject: [PATCH 482/890] x86: page.h: move and unify types for pagetable
 entry, #1

based on:

 Subject: x86: page.h: move and unify types for pagetable entry
 From: Jeremy Fitzhardinge <jeremy@goop.org>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/page.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index 2ebb6977e00c..911d7e0b3cd9 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -9,6 +9,7 @@
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 
 #define PHYSICAL_PAGE_MASK	(PAGE_MASK & __PHYSICAL_MASK)
+#define PTE_MASK		PHYSICAL_PAGE_MASK
 
 #define LARGE_PAGE_SIZE		(_AC(1,UL) << PMD_SHIFT)
 #define LARGE_PAGE_MASK		(~(LARGE_PAGE_SIZE-1))
@@ -24,8 +25,13 @@
 #define __PHYSICAL_MASK		((_AC(1,UL) << __PHYSICAL_MASK_SHIFT) - 1)
 #define __VIRTUAL_MASK		((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1)
 
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#endif
 
 #ifdef CONFIG_X86_64
+#define PAGETABLE_LEVELS	4
+
 #define THREAD_ORDER	1
 #define THREAD_SIZE  (PAGE_SIZE << THREAD_ORDER)
 #define CURRENT_MASK (~(THREAD_SIZE-1))

From 7a2389b457a03f226b441b5c53f9808d4eefdcea Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:42 +0100
Subject: [PATCH 483/890] x86: page.h: move and unify types for pagetable
 entry, #2

based on:

 Subject: x86: page.h: move and unify types for pagetable entry
 From: Jeremy Fitzhardinge <jeremy@goop.org>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable_64.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 5c252d9fee1b..3f280105d7b3 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -79,12 +79,12 @@ static inline void set_pte(pte_t *dst, pte_t val)
 
 static inline void set_pmd(pmd_t *dst, pmd_t val)
 {
-        pmd_val(*dst) = pmd_val(val); 
+	*dst = val;
 } 
 
 static inline void set_pud(pud_t *dst, pud_t val)
 {
-	pud_val(*dst) = pud_val(val);
+	*dst = val;
 }
 
 static inline void pud_clear (pud_t *pud)
@@ -94,7 +94,7 @@ static inline void pud_clear (pud_t *pud)
 
 static inline void set_pgd(pgd_t *dst, pgd_t val)
 {
-	pgd_val(*dst) = pgd_val(val); 
+	*dst = val;
 } 
 
 static inline void pgd_clear (pgd_t * pgd)

From ba2b6c525eec459e1c6397f0f3eaeeafad153eab Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:42 +0100
Subject: [PATCH 484/890] x86: page.h: move and unify types for pagetable
 entry, #3

based on:

 Subject: x86: page.h: move and unify types for pagetable entry
 From: Jeremy Fitzhardinge <jeremy@goop.org>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/page.h    | 13 +++++++++++++
 include/asm-x86/page_64.h |  1 -
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index 911d7e0b3cd9..a522fbada97e 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -79,6 +79,19 @@
 #ifndef __ASSEMBLY__
 void clear_page(void *page);
 void copy_page(void *to, void *from);
+
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef unsigned long	pteval_t;
+typedef unsigned long	pmdval_t;
+typedef unsigned long	pudval_t;
+typedef unsigned long	pgdval_t;
+typedef unsigned long	pgprotval_t;
+typedef unsigned long	phys_addr_t;
+
+typedef struct { pteval_t pte; } pte_t;
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif	/* CONFIG_X86_64 */
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index f5e0543b8afa..f5e82c695d26 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -10,7 +10,6 @@ extern unsigned long end_pfn_map;
 /*
  * These are used to make use of C type-checking..
  */
-typedef struct { unsigned long pte; } pte_t;
 typedef struct { unsigned long pmd; } pmd_t;
 typedef struct { unsigned long pud; } pud_t;
 typedef struct { unsigned long pgd; } pgd_t;

From d6e3cf63e927310f65e21132f1d1c50e3aa4c2b0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:43 +0100
Subject: [PATCH 485/890] x86: page.h: move and unify types for pagetable
 entry, #4

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/page.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index a522fbada97e..530439e0d340 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -92,6 +92,9 @@ typedef unsigned long	phys_addr_t;
 
 typedef struct { pteval_t pte; } pte_t;
 
+#define native_pte_val(x)	((x).pte)
+#define native_make_pte(x) ((pte_t) { (x) } )
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif	/* CONFIG_X86_64 */

From 38f0f12793a490ac633dbba2418172b7abfa077e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:43 +0100
Subject: [PATCH 486/890] x86: page.h: move and unify types for pagetable
 entry, #5

based on:

 Subject: x86: page.h: move and unify types for pagetable entry
 From: Jeremy Fitzhardinge <jeremy@goop.org>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/page.h    | 72 +++++++++++++++++++++++++++++++++++++++
 include/asm-x86/page_64.h | 21 ------------
 2 files changed, 72 insertions(+), 21 deletions(-)

diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index 530439e0d340..014f087d7c27 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -181,6 +181,78 @@ static void inline copy_user_page(void *to, void *from, unsigned long vaddr,
 	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
+#ifdef CONFIG_X86_64
+typedef struct { pgdval_t pgd; } pgd_t;
+typedef struct { pgprotval_t pgprot; } pgprot_t;
+
+static inline pgd_t native_make_pgd(pgdval_t val)
+{
+	return (pgd_t) { val };
+}
+
+static inline pgdval_t native_pgd_val(pgd_t pgd)
+{
+	return pgd.pgd;
+}
+
+#if PAGETABLE_LEVELS >= 3
+#if PAGETABLE_LEVELS == 4
+typedef struct { pudval_t pud; } pud_t;
+
+static inline pud_t native_make_pud(pmdval_t val)
+{
+	return (pud_t) { val };
+}
+
+static inline pudval_t native_pud_val(pud_t pud)
+{
+	return pud.pud;
+}
+#else	/* PAGETABLE_LEVELS == 3 */
+#include <asm-generic/pgtable-nopud.h>
+#endif	/* PAGETABLE_LEVELS == 4 */
+
+typedef struct { pmdval_t pmd; } pmd_t;
+
+static inline pmd_t native_make_pmd(pmdval_t val)
+{
+	return (pmd_t) { val };
+}
+
+static inline pmdval_t native_pmd_val(pmd_t pmd)
+{
+	return pmd.pmd;
+}
+#else  /* PAGETABLE_LEVELS == 2 */
+#include <asm-generic/pgtable-nopmd.h>
+#endif	/* PAGETABLE_LEVELS >= 3 */
+
+#define pgprot_val(x)	((x).pgprot)
+#define __pgprot(x)	((pgprot_t) { (x) } )
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else  /* !CONFIG_PARAVIRT */
+
+#define pgd_val(x)	native_pgd_val(x)
+#define __pgd(x)	native_make_pgd(x)
+
+#ifndef __PAGETABLE_PUD_FOLDED
+#define pud_val(x)	native_pud_val(x)
+#define __pud(x)	native_make_pud(x)
+#endif
+
+#ifndef __PAGETABLE_PMD_FOLDED
+#define pmd_val(x)	native_pmd_val(x)
+#define __pmd(x)	native_make_pmd(x)
+#endif
+
+#define pte_val(x)	native_pte_val(x)
+#define __pte(x)	native_make_pte(x)
+
+#endif	/* CONFIG_PARAVIRT */
+
+#endif /* CONFIG_X86_64 */
 #endif	/* __ASSEMBLY__ */
 
 
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index f5e82c695d26..f8a07bf54c3c 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -7,30 +7,9 @@
 extern unsigned long end_pfn;
 extern unsigned long end_pfn_map;
 
-/*
- * These are used to make use of C type-checking..
- */
-typedef struct { unsigned long pmd; } pmd_t;
-typedef struct { unsigned long pud; } pud_t;
-typedef struct { unsigned long pgd; } pgd_t;
-#define PTE_MASK	PHYSICAL_PAGE_MASK
-
-typedef struct { unsigned long pgprot; } pgprot_t;
 
 extern unsigned long phys_base;
 
-#define pte_val(x)	((x).pte)
-#define pmd_val(x)	((x).pmd)
-#define pud_val(x)	((x).pud)
-#define pgd_val(x)	((x).pgd)
-#define pgprot_val(x)	((x).pgprot)
-
-#define __pte(x) ((pte_t) { (x) } )
-#define __pmd(x) ((pmd_t) { (x) } )
-#define __pud(x) ((pud_t) { (x) } )
-#define __pgd(x) ((pgd_t) { (x) } )
-#define __pgprot(x)	((pgprot_t) { (x) } )
-
 #endif /* !__ASSEMBLY__ */
 
 #ifndef __ASSEMBLY__

From 881d90d0daaeac018b0d5beb739dd825ccee0143 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:32:43 +0100
Subject: [PATCH 487/890] x86: page.h: move and unify types for pagetable entry

# HG changeset patch
# User Jeremy Fitzhardinge <jeremy@xensource.com>
# Date 1199319654 28800
# Node ID 3bd7db6e85e66e7f3362874802df26a82fcb2d92
# Parent  f7e7db3facd9406545103164f9be8f9ba1a2b549
x86: page.h: move and unify types for pagetable entry definitions

This patch:

1. Defines arch-specific types for the contents of a pagetable entry.
That is, 32-bit entries for 32-bit non-PAE, and 64-bit entries for
32-bit PAE and 64-bit.  However, even though the latter two are the
same size, they're defined with different types in order to retain
compatibility with printk format strings, etc.

2. Defines arch-specific pte_t.  This is different because 32-bit PAE
defines it in two halves, whereas 32-bit PAE and 64-bit define it as a
single entry.  All the other pagetable levels can be defined in a
common way.  This also defines arch-specific pte_val/make_pte functions.

3. Define PAGETABLE_LEVELS for each architecture variation, for later use.

4. Define common pagetable entry accessors in a paravirt-compatible
way. (64-bit does not yet use paravirt-ops in any way).

5. Convert a few instances of using a *_val() as an lvalue where it is
no longer a macro.  There are still places in the 64-bit code which
use pte_val() as an lvalue.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/page.h    | 50 ++++++++++++++++++++++-
 include/asm-x86/page_32.h | 83 ---------------------------------------
 2 files changed, 48 insertions(+), 85 deletions(-)

diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index 014f087d7c27..00353eb5666d 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -116,9 +116,57 @@ typedef struct { pteval_t pte; } pte_t;
 #ifdef CONFIG_X86_PAE
 #define __PHYSICAL_MASK_SHIFT	36
 #define __VIRTUAL_MASK_SHIFT	32
+#define PAGETABLE_LEVELS	3
+
+#ifndef __ASSEMBLY__
+typedef u64	pteval_t;
+typedef u64	pmdval_t;
+typedef u64	pudval_t;
+typedef u64	pgdval_t;
+typedef u64	pgprotval_t;
+typedef u64	phys_addr_t;
+
+typedef struct { unsigned long pte_low, pte_high; } pte_t;
+
+static inline unsigned long long native_pte_val(pte_t pte)
+{
+	return pte.pte_low | ((unsigned long long)pte.pte_high << 32);
+}
+
+static inline pte_t native_make_pte(unsigned long long val)
+{
+	return (pte_t) { .pte_low = val, .pte_high = (val >> 32) } ;
+}
+
+#endif	/* __ASSEMBLY__
+ */
 #else  /* !CONFIG_X86_PAE */
 #define __PHYSICAL_MASK_SHIFT	32
 #define __VIRTUAL_MASK_SHIFT	32
+#define PAGETABLE_LEVELS	2
+
+#ifndef __ASSEMBLY__
+typedef unsigned long	pteval_t;
+typedef unsigned long	pmdval_t;
+typedef unsigned long	pudval_t;
+typedef unsigned long	pgdval_t;
+typedef unsigned long	pgprotval_t;
+typedef unsigned long	phys_addr_t;
+
+typedef struct { pteval_t pte_low; } pte_t;
+typedef pte_t boot_pte_t;
+
+static inline unsigned long native_pte_val(pte_t pte)
+{
+	return pte.pte_low;
+}
+
+static inline pte_t native_make_pte(unsigned long val)
+{
+	return (pte_t) { .pte_low = val };
+}
+
+#endif	/* __ASSEMBLY__ */
 #endif	/* CONFIG_X86_PAE */
 
 #ifdef CONFIG_HUGETLB_PAGE
@@ -181,7 +229,6 @@ static void inline copy_user_page(void *to, void *from, unsigned long vaddr,
 	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
-#ifdef CONFIG_X86_64
 typedef struct { pgdval_t pgd; } pgd_t;
 typedef struct { pgprotval_t pgprot; } pgprot_t;
 
@@ -252,7 +299,6 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
 
 #endif	/* CONFIG_PARAVIRT */
 
-#endif /* CONFIG_X86_64 */
 #endif	/* __ASSEMBLY__ */
 
 
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index b4750ef44920..2d60661ab340 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -9,89 +9,6 @@
  */
 extern int nx_enabled;
 
-#ifdef CONFIG_X86_PAE
-typedef struct { unsigned long pte_low, pte_high; } pte_t;
-typedef struct { unsigned long long pmd; } pmd_t;
-typedef struct { unsigned long long pgd; } pgd_t;
-typedef struct { unsigned long long pgprot; } pgprot_t;
-
-static inline unsigned long long native_pgd_val(pgd_t pgd)
-{
-	return pgd.pgd;
-}
-
-static inline unsigned long long native_pmd_val(pmd_t pmd)
-{
-	return pmd.pmd;
-}
-
-static inline unsigned long long native_pte_val(pte_t pte)
-{
-	return pte.pte_low | ((unsigned long long)pte.pte_high << 32);
-}
-
-static inline pgd_t native_make_pgd(unsigned long long val)
-{
-	return (pgd_t) { val };
-}
-
-static inline pmd_t native_make_pmd(unsigned long long val)
-{
-	return (pmd_t) { val };
-}
-
-static inline pte_t native_make_pte(unsigned long long val)
-{
-	return (pte_t) { .pte_low = val, .pte_high = (val >> 32) } ;
-}
-
-#ifndef CONFIG_PARAVIRT
-#define pmd_val(x)	native_pmd_val(x)
-#define __pmd(x)	native_make_pmd(x)
-#endif
-
-#include <asm-generic/pgtable-nopud.h>
-#else  /* !CONFIG_X86_PAE */
-typedef struct { unsigned long pte_low; } pte_t;
-typedef struct { unsigned long pgd; } pgd_t;
-typedef struct { unsigned long pgprot; } pgprot_t;
-#define boot_pte_t pte_t /* or would you rather have a typedef */
-
-static inline unsigned long native_pgd_val(pgd_t pgd)
-{
-	return pgd.pgd;
-}
-
-static inline unsigned long native_pte_val(pte_t pte)
-{
-	return pte.pte_low;
-}
-
-static inline pgd_t native_make_pgd(unsigned long val)
-{
-	return (pgd_t) { val };
-}
-
-static inline pte_t native_make_pte(unsigned long val)
-{
-	return (pte_t) { .pte_low = val };
-}
-
-#include <asm-generic/pgtable-nopmd.h>
-#endif	/* CONFIG_X86_PAE */
-
-#define PTE_MASK	PAGE_MASK
-
-#define pgprot_val(x)	((x).pgprot)
-#define __pgprot(x)	((pgprot_t) { (x) } )
-
-#ifndef CONFIG_PARAVIRT
-#define pgd_val(x)	native_pgd_val(x)
-#define __pgd(x)	native_make_pgd(x)
-#define pte_val(x)	native_pte_val(x)
-#define __pte(x)	native_make_pte(x)
-#endif
-
 #endif /* !__ASSEMBLY__ */
 
 #ifndef __ASSEMBLY__

From 6724a1d2fc6b40d55f232b8dacc12a915878bdc7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:43 +0100
Subject: [PATCH 488/890] x86: page.h: move and unify types for pagetable
 entry, #6

based on:

 Subject: x86: page.h: move and unify types for pagetable entry
 From: Jeremy Fitzhardinge <jeremy@goop.org>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/page.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index 00353eb5666d..ef393c3df902 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -22,7 +22,7 @@
 /* to align the pointer to the (next) page boundary */
 #define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
 
-#define __PHYSICAL_MASK		((_AC(1,UL) << __PHYSICAL_MASK_SHIFT) - 1)
+#define __PHYSICAL_MASK		_AT(phys_addr_t, (_AC(1,ULL) << __PHYSICAL_MASK_SHIFT) - 1)
 #define __VIRTUAL_MASK		((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1)
 
 #ifndef __ASSEMBLY__

From 98fd5aee348f0420afd1c636790d50aaaec6ceec Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:32:43 +0100
Subject: [PATCH 489/890] x86: page.h: move pa and va related things

# HG changeset patch
# User Jeremy Fitzhardinge <jeremy@xensource.com>
# Date 1199319656 28800
# Node ID d617b72a0cc9d14bde2087d065c36d4ed3265761
# Parent  3bd7db6e85e66e7f3362874802df26a82fcb2d92
x86: page.h: move pa and va related things

Move and unify the virtual<->physical address space conversion
functions.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/page.h    | 24 ++++++++++++++++++++++++
 include/asm-x86/page_32.h | 11 -----------
 include/asm-x86/page_64.h | 13 -------------
 3 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index ef393c3df902..9830eeb57173 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -80,6 +80,9 @@
 void clear_page(void *page);
 void copy_page(void *to, void *from);
 
+extern unsigned long __phys_addr(unsigned long);
+#define __phys_reloc_hide(x)	(x)
+
 /*
  * These are used to make use of C type-checking..
  */
@@ -174,6 +177,13 @@ static inline pte_t native_make_pte(unsigned long val)
 #endif
 
 #ifndef __ASSEMBLY__
+#define __phys_addr(x)		((x)-PAGE_OFFSET)
+#define __phys_reloc_hide(x)	RELOC_HIDE((x), 0)
+
+#ifdef CONFIG_FLATMEM
+#define pfn_valid(pfn)		((pfn) < max_mapnr)
+#endif /* CONFIG_FLATMEM */
+
 #ifdef CONFIG_X86_USE_3DNOW
 #include <asm/mmx.h>
 
@@ -299,6 +309,20 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
 
 #endif	/* CONFIG_PARAVIRT */
 
+#define __pa(x)		__phys_addr((unsigned long)(x))
+/* __pa_symbol should be used for C visible symbols.
+   This seems to be the official gcc blessed way to do such arithmetic. */
+#define __pa_symbol(x)	__pa(__phys_reloc_hide((unsigned long)(x)))
+
+#define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
+
+#define __boot_va(x)		__va(x)
+#define __boot_pa(x)		__pa(x)
+
+#define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
+#define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+
 #endif	/* __ASSEMBLY__ */
 
 
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index 2d60661ab340..a27424a2ee0d 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -29,18 +29,7 @@ extern int page_is_ram(unsigned long pagenr);
 
 #define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
 #define MAXMEM			(-__PAGE_OFFSET-__VMALLOC_RESERVE)
-#define __pa(x)			((unsigned long)(x)-PAGE_OFFSET)
-/* __pa_symbol should be used for C visible symbols.
-   This seems to be the official gcc blessed way to do such arithmetic. */
-#define __pa_symbol(x)          __pa(RELOC_HIDE((unsigned long)(x),0))
-#define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
-#define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
-#ifdef CONFIG_FLATMEM
-#define pfn_valid(pfn)		((pfn) < max_mapnr)
-#endif /* CONFIG_FLATMEM */
-#define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
 
-#define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
 
 #include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index f8a07bf54c3c..7094684270a6 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -16,21 +16,8 @@ extern unsigned long phys_base;
 
 #include <asm/bug.h>
 
-extern unsigned long __phys_addr(unsigned long);
-
 #endif /* __ASSEMBLY__ */
 
-#define __pa(x)		__phys_addr((unsigned long)(x))
-#define __pa_symbol(x)	__phys_addr((unsigned long)(x))
-
-#define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
-#define __boot_va(x)		__va(x)
-#define __boot_pa(x)		__pa(x)
-
-#define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-#define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
-#define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
-
 #define __HAVE_ARCH_GATE_AREA 1	
 #define vmemmap ((struct page *)VMEMMAP_START)
 

From e62f4473f32a882a537b32cb7202da8d5d7c4f1f Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:32:44 +0100
Subject: [PATCH 490/890] x86: page.h: move remaining bits and pieces

# HG changeset patch
# User Jeremy Fitzhardinge <jeremy@xensource.com>
# Date 1199319657 28800
# Node ID bba9287641ff90e836d090d80b5c0a846aab7162
# Parent  d617b72a0cc9d14bde2087d065c36d4ed3265761
x86: page.h: move remaining bits and pieces

Move the remaining odds and ends into page.h.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/page.h    | 23 +++++++++++++++++++++++
 include/asm-x86/page_32.h | 17 -----------------
 include/asm-x86/page_64.h | 13 -------------
 3 files changed, 23 insertions(+), 30 deletions(-)

diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index 9830eeb57173..f65a2ae6e323 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -80,6 +80,10 @@
 void clear_page(void *page);
 void copy_page(void *to, void *from);
 
+extern unsigned long end_pfn;
+extern unsigned long end_pfn_map;
+extern unsigned long phys_base;
+
 extern unsigned long __phys_addr(unsigned long);
 #define __phys_reloc_hide(x)	(x)
 
@@ -98,6 +102,8 @@ typedef struct { pteval_t pte; } pte_t;
 #define native_pte_val(x)	((x).pte)
 #define native_make_pte(x) ((pte_t) { (x) } )
 
+#define vmemmap ((struct page *)VMEMMAP_START)
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif	/* CONFIG_X86_64 */
@@ -184,6 +190,19 @@ static inline pte_t native_make_pte(unsigned long val)
 #define pfn_valid(pfn)		((pfn) < max_mapnr)
 #endif /* CONFIG_FLATMEM */
 
+extern int nx_enabled;
+
+/*
+ * This much address space is reserved for vmalloc() and iomap()
+ * as well as fixmap mappings.
+ */
+extern unsigned int __VMALLOC_RESERVE;
+extern int sysctl_legacy_va_layout;
+extern int page_is_ram(unsigned long pagenr);
+
+#define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
+#define MAXMEM			(-__PAGE_OFFSET-__VMALLOC_RESERVE)
+
 #ifdef CONFIG_X86_USE_3DNOW
 #include <asm/mmx.h>
 
@@ -325,6 +344,10 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
 
 #endif	/* __ASSEMBLY__ */
 
+#include <asm-generic/memory_model.h>
+#include <asm-generic/page.h>
+
+#define __HAVE_ARCH_GATE_AREA 1
 
 #ifdef CONFIG_X86_32
 # include "page_32.h"
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index a27424a2ee0d..c27b41fd0254 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -7,7 +7,6 @@
 /*
  * These are used to make use of C type-checking..
  */
-extern int nx_enabled;
 
 #endif /* !__ASSEMBLY__ */
 
@@ -15,26 +14,10 @@ extern int nx_enabled;
 
 struct vm_area_struct;
 
-/*
- * This much address space is reserved for vmalloc() and iomap()
- * as well as fixmap mappings.
- */
-extern unsigned int __VMALLOC_RESERVE;
-
-extern int sysctl_legacy_va_layout;
-
-extern int page_is_ram(unsigned long pagenr);
-
 #endif /* __ASSEMBLY__ */
 
-#define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
-#define MAXMEM			(-__PAGE_OFFSET-__VMALLOC_RESERVE)
 
 
-#include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
-
-#define __HAVE_ARCH_GATE_AREA 1
 #endif /* __KERNEL__ */
 
 #endif /* _I386_PAGE_H */
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index 7094684270a6..94d4803aab2d 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -4,26 +4,13 @@
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
-extern unsigned long end_pfn;
-extern unsigned long end_pfn_map;
-
-
-extern unsigned long phys_base;
-
 #endif /* !__ASSEMBLY__ */
 
 #ifndef __ASSEMBLY__
 
-#include <asm/bug.h>
 
 #endif /* __ASSEMBLY__ */
 
-#define __HAVE_ARCH_GATE_AREA 1	
-#define vmemmap ((struct page *)VMEMMAP_START)
-
-#include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
-
 #endif /* __KERNEL__ */
 
 #ifdef CONFIG_FLATMEM

From 11b7c7dc945b8802c459baa3304c1b501d2bf872 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:32:44 +0100
Subject: [PATCH 491/890] x86: page.h: move things back to their own files

# HG changeset patch
# User Jeremy Fitzhardinge <jeremy@xensource.com>
# Date 1199321648 28800
# Node ID 22f6a5902285b58bfc1fbbd9e183498c9017bd78
# Parent  bba9287641ff90e836d090d80b5c0a846aab7162
x86: page.h: move things back to their own files

Oops, asm/page.h has turned into an #ifdef hellhole.  Move
32/64-specific things back to their own headers to make it somewhat
comprehensible...

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/Kbuild    |   3 +-
 include/asm-x86/page.h    | 212 ++------------------------------------
 include/asm-x86/page_32.h | 123 ++++++++++++++++++++--
 include/asm-x86/page_64.h |  74 ++++++++++++-
 4 files changed, 188 insertions(+), 224 deletions(-)

diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild
index 22e3a284c1ed..e6189b229143 100644
--- a/include/asm-x86/Kbuild
+++ b/include/asm-x86/Kbuild
@@ -15,8 +15,7 @@ unifdef-y += ist.h
 unifdef-y += mce.h
 unifdef-y += msr.h
 unifdef-y += mtrr.h
-unifdef-y += page_32.h
-unifdef-y += page_64.h
+unifdef-y += page.h
 unifdef-y += posix_types_32.h
 unifdef-y += posix_types_64.h
 unifdef-y += ptrace.h
diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index f65a2ae6e323..3a00d0d9dfd3 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -8,6 +8,8 @@
 #define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 
+#ifdef __KERNEL__
+
 #define PHYSICAL_PAGE_MASK	(PAGE_MASK & __PHYSICAL_MASK)
 #define PTE_MASK		PHYSICAL_PAGE_MASK
 
@@ -30,208 +32,11 @@
 #endif
 
 #ifdef CONFIG_X86_64
-#define PAGETABLE_LEVELS	4
-
-#define THREAD_ORDER	1
-#define THREAD_SIZE  (PAGE_SIZE << THREAD_ORDER)
-#define CURRENT_MASK (~(THREAD_SIZE-1))
-
-#define EXCEPTION_STACK_ORDER 0
-#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
-
-#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
-#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
-
-#define IRQSTACK_ORDER 2
-#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER)
-
-#define STACKFAULT_STACK 1
-#define DOUBLEFAULT_STACK 2
-#define NMI_STACK 3
-#define DEBUG_STACK 4
-#define MCE_STACK 5
-#define N_EXCEPTION_STACKS 5  /* hw limit: 7 */
-
-#define __PAGE_OFFSET           _AC(0xffff810000000000, UL)
-
-#define __PHYSICAL_START	CONFIG_PHYSICAL_START
-#define __KERNEL_ALIGN		0x200000
-
-/*
- * Make sure kernel is aligned to 2MB address. Catching it at compile
- * time is better. Change your config file and compile the kernel
- * for a 2MB aligned address (CONFIG_PHYSICAL_START)
- */
-#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0
-#error "CONFIG_PHYSICAL_START must be a multiple of 2MB"
-#endif
-
-#define __START_KERNEL		(__START_KERNEL_map + __PHYSICAL_START)
-#define __START_KERNEL_map	_AC(0xffffffff80000000, UL)
-
-/* See Documentation/x86_64/mm.txt for a description of the memory map. */
-#define __PHYSICAL_MASK_SHIFT	46
-#define __VIRTUAL_MASK_SHIFT	48
-
-#define KERNEL_TEXT_SIZE  (40*1024*1024)
-#define KERNEL_TEXT_START _AC(0xffffffff80000000, UL)
-
-#ifndef __ASSEMBLY__
-void clear_page(void *page);
-void copy_page(void *to, void *from);
-
-extern unsigned long end_pfn;
-extern unsigned long end_pfn_map;
-extern unsigned long phys_base;
-
-extern unsigned long __phys_addr(unsigned long);
-#define __phys_reloc_hide(x)	(x)
-
-/*
- * These are used to make use of C type-checking..
- */
-typedef unsigned long	pteval_t;
-typedef unsigned long	pmdval_t;
-typedef unsigned long	pudval_t;
-typedef unsigned long	pgdval_t;
-typedef unsigned long	pgprotval_t;
-typedef unsigned long	phys_addr_t;
-
-typedef struct { pteval_t pte; } pte_t;
-
-#define native_pte_val(x)	((x).pte)
-#define native_make_pte(x) ((pte_t) { (x) } )
-
-#define vmemmap ((struct page *)VMEMMAP_START)
-
-#endif	/* !__ASSEMBLY__ */
-
+#include <asm/page_64.h>
+#else
+#include <asm/page_32.h>
 #endif	/* CONFIG_X86_64 */
 
-#ifdef CONFIG_X86_32
-
-/*
- * This handles the memory map.
- *
- * A __PAGE_OFFSET of 0xC0000000 means that the kernel has
- * a virtual address space of one gigabyte, which limits the
- * amount of physical memory you can use to about 950MB.
- *
- * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
- * and CONFIG_HIGHMEM64G options in the kernel configuration.
- */
-#define __PAGE_OFFSET		_AC(CONFIG_PAGE_OFFSET, UL)
-
-#ifdef CONFIG_X86_PAE
-#define __PHYSICAL_MASK_SHIFT	36
-#define __VIRTUAL_MASK_SHIFT	32
-#define PAGETABLE_LEVELS	3
-
-#ifndef __ASSEMBLY__
-typedef u64	pteval_t;
-typedef u64	pmdval_t;
-typedef u64	pudval_t;
-typedef u64	pgdval_t;
-typedef u64	pgprotval_t;
-typedef u64	phys_addr_t;
-
-typedef struct { unsigned long pte_low, pte_high; } pte_t;
-
-static inline unsigned long long native_pte_val(pte_t pte)
-{
-	return pte.pte_low | ((unsigned long long)pte.pte_high << 32);
-}
-
-static inline pte_t native_make_pte(unsigned long long val)
-{
-	return (pte_t) { .pte_low = val, .pte_high = (val >> 32) } ;
-}
-
-#endif	/* __ASSEMBLY__
- */
-#else  /* !CONFIG_X86_PAE */
-#define __PHYSICAL_MASK_SHIFT	32
-#define __VIRTUAL_MASK_SHIFT	32
-#define PAGETABLE_LEVELS	2
-
-#ifndef __ASSEMBLY__
-typedef unsigned long	pteval_t;
-typedef unsigned long	pmdval_t;
-typedef unsigned long	pudval_t;
-typedef unsigned long	pgdval_t;
-typedef unsigned long	pgprotval_t;
-typedef unsigned long	phys_addr_t;
-
-typedef struct { pteval_t pte_low; } pte_t;
-typedef pte_t boot_pte_t;
-
-static inline unsigned long native_pte_val(pte_t pte)
-{
-	return pte.pte_low;
-}
-
-static inline pte_t native_make_pte(unsigned long val)
-{
-	return (pte_t) { .pte_low = val };
-}
-
-#endif	/* __ASSEMBLY__ */
-#endif	/* CONFIG_X86_PAE */
-
-#ifdef CONFIG_HUGETLB_PAGE
-#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-#endif
-
-#ifndef __ASSEMBLY__
-#define __phys_addr(x)		((x)-PAGE_OFFSET)
-#define __phys_reloc_hide(x)	RELOC_HIDE((x), 0)
-
-#ifdef CONFIG_FLATMEM
-#define pfn_valid(pfn)		((pfn) < max_mapnr)
-#endif /* CONFIG_FLATMEM */
-
-extern int nx_enabled;
-
-/*
- * This much address space is reserved for vmalloc() and iomap()
- * as well as fixmap mappings.
- */
-extern unsigned int __VMALLOC_RESERVE;
-extern int sysctl_legacy_va_layout;
-extern int page_is_ram(unsigned long pagenr);
-
-#define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
-#define MAXMEM			(-__PAGE_OFFSET-__VMALLOC_RESERVE)
-
-#ifdef CONFIG_X86_USE_3DNOW
-#include <asm/mmx.h>
-
-static inline void clear_page(void *page)
-{
-	mmx_clear_page(page);
-}
-
-static inline void copy_page(void *to, void *from)
-{
-	mmx_copy_page(to, from);
-}
-#else  /* !CONFIG_X86_USE_3DNOW */
-#include <linux/string.h>
-
-static inline void clear_page(void *page)
-{
-	memset(page, 0, PAGE_SIZE);
-}
-
-static inline void copy_page(void *to, void *from)
-{
-	memcpy(to, from, PAGE_SIZE);
-}
-#endif	/* CONFIG_X86_3DNOW */
-#endif	/* !__ASSEMBLY__ */
-
-#endif	/* CONFIG_X86_32 */
-
 #define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
 
 #define VM_DATA_DEFAULT_FLAGS \
@@ -349,10 +154,5 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
 
 #define __HAVE_ARCH_GATE_AREA 1
 
-#ifdef CONFIG_X86_32
-# include "page_32.h"
-#else
-# include "page_64.h"
-#endif
-
+#endif	/* __KERNEL__ */
 #endif	/* _ASM_X86_PAGE_H */
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index c27b41fd0254..e424bef4395a 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -1,23 +1,124 @@
-#ifndef _I386_PAGE_H
-#define _I386_PAGE_H
-
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
+#ifndef _ASM_X86_PAGE_32_H
+#define _ASM_X86_PAGE_32_H
 
 /*
- * These are used to make use of C type-checking..
+ * This handles the memory map.
+ *
+ * A __PAGE_OFFSET of 0xC0000000 means that the kernel has
+ * a virtual address space of one gigabyte, which limits the
+ * amount of physical memory you can use to about 950MB.
+ *
+ * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
+ * and CONFIG_HIGHMEM64G options in the kernel configuration.
  */
+#define __PAGE_OFFSET		_AC(CONFIG_PAGE_OFFSET, UL)
 
-#endif /* !__ASSEMBLY__ */
+#ifdef CONFIG_X86_PAE
+#define __PHYSICAL_MASK_SHIFT	36
+#define __VIRTUAL_MASK_SHIFT	32
+#define PAGETABLE_LEVELS	3
 
 #ifndef __ASSEMBLY__
+typedef u64	pteval_t;
+typedef u64	pmdval_t;
+typedef u64	pudval_t;
+typedef u64	pgdval_t;
+typedef u64	pgprotval_t;
+typedef u64	phys_addr_t;
 
-struct vm_area_struct;
+typedef struct { unsigned long pte_low, pte_high; } pte_t;
 
-#endif /* __ASSEMBLY__ */
+static inline unsigned long long native_pte_val(pte_t pte)
+{
+	return pte.pte_low | ((unsigned long long)pte.pte_high << 32);
+}
 
+static inline pte_t native_make_pte(unsigned long long val)
+{
+	return (pte_t) { .pte_low = val, .pte_high = (val >> 32) } ;
+}
 
+#endif	/* __ASSEMBLY__
+ */
+#else  /* !CONFIG_X86_PAE */
+#define __PHYSICAL_MASK_SHIFT	32
+#define __VIRTUAL_MASK_SHIFT	32
+#define PAGETABLE_LEVELS	2
 
-#endif /* __KERNEL__ */
+#ifndef __ASSEMBLY__
+typedef unsigned long	pteval_t;
+typedef unsigned long	pmdval_t;
+typedef unsigned long	pudval_t;
+typedef unsigned long	pgdval_t;
+typedef unsigned long	pgprotval_t;
+typedef unsigned long	phys_addr_t;
 
-#endif /* _I386_PAGE_H */
+typedef struct { pteval_t pte_low; } pte_t;
+typedef pte_t boot_pte_t;
+
+static inline unsigned long native_pte_val(pte_t pte)
+{
+	return pte.pte_low;
+}
+
+static inline pte_t native_make_pte(unsigned long val)
+{
+	return (pte_t) { .pte_low = val };
+}
+
+#endif	/* __ASSEMBLY__ */
+#endif	/* CONFIG_X86_PAE */
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
+
+#ifndef __ASSEMBLY__
+#define __phys_addr(x)		((x)-PAGE_OFFSET)
+#define __phys_reloc_hide(x)	RELOC_HIDE((x), 0)
+
+#ifdef CONFIG_FLATMEM
+#define pfn_valid(pfn)		((pfn) < max_mapnr)
+#endif /* CONFIG_FLATMEM */
+
+extern int nx_enabled;
+
+/*
+ * This much address space is reserved for vmalloc() and iomap()
+ * as well as fixmap mappings.
+ */
+extern unsigned int __VMALLOC_RESERVE;
+extern int sysctl_legacy_va_layout;
+extern int page_is_ram(unsigned long pagenr);
+
+#define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
+#define MAXMEM			(-__PAGE_OFFSET-__VMALLOC_RESERVE)
+
+#ifdef CONFIG_X86_USE_3DNOW
+#include <asm/mmx.h>
+
+static inline void clear_page(void *page)
+{
+	mmx_clear_page(page);
+}
+
+static inline void copy_page(void *to, void *from)
+{
+	mmx_copy_page(to, from);
+}
+#else  /* !CONFIG_X86_USE_3DNOW */
+#include <linux/string.h>
+
+static inline void clear_page(void *page)
+{
+	memset(page, 0, PAGE_SIZE);
+}
+
+static inline void copy_page(void *to, void *from)
+{
+	memcpy(to, from, PAGE_SIZE);
+}
+#endif	/* CONFIG_X86_3DNOW */
+#endif	/* !__ASSEMBLY__ */
+
+#endif /* _ASM_X86_PAGE_32_H */
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index 94d4803aab2d..ebf977a809a8 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -1,17 +1,81 @@
 #ifndef _X86_64_PAGE_H
 #define _X86_64_PAGE_H
 
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
+#define PAGETABLE_LEVELS	4
 
-#endif /* !__ASSEMBLY__ */
+#define THREAD_ORDER	1
+#define THREAD_SIZE  (PAGE_SIZE << THREAD_ORDER)
+#define CURRENT_MASK (~(THREAD_SIZE-1))
+
+#define EXCEPTION_STACK_ORDER 0
+#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
+
+#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
+#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
+
+#define IRQSTACK_ORDER 2
+#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER)
+
+#define STACKFAULT_STACK 1
+#define DOUBLEFAULT_STACK 2
+#define NMI_STACK 3
+#define DEBUG_STACK 4
+#define MCE_STACK 5
+#define N_EXCEPTION_STACKS 5  /* hw limit: 7 */
+
+#define __PAGE_OFFSET           _AC(0xffff810000000000, UL)
+
+#define __PHYSICAL_START	CONFIG_PHYSICAL_START
+#define __KERNEL_ALIGN		0x200000
+
+/*
+ * Make sure kernel is aligned to 2MB address. Catching it at compile
+ * time is better. Change your config file and compile the kernel
+ * for a 2MB aligned address (CONFIG_PHYSICAL_START)
+ */
+#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0
+#error "CONFIG_PHYSICAL_START must be a multiple of 2MB"
+#endif
+
+#define __START_KERNEL		(__START_KERNEL_map + __PHYSICAL_START)
+#define __START_KERNEL_map	_AC(0xffffffff80000000, UL)
+
+/* See Documentation/x86_64/mm.txt for a description of the memory map. */
+#define __PHYSICAL_MASK_SHIFT	46
+#define __VIRTUAL_MASK_SHIFT	48
+
+#define KERNEL_TEXT_SIZE  (40*1024*1024)
+#define KERNEL_TEXT_START _AC(0xffffffff80000000, UL)
 
 #ifndef __ASSEMBLY__
+void clear_page(void *page);
+void copy_page(void *to, void *from);
 
+extern unsigned long end_pfn;
+extern unsigned long end_pfn_map;
+extern unsigned long phys_base;
 
-#endif /* __ASSEMBLY__ */
+extern unsigned long __phys_addr(unsigned long);
+#define __phys_reloc_hide(x)	(x)
 
-#endif /* __KERNEL__ */
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef unsigned long	pteval_t;
+typedef unsigned long	pmdval_t;
+typedef unsigned long	pudval_t;
+typedef unsigned long	pgdval_t;
+typedef unsigned long	pgprotval_t;
+typedef unsigned long	phys_addr_t;
+
+typedef struct { pteval_t pte; } pte_t;
+
+#define native_pte_val(x)	((x).pte)
+#define native_make_pte(x) ((pte_t) { (x) } )
+
+#define vmemmap ((struct page *)VMEMMAP_START)
+
+#endif	/* !__ASSEMBLY__ */
 
 #ifdef CONFIG_FLATMEM
 #define pfn_valid(pfn)          ((pfn) < end_pfn)

From b899c5ed2ef3af3429abd8046197255f179ea496 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:32:44 +0100
Subject: [PATCH 492/890] x86/efi: fix improper use of lvalue

# HG changeset patch
# User Jeremy Fitzhardinge <jeremy@xensource.com>
# Date 1199391030 28800
# Node ID 5d35c92fdf0e2c52edbb6fc4ccd06c7f65f25009
# Parent  22f6a5902285b58bfc1fbbd9e183498c9017bd78
x86/efi: fix improper use of lvalue

pgd_val is no longer valid as an lvalue, so don't try to assign to it.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/efi_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 269de2e049a3..1f8bbd9644d7 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -85,7 +85,7 @@ void __init efi_call_phys_prelog(void)
 	local_irq_save(efi_flags);
 	early_runtime_code_mapping_set_exec(1);
 	vaddress = (unsigned long)__va(0x0UL);
-	pgd_val(save_pgd) = pgd_val(*pgd_offset_k(0x0UL));
+	save_pgd = *pgd_offset_k(0x0UL);
 	set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress));
 	__flush_tlb_all();
 }

From 9e094c17ee2b85130ab7b2ea37456f6867eb687a Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:32:48 +0100
Subject: [PATCH 493/890] genirq: turn irq debugging options into module params

This allows to change them at runtime using sysfs. No need to
reboot to set them.

I only added aliases (kernel.noirqdebug etc.) so the old options
still work.

Signed-off-by: Andi Kleen <ak@suse.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/irq/spurious.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 32b161972fad..a6b2bc831dd0 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/interrupt.h>
+#include <linux/moduleparam.h>
 
 static int irqfixup __read_mostly;
 
@@ -225,6 +226,8 @@ int noirqdebug_setup(char *str)
 }
 
 __setup("noirqdebug", noirqdebug_setup);
+module_param(noirqdebug, bool, 0644);
+MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
 
 static int __init irqfixup_setup(char *str)
 {
@@ -236,6 +239,8 @@ static int __init irqfixup_setup(char *str)
 }
 
 __setup("irqfixup", irqfixup_setup);
+module_param(irqfixup, int, 0644);
+MODULE_PARM_DESC("irqfixup", "0: No fixup, 1: irqfixup mode 2: irqpoll mode");
 
 static int __init irqpoll_setup(char *str)
 {

From 96d97cf03b3d68e6c857623da93acd522b2b7e1a Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:32:48 +0100
Subject: [PATCH 494/890] x86: add /proc/irq/*/spurious to dump the spurious
 irq debugging state

This is useful to debug problems with interrupt handlers that return
sometimes IRQ_NONE.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/irq/proc.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 50b81b98046a..c2f2ccb0549a 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -75,6 +75,18 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
 
 #endif
 
+static int irq_spurious_read(char *page, char **start, off_t off,
+				  int count, int *eof, void *data)
+{
+	struct irq_desc *d = &irq_desc[(long) data];
+	return sprintf(page, "count %u\n"
+			     "unhandled %u\n"
+			     "last_unhandled %u ms\n",
+			d->irq_count,
+			d->irqs_unhandled,
+			jiffies_to_msecs(d->last_unhandled));
+}
+
 #define MAX_NAMELEN 128
 
 static int name_unique(unsigned int irq, struct irqaction *new_action)
@@ -118,6 +130,7 @@ void register_handler_proc(unsigned int irq, struct irqaction *action)
 void register_irq_proc(unsigned int irq)
 {
 	char name [MAX_NAMELEN];
+	struct proc_dir_entry *entry;
 
 	if (!root_irq_dir ||
 		(irq_desc[irq].chip == &no_irq_chip) ||
@@ -132,8 +145,6 @@ void register_irq_proc(unsigned int irq)
 
 #ifdef CONFIG_SMP
 	{
-		struct proc_dir_entry *entry;
-
 		/* create /proc/irq/<irq>/smp_affinity */
 		entry = create_proc_entry("smp_affinity", 0600, irq_desc[irq].dir);
 
@@ -144,6 +155,12 @@ void register_irq_proc(unsigned int irq)
 		}
 	}
 #endif
+
+	entry = create_proc_entry("spurious", 0444, irq_desc[irq].dir);
+	if (entry) {
+		entry->data = (void *)(long)irq;
+		entry->read_proc = irq_spurious_read;
+	}
 }
 
 #undef MAX_NAMELEN

From 3898534d85e2da8cedab1ceb6ab9328c61f2c1ce Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:32:49 +0100
Subject: [PATCH 495/890] x86: remove CPU capabitilites printks on 32-bit

I don't know of any case where they have been useful and they look ugly.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/common.c | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index dbb9142a8241..ed05c7a0ca9b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -432,20 +432,9 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 
 	generic_identify(c);
 
-	printk(KERN_DEBUG "CPU: After generic identify, caps:");
-	for (i = 0; i < NCAPINTS; i++)
-		printk(" %08x", c->x86_capability[i]);
-	printk("\n");
-
-	if (this_cpu->c_identify) {
+	if (this_cpu->c_identify)
 		this_cpu->c_identify(c);
 
-		printk(KERN_DEBUG "CPU: After vendor identify, caps:");
-		for (i = 0; i < NCAPINTS; i++)
-			printk(" %08x", c->x86_capability[i]);
-		printk("\n");
-	}
-
 	/*
 	 * Vendor-specific initialization.  In this section we
 	 * canonicalize the feature flags, meaning if there are
@@ -496,13 +485,6 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 				c->x86, c->x86_model);
 	}
 
-	/* Now the feature flags better reflect actual CPU features! */
-
-	printk(KERN_DEBUG "CPU: After all inits, caps:");
-	for (i = 0; i < NCAPINTS; i++)
-		printk(" %08x", c->x86_capability[i]);
-	printk("\n");
-
 	/*
 	 * On SMP, boot_cpu_data holds the common feature set between
 	 * all CPUs; so make sure that we indicate which features are

From 37f30e21d6be046e948fce1dfa4a968cf5abcf89 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:32:49 +0100
Subject: [PATCH 496/890] x86: document fdimage/isoimage completely in make
 help

Add missing targets and missing options in x86 make help

[ mingo@elte.hu: more whitespace cleanups ]

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Makefile | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index bfe061de8ed6..e2ffccfd6af6 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -234,14 +234,18 @@ archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
 
 define archhelp
-  echo  '* bzImage	- Compressed kernel image (arch/x86/boot/bzImage)'
-  echo  '  install	- Install kernel using'
-  echo  '		   (your) ~/bin/installkernel or'
-  echo  '		   (distribution) /sbin/installkernel or'
-  echo  '		   install to $$(INSTALL_PATH) and run lilo'
-  echo  '  bzdisk       - Create a boot floppy in /dev/fd0'
-  echo  '  fdimage      - Create a boot floppy image'
-  echo  '  isoimage     - Create a boot CD-ROM image'
+  echo  '* bzImage      - Compressed kernel image (arch/x86/boot/bzImage)'
+  echo  '  install      - Install kernel using'
+  echo  '                  (your) ~/bin/installkernel or'
+  echo  '                  (distribution) /sbin/installkernel or'
+  echo  '                  install to $$(INSTALL_PATH) and run lilo'
+  echo  '  fdimage      - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
+  echo  '  fdimage144   - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
+  echo  '  fdimage288   - Create 2.8MB boot floppy image (arch/x86/boot/fdimage)'
+  echo  '  isoimage     - Create a boot CD-ROM image (arch/x86/boot/image.iso)'
+  echo  '                  bzdisk/fdimage*/isoimage also accept:'
+  echo  '                  FDARGS="..."  arguments for the booted kernel'
+  echo  '                  FDINITRD=file initrd for the booted kernel'
 endef
 
 CLEAN_FILES += arch/x86/boot/fdimage \

From a6b68076fd5d7de04e6847ed9d7d212e7dd6beb5 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:32:49 +0100
Subject: [PATCH 497/890] x86: compile apm and voyager module only when
 selected in Kconfig

Previously the complete files were #ifdef'ed, but now handle that in the
Makefile.

May save a minor bit of compilation time.

[ Stephen Rothwell <sfr@canb.auug.org.au>: build dependency fix ]
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig        | 5 +++++
 arch/x86/boot/Makefile  | 6 ++++--
 arch/x86/boot/apm.c     | 3 ---
 arch/x86/boot/voyager.c | 4 ----
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5271665a22e0..1de28e850aa4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1199,6 +1199,11 @@ source "kernel/power/Kconfig"
 
 source "drivers/acpi/Kconfig"
 
+config X86_APM_BOOT
+	bool
+	default y
+	depends on APM || APM_MODULE
+
 menuconfig APM
 	tristate "APM (Advanced Power Management) BIOS support"
 	depends on X86_32 && PM_SLEEP && !X86_VISWS
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index dcad6507f196..349b81a39c40 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -28,9 +28,11 @@ SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
 targets		:= vmlinux.bin setup.bin setup.elf zImage bzImage
 subdir- 	:= compressed
 
-setup-y		+= a20.o apm.o cmdline.o copy.o cpu.o cpucheck.o edd.o
+setup-y		+= a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
 setup-y		+= header.o main.o mca.o memory.o pm.o pmjump.o
-setup-y		+= printf.o string.o tty.o video.o version.o voyager.o
+setup-y		+= printf.o string.o tty.o video.o version.o
+setup-$(CONFIG_X86_APM_BOOT) += apm.o
+setup-$(CONFIG_X86_VOYAGER) += voyager.o
 
 # The link order of the video-*.o modules can matter.  In particular,
 # video-vga.o *must* be listed first, followed by video-vesa.o.
diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c
index eab50c55a3a5..c117c7fb859c 100644
--- a/arch/x86/boot/apm.c
+++ b/arch/x86/boot/apm.c
@@ -19,8 +19,6 @@
 
 #include "boot.h"
 
-#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
-
 int query_apm_bios(void)
 {
 	u16 ax, bx, cx, dx, di;
@@ -95,4 +93,3 @@ int query_apm_bios(void)
 	return 0;
 }
 
-#endif
diff --git a/arch/x86/boot/voyager.c b/arch/x86/boot/voyager.c
index 61c8fe0453be..6499e3239b41 100644
--- a/arch/x86/boot/voyager.c
+++ b/arch/x86/boot/voyager.c
@@ -16,8 +16,6 @@
 
 #include "boot.h"
 
-#ifdef CONFIG_X86_VOYAGER
-
 int query_voyager(void)
 {
 	u8 err;
@@ -42,5 +40,3 @@ int query_voyager(void)
 	copy_from_fs(data_ptr, di, 7);	/* Table is 7 bytes apparently */
 	return 0;
 }
-
-#endif /* CONFIG_X86_VOYAGER */

From a9a832902fccc958caa339677dbff766e51b02c3 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:32:49 +0100
Subject: [PATCH 498/890] x86: Set CFQ as default in 32-bit defconfig

Someone complained that the 32-bit defconfig contains AS as default IO
scheduler. Change that to CFQ.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/configs/i386_defconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 54ee1764fdae..77562e7cdab6 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -99,9 +99,9 @@ CONFIG_IOSCHED_NOOP=y
 CONFIG_IOSCHED_AS=y
 CONFIG_IOSCHED_DEADLINE=y
 CONFIG_IOSCHED_CFQ=y
-CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_AS is not set
 # CONFIG_DEFAULT_DEADLINE is not set
-# CONFIG_DEFAULT_CFQ is not set
+CONFIG_DEFAULT_CFQ=y
 # CONFIG_DEFAULT_NOOP is not set
 CONFIG_DEFAULT_IOSCHED="anticipatory"
 

From e3cfac84cfbc8c9f17817573befc0f4913b1a4dc Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:32:49 +0100
Subject: [PATCH 499/890] x86: mark memory_setup __init

Otherwise

WARNING: vmlinux.o(.text+0x64a9): Section mismatch: reference to .init.text:machine_specific_memory_setup (between 'memory_setup' and 'show_cpuinfo')

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 1caf7458dc48..a7124bfb8578 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -279,7 +279,7 @@ static void discover_ebda(void)
 }
 
 /* Overridden in paravirt.c if CONFIG_PARAVIRT */
-void __attribute__((weak)) memory_setup(void)
+void __attribute__((weak)) __init memory_setup(void)
 {
        machine_specific_memory_setup();
 }

From 0c2bd5a5e3b9469c6ab2b96c403980f192db0bf7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:49 +0100
Subject: [PATCH 500/890] x86: clean up include/asm-x86/calling.h

clean up include/asm-x86/calling.h.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/calling.h | 196 ++++++++++++++++++++------------------
 1 file changed, 101 insertions(+), 95 deletions(-)

diff --git a/include/asm-x86/calling.h b/include/asm-x86/calling.h
index 6f4f63af96e1..f13e62e2cb3e 100644
--- a/include/asm-x86/calling.h
+++ b/include/asm-x86/calling.h
@@ -1,162 +1,168 @@
-/* 
+/*
  * Some macros to handle stack frames in assembly.
- */ 
+ */
 
+#define R15		  0
+#define R14		  8
+#define R13		 16
+#define R12		 24
+#define RBP		 32
+#define RBX		 40
 
-#define R15 0
-#define R14 8
-#define R13 16
-#define R12 24
-#define RBP 32
-#define RBX 40
 /* arguments: interrupts/non tracing syscalls only save upto here*/
-#define R11 48
-#define R10 56	
-#define R9 64
-#define R8 72
-#define RAX 80
-#define RCX 88
-#define RDX 96
-#define RSI 104
-#define RDI 112
-#define ORIG_RAX 120       /* + error_code */ 
-/* end of arguments */ 	
-/* cpu exception frame or undefined in case of fast syscall. */
-#define RIP 128
-#define CS 136
-#define EFLAGS 144
-#define RSP 152
-#define SS 160
-#define ARGOFFSET R11
-#define SWFRAME ORIG_RAX
+#define R11		 48
+#define R10		 56
+#define R9		 64
+#define R8		 72
+#define RAX		 80
+#define RCX		 88
+#define RDX		 96
+#define RSI		104
+#define RDI		112
+#define ORIG_RAX	120       /* + error_code */
+/* end of arguments */
 
-	.macro SAVE_ARGS addskip=0,norcx=0,nor891011=0
-	subq  $9*8+\addskip,%rsp
+/* cpu exception frame or undefined in case of fast syscall. */
+#define RIP		128
+#define CS		136
+#define EFLAGS		144
+#define RSP		152
+#define SS		160
+
+#define ARGOFFSET	R11
+#define SWFRAME		ORIG_RAX
+
+	.macro SAVE_ARGS addskip=0, norcx=0, nor891011=0
+	subq  $9*8+\addskip, %rsp
 	CFI_ADJUST_CFA_OFFSET	9*8+\addskip
-	movq  %rdi,8*8(%rsp) 
-	CFI_REL_OFFSET	rdi,8*8
-	movq  %rsi,7*8(%rsp) 
-	CFI_REL_OFFSET	rsi,7*8
-	movq  %rdx,6*8(%rsp)
-	CFI_REL_OFFSET	rdx,6*8
+	movq  %rdi, 8*8(%rsp)
+	CFI_REL_OFFSET	rdi, 8*8
+	movq  %rsi, 7*8(%rsp)
+	CFI_REL_OFFSET	rsi, 7*8
+	movq  %rdx, 6*8(%rsp)
+	CFI_REL_OFFSET	rdx, 6*8
 	.if \norcx
 	.else
-	movq  %rcx,5*8(%rsp)
-	CFI_REL_OFFSET	rcx,5*8
+	movq  %rcx, 5*8(%rsp)
+	CFI_REL_OFFSET	rcx, 5*8
 	.endif
-	movq  %rax,4*8(%rsp) 
-	CFI_REL_OFFSET	rax,4*8
+	movq  %rax, 4*8(%rsp)
+	CFI_REL_OFFSET	rax, 4*8
 	.if \nor891011
 	.else
-	movq  %r8,3*8(%rsp) 
-	CFI_REL_OFFSET	r8,3*8
-	movq  %r9,2*8(%rsp) 
-	CFI_REL_OFFSET	r9,2*8
-	movq  %r10,1*8(%rsp) 
-	CFI_REL_OFFSET	r10,1*8
-	movq  %r11,(%rsp) 
-	CFI_REL_OFFSET	r11,0*8
+	movq  %r8, 3*8(%rsp)
+	CFI_REL_OFFSET	r8,  3*8
+	movq  %r9, 2*8(%rsp)
+	CFI_REL_OFFSET	r9,  2*8
+	movq  %r10, 1*8(%rsp)
+	CFI_REL_OFFSET	r10, 1*8
+	movq  %r11, (%rsp)
+	CFI_REL_OFFSET	r11, 0*8
 	.endif
 	.endm
 
-#define ARG_SKIP 9*8
-	.macro RESTORE_ARGS skiprax=0,addskip=0,skiprcx=0,skipr11=0,skipr8910=0,skiprdx=0
+#define ARG_SKIP	9*8
+
+	.macro RESTORE_ARGS skiprax=0, addskip=0, skiprcx=0, skipr11=0, \
+			    skipr8910=0, skiprdx=0
 	.if \skipr11
 	.else
-	movq (%rsp),%r11
+	movq (%rsp), %r11
 	CFI_RESTORE r11
 	.endif
 	.if \skipr8910
 	.else
-	movq 1*8(%rsp),%r10
+	movq 1*8(%rsp), %r10
 	CFI_RESTORE r10
-	movq 2*8(%rsp),%r9
+	movq 2*8(%rsp), %r9
 	CFI_RESTORE r9
-	movq 3*8(%rsp),%r8
+	movq 3*8(%rsp), %r8
 	CFI_RESTORE r8
 	.endif
 	.if \skiprax
 	.else
-	movq 4*8(%rsp),%rax
+	movq 4*8(%rsp), %rax
 	CFI_RESTORE rax
 	.endif
 	.if \skiprcx
 	.else
-	movq 5*8(%rsp),%rcx
+	movq 5*8(%rsp), %rcx
 	CFI_RESTORE rcx
 	.endif
 	.if \skiprdx
 	.else
-	movq 6*8(%rsp),%rdx
+	movq 6*8(%rsp), %rdx
 	CFI_RESTORE rdx
 	.endif
-	movq 7*8(%rsp),%rsi
+	movq 7*8(%rsp), %rsi
 	CFI_RESTORE rsi
-	movq 8*8(%rsp),%rdi
+	movq 8*8(%rsp), %rdi
 	CFI_RESTORE rdi
 	.if ARG_SKIP+\addskip > 0
-	addq $ARG_SKIP+\addskip,%rsp
+	addq $ARG_SKIP+\addskip, %rsp
 	CFI_ADJUST_CFA_OFFSET	-(ARG_SKIP+\addskip)
 	.endif
-	.endm	
+	.endm
 
 	.macro LOAD_ARGS offset
-	movq \offset(%rsp),%r11
-	movq \offset+8(%rsp),%r10
-	movq \offset+16(%rsp),%r9
-	movq \offset+24(%rsp),%r8
-	movq \offset+40(%rsp),%rcx
-	movq \offset+48(%rsp),%rdx
-	movq \offset+56(%rsp),%rsi
-	movq \offset+64(%rsp),%rdi
-	movq \offset+72(%rsp),%rax
+	movq \offset(%rsp),    %r11
+	movq \offset+8(%rsp),  %r10
+	movq \offset+16(%rsp), %r9
+	movq \offset+24(%rsp), %r8
+	movq \offset+40(%rsp), %rcx
+	movq \offset+48(%rsp), %rdx
+	movq \offset+56(%rsp), %rsi
+	movq \offset+64(%rsp), %rdi
+	movq \offset+72(%rsp), %rax
 	.endm
-			
-#define REST_SKIP 6*8			
+
+#define REST_SKIP	6*8
+
 	.macro SAVE_REST
-	subq $REST_SKIP,%rsp
+	subq $REST_SKIP, %rsp
 	CFI_ADJUST_CFA_OFFSET	REST_SKIP
-	movq %rbx,5*8(%rsp) 
-	CFI_REL_OFFSET	rbx,5*8
-	movq %rbp,4*8(%rsp) 
-	CFI_REL_OFFSET	rbp,4*8
-	movq %r12,3*8(%rsp) 
-	CFI_REL_OFFSET	r12,3*8
-	movq %r13,2*8(%rsp) 
-	CFI_REL_OFFSET	r13,2*8
-	movq %r14,1*8(%rsp) 
-	CFI_REL_OFFSET	r14,1*8
-	movq %r15,(%rsp) 
-	CFI_REL_OFFSET	r15,0*8
-	.endm		
+	movq %rbx, 5*8(%rsp)
+	CFI_REL_OFFSET	rbx, 5*8
+	movq %rbp, 4*8(%rsp)
+	CFI_REL_OFFSET	rbp, 4*8
+	movq %r12, 3*8(%rsp)
+	CFI_REL_OFFSET	r12, 3*8
+	movq %r13, 2*8(%rsp)
+	CFI_REL_OFFSET	r13, 2*8
+	movq %r14, 1*8(%rsp)
+	CFI_REL_OFFSET	r14, 1*8
+	movq %r15, (%rsp)
+	CFI_REL_OFFSET	r15, 0*8
+	.endm
 
 	.macro RESTORE_REST
-	movq (%rsp),%r15
+	movq (%rsp),     %r15
 	CFI_RESTORE r15
-	movq 1*8(%rsp),%r14
+	movq 1*8(%rsp),  %r14
 	CFI_RESTORE r14
-	movq 2*8(%rsp),%r13
+	movq 2*8(%rsp),  %r13
 	CFI_RESTORE r13
-	movq 3*8(%rsp),%r12
+	movq 3*8(%rsp),  %r12
 	CFI_RESTORE r12
-	movq 4*8(%rsp),%rbp
+	movq 4*8(%rsp),  %rbp
 	CFI_RESTORE rbp
-	movq 5*8(%rsp),%rbx
+	movq 5*8(%rsp),  %rbx
 	CFI_RESTORE rbx
-	addq $REST_SKIP,%rsp
+	addq $REST_SKIP, %rsp
 	CFI_ADJUST_CFA_OFFSET	-(REST_SKIP)
 	.endm
-		
+
 	.macro SAVE_ALL
 	SAVE_ARGS
 	SAVE_REST
 	.endm
-		
+
 	.macro RESTORE_ALL addskip=0
 	RESTORE_REST
-	RESTORE_ARGS 0,\addskip
+	RESTORE_ARGS 0, \addskip
 	.endm
 
 	.macro icebp
 	.byte 0xf1
 	.endm
+

From 4c4915627f94a81a834a7a65dee83acdfb45788c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 30 Jan 2008 13:32:50 +0100
Subject: [PATCH 501/890] x86: make arch/x86/kernel/acpi/wakeup_32.S use a
 separate

While examining vmlinux namelist on i386 (nm -v vmlinux) I noticed :

c01021d0 t es7000_rename_gsi
c010221a T es7000_start_cpu
<Big Hole>
c0103000 T thread_saved_pc

and

c0113218 T acpi_restore_state_mem
c0113219 T acpi_save_state_mem
<Big Hole>
c0114000 t wakeup_code

This is because arch/x86/kernel/acpi/wakeup_32.S forces a .text alignment
of 4096 bytes. (I have no idea if it is really needed, since
arch/x86/kernel/acpi/wakeup_64.S uses a 16 bytes alignment *only*)

So arch/x86/kernel/built-in.o also has this alignment

arch/x86/kernel/built-in.o:     file format elf32-i386

Sections:
Idx Name          Size      VMA       LMA       File off  Algn
  0 .text         00018c94  00000000  00000000  00001000  2**12
                  CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE

But as arch/x86/kernel/acpi/wakeup_32.o is not the first object linked
into arch/x86/kernel/built-in.o, linker had to build several holes to meet
alignement requirements, because of .o nestings in the kbuild process.

This can be solved by using a special section, .text.page_aligned, so that
no holes are needed.

# size vmlinux.before vmlinux.after
   text    data     bss     dec     hex filename
4619942  422838  458752 5501532  53f25c vmlinux.before
4610534  422838  458752 5492124  53cd9c vmlinux.after

This saves 9408 bytes

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/acpi/wakeup_32.S | 2 +-
 arch/x86/kernel/vmlinux_32.lds.S | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index 1e931aaf2ef6..f53e3277f8e5 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -1,4 +1,4 @@
-.text
+	.section .text.page_aligned
 #include <linux/linkage.h>
 #include <asm/segment.h>
 #include <asm/page.h>
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index ec072588ff01..f1148ac8abe3 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -38,6 +38,8 @@ SECTIONS
 
   /* read-only */
   .text : AT(ADDR(.text) - LOAD_OFFSET) {
+	. = ALIGN(4096); /* not really needed, already page aligned */
+	*(.text.page_aligned)
 	TEXT_TEXT
 	SCHED_TEXT
 	LOCK_TEXT

From f315decbd05fefbca09bd492ae54eaa334ba826b Mon Sep 17 00:00:00 2001
From: Abhishek Sagar <sagar.abhishek@gmail.com>
Date: Wed, 30 Jan 2008 13:32:50 +0100
Subject: [PATCH 502/890] x86: kprobes change kprobe_handler flow

Signed-off-by: Abhishek Sagar <sagar.abhishek@gmail.com>
Signed-off-by: Quentin Barnes <qbarnes@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/kprobes.c | 153 +++++++++++++++++++++-----------------
 1 file changed, 86 insertions(+), 67 deletions(-)

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 711fec8f6379..53ba6a5b6550 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -442,6 +442,34 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 	/* Replace the return addr with trampoline addr */
 	*sara = (unsigned long) &kretprobe_trampoline;
 }
+
+static void __kprobes recursive_singlestep(struct kprobe *p,
+					   struct pt_regs *regs,
+					   struct kprobe_ctlblk *kcb)
+{
+	save_previous_kprobe(kcb);
+	set_current_kprobe(p, regs, kcb);
+	kprobes_inc_nmissed_count(p);
+	prepare_singlestep(p, regs);
+	kcb->kprobe_status = KPROBE_REENTER;
+}
+
+static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
+				       struct kprobe_ctlblk *kcb)
+{
+#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
+	if (p->ainsn.boostable == 1 && !p->post_handler) {
+		/* Boost up -- we can execute copied instructions directly */
+		reset_current_kprobe();
+		regs->ip = (unsigned long)p->ainsn.insn;
+		preempt_enable_no_resched();
+		return;
+	}
+#endif
+	prepare_singlestep(p, regs);
+	kcb->kprobe_status = KPROBE_HIT_SS;
+}
+
 /*
  * We have reentered the kprobe_handler(), since another probe was hit while
  * within the handler. We save the original kprobes variables and just single
@@ -450,13 +478,9 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
 				    struct kprobe_ctlblk *kcb)
 {
-	if (kcb->kprobe_status == KPROBE_HIT_SS &&
-	    *p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
-		regs->flags &= ~X86_EFLAGS_TF;
-		regs->flags |= kcb->kprobe_saved_flags;
-		return 0;
+	switch (kcb->kprobe_status) {
+	case KPROBE_HIT_SSDONE:
 #ifdef CONFIG_X86_64
-	} else if (kcb->kprobe_status == KPROBE_HIT_SSDONE) {
 		/* TODO: Provide re-entrancy from post_kprobes_handler() and
 		 * avoid exception stack corruption while single-stepping on
 		 * the instruction of the new probe.
@@ -464,14 +488,26 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
 		arch_disarm_kprobe(p);
 		regs->ip = (unsigned long)p->addr;
 		reset_current_kprobe();
-		return 1;
+		preempt_enable_no_resched();
+		break;
 #endif
+	case KPROBE_HIT_ACTIVE:
+		recursive_singlestep(p, regs, kcb);
+		break;
+	case KPROBE_HIT_SS:
+		if (*p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
+			regs->flags &= ~TF_MASK;
+			regs->flags |= kcb->kprobe_saved_flags;
+			return 0;
+		} else {
+			recursive_singlestep(p, regs, kcb);
+		}
+		break;
+	default:
+		/* impossible cases */
+		WARN_ON(1);
 	}
-	save_previous_kprobe(kcb);
-	set_current_kprobe(p, regs, kcb);
-	kprobes_inc_nmissed_count(p);
-	prepare_singlestep(p, regs);
-	kcb->kprobe_status = KPROBE_REENTER;
+
 	return 1;
 }
 
@@ -481,83 +517,66 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
  */
 static int __kprobes kprobe_handler(struct pt_regs *regs)
 {
-	struct kprobe *p;
-	int ret = 0;
 	kprobe_opcode_t *addr;
+	struct kprobe *p;
 	struct kprobe_ctlblk *kcb;
 
 	addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
+	if (*addr != BREAKPOINT_INSTRUCTION) {
+		/*
+		 * The breakpoint instruction was removed right
+		 * after we hit it.  Another cpu has removed
+		 * either a probepoint or a debugger breakpoint
+		 * at this address.  In either case, no further
+		 * handling of this interrupt is appropriate.
+		 * Back up over the (now missing) int3 and run
+		 * the original instruction.
+		 */
+		regs->ip = (unsigned long)addr;
+		return 1;
+	}
 
 	/*
 	 * We don't want to be preempted for the entire
-	 * duration of kprobe processing
+	 * duration of kprobe processing. We conditionally
+	 * re-enable preemption at the end of this function,
+	 * and also in reenter_kprobe() and setup_singlestep().
 	 */
 	preempt_disable();
-	kcb = get_kprobe_ctlblk();
 
+	kcb = get_kprobe_ctlblk();
 	p = get_kprobe(addr);
+
 	if (p) {
-		/* Check we're not actually recursing */
 		if (kprobe_running()) {
-			ret = reenter_kprobe(p, regs, kcb);
-			if (kcb->kprobe_status == KPROBE_REENTER)
-			{
-				ret = 1;
-				goto out;
-			}
-			goto preempt_out;
+			if (reenter_kprobe(p, regs, kcb))
+				return 1;
 		} else {
 			set_current_kprobe(p, regs, kcb);
 			kcb->kprobe_status = KPROBE_HIT_ACTIVE;
-			if (p->pre_handler && p->pre_handler(p, regs))
-			{
-				/* handler set things up, skip ss setup */
-				ret = 1;
-				goto out;
-			}
-		}
-	} else {
-		if (*addr != BREAKPOINT_INSTRUCTION) {
+
 			/*
-			 * The breakpoint instruction was removed right
-			 * after we hit it.  Another cpu has removed
-			 * either a probepoint or a debugger breakpoint
-			 * at this address.  In either case, no further
-			 * handling of this interrupt is appropriate.
-			 * Back up over the (now missing) int3 and run
-			 * the original instruction.
+			 * If we have no pre-handler or it returned 0, we
+			 * continue with normal processing.  If we have a
+			 * pre-handler and it returned non-zero, it prepped
+			 * for calling the break_handler below on re-entry
+			 * for jprobe processing, so get out doing nothing
+			 * more here.
 			 */
-			regs->ip = (unsigned long)addr;
-			ret = 1;
-			goto preempt_out;
+			if (!p->pre_handler || !p->pre_handler(p, regs))
+				setup_singlestep(p, regs, kcb);
+			return 1;
 		}
-		if (kprobe_running()) {
-			p = __get_cpu_var(current_kprobe);
-			if (p->break_handler && p->break_handler(p, regs))
-				goto ss_probe;
+	} else if (kprobe_running()) {
+		p = __get_cpu_var(current_kprobe);
+		if (p->break_handler && p->break_handler(p, regs)) {
+			setup_singlestep(p, regs, kcb);
+			return 1;
 		}
-		/* Not one of ours: let kernel handle it */
-		goto preempt_out;
-	}
+	} /* else: not a kprobe fault; let the kernel handle it */
 
-ss_probe:
-	ret = 1;
-#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
-	if (p->ainsn.boostable == 1 && !p->post_handler) {
-		/* Boost up -- we can execute copied instructions directly */
-		reset_current_kprobe();
-		regs->ip = (unsigned long)p->ainsn.insn;
-		goto preempt_out;
-	}
-#endif
-	prepare_singlestep(p, regs);
-	kcb->kprobe_status = KPROBE_HIT_SS;
-	goto out;
-
-preempt_out:
 	preempt_enable_no_resched();
-out:
-	return ret;
+	return 0;
 }
 
 /*

From 3a6a62f96f168d192fb0cc9c0b5ee2584740b32d Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Wed, 30 Jan 2008 13:32:50 +0100
Subject: [PATCH 503/890] debug: introduce __WARN()

Introduce __WARN() in the generic case, so the generic WARN_ON()
can use arch-specific code for when the condition is true.

Signed-off-by: Olof Johansson <olof@lixom.net>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-generic/bug.h | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index d56fedbb457a..1a0e1a7684bd 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -31,14 +31,19 @@ struct bug_entry {
 #define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while(0)
 #endif
 
-#ifndef HAVE_ARCH_WARN_ON
+#ifndef __WARN
+#define __WARN() do {							\
+	printk("WARNING: at %s:%d %s()\n", __FILE__,			\
+		__LINE__, __FUNCTION__);				\
+	dump_stack();							\
+} while (0)
+#endif
+
+#ifndef WARN_ON
 #define WARN_ON(condition) ({						\
 	int __ret_warn_on = !!(condition);				\
-	if (unlikely(__ret_warn_on)) {					\
-		printk("WARNING: at %s:%d %s()\n", __FILE__,		\
-			__LINE__, __FUNCTION__);			\
-		dump_stack();						\
-	}								\
+	if (unlikely(__ret_warn_on))					\
+		__WARN();						\
 	unlikely(__ret_warn_on);					\
 })
 #endif

From 79b4cc5ee7a8086ac2c9c0afa52e6d687ce1ffef Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Wed, 30 Jan 2008 13:32:50 +0100
Subject: [PATCH 504/890] debug: move WARN_ON() out of line

A quick grep shows that there are currently 1145 instances of WARN_ON
in the kernel. Currently, WARN_ON is pretty much entirely inlined,
which makes it hard to enhance it without growing the size of the kernel
(and getting Andrew unhappy).

This patch build on top of Olof's patch that introduces __WARN,
and places the slowpath out of line. It also uses Ingo's suggestion
to not use __FUNCTION__ but to use kallsyms to do the lookup;
this saves a ton of extra space since gcc doesn't need to store the function
string twice now:

3936367  833603  624736 5394706  525112 vmlinux.before
3917508  833603  624736 5375847  520767 vmlinux-slowpath

15Kb savings...

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Olof Johansson <olof@lixom.net>
Acked-by: Matt Meckall <mpm@selenic.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-generic/bug.h | 10 +++++-----
 kernel/panic.c            | 15 +++++++++++++++
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 1a0e1a7684bd..2632328d8646 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -32,11 +32,11 @@ struct bug_entry {
 #endif
 
 #ifndef __WARN
-#define __WARN() do {							\
-	printk("WARNING: at %s:%d %s()\n", __FILE__,			\
-		__LINE__, __FUNCTION__);				\
-	dump_stack();							\
-} while (0)
+#ifndef __ASSEMBLY__
+extern void warn_on_slowpath(const char *file, const int line);
+#define WANT_WARN_ON_SLOWPATH
+#endif
+#define __WARN() warn_on_slowpath(__FILE__, __LINE__)
 #endif
 
 #ifndef WARN_ON
diff --git a/kernel/panic.c b/kernel/panic.c
index da4d6bac270e..0ebea438278a 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -20,6 +20,7 @@
 #include <linux/kexec.h>
 #include <linux/debug_locks.h>
 #include <linux/random.h>
+#include <linux/kallsyms.h>
 
 int panic_on_oops;
 int tainted;
@@ -292,6 +293,20 @@ void oops_exit(void)
 		(unsigned long long)oops_id);
 }
 
+#ifdef WANT_WARN_ON_SLOWPATH
+void warn_on_slowpath(const char *file, int line)
+{
+	char function[KSYM_SYMBOL_LEN];
+	unsigned long caller = (unsigned long) __builtin_return_address(0);
+
+	sprint_symbol(function, caller);
+	printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file,
+		line, function);
+	dump_stack();
+}
+EXPORT_SYMBOL(warn_on_slowpath);
+#endif
+
 #ifdef CONFIG_CC_STACKPROTECTOR
 /*
  * Called when gcc's -fstack-protector feature is used, and

From 71c339116a216b181fc5e203ef51a033fe5e38cf Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 30 Jan 2008 13:32:50 +0100
Subject: [PATCH 505/890] debug: add the end-of-trace marker and the module
 list to

Unlike oopses, WARN_ON() currently does't print the loaded modules list.
This makes it harder to take action on certain bug reports. For example,
recently there were a set of WARN_ON()s reported in the mac80211 stack,
which were just signalling a driver bug. It takes then anther round trip
to the bug reporter (if he responds at all) to find out which driver
is at fault.

Another issue is that, unlike oopses, WARN_ON() doesn't currently printk
the helpful "cut here" line, nor the "end of trace" marker.
Now that WARN_ON() is out of line, the size increase due to this is
minimal and it's worth adding.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/panic.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/kernel/panic.c b/kernel/panic.c
index 0ebea438278a..d9e90cfe3298 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -281,6 +281,13 @@ static int init_oops_id(void)
 }
 late_initcall(init_oops_id);
 
+static void print_oops_end_marker(void)
+{
+	init_oops_id();
+	printk(KERN_WARNING "---[ end trace %016llx ]---\n",
+		(unsigned long long)oops_id);
+}
+
 /*
  * Called when the architecture exits its oops handler, after printing
  * everything.
@@ -288,9 +295,7 @@ late_initcall(init_oops_id);
 void oops_exit(void)
 {
 	do_oops_enter_exit();
-	init_oops_id();
-	printk(KERN_WARNING "---[ end trace %016llx ]---\n",
-		(unsigned long long)oops_id);
+	print_oops_end_marker();
 }
 
 #ifdef WANT_WARN_ON_SLOWPATH
@@ -298,11 +303,14 @@ void warn_on_slowpath(const char *file, int line)
 {
 	char function[KSYM_SYMBOL_LEN];
 	unsigned long caller = (unsigned long) __builtin_return_address(0);
-
 	sprint_symbol(function, caller);
+
+	printk(KERN_WARNING "------------[ cut here ]------------\n");
 	printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file,
 		line, function);
+	print_modules();
 	dump_stack();
+	print_oops_end_marker();
 }
 EXPORT_SYMBOL(warn_on_slowpath);
 #endif

From 4d022e35fd7e07c522c7863fee6f07e53cf3fc14 Mon Sep 17 00:00:00 2001
From: Miguel Boton <mboton.lkml@gmail.com>
Date: Wed, 30 Jan 2008 13:32:51 +0100
Subject: [PATCH 506/890] x86: reboot_{32|64}.c unification

reboot_{32|64}.c unification patch.

This patch unifies the code from the reboot_32.c and reboot_64.c files.

It has been tested in computers with X86_32 and X86_64 kernels and it
looks like all reboot modes work fine (EFI restart system hasn't been
tested yet).

Probably I made some mistakes (like I usually do) so I hope
we can identify and fix them soon.

Signed-off-by: Miguel Boton <mboton@gmail.com>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/Makefile                  |   4 +-
 arch/x86/kernel/{reboot_32.c => reboot.c} | 298 ++++++++++++----------
 arch/x86/kernel/reboot_64.c               | 191 --------------
 include/asm-x86/emergency-restart.h       |   3 +
 4 files changed, 172 insertions(+), 324 deletions(-)
 rename arch/x86/kernel/{reboot_32.c => reboot.c} (72%)
 delete mode 100644 arch/x86/kernel/reboot_64.c

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0903bbf0ca4d..b40bed4baa77 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -30,8 +30,8 @@ obj-y				+= step.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-y				+= cpu/
 obj-y				+= acpi/
-obj-$(CONFIG_X86_BIOS_REBOOT)	+= reboot_32.o
-obj-$(CONFIG_X86_64)            += reboot_64.o
+obj-$(CONFIG_X86_BIOS_REBOOT)	+= reboot.o
+obj-$(CONFIG_X86_64)		+= reboot.o
 obj-$(CONFIG_MCA)		+= mca_32.o
 obj-$(CONFIG_X86_MSR)		+= msr.o
 obj-$(CONFIG_X86_CPUID)		+= cpuid.o
diff --git a/arch/x86/kernel/reboot_32.c b/arch/x86/kernel/reboot.c
similarity index 72%
rename from arch/x86/kernel/reboot_32.c
rename to arch/x86/kernel/reboot.c
index c3376fae639d..5b32f0b4d133 100644
--- a/arch/x86/kernel/reboot_32.c
+++ b/arch/x86/kernel/reboot.c
@@ -1,64 +1,94 @@
-#include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/delay.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/efi.h>
-#include <linux/dmi.h>
-#include <linux/ctype.h>
-#include <linux/pm.h>
 #include <linux/reboot.h>
-#include <asm/uaccess.h>
+#include <linux/init.h>
+#include <linux/pm.h>
+#include <linux/efi.h>
+#include <acpi/reboot.h>
+#include <asm/io.h>
 #include <asm/apic.h>
-#include <asm/hpet.h>
 #include <asm/desc.h>
-#include "mach_reboot.h"
+#include <asm/hpet.h>
 #include <asm/reboot_fixups.h>
 #include <asm/reboot.h>
 
+#ifdef CONFIG_X86_32
+# include <linux/dmi.h>
+# include <linux/ctype.h>
+# include <linux/mc146818rtc.h>
+# include <asm/pgtable.h>
+#else
+# include <asm/iommu.h>
+#endif
+
 /*
  * Power off function, if any
  */
 void (*pm_power_off)(void);
 EXPORT_SYMBOL(pm_power_off);
 
+static long no_idt[3];
 static int reboot_mode;
-static int reboot_thru_bios;
+enum reboot_type reboot_type = BOOT_KBD;
+int reboot_force;
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
 static int reboot_cpu = -1;
 #endif
+
+/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old]
+   warm   Don't set the cold reboot flag
+   cold   Set the cold reboot flag
+   bios   Reboot by jumping through the BIOS (only for X86_32)
+   smp    Reboot by executing reset on BSP or other CPU (only for X86_32)
+   triple Force a triple fault (init)
+   kbd    Use the keyboard controller. cold reset (default)
+   acpi   Use the RESET_REG in the FADT
+   efi    Use efi reset_system runtime service
+   force  Avoid anything that could hang.
+ */
 static int __init reboot_setup(char *str)
 {
-	while(1) {
+	for (;;) {
 		switch (*str) {
-		case 'w': /* "warm" reboot (no memory testing etc) */
+		case 'w':
 			reboot_mode = 0x1234;
 			break;
-		case 'c': /* "cold" reboot (with memory testing etc) */
-			reboot_mode = 0x0;
-			break;
-		case 'b': /* "bios" reboot by jumping through the BIOS */
-			reboot_thru_bios = 1;
-			break;
-		case 'h': /* "hard" reboot by toggling RESET and/or crashing the CPU */
-			reboot_thru_bios = 0;
+
+		case 'c':
+			reboot_mode = 0;
 			break;
+
+#ifdef CONFIG_X86_32
 #ifdef CONFIG_SMP
-		case 's': /* "smp" reboot by executing reset on BSP or other CPU*/
+		case 's':
 			if (isdigit(*(str+1))) {
 				reboot_cpu = (int) (*(str+1) - '0');
 				if (isdigit(*(str+2)))
 					reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0');
 			}
-				/* we will leave sorting out the final value 
-				when we are ready to reboot, since we might not
- 				have set up boot_cpu_id or smp_num_cpu */
+				/* we will leave sorting out the final value
+				   when we are ready to reboot, since we might not
+				   have set up boot_cpu_id or smp_num_cpu */
 			break;
+#endif /* CONFIG_SMP */
+
+		case 'b':
 #endif
+		case 'a':
+		case 'k':
+		case 't':
+		case 'e':
+			reboot_type = *str;
+			break;
+
+		case 'f':
+			reboot_force = 1;
+			break;
 		}
-		if((str = strchr(str,',')) != NULL)
+
+		str = strchr(str, ',');
+		if (str)
 			str++;
 		else
 			break;
@@ -68,18 +98,21 @@ static int __init reboot_setup(char *str)
 
 __setup("reboot=", reboot_setup);
 
+
+#ifdef CONFIG_X86_32
 /*
  * Reboot options and system auto-detection code provided by
  * Dell Inc. so their systems "just work". :-)
  */
 
 /*
- * Some machines require the "reboot=b"  commandline option, this quirk makes that automatic.
+ * Some machines require the "reboot=b"  commandline option,
+ * this quirk makes that automatic.
  */
 static int __init set_bios_reboot(const struct dmi_system_id *d)
 {
-	if (!reboot_thru_bios) {
-		reboot_thru_bios = 1;
+	if (reboot_type != BOOT_BIOS) {
+		reboot_type = BOOT_BIOS;
 		printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident);
 	}
 	return 0;
@@ -143,7 +176,6 @@ static int __init reboot_init(void)
 	dmi_check_system(reboot_dmi_table);
 	return 0;
 }
-
 core_initcall(reboot_init);
 
 /* The following code and data reboots the machine by switching to real
@@ -152,7 +184,6 @@ core_initcall(reboot_init);
    controller to pulse the CPU reset line, which is more thorough, but
    doesn't work with at least one type of 486 motherboard.  It is easy
    to stop this code working; hence the copious comments. */
-
 static unsigned long long
 real_mode_gdt_entries [3] =
 {
@@ -163,9 +194,7 @@ real_mode_gdt_entries [3] =
 
 static struct desc_ptr
 real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries },
-real_mode_idt = { 0x3ff, 0 },
-no_idt = { 0, 0 };
-
+real_mode_idt = { 0x3ff, 0 };
 
 /* This is 16-bit protected mode code to disable paging and the cache,
    switch to real mode and jump to the BIOS reset code.
@@ -185,7 +214,6 @@ no_idt = { 0, 0 };
 
    More could be done here to set up the registers as if a CPU reset had
    occurred; hopefully real BIOSs don't assume much. */
-
 static unsigned char real_mode_switch [] =
 {
 	0x66, 0x0f, 0x20, 0xc0,			/*    movl  %cr0,%eax        */
@@ -223,7 +251,6 @@ void machine_real_restart(unsigned char *code, int length)
 	   `outb_p' is needed instead of just `outb'.  Use it to be on the
 	   safe side.  (Yes, CMOS_WRITE does outb_p's. -  Paul G.)
 	 */
-
 	spin_lock(&rtc_lock);
 	CMOS_WRITE(0x00, 0x8f);
 	spin_unlock(&rtc_lock);
@@ -231,9 +258,8 @@ void machine_real_restart(unsigned char *code, int length)
 	/* Remap the kernel at virtual address zero, as well as offset zero
 	   from the kernel segment.  This assumes the kernel segment starts at
 	   virtual address PAGE_OFFSET. */
-
-	memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
-		sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
+	memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
+		sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
 
 	/*
 	 * Use `swapper_pg_dir' as our page directory.
@@ -245,7 +271,6 @@ void machine_real_restart(unsigned char *code, int length)
 	   boot)".  This seems like a fairly standard thing that gets set by
 	   REBOOT.COM programs, and the previous reset routine did this
 	   too. */
-
 	*((unsigned short *)0x472) = reboot_mode;
 
 	/* For the switch to real mode, copy some code to low memory.  It has
@@ -253,19 +278,16 @@ void machine_real_restart(unsigned char *code, int length)
 	   has to have the same physical and virtual address, because it turns
 	   off paging.  Copy it near the end of the first page, out of the way
 	   of BIOS variables. */
-
-	memcpy ((void *) (0x1000 - sizeof (real_mode_switch) - 100),
+	memcpy((void *)(0x1000 - sizeof(real_mode_switch) - 100),
 		real_mode_switch, sizeof (real_mode_switch));
-	memcpy ((void *) (0x1000 - 100), code, length);
+	memcpy((void *)(0x1000 - 100), code, length);
 
 	/* Set up the IDT for real mode. */
-
 	load_idt(&real_mode_idt);
 
 	/* Set up a GDT from which we can load segment descriptors for real
 	   mode.  The GDT is not used in real mode; it is just needed here to
 	   prepare the descriptors. */
-
 	load_gdt(&real_mode_gdt);
 
 	/* Load the data segment registers, and thus the descriptors ready for
@@ -273,7 +295,6 @@ void machine_real_restart(unsigned char *code, int length)
 	   selector value being loaded here.  This is so that the segment
 	   registers don't have to be reloaded after switching to real mode:
 	   the values are consistent for real mode operation already. */
-
 	__asm__ __volatile__ ("movl $0x0010,%%eax\n"
 				"\tmovl %%eax,%%ds\n"
 				"\tmovl %%eax,%%es\n"
@@ -284,130 +305,145 @@ void machine_real_restart(unsigned char *code, int length)
 	/* Jump to the 16-bit code that we copied earlier.  It disables paging
 	   and the cache, switches to real mode, and jumps to the BIOS reset
 	   entry point. */
-
 	__asm__ __volatile__ ("ljmp $0x0008,%0"
 				:
-				: "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100)));
+				: "i" ((void *)(0x1000 - sizeof (real_mode_switch) - 100)));
 }
 #ifdef CONFIG_APM_MODULE
 EXPORT_SYMBOL(machine_real_restart);
 #endif
 
-static void native_machine_shutdown(void)
+#endif /* CONFIG_X86_32 */
+
+static inline void kb_wait(void)
 {
+	int i;
+
+	for (i = 0; i < 0x10000; i++)
+		if ((inb_p(0x64) & 0x02) == 0)
+			break;
+}
+
+void machine_emergency_restart(void)
+{
+	int i;
+
+	/* Tell the BIOS if we want cold or warm reboot */
+	*((unsigned short *)__va(0x472)) = reboot_mode;
+
+	for (;;) {
+		/* Could also try the reset bit in the Hammer NB */
+		switch (reboot_type) {
+		case BOOT_KBD:
+			for (i = 0; i < 10; i++) {
+				kb_wait();
+				udelay(50);
+				outb(0xfe, 0x64); /* pulse reset low */
+				udelay(50);
+			}
+
+		case BOOT_TRIPLE:
+			load_idt((const struct desc_ptr *)&no_idt);
+			__asm__ __volatile__("int3");
+
+			reboot_type = BOOT_KBD;
+			break;
+
+#ifdef CONFIG_X86_32
+		case BOOT_BIOS:
+			machine_real_restart(jump_to_bios, sizeof(jump_to_bios));
+
+			reboot_type = BOOT_KBD;
+			break;
+#endif
+
+		case BOOT_ACPI:
+			acpi_reboot();
+			reboot_type = BOOT_KBD;
+			break;
+
+
+		case BOOT_EFI:
+			if (efi_enabled)
+				efi.reset_system(reboot_mode ? EFI_RESET_WARM : EFI_RESET_COLD,
+						 EFI_SUCCESS, 0, NULL);
+
+			reboot_type = BOOT_KBD;
+			break;
+		}
+	}
+}
+
+void machine_shutdown(void)
+{
+	/* Stop the cpus and apics */
 #ifdef CONFIG_SMP
 	int reboot_cpu_id;
 
 	/* The boot cpu is always logical cpu 0 */
 	reboot_cpu_id = 0;
 
+#ifdef CONFIG_X86_32
 	/* See if there has been given a command line override */
 	if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) &&
-		cpu_isset(reboot_cpu, cpu_online_map)) {
+		cpu_isset(reboot_cpu, cpu_online_map))
 		reboot_cpu_id = reboot_cpu;
-	}
+#endif
 
-	/* Make certain the cpu I'm rebooting on is online */
-	if (!cpu_isset(reboot_cpu_id, cpu_online_map)) {
+	/* Make certain the cpu I'm about to reboot on is online */
+	if (!cpu_isset(reboot_cpu_id, cpu_online_map))
 		reboot_cpu_id = smp_processor_id();
-	}
 
 	/* Make certain I only run on the appropriate processor */
 	set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id));
 
-	/* O.K. Now that I'm on the appropriate processor, stop
-	 * all of the others, and disable their local APICs.
+	/* O.K Now that I'm on the appropriate processor,
+	 * stop all of the others.
 	 */
-
 	smp_send_stop();
-#endif /* CONFIG_SMP */
+#endif
 
 	lapic_shutdown();
 
 #ifdef CONFIG_X86_IO_APIC
 	disable_IO_APIC();
 #endif
+
 #ifdef CONFIG_HPET_TIMER
 	hpet_disable();
 #endif
+
+#ifdef CONFIG_X86_64
+	pci_iommu_shutdown();
+#endif
 }
 
-void __attribute__((weak)) mach_reboot_fixups(void)
+void machine_restart(char *__unused)
 {
-}
+	printk("machine restart\n");
 
-static void native_machine_emergency_restart(void)
-{
-	if (!reboot_thru_bios) {
-		if (efi_enabled) {
-			efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, NULL);
-			load_idt(&no_idt);
-			__asm__ __volatile__("int3");
-		}
-		/* rebooting needs to touch the page at absolute addr 0 */
-		*((unsigned short *)__va(0x472)) = reboot_mode;
-		for (;;) {
-			mach_reboot_fixups(); /* for board specific fixups */
-			mach_reboot();
-			/* That didn't work - force a triple fault.. */
-			load_idt(&no_idt);
-			__asm__ __volatile__("int3");
-		}
-	}
-	if (efi_enabled)
-		efi.reset_system(EFI_RESET_WARM, EFI_SUCCESS, 0, NULL);
-
-	machine_real_restart(jump_to_bios, sizeof(jump_to_bios));
-}
-
-static void native_machine_restart(char * __unused)
-{
-	machine_shutdown();
-	machine_emergency_restart();
-}
-
-static void native_machine_halt(void)
-{
-}
-
-static void native_machine_power_off(void)
-{
-	if (pm_power_off) {
+	if (!reboot_force)
 		machine_shutdown();
-		pm_power_off();
-	}
-}
-
-
-struct machine_ops machine_ops = {
-	.power_off = native_machine_power_off,
-	.shutdown = native_machine_shutdown,
-	.emergency_restart = native_machine_emergency_restart,
-	.restart = native_machine_restart,
-	.halt = native_machine_halt,
-};
-
-void machine_power_off(void)
-{
-	machine_ops.power_off();
-}
-
-void machine_shutdown(void)
-{
-	machine_ops.shutdown();
-}
-
-void machine_emergency_restart(void)
-{
-	machine_ops.emergency_restart();
-}
-
-void machine_restart(char *cmd)
-{
-	machine_ops.restart(cmd);
+	machine_emergency_restart();
 }
 
 void machine_halt(void)
 {
-	machine_ops.halt();
 }
+
+void machine_power_off(void)
+{
+	if (pm_power_off) {
+		if (!reboot_force)
+			machine_shutdown();
+		pm_power_off();
+	}
+}
+
+struct machine_ops machine_ops = {
+	.power_off = machine_power_off,
+	.shutdown = machine_shutdown,
+	.emergency_restart = machine_emergency_restart,
+	.restart = machine_restart,
+	.halt = machine_halt
+};
diff --git a/arch/x86/kernel/reboot_64.c b/arch/x86/kernel/reboot_64.c
deleted file mode 100644
index d6bdf93ffca9..000000000000
--- a/arch/x86/kernel/reboot_64.c
+++ /dev/null
@@ -1,191 +0,0 @@
-/* Various gunk just to reboot the machine. */ 
-#include <linux/module.h>
-#include <linux/reboot.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/string.h>
-#include <linux/pm.h>
-#include <linux/kdebug.h>
-#include <linux/sched.h>
-#include <linux/efi.h>
-#include <acpi/reboot.h>
-#include <asm/io.h>
-#include <asm/delay.h>
-#include <asm/desc.h>
-#include <asm/hw_irq.h>
-#include <asm/system.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-#include <asm/apic.h>
-#include <asm/hpet.h>
-#include <asm/gart.h>
-
-/*
- * Power off function, if any
- */
-void (*pm_power_off)(void);
-EXPORT_SYMBOL(pm_power_off);
-
-static long no_idt[3];
-enum reboot_type reboot_type = BOOT_KBD;
-static int reboot_mode = 0;
-int reboot_force;
-
-/* reboot=t[riple] | k[bd] | e[fi] [, [w]arm | [c]old]
-   warm   Don't set the cold reboot flag
-   cold   Set the cold reboot flag
-   triple Force a triple fault (init)
-   kbd    Use the keyboard controller. cold reset (default)
-   acpi   Use the RESET_REG in the FADT
-   efi    Use efi reset_system runtime service
-   force  Avoid anything that could hang.
- */ 
-static int __init reboot_setup(char *str)
-{
-	for (;;) {
-		switch (*str) {
-		case 'w': 
-			reboot_mode = 0x1234;
-			break;
-
-		case 'c':
-			reboot_mode = 0;
-			break;
-
-		case 't':
-		case 'a':
-		case 'b':
-		case 'k':
-		case 'e':
-			reboot_type = *str;
-			break;
-		case 'f':
-			reboot_force = 1;
-			break;
-		}
-		if((str = strchr(str,',')) != NULL)
-			str++;
-		else
-			break;
-	}
-	return 1;
-}
-
-__setup("reboot=", reboot_setup);
-
-static inline void kb_wait(void)
-{
-	int i;
-
-	for (i=0; i<0x10000; i++)
-		if ((inb_p(0x64) & 0x02) == 0)
-			break;
-}
-
-void machine_shutdown(void)
-{
-	unsigned long flags;
-
-	/* Stop the cpus and apics */
-#ifdef CONFIG_SMP
-	int reboot_cpu_id;
-
-	/* The boot cpu is always logical cpu 0 */
-	reboot_cpu_id = 0;
-
-	/* Make certain the cpu I'm about to reboot on is online */
-	if (!cpu_isset(reboot_cpu_id, cpu_online_map)) {
-		reboot_cpu_id = smp_processor_id();
-	}
-
-	/* Make certain I only run on the appropriate processor */
-	set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id));
-
-	/* O.K Now that I'm on the appropriate processor,
-	 * stop all of the others.
-	 */
-	smp_send_stop();
-#endif
-
-	local_irq_save(flags);
-
-#ifndef CONFIG_SMP
-	disable_local_APIC();
-#endif
-
-	disable_IO_APIC();
-
-#ifdef CONFIG_HPET_TIMER
-	hpet_disable();
-#endif
-	local_irq_restore(flags);
-
-	pci_iommu_shutdown();
-}
-
-void machine_emergency_restart(void)
-{
-	int i;
-
-	/* Tell the BIOS if we want cold or warm reboot */
-	*((unsigned short *)__va(0x472)) = reboot_mode;
-       
-	for (;;) {
-		/* Could also try the reset bit in the Hammer NB */
-		switch (reboot_type) { 
-		case BOOT_KBD:
-		for (i=0; i<10; i++) {
-			kb_wait();
-			udelay(50);
-			outb(0xfe,0x64);         /* pulse reset low */
-			udelay(50);
-		}
-
-		case BOOT_TRIPLE: 
-			load_idt((const struct desc_ptr *)&no_idt);
-			__asm__ __volatile__("int3");
-
-			reboot_type = BOOT_KBD;
-			break;
-
-		case BOOT_ACPI:
-			acpi_reboot();
-			reboot_type = BOOT_KBD;
-			break;
-
-		case BOOT_EFI:
-			if (efi_enabled)
-				efi.reset_system(reboot_mode ? EFI_RESET_WARM : EFI_RESET_COLD,
-						 EFI_SUCCESS, 0, NULL);
-			reboot_type = BOOT_KBD;
-			break;
-		}
-	}      
-}
-
-void machine_restart(char * __unused)
-{
-	printk("machine restart\n");
-
-	if (!reboot_force) {
-		machine_shutdown();
-	}
-	machine_emergency_restart();
-}
-
-void machine_halt(void)
-{
-}
-
-void machine_power_off(void)
-{
-	if (pm_power_off) {
-		if (!reboot_force) {
-			machine_shutdown();
-		}
-		pm_power_off();
-	}
-}
-
diff --git a/include/asm-x86/emergency-restart.h b/include/asm-x86/emergency-restart.h
index 54189084462a..8e6aef19f8f0 100644
--- a/include/asm-x86/emergency-restart.h
+++ b/include/asm-x86/emergency-restart.h
@@ -4,6 +4,9 @@
 enum reboot_type {
 	BOOT_TRIPLE = 't',
 	BOOT_KBD = 'k',
+#ifdef CONFIG_X86_32
+	BOOT_BIOS = 'b',
+#endif
 	BOOT_ACPI = 'a',
 	BOOT_EFI = 'e'
 };

From 6d7d7433750c7c6eec93d7b3206019e329228686 Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Wed, 30 Jan 2008 13:32:51 +0100
Subject: [PATCH 507/890] x86 boot : export boot_params via debugfs for
 debugging

This patch export the boot parameters via debugfs for debugging.

The files added are as follow:

boot_params/data    :  binary file for struct boot_params
boot_params/version :  boot protocol version

This patch is based on 2.6.24-rc5-mm1 and has been tested on i386 and
x86_64 platform.

This patch is based on the Peter Anvin's proposal.

Signed-off-by: Huang Ying <ying.huang@intel.com>

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig.debug     |  7 ++++
 arch/x86/kernel/Makefile   |  2 +-
 arch/x86/kernel/kdebugfs.c | 65 ++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/setup64.c  |  4 +++
 arch/x86/kernel/setup_32.c |  4 +++
 5 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/kdebugfs.c

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 660200915baa..15854b53badc 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -185,4 +185,11 @@ config DEFAULT_IO_DELAY_TYPE
 	default IO_DELAY_TYPE_NONE
 endif
 
+config DEBUG_BOOT_PARAMS
+	bool "Debug boot parameters"
+	depends on DEBUG_KERNEL
+	depends on DEBUG_FS
+	help
+	  This option will cause struct boot_params to be exported via debugfs.
+
 endmenu
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index b40bed4baa77..3d23ccd366ea 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -16,7 +16,7 @@ obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)	+= syscall_64.o vsyscall_64.o setup64.o
 obj-y			+= pci-dma_$(BITS).o  bootflag.o e820_$(BITS).o
-obj-y			+= quirks.o i8237.o topology.o
+obj-y			+= quirks.o i8237.o topology.o kdebugfs.o
 obj-y			+= alternative.o i8253.o
 obj-$(CONFIG_X86_64)	+= pci-nommu_64.o bugs_64.o
 obj-y			+= tsc_$(BITS).o io_delay.o rtc.o
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
new file mode 100644
index 000000000000..73354302fda7
--- /dev/null
+++ b/arch/x86/kernel/kdebugfs.c
@@ -0,0 +1,65 @@
+/*
+ * Architecture specific debugfs files
+ *
+ * Copyright (C) 2007, Intel Corp.
+ *	Huang Ying <ying.huang@intel.com>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+
+#include <asm/setup.h>
+
+#ifdef CONFIG_DEBUG_BOOT_PARAMS
+static struct debugfs_blob_wrapper boot_params_blob = {
+	.data = &boot_params,
+	.size = sizeof(boot_params),
+};
+
+static int __init boot_params_kdebugfs_init(void)
+{
+	int error;
+	struct dentry *dbp, *version, *data;
+
+	dbp = debugfs_create_dir("boot_params", NULL);
+	if (!dbp) {
+		error = -ENOMEM;
+		goto err_return;
+	}
+	version = debugfs_create_x16("version", S_IRUGO, dbp,
+				     &boot_params.hdr.version);
+	if (!version) {
+		error = -ENOMEM;
+		goto err_dir;
+	}
+	data = debugfs_create_blob("data", S_IRUGO, dbp,
+				   &boot_params_blob);
+	if (!data) {
+		error = -ENOMEM;
+		goto err_version;
+	}
+	return 0;
+err_version:
+	debugfs_remove(version);
+err_dir:
+	debugfs_remove(dbp);
+err_return:
+	return error;
+}
+#endif
+
+static int __init arch_kdebugfs_init(void)
+{
+	int error = 0;
+
+#ifdef CONFIG_DEBUG_BOOT_PARAMS
+	error = boot_params_kdebugfs_init();
+#endif
+
+	return error;
+}
+
+arch_initcall(arch_kdebugfs_init);
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index 3b0ffa31f3c0..8fa0de810d0b 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -24,7 +24,11 @@
 #include <asm/sections.h>
 #include <asm/setup.h>
 
+#ifndef CONFIG_DEBUG_BOOT_PARAMS
 struct boot_params __initdata boot_params;
+#else
+struct boot_params boot_params;
+#endif
 
 cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
 
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 704550fdb84c..3bce4af60bb6 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -194,7 +194,11 @@ unsigned long saved_videomode;
 
 static char __initdata command_line[COMMAND_LINE_SIZE];
 
+#ifndef CONFIG_DEBUG_BOOT_PARAMS
 struct boot_params __initdata boot_params;
+#else
+struct boot_params boot_params;
+#endif
 
 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
 struct edd edd;

From cf8fa920cb4271f17e0265c863d64bea1b31941a Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:32:51 +0100
Subject: [PATCH 508/890] i386: handle an initrd in highmem (version 2)

The boot protocol has until now required that the initrd be located in
lowmem, which makes the lowmem/highmem boundary visible to the boot
loader.  This was exported to the bootloader via a compile-time
field.  Unfortunately, the vmalloc= command-line option breaks this
part of the protocol; instead of adding yet another hack that affects
the bootloader, have the kernel relocate the initrd down below the
lowmem boundary inside the kernel itself.

Note that this does not rely on HIGHMEM being enabled in the kernel.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/boot/header.S     |   5 +-
 arch/x86/kernel/setup_32.c | 130 ++++++++++++++++++++++++++++++-------
 arch/x86/mm/discontig_32.c |   4 +-
 3 files changed, 112 insertions(+), 27 deletions(-)

diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 4cc5b0411db5..64ad9016585a 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -195,10 +195,13 @@ cmd_line_ptr:	.long	0		# (Header version 0x0202 or later)
 					# can be located anywhere in
 					# low memory 0x10000 or higher.
 
-ramdisk_max:	.long (-__PAGE_OFFSET-(512 << 20)-1) & 0x7fffffff
+ramdisk_max:	.long 0x7fffffff
 					# (Header version 0x0203 or later)
 					# The highest safe address for
 					# the contents of an initrd
+					# The current kernel allows up to 4 GB,
+					# but leave it at 2 GB to avoid
+					# possible bootloader bugs.
 
 kernel_alignment:  .long CONFIG_PHYSICAL_ALIGN	#physical addr alignment
 						#required for protected mode
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 3bce4af60bb6..6802a383077d 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -188,9 +188,9 @@ extern int root_mountflags;
 
 unsigned long saved_videomode;
 
-#define RAMDISK_IMAGE_START_MASK  	0x07FF
+#define RAMDISK_IMAGE_START_MASK	0x07FF
 #define RAMDISK_PROMPT_FLAG		0x8000
-#define RAMDISK_LOAD_FLAG		0x4000	
+#define RAMDISK_LOAD_FLAG		0x4000
 
 static char __initdata command_line[COMMAND_LINE_SIZE];
 
@@ -252,7 +252,7 @@ static int __init parse_mem(char *arg)
 		 * trim the existing memory map.
 		 */
 		unsigned long long mem_size;
- 
+
 		mem_size = memparse(arg, &arg);
 		limit_regions(mem_size);
 		user_defined_memmap = 1;
@@ -391,7 +391,7 @@ static void __init reserve_ebda_region(void)
 	unsigned int addr;
 	addr = get_bios_ebda();
 	if (addr)
-		reserve_bootmem(addr, PAGE_SIZE);	
+		reserve_bootmem(addr, PAGE_SIZE);
 }
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -496,6 +496,100 @@ static inline void __init reserve_crashkernel(void)
 {}
 #endif
 
+#ifdef CONFIG_BLK_DEV_INITRD
+
+static bool do_relocate_initrd = false;
+
+static void __init reserve_initrd(void)
+{
+	unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
+	unsigned long ramdisk_size  = boot_params.hdr.ramdisk_size;
+	unsigned long ramdisk_end   = ramdisk_image + ramdisk_size;
+	unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+	unsigned long ramdisk_here;
+
+	initrd_start = 0;
+
+	if (!boot_params.hdr.type_of_loader ||
+	    !ramdisk_image || !ramdisk_size)
+		return;		/* No initrd provided by bootloader */
+
+	if (ramdisk_end < ramdisk_image) {
+		printk(KERN_ERR "initrd wraps around end of memory, "
+		       "disabling initrd\n");
+		return;
+	}
+	if (ramdisk_size >= end_of_lowmem/2) {
+		printk(KERN_ERR "initrd too large to handle, "
+		       "disabling initrd\n");
+		return;
+	}
+	if (ramdisk_end <= end_of_lowmem) {
+		/* All in lowmem, easy case */
+		reserve_bootmem(ramdisk_image, ramdisk_size);
+		initrd_start = ramdisk_image + PAGE_OFFSET;
+		initrd_end = initrd_start+ramdisk_size;
+		return;
+	}
+
+	/* We need to move the initrd down into lowmem */
+	ramdisk_here = (end_of_lowmem - ramdisk_size) & PAGE_MASK;
+
+	/* Note: this includes all the lowmem currently occupied by
+	   the initrd, we rely on that fact to keep the data intact. */
+	reserve_bootmem(ramdisk_here, ramdisk_size);
+	initrd_start = ramdisk_here + PAGE_OFFSET;
+	initrd_end   = initrd_start + ramdisk_size;
+
+	do_relocate_initrd = true;
+}
+
+#define MAX_MAP_CHUNK	(NR_FIX_BTMAPS << PAGE_SHIFT)
+
+static void __init relocate_initrd(void)
+{
+	unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
+	unsigned long ramdisk_size  = boot_params.hdr.ramdisk_size;
+	unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+	unsigned long ramdisk_here;
+	unsigned long slop, clen, mapaddr;
+	char *p, *q;
+
+	if (!do_relocate_initrd)
+		return;
+
+	ramdisk_here = initrd_start - PAGE_OFFSET;
+
+	q = (char *)initrd_start;
+
+	/* Copy any lowmem portion of the initrd */
+	if (ramdisk_image < end_of_lowmem) {
+		clen = end_of_lowmem - ramdisk_image;
+		p = (char *)__va(ramdisk_image);
+		memcpy(q, p, clen);
+		q += clen;
+		ramdisk_image += clen;
+		ramdisk_size  -= clen;
+	}
+
+	/* Copy the highmem portion of the initrd */
+	while (ramdisk_size) {
+		slop = ramdisk_image & ~PAGE_MASK;
+		clen = ramdisk_size;
+		if (clen > MAX_MAP_CHUNK-slop)
+			clen = MAX_MAP_CHUNK-slop;
+		mapaddr = ramdisk_image & PAGE_MASK;
+		p = bt_ioremap(mapaddr, clen+slop);
+		memcpy(q, p+slop, clen);
+		bt_iounmap(p, clen+slop);
+		q += clen;
+		ramdisk_image += clen;
+		ramdisk_size  -= clen;
+	}
+}
+
+#endif /* CONFIG_BLK_DEV_INITRD */
+
 void __init setup_bootmem_allocator(void)
 {
 	unsigned long bootmap_size;
@@ -551,26 +645,10 @@ void __init setup_bootmem_allocator(void)
 	 */
 	find_smp_config();
 #endif
-	numa_kva_reserve();
 #ifdef CONFIG_BLK_DEV_INITRD
-	if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
-		unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
-		unsigned long ramdisk_size  = boot_params.hdr.ramdisk_size;
-		unsigned long ramdisk_end   = ramdisk_image + ramdisk_size;
-		unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
-
-		if (ramdisk_end <= end_of_lowmem) {
-			reserve_bootmem(ramdisk_image, ramdisk_size);
-			initrd_start = ramdisk_image + PAGE_OFFSET;
-			initrd_end = initrd_start+ramdisk_size;
-		} else {
-			printk(KERN_ERR "initrd extends beyond end of memory "
-			       "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
-			       ramdisk_end, end_of_lowmem);
-			initrd_start = 0;
-		}
-	}
+	reserve_initrd();
 #endif
+	numa_kva_reserve();
 	reserve_crashkernel();
 }
 
@@ -713,15 +791,19 @@ void __init setup_arch(char **cmdline_p)
 	 * NOTE: at this point the bootmem allocator is fully available.
 	 */
 
+#ifdef CONFIG_BLK_DEV_INITRD
+	relocate_initrd();
+#endif
+
 	paravirt_post_allocator_init();
 
 	dmi_scan_machine();
 
-	io_delay_init();;
+	io_delay_init();
 
 #ifdef CONFIG_X86_GENERICARCH
 	generic_apic_probe();
-#endif	
+#endif
 	if (efi_enabled)
 		efi_map_memmap();
 
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 13a474d3c6e9..88a7499e8e48 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -288,8 +288,8 @@ unsigned long __init setup_memory(void)
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	/* Numa kva area is below the initrd */
-	if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image)
-		kva_start_pfn = PFN_DOWN(boot_params.hdr.ramdisk_image)
+	if (initrd_start)
+		kva_start_pfn = PFN_DOWN(initrd_start - PAGE_OFFSET)
 			- kva_pages;
 #endif
 	kva_start_pfn -= kva_start_pfn & (PTRS_PER_PTE-1);

From b32ef636a59aad12f9f9b5dc34c93222842c58ba Mon Sep 17 00:00:00 2001
From: "travis@sgi.com" <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:32:51 +0100
Subject: [PATCH 509/890] percpu: use a kconfig variable to signal arch
 specific percpu setup

The use of the __GENERIC_PERCPU is a bit problematic since arches
may want to run their own percpu setup while using the generic
percpu definitions. Replace it through a kconfig variable.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/Kconfig            | 3 +++
 arch/powerpc/Kconfig         | 3 +++
 arch/sparc64/Kconfig         | 3 +++
 arch/x86/Kconfig             | 3 +++
 include/asm-generic/percpu.h | 1 -
 include/asm-s390/percpu.h    | 2 --
 include/asm-x86/percpu_32.h  | 2 --
 init/main.c                  | 4 ++--
 8 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 4a81b7fb191a..5a41e75ae1fe 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -80,6 +80,9 @@ config GENERIC_TIME_VSYSCALL
 	bool
 	default y
 
+config ARCH_SETS_UP_PER_CPU_AREA
+	def_bool y
+
 config DMI
 	bool
 	default y
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c17a194beb0e..fb85f6b72fcf 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -42,6 +42,9 @@ config GENERIC_HARDIRQS
 	bool
 	default y
 
+config ARCH_SETS_UP_PER_CPU_AREA
+	def_bool PPC64
+
 config IRQ_PER_CPU
 	bool
 	default y
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 1e25bce0366d..26f5791baa33 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -66,6 +66,9 @@ config AUDIT_ARCH
 	bool
 	default y
 
+config ARCH_SETS_UP_PER_CPU_AREA
+	def_bool y
+
 config ARCH_NO_VIRT_TO_BUS
 	def_bool y
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1de28e850aa4..dc80a9f1f303 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -97,6 +97,9 @@ config GENERIC_TIME_VSYSCALL
 	bool
 	default X86_64
 
+config ARCH_SETS_UP_PER_CPU_AREA
+	def_bool X86_64
+
 config ARCH_SUPPORTS_OPROFILE
 	bool
 	default y
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index d85172e9ed45..b5e53b9ab1f7 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -3,7 +3,6 @@
 #include <linux/compiler.h>
 #include <linux/threads.h>
 
-#define __GENERIC_PER_CPU
 #ifdef CONFIG_SMP
 
 extern unsigned long __per_cpu_offset[NR_CPUS];
diff --git a/include/asm-s390/percpu.h b/include/asm-s390/percpu.h
index 545857e64443..f94d0d3cdb2f 100644
--- a/include/asm-s390/percpu.h
+++ b/include/asm-s390/percpu.h
@@ -4,8 +4,6 @@
 #include <linux/compiler.h>
 #include <asm/lowcore.h>
 
-#define __GENERIC_PER_CPU
-
 /*
  * s390 uses its own implementation for per cpu data, the offset of
  * the cpu local data area is cached in the cpu's lowcore memory.
diff --git a/include/asm-x86/percpu_32.h b/include/asm-x86/percpu_32.h
index a7ebd436f3cc..3949586bf94e 100644
--- a/include/asm-x86/percpu_32.h
+++ b/include/asm-x86/percpu_32.h
@@ -41,8 +41,6 @@
  *    PER_CPU(cpu_gdt_descr, %ebx)
  */
 #ifdef CONFIG_SMP
-/* Same as generic implementation except for optimized local access. */
-#define __GENERIC_PER_CPU
 
 /* This is used for other cpus to find our section. */
 extern unsigned long __per_cpu_offset[];
diff --git a/init/main.c b/init/main.c
index f287ca5862b9..3f8aba291ed3 100644
--- a/init/main.c
+++ b/init/main.c
@@ -363,7 +363,7 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { }
 
 #else
 
-#ifdef __GENERIC_PER_CPU
+#ifndef CONFIG_ARCH_SETS_UP_PER_CPU_AREA
 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
 
 EXPORT_SYMBOL(__per_cpu_offset);
@@ -384,7 +384,7 @@ static void __init setup_per_cpu_areas(void)
 		ptr += size;
 	}
 }
-#endif /* !__GENERIC_PER_CPU */
+#endif /* CONFIG_ARCH_SETS_UP_CPU_AREA */
 
 /* Called by boot processor to activate the rest. */
 static void __init smp_init(void)

From 5280e004fc22314122c84978c0b6a741cf96dc0f Mon Sep 17 00:00:00 2001
From: "travis@sgi.com" <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:32:52 +0100
Subject: [PATCH 510/890] percpu: move arch XX_PER_CPU_XX definitions into
 linux/percpu.h

- Special consideration for IA64: Add the ability to specify
  arch specific per cpu flags

- remove .data.percpu attribute from DEFINE_PER_CPU for non-smp case.

The arch definitions are all the same. So move them into linux/percpu.h.

We cannot move DECLARE_PER_CPU since some include files just include
asm/percpu.h to avoid include recursion problems.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/percpu.h | 18 ------------------
 include/asm-ia64/percpu.h    | 24 ++----------------------
 include/asm-powerpc/percpu.h | 17 -----------------
 include/asm-s390/percpu.h    | 18 ------------------
 include/asm-sparc64/percpu.h | 16 ----------------
 include/asm-x86/percpu_32.h  | 12 ------------
 include/asm-x86/percpu_64.h  | 17 -----------------
 include/linux/percpu.h       | 24 ++++++++++++++++++++++++
 8 files changed, 26 insertions(+), 120 deletions(-)

diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index b5e53b9ab1f7..e038f13594e5 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -9,15 +9,6 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
 
 #define per_cpu_offset(x) (__per_cpu_offset[x])
 
-/* Separate out the type, so (int[3], foo) works. */
-#define DEFINE_PER_CPU(type, name) \
-    __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)		\
-    __attribute__((__section__(".data.percpu.shared_aligned"))) \
-    __typeof__(type) per_cpu__##name				\
-    ____cacheline_aligned_in_smp
-
 /* var is in discarded region: offset to particular copy we want */
 #define per_cpu(var, cpu) (*({				\
 	extern int simple_identifier_##var(void);	\
@@ -35,12 +26,6 @@ do {								\
 } while (0)
 #else /* ! SMP */
 
-#define DEFINE_PER_CPU(type, name) \
-    __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)	\
-    DEFINE_PER_CPU(type, name)
-
 #define per_cpu(var, cpu)			(*((void)(cpu), &per_cpu__##var))
 #define __get_cpu_var(var)			per_cpu__##var
 #define __raw_get_cpu_var(var)			per_cpu__##var
@@ -49,7 +34,4 @@ do {								\
 
 #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
 
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
-
 #endif /* _ASM_GENERIC_PERCPU_H_ */
diff --git a/include/asm-ia64/percpu.h b/include/asm-ia64/percpu.h
index c4f1e328a5ba..0095bcf79848 100644
--- a/include/asm-ia64/percpu.h
+++ b/include/asm-ia64/percpu.h
@@ -16,28 +16,11 @@
 #include <linux/threads.h>
 
 #ifdef HAVE_MODEL_SMALL_ATTRIBUTE
-# define __SMALL_ADDR_AREA	__attribute__((__model__ (__small__)))
-#else
-# define __SMALL_ADDR_AREA
+# define PER_CPU_ATTRIBUTES	__attribute__((__model__ (__small__)))
 #endif
 
 #define DECLARE_PER_CPU(type, name)				\
-	extern __SMALL_ADDR_AREA __typeof__(type) per_cpu__##name
-
-/* Separate out the type, so (int[3], foo) works. */
-#define DEFINE_PER_CPU(type, name)				\
-	__attribute__((__section__(".data.percpu")))		\
-	__SMALL_ADDR_AREA __typeof__(type) per_cpu__##name
-
-#ifdef CONFIG_SMP
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)			\
-	__attribute__((__section__(".data.percpu.shared_aligned")))	\
-	__SMALL_ADDR_AREA __typeof__(type) per_cpu__##name		\
-	____cacheline_aligned_in_smp
-#else
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)	\
-	DEFINE_PER_CPU(type, name)
-#endif
+	extern PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
 
 /*
  * Pretty much a literal copy of asm-generic/percpu.h, except that percpu_modcopy() is an
@@ -68,9 +51,6 @@ extern void *per_cpu_init(void);
 
 #endif	/* SMP */
 
-#define EXPORT_PER_CPU_SYMBOL(var)		EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var)		EXPORT_SYMBOL_GPL(per_cpu__##var)
-
 /*
  * Be extremely careful when taking the address of this variable!  Due to virtual
  * remapping, it is different from the canonical address returned by __get_cpu_var(var)!
diff --git a/include/asm-powerpc/percpu.h b/include/asm-powerpc/percpu.h
index 6b229626d3ff..cc1cbf656b02 100644
--- a/include/asm-powerpc/percpu.h
+++ b/include/asm-powerpc/percpu.h
@@ -16,15 +16,6 @@
 #define __my_cpu_offset() get_paca()->data_offset
 #define per_cpu_offset(x) (__per_cpu_offset(x))
 
-/* Separate out the type, so (int[3], foo) works. */
-#define DEFINE_PER_CPU(type, name) \
-    __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)		\
-    __attribute__((__section__(".data.percpu.shared_aligned"))) \
-    __typeof__(type) per_cpu__##name				\
-    ____cacheline_aligned_in_smp
-
 /* var is in discarded region: offset to particular copy we want */
 #define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)))
 #define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()))
@@ -43,11 +34,6 @@ extern void setup_per_cpu_areas(void);
 
 #else /* ! SMP */
 
-#define DEFINE_PER_CPU(type, name) \
-    __typeof__(type) per_cpu__##name
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)	\
-    DEFINE_PER_CPU(type, name)
-
 #define per_cpu(var, cpu)			(*((void)(cpu), &per_cpu__##var))
 #define __get_cpu_var(var)			per_cpu__##var
 #define __raw_get_cpu_var(var)			per_cpu__##var
@@ -56,9 +42,6 @@ extern void setup_per_cpu_areas(void);
 
 #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
 
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
-
 #else
 #include <asm-generic/percpu.h>
 #endif
diff --git a/include/asm-s390/percpu.h b/include/asm-s390/percpu.h
index f94d0d3cdb2f..2d676a873858 100644
--- a/include/asm-s390/percpu.h
+++ b/include/asm-s390/percpu.h
@@ -34,16 +34,6 @@
 
 extern unsigned long __per_cpu_offset[NR_CPUS];
 
-/* Separate out the type, so (int[3], foo) works. */
-#define DEFINE_PER_CPU(type, name) \
-    __attribute__((__section__(".data.percpu"))) \
-    __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)		\
-    __attribute__((__section__(".data.percpu.shared_aligned"))) \
-    __typeof__(type) per_cpu__##name				\
-    ____cacheline_aligned_in_smp
-
 #define __get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset)
 #define __raw_get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset)
 #define per_cpu(var,cpu) __reloc_hide(var,__per_cpu_offset[cpu])
@@ -60,11 +50,6 @@ do {								\
 
 #else /* ! SMP */
 
-#define DEFINE_PER_CPU(type, name) \
-    __typeof__(type) per_cpu__##name
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)	\
-    DEFINE_PER_CPU(type, name)
-
 #define __get_cpu_var(var) __reloc_hide(var,0)
 #define __raw_get_cpu_var(var) __reloc_hide(var,0)
 #define per_cpu(var,cpu) __reloc_hide(var,0)
@@ -73,7 +58,4 @@ do {								\
 
 #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
 
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
-
 #endif /* __ARCH_S390_PERCPU__ */
diff --git a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h
index a1f53a4da405..c7e52decba98 100644
--- a/include/asm-sparc64/percpu.h
+++ b/include/asm-sparc64/percpu.h
@@ -16,15 +16,6 @@ extern unsigned long __per_cpu_shift;
 	(__per_cpu_base + ((unsigned long)(__cpu) << __per_cpu_shift))
 #define per_cpu_offset(x) (__per_cpu_offset(x))
 
-/* Separate out the type, so (int[3], foo) works. */
-#define DEFINE_PER_CPU(type, name) \
-    __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)		\
-    __attribute__((__section__(".data.percpu.shared_aligned"))) \
-    __typeof__(type) per_cpu__##name				\
-    ____cacheline_aligned_in_smp
-
 /* var is in discarded region: offset to particular copy we want */
 #define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)))
 #define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __local_per_cpu_offset))
@@ -41,10 +32,6 @@ do {								\
 #else /* ! SMP */
 
 #define real_setup_per_cpu_areas()		do { } while (0)
-#define DEFINE_PER_CPU(type, name) \
-    __typeof__(type) per_cpu__##name
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)	\
-    DEFINE_PER_CPU(type, name)
 
 #define per_cpu(var, cpu)			(*((void)cpu, &per_cpu__##var))
 #define __get_cpu_var(var)			per_cpu__##var
@@ -54,7 +41,4 @@ do {								\
 
 #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
 
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
-
 #endif /* __ARCH_SPARC64_PERCPU__ */
diff --git a/include/asm-x86/percpu_32.h b/include/asm-x86/percpu_32.h
index 3949586bf94e..77bd0045f331 100644
--- a/include/asm-x86/percpu_32.h
+++ b/include/asm-x86/percpu_32.h
@@ -47,16 +47,7 @@ extern unsigned long __per_cpu_offset[];
 
 #define per_cpu_offset(x) (__per_cpu_offset[x])
 
-/* Separate out the type, so (int[3], foo) works. */
 #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
-#define DEFINE_PER_CPU(type, name) \
-    __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)		\
-    __attribute__((__section__(".data.percpu.shared_aligned"))) \
-    __typeof__(type) per_cpu__##name				\
-    ____cacheline_aligned_in_smp
-
 /* We can use this directly for local CPU (faster). */
 DECLARE_PER_CPU(unsigned long, this_cpu_off);
 
@@ -81,9 +72,6 @@ do {								\
 		       (src), (size));				\
 } while (0)
 
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
-
 /* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
 #define __percpu_seg "%%fs:"
 #else  /* !SMP */
diff --git a/include/asm-x86/percpu_64.h b/include/asm-x86/percpu_64.h
index 5abd48270101..24fe7075248d 100644
--- a/include/asm-x86/percpu_64.h
+++ b/include/asm-x86/percpu_64.h
@@ -16,15 +16,6 @@
 
 #define per_cpu_offset(x) (__per_cpu_offset(x))
 
-/* Separate out the type, so (int[3], foo) works. */
-#define DEFINE_PER_CPU(type, name) \
-    __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)		\
-    __attribute__((__section__(".data.percpu.shared_aligned"))) \
-    __typeof__(type) per_cpu__##name				\
-    ____cacheline_internodealigned_in_smp
-
 /* var is in discarded region: offset to particular copy we want */
 #define per_cpu(var, cpu) (*({				\
 	extern int simple_identifier_##var(void);	\
@@ -49,11 +40,6 @@ extern void setup_per_cpu_areas(void);
 
 #else /* ! SMP */
 
-#define DEFINE_PER_CPU(type, name) \
-    __typeof__(type) per_cpu__##name
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)	\
-    DEFINE_PER_CPU(type, name)
-
 #define per_cpu(var, cpu)			(*((void)(cpu), &per_cpu__##var))
 #define __get_cpu_var(var)			per_cpu__##var
 #define __raw_get_cpu_var(var)			per_cpu__##var
@@ -62,7 +48,4 @@ extern void setup_per_cpu_areas(void);
 
 #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
 
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
-
 #endif /* _ASM_X8664_PERCPU_H_ */
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 926adaae0f96..00412bb494c4 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -9,6 +9,30 @@
 
 #include <asm/percpu.h>
 
+#ifndef PER_CPU_ATTRIBUTES
+#define PER_CPU_ATTRIBUTES
+#endif
+
+#ifdef CONFIG_SMP
+#define DEFINE_PER_CPU(type, name)					\
+	__attribute__((__section__(".data.percpu")))			\
+	PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
+
+#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)			\
+	__attribute__((__section__(".data.percpu.shared_aligned")))	\
+	PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name		\
+	____cacheline_aligned_in_smp
+#else
+#define DEFINE_PER_CPU(type, name)					\
+	PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
+
+#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)		      \
+	DEFINE_PER_CPU(type, name)
+#endif
+
+#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
+#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
+
 /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */
 #ifndef PERCPU_ENOUGH_ROOM
 #ifdef CONFIG_MODULES

From acdac87202a408133ee8f7985076de9d2e0dc5ab Mon Sep 17 00:00:00 2001
From: "travis@sgi.com" <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:32:52 +0100
Subject: [PATCH 511/890] percpu: make the asm-generic/percpu.h more "generic"

- add support for PER_CPU_ATTRIBUTES

- fix generic smp percpu_modcopy to use per_cpu_offset() macro.

Add the ability to use generic/percpu even if the arch needs to override
several aspects of its operations. This will enable the use of generic
percpu.h for all arches.

An arch may define:

__per_cpu_offset	Do not use the generic pointer array. Arch must
			define per_cpu_offset(cpu) (used by x86_64, s390).

__my_cpu_offset		Can be defined to provide an optimized way to determine
			the offset for variables of the currently executing
			processor. Used by ia64, x86_64, x86_32, sparc64, s/390.

SHIFT_PTR(ptr, offset)	If an arch defines it then special handling
			of pointer arithmentic may be implemented. Used
			by s/390.

(Some of these special percpu arch implementations may be later consolidated
so that there are less cases to deal with.)

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/kernel/module.c    |  2 +-
 include/asm-generic/percpu.h | 74 ++++++++++++++++++++++++++++++------
 2 files changed, 64 insertions(+), 12 deletions(-)

diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c
index 196287928bae..e699eb6c44be 100644
--- a/arch/ia64/kernel/module.c
+++ b/arch/ia64/kernel/module.c
@@ -947,7 +947,7 @@ percpu_modcopy (void *pcpudst, const void *src, unsigned long size)
 {
 	unsigned int i;
 	for_each_possible_cpu(i) {
-		memcpy(pcpudst + __per_cpu_offset[i], src, size);
+		memcpy(pcpudst + per_cpu_offset(i), src, size);
 	}
 }
 #endif /* CONFIG_SMP */
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index e038f13594e5..c41b1a731129 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -3,35 +3,87 @@
 #include <linux/compiler.h>
 #include <linux/threads.h>
 
+/*
+ * Determine the real variable name from the name visible in the
+ * kernel sources.
+ */
+#define per_cpu_var(var) per_cpu__##var
+
 #ifdef CONFIG_SMP
 
+/*
+ * per_cpu_offset() is the offset that has to be added to a
+ * percpu variable to get to the instance for a certain processor.
+ *
+ * Most arches use the __per_cpu_offset array for those offsets but
+ * some arches have their own ways of determining the offset (x86_64, s390).
+ */
+#ifndef __per_cpu_offset
 extern unsigned long __per_cpu_offset[NR_CPUS];
 
 #define per_cpu_offset(x) (__per_cpu_offset[x])
+#endif
 
-/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*({				\
-	extern int simple_identifier_##var(void);	\
-	RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); }))
-#define __get_cpu_var(var) per_cpu(var, smp_processor_id())
-#define __raw_get_cpu_var(var) per_cpu(var, raw_smp_processor_id())
+/*
+ * Determine the offset for the currently active processor.
+ * An arch may define __my_cpu_offset to provide a more effective
+ * means of obtaining the offset to the per cpu variables of the
+ * current processor.
+ */
+#ifndef __my_cpu_offset
+#define __my_cpu_offset per_cpu_offset(raw_smp_processor_id())
+#define my_cpu_offset per_cpu_offset(smp_processor_id())
+#else
+#define my_cpu_offset __my_cpu_offset
+#endif
+
+/*
+ * Add a offset to a pointer but keep the pointer as is.
+ *
+ * Only S390 provides its own means of moving the pointer.
+ */
+#ifndef SHIFT_PERCPU_PTR
+#define SHIFT_PERCPU_PTR(__p, __offset)	RELOC_HIDE((__p), (__offset))
+#endif
+
+/*
+ * A percpu variable may point to a discarded reghions. The following are
+ * established ways to produce a usable pointer from the percpu variable
+ * offset.
+ */
+#define per_cpu(var, cpu) \
+	(*SHIFT_PERCPU_PTR(&per_cpu_var(var), per_cpu_offset(cpu)))
+#define __get_cpu_var(var) \
+	(*SHIFT_PERCPU_PTR(&per_cpu_var(var), my_cpu_offset))
+#define __raw_get_cpu_var(var) \
+	(*SHIFT_PERCPU_PTR(&per_cpu_var(var), __my_cpu_offset))
+
+
+#ifdef CONFIG_ARCH_SETS_UP_PER_CPU_AREA
+extern void setup_per_cpu_areas(void);
+#endif
 
 /* A macro to avoid #include hell... */
 #define percpu_modcopy(pcpudst, src, size)			\
 do {								\
 	unsigned int __i;					\
 	for_each_possible_cpu(__i)				\
-		memcpy((pcpudst)+__per_cpu_offset[__i],		\
+		memcpy((pcpudst)+per_cpu_offset(__i),		\
 		       (src), (size));				\
 } while (0)
 #else /* ! SMP */
 
-#define per_cpu(var, cpu)			(*((void)(cpu), &per_cpu__##var))
-#define __get_cpu_var(var)			per_cpu__##var
-#define __raw_get_cpu_var(var)			per_cpu__##var
+#define per_cpu(var, cpu)			(*((void)(cpu), &per_cpu_var(var)))
+#define __get_cpu_var(var)			per_cpu_var(var)
+#define __raw_get_cpu_var(var)			per_cpu_var(var)
 
 #endif	/* SMP */
 
-#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
+#ifndef PER_CPU_ATTRIBUTES
+#define PER_CPU_ATTRIBUTES
+#endif
+
+#define DECLARE_PER_CPU(type, name) extern PER_CPU_ATTRIBUTES \
+					__typeof__(type) per_cpu_var(name)
 
 #endif /* _ASM_GENERIC_PERCPU_H_ */

From 0af8a5ccc51ee2269712c90ab09c91b0150f4224 Mon Sep 17 00:00:00 2001
From: "travis@sgi.com" <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:32:52 +0100
Subject: [PATCH 512/890] x86_32: use generic percpu.h

x86_32 only provides a special way to obtain the local per cpu area offset
via x86_read_percpu. Otherwise it can fully use the generic handling.

Cc: ak@suse.de
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/percpu_32.h | 30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

diff --git a/include/asm-x86/percpu_32.h b/include/asm-x86/percpu_32.h
index 77bd0045f331..e62ce2fe2c9c 100644
--- a/include/asm-x86/percpu_32.h
+++ b/include/asm-x86/percpu_32.h
@@ -42,26 +42,7 @@
  */
 #ifdef CONFIG_SMP
 
-/* This is used for other cpus to find our section. */
-extern unsigned long __per_cpu_offset[];
-
-#define per_cpu_offset(x) (__per_cpu_offset[x])
-
-#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
-/* We can use this directly for local CPU (faster). */
-DECLARE_PER_CPU(unsigned long, this_cpu_off);
-
-/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*({				\
-	extern int simple_indentifier_##var(void);	\
-	RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); }))
-
-#define __raw_get_cpu_var(var) (*({					\
-	extern int simple_indentifier_##var(void);			\
-	RELOC_HIDE(&per_cpu__##var, x86_read_percpu(this_cpu_off));	\
-}))
-
-#define __get_cpu_var(var) __raw_get_cpu_var(var)
+#define __my_cpu_offset x86_read_percpu(this_cpu_off)
 
 /* A macro to avoid #include hell... */
 #define percpu_modcopy(pcpudst, src, size)			\
@@ -74,11 +55,18 @@ do {								\
 
 /* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
 #define __percpu_seg "%%fs:"
+
 #else  /* !SMP */
-#include <asm-generic/percpu.h>
+
 #define __percpu_seg ""
+
 #endif	/* SMP */
 
+#include <asm-generic/percpu.h>
+
+/* We can use this directly for local CPU (faster). */
+DECLARE_PER_CPU(unsigned long, this_cpu_off);
+
 /* For arch-specific code, we can use direct single-insn ops (they
  * don't give an lvalue though). */
 extern void __bad_percpu_size(void);

From 072a1e6953d2caffbad7d217ae51cdc61125960f Mon Sep 17 00:00:00 2001
From: "travis@sgi.com" <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:32:52 +0100
Subject: [PATCH 513/890] x86: use generic percpu on 64-bit

x86_64 provides an optimized way to determine the local per cpu area
offset through the pda and determines the base by accessing a remote
pda.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/percpu_64.h | 23 ++---------------------
 1 file changed, 2 insertions(+), 21 deletions(-)

diff --git a/include/asm-x86/percpu_64.h b/include/asm-x86/percpu_64.h
index 24fe7075248d..e7f3f6d21759 100644
--- a/include/asm-x86/percpu_64.h
+++ b/include/asm-x86/percpu_64.h
@@ -12,21 +12,10 @@
 #include <asm/pda.h>
 
 #define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
-#define __my_cpu_offset() read_pda(data_offset)
+#define __my_cpu_offset read_pda(data_offset)
 
 #define per_cpu_offset(x) (__per_cpu_offset(x))
 
-/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*({				\
-	extern int simple_identifier_##var(void);	\
-	RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)); }))
-#define __get_cpu_var(var) (*({				\
-	extern int simple_identifier_##var(void);	\
-	RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); }))
-#define __raw_get_cpu_var(var) (*({			\
-	extern int simple_identifier_##var(void);	\
-	RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); }))
-
 /* A macro to avoid #include hell... */
 #define percpu_modcopy(pcpudst, src, size)			\
 do {								\
@@ -36,16 +25,8 @@ do {								\
 		       (src), (size));				\
 } while (0)
 
-extern void setup_per_cpu_areas(void);
-
-#else /* ! SMP */
-
-#define per_cpu(var, cpu)			(*((void)(cpu), &per_cpu__##var))
-#define __get_cpu_var(var)			per_cpu__##var
-#define __raw_get_cpu_var(var)			per_cpu__##var
-
 #endif	/* SMP */
 
-#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
+#include <asm-generic/percpu.h>
 
 #endif /* _ASM_X8664_PERCPU_H_ */

From 3334052a321aca0ffecb54244d666311f98f5487 Mon Sep 17 00:00:00 2001
From: "travis@sgi.com" <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:32:53 +0100
Subject: [PATCH 514/890] x86: unify percpu.h

Form a single percpu.h from percpu_32.h and percpu_64.h. Both are now pretty
small so this is simply adding them together.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/percpu.h    | 145 +++++++++++++++++++++++++++++++++++-
 include/asm-x86/percpu_32.h | 128 -------------------------------
 include/asm-x86/percpu_64.h |  32 --------
 3 files changed, 141 insertions(+), 164 deletions(-)
 delete mode 100644 include/asm-x86/percpu_32.h
 delete mode 100644 include/asm-x86/percpu_64.h

diff --git a/include/asm-x86/percpu.h b/include/asm-x86/percpu.h
index a1aaad274cca..0dec00f27eb4 100644
--- a/include/asm-x86/percpu.h
+++ b/include/asm-x86/percpu.h
@@ -1,5 +1,142 @@
-#ifdef CONFIG_X86_32
-# include "percpu_32.h"
-#else
-# include "percpu_64.h"
+#ifndef _ASM_X86_PERCPU_H_
+#define _ASM_X86_PERCPU_H_
+
+#ifdef CONFIG_X86_64
+#include <linux/compiler.h>
+
+/* Same as asm-generic/percpu.h, except that we store the per cpu offset
+   in the PDA. Longer term the PDA and every per cpu variable
+   should be just put into a single section and referenced directly
+   from %gs */
+
+#ifdef CONFIG_SMP
+#include <asm/pda.h>
+
+#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
+#define __my_cpu_offset read_pda(data_offset)
+
+#define per_cpu_offset(x) (__per_cpu_offset(x))
+
 #endif
+#include <asm-generic/percpu.h>
+
+DECLARE_PER_CPU(struct x8664_pda, pda);
+
+#else /* CONFIG_X86_64 */
+
+#ifdef __ASSEMBLY__
+
+/*
+ * PER_CPU finds an address of a per-cpu variable.
+ *
+ * Args:
+ *    var - variable name
+ *    reg - 32bit register
+ *
+ * The resulting address is stored in the "reg" argument.
+ *
+ * Example:
+ *    PER_CPU(cpu_gdt_descr, %ebx)
+ */
+#ifdef CONFIG_SMP
+#define PER_CPU(var, reg)				\
+	movl %fs:per_cpu__##this_cpu_off, reg;		\
+	lea per_cpu__##var(reg), reg
+#define PER_CPU_VAR(var)	%fs:per_cpu__##var
+#else /* ! SMP */
+#define PER_CPU(var, reg)			\
+	movl $per_cpu__##var, reg
+#define PER_CPU_VAR(var)	per_cpu__##var
+#endif	/* SMP */
+
+#else /* ...!ASSEMBLY */
+
+/*
+ * PER_CPU finds an address of a per-cpu variable.
+ *
+ * Args:
+ *    var - variable name
+ *    cpu - 32bit register containing the current CPU number
+ *
+ * The resulting address is stored in the "cpu" argument.
+ *
+ * Example:
+ *    PER_CPU(cpu_gdt_descr, %ebx)
+ */
+#ifdef CONFIG_SMP
+
+#define __my_cpu_offset x86_read_percpu(this_cpu_off)
+
+/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
+#define __percpu_seg "%%fs:"
+
+#else  /* !SMP */
+
+#define __percpu_seg ""
+
+#endif	/* SMP */
+
+#include <asm-generic/percpu.h>
+
+/* We can use this directly for local CPU (faster). */
+DECLARE_PER_CPU(unsigned long, this_cpu_off);
+
+/* For arch-specific code, we can use direct single-insn ops (they
+ * don't give an lvalue though). */
+extern void __bad_percpu_size(void);
+
+#define percpu_to_op(op,var,val)				\
+	do {							\
+		typedef typeof(var) T__;			\
+		if (0) { T__ tmp__; tmp__ = (val); }		\
+		switch (sizeof(var)) {				\
+		case 1:						\
+			asm(op "b %1,"__percpu_seg"%0"		\
+			    : "+m" (var)			\
+			    :"ri" ((T__)val));			\
+			break;					\
+		case 2:						\
+			asm(op "w %1,"__percpu_seg"%0"		\
+			    : "+m" (var)			\
+			    :"ri" ((T__)val));			\
+			break;					\
+		case 4:						\
+			asm(op "l %1,"__percpu_seg"%0"		\
+			    : "+m" (var)			\
+			    :"ri" ((T__)val));			\
+			break;					\
+		default: __bad_percpu_size();			\
+		}						\
+	} while (0)
+
+#define percpu_from_op(op,var)					\
+	({							\
+		typeof(var) ret__;				\
+		switch (sizeof(var)) {				\
+		case 1:						\
+			asm(op "b "__percpu_seg"%1,%0"		\
+			    : "=r" (ret__)			\
+			    : "m" (var));			\
+			break;					\
+		case 2:						\
+			asm(op "w "__percpu_seg"%1,%0"		\
+			    : "=r" (ret__)			\
+			    : "m" (var));			\
+			break;					\
+		case 4:						\
+			asm(op "l "__percpu_seg"%1,%0"		\
+			    : "=r" (ret__)			\
+			    : "m" (var));			\
+			break;					\
+		default: __bad_percpu_size();			\
+		}						\
+		ret__; })
+
+#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var)
+#define x86_write_percpu(var,val) percpu_to_op("mov", per_cpu__##var, val)
+#define x86_add_percpu(var,val) percpu_to_op("add", per_cpu__##var, val)
+#define x86_sub_percpu(var,val) percpu_to_op("sub", per_cpu__##var, val)
+#define x86_or_percpu(var,val) percpu_to_op("or", per_cpu__##var, val)
+#endif /* !__ASSEMBLY__ */
+#endif /* !CONFIG_X86_64 */
+#endif /* _ASM_X86_PERCPU_H_ */
diff --git a/include/asm-x86/percpu_32.h b/include/asm-x86/percpu_32.h
deleted file mode 100644
index e62ce2fe2c9c..000000000000
--- a/include/asm-x86/percpu_32.h
+++ /dev/null
@@ -1,128 +0,0 @@
-#ifndef __ARCH_I386_PERCPU__
-#define __ARCH_I386_PERCPU__
-
-#ifdef __ASSEMBLY__
-
-/*
- * PER_CPU finds an address of a per-cpu variable.
- *
- * Args:
- *    var - variable name
- *    reg - 32bit register
- *
- * The resulting address is stored in the "reg" argument.
- *
- * Example:
- *    PER_CPU(cpu_gdt_descr, %ebx)
- */
-#ifdef CONFIG_SMP
-#define PER_CPU(var, reg)				\
-	movl %fs:per_cpu__##this_cpu_off, reg;		\
-	lea per_cpu__##var(reg), reg
-#define PER_CPU_VAR(var)	%fs:per_cpu__##var
-#else /* ! SMP */
-#define PER_CPU(var, reg)			\
-	movl $per_cpu__##var, reg
-#define PER_CPU_VAR(var)	per_cpu__##var
-#endif	/* SMP */
-
-#else /* ...!ASSEMBLY */
-
-/*
- * PER_CPU finds an address of a per-cpu variable.
- *
- * Args:
- *    var - variable name
- *    cpu - 32bit register containing the current CPU number
- *
- * The resulting address is stored in the "cpu" argument.
- *
- * Example:
- *    PER_CPU(cpu_gdt_descr, %ebx)
- */
-#ifdef CONFIG_SMP
-
-#define __my_cpu_offset x86_read_percpu(this_cpu_off)
-
-/* A macro to avoid #include hell... */
-#define percpu_modcopy(pcpudst, src, size)			\
-do {								\
-	unsigned int __i;					\
-	for_each_possible_cpu(__i)				\
-		memcpy((pcpudst)+__per_cpu_offset[__i],		\
-		       (src), (size));				\
-} while (0)
-
-/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
-#define __percpu_seg "%%fs:"
-
-#else  /* !SMP */
-
-#define __percpu_seg ""
-
-#endif	/* SMP */
-
-#include <asm-generic/percpu.h>
-
-/* We can use this directly for local CPU (faster). */
-DECLARE_PER_CPU(unsigned long, this_cpu_off);
-
-/* For arch-specific code, we can use direct single-insn ops (they
- * don't give an lvalue though). */
-extern void __bad_percpu_size(void);
-
-#define percpu_to_op(op,var,val)				\
-	do {							\
-		typedef typeof(var) T__;			\
-		if (0) { T__ tmp__; tmp__ = (val); }		\
-		switch (sizeof(var)) {				\
-		case 1:						\
-			asm(op "b %1,"__percpu_seg"%0"		\
-			    : "+m" (var)			\
-			    :"ri" ((T__)val));			\
-			break;					\
-		case 2:						\
-			asm(op "w %1,"__percpu_seg"%0"		\
-			    : "+m" (var)			\
-			    :"ri" ((T__)val));			\
-			break;					\
-		case 4:						\
-			asm(op "l %1,"__percpu_seg"%0"		\
-			    : "+m" (var)			\
-			    :"ri" ((T__)val));			\
-			break;					\
-		default: __bad_percpu_size();			\
-		}						\
-	} while (0)
-
-#define percpu_from_op(op,var)					\
-	({							\
-		typeof(var) ret__;				\
-		switch (sizeof(var)) {				\
-		case 1:						\
-			asm(op "b "__percpu_seg"%1,%0"		\
-			    : "=r" (ret__)			\
-			    : "m" (var));			\
-			break;					\
-		case 2:						\
-			asm(op "w "__percpu_seg"%1,%0"		\
-			    : "=r" (ret__)			\
-			    : "m" (var));			\
-			break;					\
-		case 4:						\
-			asm(op "l "__percpu_seg"%1,%0"		\
-			    : "=r" (ret__)			\
-			    : "m" (var));			\
-			break;					\
-		default: __bad_percpu_size();			\
-		}						\
-		ret__; })
-
-#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var)
-#define x86_write_percpu(var,val) percpu_to_op("mov", per_cpu__##var, val)
-#define x86_add_percpu(var,val) percpu_to_op("add", per_cpu__##var, val)
-#define x86_sub_percpu(var,val) percpu_to_op("sub", per_cpu__##var, val)
-#define x86_or_percpu(var,val) percpu_to_op("or", per_cpu__##var, val)
-#endif /* !__ASSEMBLY__ */
-
-#endif /* __ARCH_I386_PERCPU__ */
diff --git a/include/asm-x86/percpu_64.h b/include/asm-x86/percpu_64.h
deleted file mode 100644
index e7f3f6d21759..000000000000
--- a/include/asm-x86/percpu_64.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef _ASM_X8664_PERCPU_H_
-#define _ASM_X8664_PERCPU_H_
-#include <linux/compiler.h>
-
-/* Same as asm-generic/percpu.h, except that we store the per cpu offset
-   in the PDA. Longer term the PDA and every per cpu variable
-   should be just put into a single section and referenced directly
-   from %gs */
-
-#ifdef CONFIG_SMP
-
-#include <asm/pda.h>
-
-#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
-#define __my_cpu_offset read_pda(data_offset)
-
-#define per_cpu_offset(x) (__per_cpu_offset(x))
-
-/* A macro to avoid #include hell... */
-#define percpu_modcopy(pcpudst, src, size)			\
-do {								\
-	unsigned int __i;					\
-	for_each_possible_cpu(__i)				\
-		memcpy((pcpudst)+__per_cpu_offset(__i),		\
-		       (src), (size));				\
-} while (0)
-
-#endif	/* SMP */
-
-#include <asm-generic/percpu.h>
-
-#endif /* _ASM_X8664_PERCPU_H_ */

From 8c1c9356429741a82ff176d0f3400fb9e06b2a30 Mon Sep 17 00:00:00 2001
From: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Date: Wed, 30 Jan 2008 13:32:53 +0100
Subject: [PATCH 515/890] x86: kprobes: add kprobes smoke tests that run on
 boot

Here is a quick and naive smoke test for kprobes. This is intended to
just verify if some unrelated change broke the *probes subsystem. It is
self contained, architecture agnostic and isn't of any great use by itself.

This needs to be built in the kernel and runs a basic set of tests to
verify if kprobes, jprobes and kretprobes run fine on the kernel. In case
of an error, it'll print out a message with a "BUG" prefix.

This is a start; we intend to add more tests to this bucket over time.

Thanks to Jim Keniston and Masami Hiramatsu for comments and suggestions.

Tested on x86 (32/64) and powerpc.

Signed-off-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kprobes.h |  10 ++
 kernel/Makefile         |   1 +
 kernel/kprobes.c        |   2 +
 kernel/test_kprobes.c   | 216 ++++++++++++++++++++++++++++++++++++++++
 lib/Kconfig.debug       |  12 +++
 5 files changed, 241 insertions(+)
 create mode 100644 kernel/test_kprobes.c

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 81891581e89b..6168c0a44172 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -182,6 +182,15 @@ static inline void kretprobe_assert(struct kretprobe_instance *ri,
 	}
 }
 
+#ifdef CONFIG_KPROBES_SANITY_TEST
+extern int init_test_probes(void);
+#else
+static inline int init_test_probes(void)
+{
+	return 0;
+}
+#endif /* CONFIG_KPROBES_SANITY_TEST */
+
 extern spinlock_t kretprobe_lock;
 extern struct mutex kprobe_mutex;
 extern int arch_prepare_kprobe(struct kprobe *p);
@@ -227,6 +236,7 @@ void unregister_kretprobe(struct kretprobe *rp);
 
 void kprobe_flush_task(struct task_struct *tk);
 void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
+
 #else /* CONFIG_KPROBES */
 
 #define __kprobes	/**/
diff --git a/kernel/Makefile b/kernel/Makefile
index 390d42146267..62015c3d8d91 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -43,6 +43,7 @@ obj-$(CONFIG_CPUSETS) += cpuset.o
 obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
 obj-$(CONFIG_IKCONFIG) += configs.o
 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
+obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
 obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
 obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
 obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index e3a5d817ac9b..d0493eafea3e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -824,6 +824,8 @@ static int __init init_kprobes(void)
 	if (!err)
 		err = register_die_notifier(&kprobe_exceptions_nb);
 
+	if (!err)
+		init_test_probes();
 	return err;
 }
 
diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c
new file mode 100644
index 000000000000..88cdb109e13c
--- /dev/null
+++ b/kernel/test_kprobes.c
@@ -0,0 +1,216 @@
+/*
+ * test_kprobes.c - simple sanity test for *probes
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/random.h>
+
+#define div_factor 3
+
+static u32 rand1, preh_val, posth_val, jph_val;
+static int errors, handler_errors, num_tests;
+
+static noinline u32 kprobe_target(u32 value)
+{
+	/*
+	 * gcc ignores noinline on some architectures unless we stuff
+	 * sufficient lard into the function. The get_kprobe() here is
+	 * just for that.
+	 *
+	 * NOTE: We aren't concerned about the correctness of get_kprobe()
+	 * here; hence, this call is neither under !preempt nor with the
+	 * kprobe_mutex held. This is fine(tm)
+	 */
+	if (get_kprobe((void *)0xdeadbeef))
+		printk(KERN_INFO "Kprobe smoke test: probe on 0xdeadbeef!\n");
+
+	return (value / div_factor);
+}
+
+static int kp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	preh_val = (rand1 / div_factor);
+	return 0;
+}
+
+static void kp_post_handler(struct kprobe *p, struct pt_regs *regs,
+		unsigned long flags)
+{
+	if (preh_val != (rand1 / div_factor)) {
+		handler_errors++;
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"incorrect value in post_handler\n");
+	}
+	posth_val = preh_val + div_factor;
+}
+
+static struct kprobe kp = {
+	.symbol_name = "kprobe_target",
+	.pre_handler = kp_pre_handler,
+	.post_handler = kp_post_handler
+};
+
+static int test_kprobe(void)
+{
+	int ret;
+
+	ret = register_kprobe(&kp);
+	if (ret < 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"register_kprobe returned %d\n", ret);
+		return ret;
+	}
+
+	ret = kprobe_target(rand1);
+	unregister_kprobe(&kp);
+
+	if (preh_val == 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"kprobe pre_handler not called\n");
+		handler_errors++;
+	}
+
+	if (posth_val == 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"kprobe post_handler not called\n");
+		handler_errors++;
+	}
+
+	return 0;
+}
+
+static u32 j_kprobe_target(u32 value)
+{
+	if (value != rand1) {
+		handler_errors++;
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"incorrect value in jprobe handler\n");
+	}
+
+	jph_val = rand1;
+	jprobe_return();
+	return 0;
+}
+
+static struct jprobe jp = {
+	.entry		= j_kprobe_target,
+	.kp.symbol_name = "kprobe_target"
+};
+
+static int test_jprobe(void)
+{
+	int ret;
+
+	ret = register_jprobe(&jp);
+	if (ret < 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"register_jprobe returned %d\n", ret);
+		return ret;
+	}
+
+	ret = kprobe_target(rand1);
+	unregister_jprobe(&jp);
+	if (jph_val == 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"jprobe handler not called\n");
+		handler_errors++;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_KRETPROBES
+static u32 krph_val;
+
+static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+	unsigned long ret = regs_return_value(regs);
+
+	if (ret != (rand1 / div_factor)) {
+		handler_errors++;
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"incorrect value in kretprobe handler\n");
+	}
+
+	krph_val = (rand1 / div_factor);
+	return 0;
+}
+
+static struct kretprobe rp = {
+	.handler	= return_handler,
+	.kp.symbol_name = "kprobe_target"
+};
+
+static int test_kretprobe(void)
+{
+	int ret;
+
+	ret = register_kretprobe(&rp);
+	if (ret < 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"register_kretprobe returned %d\n", ret);
+		return ret;
+	}
+
+	ret = kprobe_target(rand1);
+	unregister_kretprobe(&rp);
+	if (krph_val == 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"kretprobe handler not called\n");
+		handler_errors++;
+	}
+
+	return 0;
+}
+#endif /* CONFIG_KRETPROBES */
+
+int init_test_probes(void)
+{
+	int ret;
+
+	do {
+		rand1 = random32();
+	} while (rand1 <= div_factor);
+
+	printk(KERN_INFO "Kprobe smoke test started\n");
+	num_tests++;
+	ret = test_kprobe();
+	if (ret < 0)
+		errors++;
+
+	num_tests++;
+	ret = test_jprobe();
+	if (ret < 0)
+		errors++;
+
+#ifdef CONFIG_KRETPROBES
+	num_tests++;
+	ret = test_kretprobe();
+	if (ret < 0)
+		errors++;
+#endif /* CONFIG_KRETPROBES */
+
+	if (errors)
+		printk(KERN_ERR "BUG: Kprobe smoke test: %d out of "
+				"%d tests failed\n", errors, num_tests);
+	else if (handler_errors)
+		printk(KERN_ERR "BUG: Kprobe smoke test: %d error(s) "
+				"running handlers\n", handler_errors);
+	else
+		printk(KERN_INFO "Kprobe smoke test passed successfully\n");
+
+	return 0;
+}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c4ecb2994ba3..f535b9b5eb00 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -494,6 +494,18 @@ config RCU_TORTURE_TEST
 	  Say M if you want the RCU torture tests to build as a module.
 	  Say N if you are unsure.
 
+config KPROBES_SANITY_TEST
+	bool "Kprobes sanity tests"
+	depends on DEBUG_KERNEL
+	depends on KPROBES
+	default n
+	help
+	  This option provides for testing basic kprobes functionality on
+	  boot. A sample kprobe, jprobe and kretprobe are inserted and
+	  verified for functionality.
+
+	  Say N if you are unsure.
+
 config LKDTM
 	tristate "Linux Kernel Dump Test Tool Module"
 	depends on DEBUG_KERNEL

From 8a19da7b7f16d8360e39035f6ab96686e835522b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:53 +0100
Subject: [PATCH 516/890] x86: clean up arch/x86/mm/fault_64.c

clean up arch/x86/mm/fault_64.c a bit.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_64.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index dcf430bb62ee..058b04cb96d4 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -39,10 +39,10 @@
  *	bit 3 == 1 means use of reserved bit detected
  *	bit 4 == 1 means fault was an instruction fetch
  */
-#define PF_PROT	(1<<0)
+#define PF_PROT		(1<<0)
 #define PF_WRITE	(1<<1)
-#define PF_USER	(1<<2)
-#define PF_RSVD	(1<<3)
+#define PF_USER		(1<<2)
+#define PF_RSVD		(1<<3)
 #define PF_INSTR	(1<<4)
 
 static inline int notify_page_fault(struct pt_regs *regs)

From 4fc2fba804cae404d2665e23b8cbd46d5f63a07e Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Wed, 30 Jan 2008 13:32:54 +0100
Subject: [PATCH 517/890] x86: unify arch/x86/kernel/acpi/sleep*.c

Unify arch/x86/kernel/acpi/sleep*.c

Pretty trivial unification; when two functions differed, it was
usually in error handling, and better of the two was picked up.

Signed-off-by: Pavel Machek <pavel@suse.cz>
Looks-okay-to: Rafael J. Wysocki <rjw@sisk.pl>
Tested-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/Makefile   |   2 +-
 arch/x86/kernel/acpi/sleep.c    |  87 ++++++++++++++++++++++++
 arch/x86/kernel/acpi/sleep_32.c |  70 -------------------
 arch/x86/kernel/acpi/sleep_64.c | 117 --------------------------------
 4 files changed, 88 insertions(+), 188 deletions(-)
 create mode 100644 arch/x86/kernel/acpi/sleep.c
 delete mode 100644 arch/x86/kernel/acpi/sleep_64.c

diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index 1351c3982ee4..19d3d6e9d09b 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_ACPI)		+= boot.o
-obj-$(CONFIG_ACPI_SLEEP)	+= sleep_$(BITS).o wakeup_$(BITS).o
+obj-$(CONFIG_ACPI_SLEEP)	+= sleep.o wakeup_$(BITS).o
 
 ifneq ($(CONFIG_ACPI_PROCESSOR),)
 obj-y				+= cstate.o processor.o
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
new file mode 100644
index 000000000000..6bc815cd8cb3
--- /dev/null
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -0,0 +1,87 @@
+/*
+ * sleep.c - x86-specific ACPI sleep support.
+ *
+ *  Copyright (C) 2001-2003 Patrick Mochel
+ *  Copyright (C) 2001-2003 Pavel Machek <pavel@suse.cz>
+ */
+
+#include <linux/acpi.h>
+#include <linux/bootmem.h>
+#include <linux/dmi.h>
+#include <linux/cpumask.h>
+
+#include <asm/smp.h>
+
+/* address in low memory of the wakeup routine. */
+unsigned long acpi_wakeup_address = 0;
+unsigned long acpi_realmode_flags;
+extern char wakeup_start, wakeup_end;
+
+extern unsigned long acpi_copy_wakeup_routine(unsigned long);
+
+/**
+ * acpi_save_state_mem - save kernel state
+ *
+ * Create an identity mapped page table and copy the wakeup routine to
+ * low memory.
+ */
+int acpi_save_state_mem(void)
+{
+	if (!acpi_wakeup_address) {
+		printk(KERN_ERR "Could not allocate memory during boot, S3 disabled\n");
+		return -ENOMEM;
+	}
+	memcpy((void *)acpi_wakeup_address, &wakeup_start,
+	       &wakeup_end - &wakeup_start);
+	acpi_copy_wakeup_routine(acpi_wakeup_address);
+
+	return 0;
+}
+
+/*
+ * acpi_restore_state - undo effects of acpi_save_state_mem
+ */
+void acpi_restore_state_mem(void)
+{
+}
+
+
+/**
+ * acpi_reserve_bootmem - do _very_ early ACPI initialisation
+ *
+ * We allocate a page from the first 1MB of memory for the wakeup
+ * routine for when we come back from a sleep state. The
+ * runtime allocator allows specification of <16MB pages, but not
+ * <1MB pages.
+ */
+void __init acpi_reserve_bootmem(void)
+{
+	if ((&wakeup_end - &wakeup_start) > PAGE_SIZE*2) {
+		printk(KERN_ERR
+		       "ACPI: Wakeup code way too big, S3 disabled.\n");
+		return;
+	}
+
+	acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2);
+	if (!acpi_wakeup_address)
+		printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
+}
+
+
+static int __init acpi_sleep_setup(char *str)
+{
+	while ((str != NULL) && (*str != '\0')) {
+		if (strncmp(str, "s3_bios", 7) == 0)
+			acpi_realmode_flags |= 1;
+		if (strncmp(str, "s3_mode", 7) == 0)
+			acpi_realmode_flags |= 2;
+		if (strncmp(str, "s3_beep", 7) == 0)
+			acpi_realmode_flags |= 4;
+		str = strchr(str, ',');
+		if (str != NULL)
+			str += strspn(str, ", \t");
+	}
+	return 1;
+}
+
+__setup("acpi_sleep=", acpi_sleep_setup);
diff --git a/arch/x86/kernel/acpi/sleep_32.c b/arch/x86/kernel/acpi/sleep_32.c
index 09f820d920b1..63fe5525e026 100644
--- a/arch/x86/kernel/acpi/sleep_32.c
+++ b/arch/x86/kernel/acpi/sleep_32.c
@@ -12,76 +12,6 @@
 
 #include <asm/smp.h>
 
-/* address in low memory of the wakeup routine. */
-unsigned long acpi_wakeup_address = 0;
-unsigned long acpi_realmode_flags;
-extern char wakeup_start, wakeup_end;
-
-extern unsigned long acpi_copy_wakeup_routine(unsigned long);
-
-/**
- * acpi_save_state_mem - save kernel state
- *
- * Create an identity mapped page table and copy the wakeup routine to
- * low memory.
- */
-int acpi_save_state_mem(void)
-{
-	if (!acpi_wakeup_address)
-		return 1;
-	memcpy((void *)acpi_wakeup_address, &wakeup_start,
-	       &wakeup_end - &wakeup_start);
-	acpi_copy_wakeup_routine(acpi_wakeup_address);
-
-	return 0;
-}
-
-/*
- * acpi_restore_state - undo effects of acpi_save_state_mem
- */
-void acpi_restore_state_mem(void)
-{
-}
-
-/**
- * acpi_reserve_bootmem - do _very_ early ACPI initialisation
- *
- * We allocate a page from the first 1MB of memory for the wakeup
- * routine for when we come back from a sleep state. The
- * runtime allocator allows specification of <16MB pages, but not
- * <1MB pages.
- */
-void __init acpi_reserve_bootmem(void)
-{
-	if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) {
-		printk(KERN_ERR
-		       "ACPI: Wakeup code way too big, S3 disabled.\n");
-		return;
-	}
-
-	acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE);
-	if (!acpi_wakeup_address)
-		printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
-}
-
-static int __init acpi_sleep_setup(char *str)
-{
-	while ((str != NULL) && (*str != '\0')) {
-		if (strncmp(str, "s3_bios", 7) == 0)
-			acpi_realmode_flags |= 1;
-		if (strncmp(str, "s3_mode", 7) == 0)
-			acpi_realmode_flags |= 2;
-		if (strncmp(str, "s3_beep", 7) == 0)
-			acpi_realmode_flags |= 4;
-		str = strchr(str, ',');
-		if (str != NULL)
-			str += strspn(str, ", \t");
-	}
-	return 1;
-}
-
-__setup("acpi_sleep=", acpi_sleep_setup);
-
 /* Ouch, we want to delete this. We already have better version in userspace, in
    s2ram from suspend.sf.net project */
 static __init int reset_videomode_after_s3(const struct dmi_system_id *d)
diff --git a/arch/x86/kernel/acpi/sleep_64.c b/arch/x86/kernel/acpi/sleep_64.c
deleted file mode 100644
index da42de261ba8..000000000000
--- a/arch/x86/kernel/acpi/sleep_64.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- *  acpi.c - Architecture-Specific Low-Level ACPI Support
- *
- *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
- *  Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
- *  Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
- *  Copyright (C) 2002 Andi Kleen, SuSE Labs (x86-64 port)
- *  Copyright (C) 2003 Pavel Machek, SuSE Labs
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/stddef.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/bootmem.h>
-#include <linux/acpi.h>
-#include <linux/cpumask.h>
-
-#include <asm/mpspec.h>
-#include <asm/io.h>
-#include <asm/apic.h>
-#include <asm/apicdef.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-#include <asm/io_apic.h>
-#include <asm/proto.h>
-#include <asm/tlbflush.h>
-
-/* --------------------------------------------------------------------------
-                              Low-Level Sleep Support
-   -------------------------------------------------------------------------- */
-
-/* address in low memory of the wakeup routine. */
-unsigned long acpi_wakeup_address = 0;
-unsigned long acpi_realmode_flags;
-extern char wakeup_start, wakeup_end;
-
-extern unsigned long acpi_copy_wakeup_routine(unsigned long);
-
-/**
- * acpi_save_state_mem - save kernel state
- *
- * Create an identity mapped page table and copy the wakeup routine to
- * low memory.
- */
-int acpi_save_state_mem(void)
-{
-	memcpy((void *)acpi_wakeup_address, &wakeup_start,
-	       &wakeup_end - &wakeup_start);
-	acpi_copy_wakeup_routine(acpi_wakeup_address);
-
-	return 0;
-}
-
-/*
- * acpi_restore_state
- */
-void acpi_restore_state_mem(void)
-{
-}
-
-/**
- * acpi_reserve_bootmem - do _very_ early ACPI initialisation
- *
- * We allocate a page in low memory for the wakeup
- * routine for when we come back from a sleep state. The
- * runtime allocator allows specification of <16M pages, but not
- * <1M pages.
- */
-void __init acpi_reserve_bootmem(void)
-{
-	acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2);
-	if ((&wakeup_end - &wakeup_start) > (PAGE_SIZE*2))
-		printk(KERN_CRIT
-		       "ACPI: Wakeup code way too big, will crash on attempt"
-		       " to suspend\n");
-}
-
-static int __init acpi_sleep_setup(char *str)
-{
-	while ((str != NULL) && (*str != '\0')) {
-		if (strncmp(str, "s3_bios", 7) == 0)
-			acpi_realmode_flags |= 1;
-		if (strncmp(str, "s3_mode", 7) == 0)
-			acpi_realmode_flags |= 2;
-		if (strncmp(str, "s3_beep", 7) == 0)
-			acpi_realmode_flags |= 4;
-		str = strchr(str, ',');
-		if (str != NULL)
-			str += strspn(str, ", \t");
-	}
-	return 1;
-}
-
-__setup("acpi_sleep=", acpi_sleep_setup);
-

From 58f6f6ea1d5031f998b5e7d640d7bc45ed86d0c8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:54 +0100
Subject: [PATCH 518/890] x86: migration helpers for KVM

migration helpers for KVM.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/processor.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 41c002440de7..cc549bf819a4 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -3,6 +3,10 @@
 
 #include <asm/processor-flags.h>
 
+/* migration helpers, for KVM - will be removed in 2.6.25: */
+#include <asm/vm86.h>
+#define Xgt_desc_struct	desc_ptr
+
 /* Forward declaration, a strange C thing */
 struct task_struct;
 struct mm_struct;

From 87e8407f9ad2a2df901c4b690ab0a2bf0fb168c5 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Wed, 30 Jan 2008 13:32:54 +0100
Subject: [PATCH 519/890] x86, ptrace: add bts_struct size to status command

Return the size of bts_struct in the PTRACE_BTS_STATUS command.
Change types to u32.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ptrace.c     |  2 ++
 include/asm-x86/ptrace-abi.h | 11 ++++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index e19a91db9b35..96286df1bb81 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -787,6 +787,8 @@ static int ptrace_bts_status(struct task_struct *child,
 			cfg.flags |= PTRACE_BTS_O_SCHED;
 	}
 
+	cfg.bts_size = sizeof(struct bts_struct);
+
 	if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
 		return -EFAULT;
 
diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h
index 08a12b790a77..81a8ee4c55fc 100644
--- a/include/asm-x86/ptrace-abi.h
+++ b/include/asm-x86/ptrace-abi.h
@@ -81,16 +81,21 @@
 #define PTRACE_SINGLEBLOCK	33	/* resume execution until next branch */
 
 #ifndef __ASSEMBLY__
+
+#include <asm/types.h>
+
 /* configuration/status structure used in PTRACE_BTS_CONFIG and
    PTRACE_BTS_STATUS commands.
 */
 struct ptrace_bts_config {
 	/* requested or actual size of BTS buffer in bytes */
-	unsigned int size;
+	u32 size;
 	/* bitmask of below flags */
-	unsigned int flags;
+	u32 flags;
 	/* buffer overflow signal */
-	unsigned int signal;
+	u32 signal;
+	/* actual size of bts_struct in bytes */
+	u32 bts_size;
 };
 #endif
 

From bac4894dfa9c75c297b905e2ea88caaa5768f1e2 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Wed, 30 Jan 2008 13:32:54 +0100
Subject: [PATCH 520/890] x86: make NUMA work on 32-bit again

On 32-bit NUMA, the memmap representing struct pages on each node is
allocated from node-local memory if possible. As only node-0 has memory from
ZONE_NORMAL, the memmap must be mapped into low memory. This is done by
reserving space in the Kernel Virtual Area (KVA) for the memmap belonging
to other nodes by taking pages from the end of ZONE_NORMAL and remapping
the other nodes memmap into those virtual addresses. The node boundaries
are then adjusted so that the region of pages is not used and it is marked
as reserved in the bootmem allocator.

This reserved portion of the KVA is PMD aligned althought
strictly speaking that requirement could be lifted (see thread at
http://lkml.org/lkml/2007/8/24/220). The problem is that when aligned, there
may be a portion of ZONE_NORMAL at the end that is not used for memmap and
does not have an initialised memmap nor is it marked reserved in the bootmem
allocator. Later in the boot process, these pages are freed and a storm of
Bad page state messages result.

This patch marks these pages reserved that are wasted due to alignment
in the bootmem allocator so they are not accidently freed. It is worth
noting that memory from node-0 is wasted where it could have been put into
ZONE_HIGHMEM on NUMA machines. Worse, the KVA is always reserved from the
location of real memory even when there is plenty of spare virtual address
space.

This patch also makes sure that reserve_bootmem() is not called with a
0-length size in numa_kva_reserve().  When this happens, it usually means
that a kernel built for Summit is being booted on a normal machine. The
resulting BUG_ON() is misleading so it is caught here.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/discontig_32.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 88a7499e8e48..9f1d02cfde37 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -268,6 +268,7 @@ unsigned long __init setup_memory(void)
 {
 	int nid;
 	unsigned long system_start_pfn, system_max_low_pfn;
+	unsigned long wasted_pages;
 
 	/*
 	 * When mapping a NUMA machine we allocate the node_mem_map arrays
@@ -292,7 +293,14 @@ unsigned long __init setup_memory(void)
 		kva_start_pfn = PFN_DOWN(initrd_start - PAGE_OFFSET)
 			- kva_pages;
 #endif
-	kva_start_pfn -= kva_start_pfn & (PTRS_PER_PTE-1);
+
+	/*
+	 * We waste pages past at the end of the KVA for no good reason other
+	 * than how it is located. This is bad.
+	 */
+	wasted_pages = kva_start_pfn & (PTRS_PER_PTE-1);
+	kva_start_pfn -= wasted_pages;
+	kva_pages += wasted_pages;
 
 	system_max_low_pfn = max_low_pfn = find_max_low_pfn();
 	printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n",
@@ -345,7 +353,8 @@ unsigned long __init setup_memory(void)
 
 void __init numa_kva_reserve(void)
 {
-	reserve_bootmem(PFN_PHYS(kva_start_pfn),PFN_PHYS(kva_pages));
+	if (kva_pages)
+		reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages));
 }
 
 void __init zone_sizes_init(void)

From 7375931a27bbe687e03ae3c28178920b0c66a87d Mon Sep 17 00:00:00 2001
From: Paolo Ciarrocchi <paolo.ciarrocchi@gmail.com>
Date: Wed, 30 Jan 2008 13:32:54 +0100
Subject: [PATCH 521/890] x86: coding style fixes in arch/x86/ia32/audit.c

Fix one error reported by checkpatch,
it now reports:

total: 0 errors, 0 warnings, 42 lines checked

Signed-off-by: Paolo Ciarrocchi <paolo.ciarrocchi@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/ia32/audit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/ia32/audit.c b/arch/x86/ia32/audit.c
index 91b7b5922dfa..5d7b381da692 100644
--- a/arch/x86/ia32/audit.c
+++ b/arch/x86/ia32/audit.c
@@ -27,7 +27,7 @@ unsigned ia32_signal_class[] = {
 
 int ia32_classify_syscall(unsigned syscall)
 {
-	switch(syscall) {
+	switch (syscall) {
 	case __NR_open:
 		return 2;
 	case __NR_openat:

From 82bc03fc158e28c90d7ed9919410776039cb4e14 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:54 +0100
Subject: [PATCH 522/890] x86: add PWT to NOCACHE flags

add PWT bit to NOCACHE flags. No real difference to CPUs, but needed
later for PAT.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable_32.h | 2 +-
 include/asm-x86/pgtable_64.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index a8be1ee15a99..16da5d5cce40 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -156,7 +156,7 @@ void paging_init(void);
 extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
 #define __PAGE_KERNEL_RO		(__PAGE_KERNEL & ~_PAGE_RW)
 #define __PAGE_KERNEL_RX		(__PAGE_KERNEL_EXEC & ~_PAGE_RW)
-#define __PAGE_KERNEL_NOCACHE		(__PAGE_KERNEL | _PAGE_PCD)
+#define __PAGE_KERNEL_NOCACHE		(__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
 #define __PAGE_KERNEL_LARGE		(__PAGE_KERNEL | _PAGE_PSE)
 #define __PAGE_KERNEL_LARGE_EXEC	(__PAGE_KERNEL_EXEC | _PAGE_PSE)
 
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 3f280105d7b3..9c9cddf5138b 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -189,13 +189,13 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
 #define __PAGE_KERNEL_EXEC \
 	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
 #define __PAGE_KERNEL_NOCACHE \
-	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED | _PAGE_NX)
+	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_PWT | _PAGE_ACCESSED | _PAGE_NX)
 #define __PAGE_KERNEL_RO \
 	(_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
 #define __PAGE_KERNEL_VSYSCALL \
 	(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
 #define __PAGE_KERNEL_VSYSCALL_NOCACHE \
-	(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_PCD)
+	(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_PCD | _PAGE_PWT)
 #define __PAGE_KERNEL_LARGE \
 	(__PAGE_KERNEL | _PAGE_PSE)
 #define __PAGE_KERNEL_LARGE_EXEC \

From 6c3866558213ff706d8331053386915371ad63ec Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:32:55 +0100
Subject: [PATCH 523/890] x86: move all asm/pgtable constants into one place

32 and 64-bit use the same flags for pagetable entries, so make them all common.

[ mingo@elte.hu: fixes ]

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable.h    | 119 +++++++++++++++++++++++++++++++++++
 include/asm-x86/pgtable_32.h | 110 --------------------------------
 include/asm-x86/pgtable_64.h |  91 ---------------------------
 3 files changed, 119 insertions(+), 201 deletions(-)

diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 1039140652af..5b858f0285a8 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -1,5 +1,124 @@
+#ifndef _ASM_X86_PGTABLE_H
+#define _ASM_X86_PGTABLE_H
+
+#define USER_PTRS_PER_PGD	((TASK_SIZE-1)/PGDIR_SIZE+1)
+#define FIRST_USER_ADDRESS	0
+
+#define _PAGE_BIT_PRESENT	0
+#define _PAGE_BIT_RW		1
+#define _PAGE_BIT_USER		2
+#define _PAGE_BIT_PWT		3
+#define _PAGE_BIT_PCD		4
+#define _PAGE_BIT_ACCESSED	5
+#define _PAGE_BIT_DIRTY		6
+#define _PAGE_BIT_FILE		6
+#define _PAGE_BIT_PSE		7	/* 4 MB (or 2MB) page */
+#define _PAGE_BIT_GLOBAL	8	/* Global TLB entry PPro+ */
+#define _PAGE_BIT_UNUSED1	9	/* available for programmer */
+#define _PAGE_BIT_UNUSED2	10
+#define _PAGE_BIT_UNUSED3	11
+#define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */
+
+#define _PAGE_PRESENT	(_AC(1, UL)<<_PAGE_BIT_PRESENT)
+#define _PAGE_RW	(_AC(1, UL)<<_PAGE_BIT_RW)
+#define _PAGE_USER	(_AC(1, UL)<<_PAGE_BIT_USER)
+#define _PAGE_PWT	(_AC(1, UL)<<_PAGE_BIT_PWT)
+#define _PAGE_PCD	(_AC(1, UL)<<_PAGE_BIT_PCD)
+#define _PAGE_ACCESSED	(_AC(1, UL)<<_PAGE_BIT_ACCESSED)
+#define _PAGE_DIRTY	(_AC(1, UL)<<_PAGE_BIT_DIRTY)
+#define _PAGE_PSE	(_AC(1, UL)<<_PAGE_BIT_PSE)	/* 2MB page */
+#define _PAGE_GLOBAL	(_AC(1, UL)<<_PAGE_BIT_GLOBAL)	/* Global TLB entry */
+#define _PAGE_UNUSED1	(_AC(1, UL)<<_PAGE_BIT_UNUSED1)
+#define _PAGE_UNUSED2	(_AC(1, UL)<<_PAGE_BIT_UNUSED2)
+#define _PAGE_UNUSED3	(_AC(1, UL)<<_PAGE_BIT_UNUSED3)
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+#define _PAGE_NX	(_AC(1, ULL) << _PAGE_BIT_NX)
+#else
+#define _PAGE_NX	0
+#endif
+
+/* If _PAGE_PRESENT is clear, we use these: */
+#define _PAGE_FILE	_PAGE_DIRTY	/* nonlinear file mapping, saved PTE; unset:swap */
+#define _PAGE_PROTNONE	_PAGE_PSE	/* if the user mapped it with PROT_NONE;
+					   pte_present gives true */
+
+#define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+
+#define _PAGE_CHG_MASK	(PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
+
+#define PAGE_NONE	__pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
+#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
+
+#define PAGE_SHARED_EXEC	__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_COPY_NOEXEC	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
+#define PAGE_COPY_EXEC		__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_COPY		PAGE_COPY_NOEXEC
+#define PAGE_READONLY		__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
+#define PAGE_READONLY_EXEC	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+
+#ifdef CONFIG_X86_32
+#define _PAGE_KERNEL_EXEC \
+	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define _PAGE_KERNEL (_PAGE_KERNEL_EXEC | _PAGE_NX)
+
+#ifndef __ASSEMBLY__
+extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
+#endif	/* __ASSEMBLY__ */
+#else
+#define __PAGE_KERNEL_EXEC						\
+	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define __PAGE_KERNEL		(__PAGE_KERNEL_EXEC | _PAGE_NX)
+#endif
+
+#define __PAGE_KERNEL_RO		(__PAGE_KERNEL & ~_PAGE_RW)
+#define __PAGE_KERNEL_RX		(__PAGE_KERNEL_EXEC & ~_PAGE_RW)
+#define __PAGE_KERNEL_NOCACHE		(__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
+#define __PAGE_KERNEL_VSYSCALL		(__PAGE_KERNEL_RX | _PAGE_USER)
+#define __PAGE_KERNEL_VSYSCALL_NOCACHE	(__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT)
+#define __PAGE_KERNEL_LARGE		(__PAGE_KERNEL | _PAGE_PSE)
+#define __PAGE_KERNEL_LARGE_EXEC	(__PAGE_KERNEL_EXEC | _PAGE_PSE)
+
+#ifdef CONFIG_X86_32
+# define MAKE_GLOBAL(x)			__pgprot((x))
+#else
+# define MAKE_GLOBAL(x)			__pgprot((x) | _PAGE_GLOBAL)
+#endif
+
+#define PAGE_KERNEL			MAKE_GLOBAL(__PAGE_KERNEL)
+#define PAGE_KERNEL_RO			MAKE_GLOBAL(__PAGE_KERNEL_RO)
+#define PAGE_KERNEL_EXEC		MAKE_GLOBAL(__PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_RX			MAKE_GLOBAL(__PAGE_KERNEL_RX)
+#define PAGE_KERNEL_NOCACHE		MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
+#define PAGE_KERNEL_LARGE		MAKE_GLOBAL(__PAGE_KERNEL_LARGE)
+#define PAGE_KERNEL_LARGE_EXEC		MAKE_GLOBAL(__PAGE_KERNEL_LARGE_EXEC)
+#define PAGE_KERNEL_VSYSCALL		MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL)
+#define PAGE_KERNEL_VSYSCALL_NOCACHE	MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL_NOCACHE)
+
+/*         xwr */
+#define __P000	PAGE_NONE
+#define __P001	PAGE_READONLY
+#define __P010	PAGE_COPY
+#define __P011	PAGE_COPY
+#define __P100	PAGE_READONLY_EXEC
+#define __P101	PAGE_READONLY_EXEC
+#define __P110	PAGE_COPY_EXEC
+#define __P111	PAGE_COPY_EXEC
+
+#define __S000	PAGE_NONE
+#define __S001	PAGE_READONLY
+#define __S010	PAGE_SHARED
+#define __S011	PAGE_SHARED
+#define __S100	PAGE_READONLY_EXEC
+#define __S101	PAGE_READONLY_EXEC
+#define __S110	PAGE_SHARED_EXEC
+#define __S111	PAGE_SHARED_EXEC
+
 #ifdef CONFIG_X86_32
 # include "pgtable_32.h"
 #else
 # include "pgtable_64.h"
 #endif
+
+#endif	/* _ASM_X86_PGTABLE_H */
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 16da5d5cce40..3b40cb6dea18 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -58,9 +58,6 @@ void paging_init(void);
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
-#define USER_PTRS_PER_PGD	(TASK_SIZE/PGDIR_SIZE)
-#define FIRST_USER_ADDRESS	0
-
 #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
 #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
 
@@ -84,113 +81,6 @@ void paging_init(void);
 # define VMALLOC_END	(FIXADDR_START-2*PAGE_SIZE)
 #endif
 
-/*
- * _PAGE_PSE set in the page directory entry just means that
- * the page directory entry points directly to a 4MB-aligned block of
- * memory. 
- */
-#define _PAGE_BIT_PRESENT	0
-#define _PAGE_BIT_RW		1
-#define _PAGE_BIT_USER		2
-#define _PAGE_BIT_PWT		3
-#define _PAGE_BIT_PCD		4
-#define _PAGE_BIT_ACCESSED	5
-#define _PAGE_BIT_DIRTY		6
-#define _PAGE_BIT_PSE		7	/* 4 MB (or 2MB) page, Pentium+, if present.. */
-#define _PAGE_BIT_GLOBAL	8	/* Global TLB entry PPro+ */
-#define _PAGE_BIT_UNUSED1	9	/* available for programmer */
-#define _PAGE_BIT_UNUSED2	10
-#define _PAGE_BIT_UNUSED3	11
-#define _PAGE_BIT_NX		63
-
-#define _PAGE_PRESENT	0x001
-#define _PAGE_RW	0x002
-#define _PAGE_USER	0x004
-#define _PAGE_PWT	0x008
-#define _PAGE_PCD	0x010
-#define _PAGE_ACCESSED	0x020
-#define _PAGE_DIRTY	0x040
-#define _PAGE_PSE	0x080	/* 4 MB (or 2MB) page, Pentium+, if present.. */
-#define _PAGE_GLOBAL	0x100	/* Global TLB entry PPro+ */
-#define _PAGE_UNUSED1	0x200	/* available for programmer */
-#define _PAGE_UNUSED2	0x400
-#define _PAGE_UNUSED3	0x800
-
-/* If _PAGE_PRESENT is clear, we use these: */
-#define _PAGE_FILE	0x040	/* nonlinear file mapping, saved PTE; unset:swap */
-#define _PAGE_PROTNONE	0x080	/* if the user mapped it with PROT_NONE;
-				   pte_present gives true */
-#ifdef CONFIG_X86_PAE
-#define _PAGE_NX	(1ULL<<_PAGE_BIT_NX)
-#else
-#define _PAGE_NX	0
-#endif
-
-#define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define _PAGE_CHG_MASK	(PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
-
-#define PAGE_NONE \
-	__pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
-#define PAGE_SHARED \
-	__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
-
-#define PAGE_SHARED_EXEC \
-	__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
-#define PAGE_COPY_NOEXEC \
-	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_COPY_EXEC \
-	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
-#define PAGE_COPY \
-	PAGE_COPY_NOEXEC
-#define PAGE_READONLY \
-	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_READONLY_EXEC \
-	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
-
-#define _PAGE_KERNEL \
-	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
-#define _PAGE_KERNEL_EXEC \
-	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
-
-extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
-#define __PAGE_KERNEL_RO		(__PAGE_KERNEL & ~_PAGE_RW)
-#define __PAGE_KERNEL_RX		(__PAGE_KERNEL_EXEC & ~_PAGE_RW)
-#define __PAGE_KERNEL_NOCACHE		(__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
-#define __PAGE_KERNEL_LARGE		(__PAGE_KERNEL | _PAGE_PSE)
-#define __PAGE_KERNEL_LARGE_EXEC	(__PAGE_KERNEL_EXEC | _PAGE_PSE)
-
-#define PAGE_KERNEL		__pgprot(__PAGE_KERNEL)
-#define PAGE_KERNEL_RO		__pgprot(__PAGE_KERNEL_RO)
-#define PAGE_KERNEL_EXEC	__pgprot(__PAGE_KERNEL_EXEC)
-#define PAGE_KERNEL_RX		__pgprot(__PAGE_KERNEL_RX)
-#define PAGE_KERNEL_NOCACHE	__pgprot(__PAGE_KERNEL_NOCACHE)
-#define PAGE_KERNEL_LARGE	__pgprot(__PAGE_KERNEL_LARGE)
-#define PAGE_KERNEL_LARGE_EXEC	__pgprot(__PAGE_KERNEL_LARGE_EXEC)
-
-/*
- * The i386 can't do page protection for execute, and considers that
- * the same are read. Also, write permissions imply read permissions.
- * This is the closest we can get..
- */
-#define __P000	PAGE_NONE
-#define __P001	PAGE_READONLY
-#define __P010	PAGE_COPY
-#define __P011	PAGE_COPY
-#define __P100	PAGE_READONLY_EXEC
-#define __P101	PAGE_READONLY_EXEC
-#define __P110	PAGE_COPY_EXEC
-#define __P111	PAGE_COPY_EXEC
-
-#define __S000	PAGE_NONE
-#define __S001	PAGE_READONLY
-#define __S010	PAGE_SHARED
-#define __S011	PAGE_SHARED
-#define __S100	PAGE_READONLY_EXEC
-#define __S101	PAGE_READONLY_EXEC
-#define __S110	PAGE_SHARED_EXEC
-#define __S111	PAGE_SHARED_EXEC
-
 /*
  * Define this if things work differently on an i386 and an i486:
  * it will (on an i486) warn about kernel memory accesses that are
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 9c9cddf5138b..93eb4df2ec11 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -131,8 +131,6 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
 #define PGDIR_SIZE	(_AC(1,UL) << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
-#define USER_PTRS_PER_PGD	((TASK_SIZE-1)/PGDIR_SIZE+1)
-#define FIRST_USER_ADDRESS	0
 
 #define MAXMEM		 _AC(0x3fffffffffff, UL)
 #define VMALLOC_START    _AC(0xffffc20000000000, UL)
@@ -142,95 +140,6 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
 #define MODULES_END      _AC(0xfffffffffff00000, UL)
 #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
 
-#define _PAGE_BIT_PRESENT	0
-#define _PAGE_BIT_RW		1
-#define _PAGE_BIT_USER		2
-#define _PAGE_BIT_PWT		3
-#define _PAGE_BIT_PCD		4
-#define _PAGE_BIT_ACCESSED	5
-#define _PAGE_BIT_DIRTY		6
-#define _PAGE_BIT_PSE		7	/* 4 MB (or 2MB) page */
-#define _PAGE_BIT_FILE		6
-#define _PAGE_BIT_GLOBAL	8	/* Global TLB entry PPro+ */
-#define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */
-
-#define _PAGE_PRESENT	(_AC(1, UL)<<_PAGE_BIT_PRESENT)
-#define _PAGE_RW	(_AC(1, UL)<<_PAGE_BIT_RW)
-#define _PAGE_USER	(_AC(1, UL)<<_PAGE_BIT_USER)
-#define _PAGE_PWT	(_AC(1, UL)<<_PAGE_BIT_PWT)
-#define _PAGE_PCD	(_AC(1, UL)<<_PAGE_BIT_PCD)
-#define _PAGE_ACCESSED	(_AC(1, UL)<<_PAGE_BIT_ACCESSED)
-#define _PAGE_DIRTY	(_AC(1, UL)<<_PAGE_BIT_DIRTY)
-/* 2MB page */
-#define _PAGE_PSE	(_AC(1, UL)<<_PAGE_BIT_PSE)
-/* nonlinear file mapping, saved PTE; unset:swap */
-#define _PAGE_FILE	(_AC(1, UL)<<_PAGE_BIT_FILE)
-/* Global TLB entry */
-#define _PAGE_GLOBAL	(_AC(1, UL)<<_PAGE_BIT_GLOBAL)
-
-#define _PAGE_PROTNONE	0x080	/* If not present */
-#define _PAGE_NX        (_AC(1, UL)<<_PAGE_BIT_NX)
-
-#define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-
-#define _PAGE_CHG_MASK	(PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
-
-#define PAGE_NONE	__pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
-#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
-#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_COPY PAGE_COPY_NOEXEC
-#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
-#define PAGE_READONLY	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
-#define __PAGE_KERNEL \
-	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
-#define __PAGE_KERNEL_EXEC \
-	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
-#define __PAGE_KERNEL_NOCACHE \
-	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_PWT | _PAGE_ACCESSED | _PAGE_NX)
-#define __PAGE_KERNEL_RO \
-	(_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
-#define __PAGE_KERNEL_VSYSCALL \
-	(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
-#define __PAGE_KERNEL_VSYSCALL_NOCACHE \
-	(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_PCD | _PAGE_PWT)
-#define __PAGE_KERNEL_LARGE \
-	(__PAGE_KERNEL | _PAGE_PSE)
-#define __PAGE_KERNEL_LARGE_EXEC \
-	(__PAGE_KERNEL_EXEC | _PAGE_PSE)
-
-#define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL)
-
-#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL)
-#define PAGE_KERNEL_EXEC MAKE_GLOBAL(__PAGE_KERNEL_EXEC)
-#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO)
-#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
-#define PAGE_KERNEL_VSYSCALL32 __pgprot(__PAGE_KERNEL_VSYSCALL)
-#define PAGE_KERNEL_VSYSCALL MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL)
-#define PAGE_KERNEL_LARGE MAKE_GLOBAL(__PAGE_KERNEL_LARGE)
-#define PAGE_KERNEL_VSYSCALL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL_NOCACHE)
-
-/*         xwr */
-#define __P000	PAGE_NONE
-#define __P001	PAGE_READONLY
-#define __P010	PAGE_COPY
-#define __P011	PAGE_COPY
-#define __P100	PAGE_READONLY_EXEC
-#define __P101	PAGE_READONLY_EXEC
-#define __P110	PAGE_COPY_EXEC
-#define __P111	PAGE_COPY_EXEC
-
-#define __S000	PAGE_NONE
-#define __S001	PAGE_READONLY
-#define __S010	PAGE_SHARED
-#define __S011	PAGE_SHARED
-#define __S100	PAGE_READONLY_EXEC
-#define __S101	PAGE_READONLY_EXEC
-#define __S110	PAGE_SHARED_EXEC
-#define __S111	PAGE_SHARED_EXEC
-
 #ifndef __ASSEMBLY__
 
 static inline unsigned long pgd_bad(pgd_t pgd)

From 925596a017bbd045ff711b778256f459e50a1192 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:32:55 +0100
Subject: [PATCH 524/890] x86: avoid name conflict for Voyager leave_mm

Avoid a conflict between Voyager's leave_mm and asm-x86/mmu.h's leave_mm.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mach-voyager/voyager_smp.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 751777241881..dffa786f61fe 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -808,7 +808,7 @@ static DEFINE_SPINLOCK(tlbstate_lock);
  * We need to reload %cr3 since the page tables may be going
  * away from under us..
  */
-static inline void leave_mm(unsigned long cpu)
+static inline void voyager_leave_mm(unsigned long cpu)
 {
 	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
 		BUG();
@@ -838,7 +838,7 @@ static void smp_invalidate_interrupt(void)
 			else
 				__flush_tlb_one(flush_va);
 		} else
-			leave_mm(cpu);
+			voyager_leave_mm(cpu);
 	}
 	smp_mb__before_clear_bit();
 	clear_bit(cpu, &smp_invalidate_needed);
@@ -919,7 +919,7 @@ void flush_tlb_mm(struct mm_struct *mm)
 		if (current->mm)
 			local_flush_tlb();
 		else
-			leave_mm(smp_processor_id());
+			voyager_leave_mm(smp_processor_id());
 	}
 	if (cpu_mask)
 		voyager_flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
@@ -939,7 +939,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
 		if (current->mm)
 			__flush_tlb_one(va);
 		else
-			leave_mm(smp_processor_id());
+			voyager_leave_mm(smp_processor_id());
 	}
 
 	if (cpu_mask)
@@ -1155,7 +1155,7 @@ static void do_flush_tlb_all(void *info)
 
 	__flush_tlb_all();
 	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
-		leave_mm(cpu);
+		voyager_leave_mm(cpu);
 }
 
 /* flush the TLB of every active CPU in the system */

From 61f38226def55d972cfd0e789971e952525ff8e5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:55 +0100
Subject: [PATCH 525/890] x86/pgtable: fix constant sign extension problem

based on:

 Subject: x86/pgtable: fix constant sign extension problem
 From: Jeremy Fitzhardinge <jeremy@goop.org>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 5b858f0285a8..73e2b47c055e 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -19,18 +19,18 @@
 #define _PAGE_BIT_UNUSED3	11
 #define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */
 
-#define _PAGE_PRESENT	(_AC(1, UL)<<_PAGE_BIT_PRESENT)
-#define _PAGE_RW	(_AC(1, UL)<<_PAGE_BIT_RW)
-#define _PAGE_USER	(_AC(1, UL)<<_PAGE_BIT_USER)
-#define _PAGE_PWT	(_AC(1, UL)<<_PAGE_BIT_PWT)
-#define _PAGE_PCD	(_AC(1, UL)<<_PAGE_BIT_PCD)
-#define _PAGE_ACCESSED	(_AC(1, UL)<<_PAGE_BIT_ACCESSED)
-#define _PAGE_DIRTY	(_AC(1, UL)<<_PAGE_BIT_DIRTY)
-#define _PAGE_PSE	(_AC(1, UL)<<_PAGE_BIT_PSE)	/* 2MB page */
-#define _PAGE_GLOBAL	(_AC(1, UL)<<_PAGE_BIT_GLOBAL)	/* Global TLB entry */
-#define _PAGE_UNUSED1	(_AC(1, UL)<<_PAGE_BIT_UNUSED1)
-#define _PAGE_UNUSED2	(_AC(1, UL)<<_PAGE_BIT_UNUSED2)
-#define _PAGE_UNUSED3	(_AC(1, UL)<<_PAGE_BIT_UNUSED3)
+#define _PAGE_PRESENT	(_AC(1, L)<<_PAGE_BIT_PRESENT)
+#define _PAGE_RW	(_AC(1, L)<<_PAGE_BIT_RW)
+#define _PAGE_USER	(_AC(1, L)<<_PAGE_BIT_USER)
+#define _PAGE_PWT	(_AC(1, L)<<_PAGE_BIT_PWT)
+#define _PAGE_PCD	(_AC(1, L)<<_PAGE_BIT_PCD)
+#define _PAGE_ACCESSED	(_AC(1, L)<<_PAGE_BIT_ACCESSED)
+#define _PAGE_DIRTY	(_AC(1, L)<<_PAGE_BIT_DIRTY)
+#define _PAGE_PSE	(_AC(1, L)<<_PAGE_BIT_PSE)	/* 2MB page */
+#define _PAGE_GLOBAL	(_AC(1, L)<<_PAGE_BIT_GLOBAL)	/* Global TLB entry */
+#define _PAGE_UNUSED1	(_AC(1, L)<<_PAGE_BIT_UNUSED1)
+#define _PAGE_UNUSED2	(_AC(1, L)<<_PAGE_BIT_UNUSED2)
+#define _PAGE_UNUSED3	(_AC(1, L)<<_PAGE_BIT_UNUSED3)
 
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 #define _PAGE_NX	(_AC(1, ULL) << _PAGE_BIT_NX)

From dcbae6b377d78190954055ef2d8909ae83ff57de Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:55 +0100
Subject: [PATCH 526/890] x86/pgtable: unify pagetable accessors, #1

based on:

 Subject: x86/pgtable: unify pagetable accessors
 From: Jeremy Fitzhardinge <jeremy@goop.org>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable_64.h | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 93eb4df2ec11..3a0588088480 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -179,21 +179,27 @@ static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
  * Undefined behaviour if not..
  */
 #define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT)
+
 static inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
 static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
 static inline int pte_write(pte_t pte)		{ return pte_val(pte) & _PAGE_RW; }
 static inline int pte_file(pte_t pte)		{ return pte_val(pte) & _PAGE_FILE; }
 static inline int pte_huge(pte_t pte)		{ return pte_val(pte) & _PAGE_PSE; }
 
-static inline pte_t pte_mkclean(pte_t pte)	{ set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_DIRTY)); return pte; }
-static inline pte_t pte_mkold(pte_t pte)	{ set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_ACCESSED)); return pte; }
-static inline pte_t pte_wrprotect(pte_t pte)	{ set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_RW)); return pte; }
-static inline pte_t pte_mkexec(pte_t pte)	{ set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_NX)); return pte; }
-static inline pte_t pte_mkdirty(pte_t pte)	{ set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; }
-static inline pte_t pte_mkyoung(pte_t pte)	{ set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; }
-static inline pte_t pte_mkwrite(pte_t pte)	{ set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; }
-static inline pte_t pte_mkhuge(pte_t pte)	{ set_pte(&pte, __pte(pte_val(pte) | _PAGE_PSE)); return pte; }
-static inline pte_t pte_clrhuge(pte_t pte)	{ set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_PSE)); return pte; }
+static inline int pmd_large(pmd_t pte) {
+	return (pmd_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==
+		(_PAGE_PSE|_PAGE_PRESENT);
+}
+
+static inline pte_t pte_mkclean(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_DIRTY); }
+static inline pte_t pte_mkold(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_ACCESSED); }
+static inline pte_t pte_wrprotect(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_RW); }
+static inline pte_t pte_mkexec(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_NX); }
+static inline pte_t pte_mkdirty(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_DIRTY); }
+static inline pte_t pte_mkyoung(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_ACCESSED); }
+static inline pte_t pte_mkwrite(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_RW); }
+static inline pte_t pte_mkhuge(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_PSE); }
+static inline pte_t pte_clrhuge(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_PSE); }
 
 struct vm_area_struct;
 

From 92ac166f3a776afc51766395076ffcfde112740a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:55 +0100
Subject: [PATCH 527/890] x86/pgtable: unify pagetable accessors, #2

change the pte_dirty/* inlines to the unified format. Non-NOP!

based on:

 Subject: x86/pgtable: unify pagetable accessors
 From: Jeremy Fitzhardinge <jeremy@goop.org>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable_32.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 3b40cb6dea18..57d4aab5594b 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -105,15 +105,15 @@ extern unsigned long pg0[];
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
  */
-static inline int pte_dirty(pte_t pte)		{ return (pte).pte_low & _PAGE_DIRTY; }
-static inline int pte_young(pte_t pte)		{ return (pte).pte_low & _PAGE_ACCESSED; }
-static inline int pte_write(pte_t pte)		{ return (pte).pte_low & _PAGE_RW; }
-static inline int pte_huge(pte_t pte)		{ return (pte).pte_low & _PAGE_PSE; }
+static inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
+static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
+static inline int pte_write(pte_t pte)		{ return pte_val(pte) & _PAGE_RW; }
+static inline int pte_huge(pte_t pte)		{ return pte_val(pte) & _PAGE_PSE; }
 
 /*
  * The following only works if pte_present() is not true.
  */
-static inline int pte_file(pte_t pte)		{ return (pte).pte_low & _PAGE_FILE; }
+static inline int pte_file(pte_t pte)		{ return pte_val(pte) & _PAGE_FILE; }
 
 static inline pte_t pte_mkclean(pte_t pte)	{ (pte).pte_low &= ~_PAGE_DIRTY; return pte; }
 static inline pte_t pte_mkold(pte_t pte)	{ (pte).pte_low &= ~_PAGE_ACCESSED; return pte; }

From 1444d2da4468f30daff47cacfd98acbd7baf43a7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:56 +0100
Subject: [PATCH 528/890] x86/pgtable: unify pagetable accessors, #3

change the pte_mk inlines to the unified format. Non-NOP!

based on:

 Subject: x86/pgtable: unify pagetable accessors
 From: Jeremy Fitzhardinge <jeremy@goop.org>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable_32.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 57d4aab5594b..c1e855fdf60e 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -115,13 +115,13 @@ static inline int pte_huge(pte_t pte)		{ return pte_val(pte) & _PAGE_PSE; }
  */
 static inline int pte_file(pte_t pte)		{ return pte_val(pte) & _PAGE_FILE; }
 
-static inline pte_t pte_mkclean(pte_t pte)	{ (pte).pte_low &= ~_PAGE_DIRTY; return pte; }
-static inline pte_t pte_mkold(pte_t pte)	{ (pte).pte_low &= ~_PAGE_ACCESSED; return pte; }
-static inline pte_t pte_wrprotect(pte_t pte)	{ (pte).pte_low &= ~_PAGE_RW; return pte; }
-static inline pte_t pte_mkdirty(pte_t pte)	{ (pte).pte_low |= _PAGE_DIRTY; return pte; }
-static inline pte_t pte_mkyoung(pte_t pte)	{ (pte).pte_low |= _PAGE_ACCESSED; return pte; }
-static inline pte_t pte_mkwrite(pte_t pte)	{ (pte).pte_low |= _PAGE_RW; return pte; }
-static inline pte_t pte_mkhuge(pte_t pte)	{ (pte).pte_low |= _PAGE_PSE; return pte; }
+static inline pte_t pte_mkclean(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_DIRTY); }
+static inline pte_t pte_mkold(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_ACCESSED); }
+static inline pte_t pte_wrprotect(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_RW); }
+static inline pte_t pte_mkdirty(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_DIRTY); }
+static inline pte_t pte_mkyoung(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_ACCESSED); }
+static inline pte_t pte_mkwrite(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_RW); }
+static inline pte_t pte_mkhuge(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_PSE); }
 
 #ifdef CONFIG_X86_PAE
 # include <asm/pgtable-3level.h>

From 7d00a1ae54779db1bec03fa575cf53d99cb6cc48 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:56 +0100
Subject: [PATCH 529/890] x86/pgtable: unify pagetable accessors, #4

add new ops to 32-bit.

based on:

 Subject: x86/pgtable: unify pagetable accessors
 From: Jeremy Fitzhardinge <jeremy@goop.org>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable_32.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index c1e855fdf60e..b3e92124fc3f 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -123,6 +123,14 @@ static inline pte_t pte_mkyoung(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_A
 static inline pte_t pte_mkwrite(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_RW); }
 static inline pte_t pte_mkhuge(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_PSE); }
 
+static inline pte_t pte_clrhuge(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_PSE); }
+static inline pte_t pte_mkexec(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_NX); }
+
+static inline int pmd_large(pmd_t pte) {
+	return (pmd_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==
+		(_PAGE_PSE|_PAGE_PRESENT);
+}
+
 #ifdef CONFIG_X86_PAE
 # include <asm/pgtable-3level.h>
 #else

From e8a4852453c5af27d2a9c606e76809afa3dedb64 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:56 +0100
Subject: [PATCH 530/890] x86/pgtable: unify pagetable accessors, #5

reorder. NOP.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable_32.h | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index b3e92124fc3f..1c43c302609c 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -108,29 +108,24 @@ extern unsigned long pg0[];
 static inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
 static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
 static inline int pte_write(pte_t pte)		{ return pte_val(pte) & _PAGE_RW; }
-static inline int pte_huge(pte_t pte)		{ return pte_val(pte) & _PAGE_PSE; }
-
-/*
- * The following only works if pte_present() is not true.
- */
 static inline int pte_file(pte_t pte)		{ return pte_val(pte) & _PAGE_FILE; }
-
-static inline pte_t pte_mkclean(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_DIRTY); }
-static inline pte_t pte_mkold(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_ACCESSED); }
-static inline pte_t pte_wrprotect(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_RW); }
-static inline pte_t pte_mkdirty(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_DIRTY); }
-static inline pte_t pte_mkyoung(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_ACCESSED); }
-static inline pte_t pte_mkwrite(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_RW); }
-static inline pte_t pte_mkhuge(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_PSE); }
-
-static inline pte_t pte_clrhuge(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_PSE); }
-static inline pte_t pte_mkexec(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_NX); }
+static inline int pte_huge(pte_t pte)		{ return pte_val(pte) & _PAGE_PSE; }
 
 static inline int pmd_large(pmd_t pte) {
 	return (pmd_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==
 		(_PAGE_PSE|_PAGE_PRESENT);
 }
 
+static inline pte_t pte_mkclean(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_DIRTY); }
+static inline pte_t pte_mkold(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_ACCESSED); }
+static inline pte_t pte_wrprotect(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_RW); }
+static inline pte_t pte_mkexec(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_NX); }
+static inline pte_t pte_mkdirty(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_DIRTY); }
+static inline pte_t pte_mkyoung(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_ACCESSED); }
+static inline pte_t pte_mkwrite(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_RW); }
+static inline pte_t pte_mkhuge(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_PSE); }
+static inline pte_t pte_clrhuge(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_PSE); }
+
 #ifdef CONFIG_X86_PAE
 # include <asm/pgtable-3level.h>
 #else

From 4614139c6a74fe02c85f702ba9c0e57f8e38647e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:32:56 +0100
Subject: [PATCH 531/890] x86/pgtable: unify pagetable accessors, #6

Unify functions to test and set bits in pagetable entries.

NOP: only moves existing code around, without any change to it.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable.h    | 28 ++++++++++++++++++++++++++++
 include/asm-x86/pgtable_32.h | 28 ----------------------------
 include/asm-x86/pgtable_64.h | 32 --------------------------------
 3 files changed, 28 insertions(+), 60 deletions(-)

diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 73e2b47c055e..4c03ff2246fd 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -115,6 +115,34 @@ extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
 #define __S110	PAGE_SHARED_EXEC
 #define __S111	PAGE_SHARED_EXEC
 
+#ifndef __ASSEMBLY__
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+static inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
+static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
+static inline int pte_write(pte_t pte)		{ return pte_val(pte) & _PAGE_RW; }
+static inline int pte_file(pte_t pte)		{ return pte_val(pte) & _PAGE_FILE; }
+static inline int pte_huge(pte_t pte)		{ return pte_val(pte) & _PAGE_PSE; }
+
+static inline int pmd_large(pmd_t pte) {
+	return (pmd_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==
+		(_PAGE_PSE|_PAGE_PRESENT);
+}
+
+static inline pte_t pte_mkclean(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_DIRTY); }
+static inline pte_t pte_mkold(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_ACCESSED); }
+static inline pte_t pte_wrprotect(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_RW); }
+static inline pte_t pte_mkexec(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_NX); }
+static inline pte_t pte_mkdirty(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_DIRTY); }
+static inline pte_t pte_mkyoung(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_ACCESSED); }
+static inline pte_t pte_mkwrite(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_RW); }
+static inline pte_t pte_mkhuge(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_PSE); }
+static inline pte_t pte_clrhuge(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_PSE); }
+
+#endif	/* __ASSEMBLY__ */
+
 #ifdef CONFIG_X86_32
 # include "pgtable_32.h"
 #else
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 1c43c302609c..0aaefdda5158 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -101,31 +101,6 @@ extern unsigned long pg0[];
 
 #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
 
-/*
- * The following only work if pte_present() is true.
- * Undefined behaviour if not..
- */
-static inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
-static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
-static inline int pte_write(pte_t pte)		{ return pte_val(pte) & _PAGE_RW; }
-static inline int pte_file(pte_t pte)		{ return pte_val(pte) & _PAGE_FILE; }
-static inline int pte_huge(pte_t pte)		{ return pte_val(pte) & _PAGE_PSE; }
-
-static inline int pmd_large(pmd_t pte) {
-	return (pmd_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==
-		(_PAGE_PSE|_PAGE_PRESENT);
-}
-
-static inline pte_t pte_mkclean(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_DIRTY); }
-static inline pte_t pte_mkold(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_ACCESSED); }
-static inline pte_t pte_wrprotect(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_RW); }
-static inline pte_t pte_mkexec(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_NX); }
-static inline pte_t pte_mkdirty(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_DIRTY); }
-static inline pte_t pte_mkyoung(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_ACCESSED); }
-static inline pte_t pte_mkwrite(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_RW); }
-static inline pte_t pte_mkhuge(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_PSE); }
-static inline pte_t pte_clrhuge(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_PSE); }
-
 #ifdef CONFIG_X86_PAE
 # include <asm/pgtable-3level.h>
 #else
@@ -276,9 +251,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 	return pte;
 }
 
-#define pmd_large(pmd) \
-((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT))
-
 /*
  * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
  *
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 3a0588088480..84f47f9d7896 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -173,34 +173,6 @@ static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
 	pte_val(pte) &= __supported_pte_mask;
 	return pte;
 }
-
-/*
- * The following only work if pte_present() is true.
- * Undefined behaviour if not..
- */
-#define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT)
-
-static inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
-static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
-static inline int pte_write(pte_t pte)		{ return pte_val(pte) & _PAGE_RW; }
-static inline int pte_file(pte_t pte)		{ return pte_val(pte) & _PAGE_FILE; }
-static inline int pte_huge(pte_t pte)		{ return pte_val(pte) & _PAGE_PSE; }
-
-static inline int pmd_large(pmd_t pte) {
-	return (pmd_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==
-		(_PAGE_PSE|_PAGE_PRESENT);
-}
-
-static inline pte_t pte_mkclean(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_DIRTY); }
-static inline pte_t pte_mkold(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_ACCESSED); }
-static inline pte_t pte_wrprotect(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_RW); }
-static inline pte_t pte_mkexec(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_NX); }
-static inline pte_t pte_mkdirty(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_DIRTY); }
-static inline pte_t pte_mkyoung(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_ACCESSED); }
-static inline pte_t pte_mkwrite(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_RW); }
-static inline pte_t pte_mkhuge(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_PSE); }
-static inline pte_t pte_clrhuge(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_PSE); }
-
 struct vm_area_struct;
 
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
@@ -220,10 +192,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
  */
 #define pgprot_noncached(prot)	(__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT))
 
-static inline int pmd_large(pmd_t pte) { 
-	return (pmd_val(pte) & __LARGE_PTE) == __LARGE_PTE; 
-} 	
-
 
 /*
  * Conversion functions: convert a page and protection to a page entry,

From c3bcfb57e1e64b9b2f8b2d90564826637e21c5ea Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:32:57 +0100
Subject: [PATCH 532/890] x86: mask NX from pte_pfn

In 32-bit PAE, mask NX from pte_pfn, since it isn't part of the PFN.
This code is due for unification anyway, but this fixes a latent bug.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable-3level.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h
index 948a33414118..9816346f7df6 100644
--- a/include/asm-x86/pgtable-3level.h
+++ b/include/asm-x86/pgtable-3level.h
@@ -155,7 +155,7 @@ static inline int pte_none(pte_t pte)
 
 static inline unsigned long pte_pfn(pte_t pte)
 {
-	return pte_val(pte) >> PAGE_SHIFT;
+	return (pte_val(pte) & ~_PAGE_NX) >> PAGE_SHIFT;
 }
 
 extern unsigned long long __supported_pte_mask;

From 6fdc05d4794056e0b98901646c4b68c60a01d5eb Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:32:57 +0100
Subject: [PATCH 533/890] x86: unify pgtable accessors which use

Make users of supported_pte_mask common.  This has the side-effect of
introducing the variable for 32-bit non-PAE, but I think its a pretty
small cost to simplify the code.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_32.c            |  5 +++--
 include/asm-x86/pgtable-2level.h |  2 --
 include/asm-x86/pgtable-3level.h | 14 --------------
 include/asm-x86/pgtable.h        | 14 ++++++++++++++
 include/asm-x86/pgtable_64.h     | 18 ------------------
 5 files changed, 17 insertions(+), 36 deletions(-)

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 29130970c193..04332c09ad1d 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -478,11 +478,12 @@ void zap_low_mappings (void)
 
 int nx_enabled = 0;
 
+pteval_t __supported_pte_mask __read_mostly = ~_PAGE_NX;
+EXPORT_SYMBOL_GPL(__supported_pte_mask);
+
 #ifdef CONFIG_X86_PAE
 
 static int disable_nx __initdata = 0;
-u64 __supported_pte_mask __read_mostly = ~_PAGE_NX;
-EXPORT_SYMBOL_GPL(__supported_pte_mask);
 
 /*
  * noexec = on|off
diff --git a/include/asm-x86/pgtable-2level.h b/include/asm-x86/pgtable-2level.h
index 84b03cf56a79..d3171c0cb157 100644
--- a/include/asm-x86/pgtable-2level.h
+++ b/include/asm-x86/pgtable-2level.h
@@ -53,8 +53,6 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp)
 #define pte_page(x)		pfn_to_page(pte_pfn(x))
 #define pte_none(x)		(!(x).pte_low)
 #define pte_pfn(x)		(pte_val(x) >> PAGE_SHIFT)
-#define pfn_pte(pfn, prot)	__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
-#define pfn_pmd(pfn, prot)	__pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
 
 /*
  * All present pages are kernel-executable:
diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h
index 9816346f7df6..61a7d0029f60 100644
--- a/include/asm-x86/pgtable-3level.h
+++ b/include/asm-x86/pgtable-3level.h
@@ -158,20 +158,6 @@ static inline unsigned long pte_pfn(pte_t pte)
 	return (pte_val(pte) & ~_PAGE_NX) >> PAGE_SHIFT;
 }
 
-extern unsigned long long __supported_pte_mask;
-
-static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
-{
-	return __pte((((unsigned long long)page_nr << PAGE_SHIFT) |
-		      pgprot_val(pgprot)) & __supported_pte_mask);
-}
-
-static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
-{
-	return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) |
-		      pgprot_val(pgprot)) & __supported_pte_mask);
-}
-
 /*
  * Bits 0, 6 and 7 are taken in the low part of the pte,
  * put the 32 bits of offset into the high part.
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 4c03ff2246fd..b29a122fc1ff 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -141,6 +141,20 @@ static inline pte_t pte_mkwrite(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_R
 static inline pte_t pte_mkhuge(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_PSE); }
 static inline pte_t pte_clrhuge(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_PSE); }
 
+extern pteval_t __supported_pte_mask;
+
+static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
+{
+	return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) |
+		      pgprot_val(pgprot)) & __supported_pte_mask);
+}
+
+static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
+{
+	return __pmd((((phys_addr_t)page_nr << PAGE_SHIFT) |
+		      pgprot_val(pgprot)) & __supported_pte_mask);
+}
+
 #endif	/* __ASSEMBLY__ */
 
 #ifdef CONFIG_X86_32
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 84f47f9d7896..57e4ebe0f7fc 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -17,7 +17,6 @@ extern pud_t level3_kernel_pgt[512];
 extern pud_t level3_ident_pgt[512];
 extern pmd_t level2_kernel_pgt[512];
 extern pgd_t init_level4_pgt[];
-extern unsigned long __supported_pte_mask;
 
 #define swapper_pg_dir init_level4_pgt
 
@@ -165,14 +164,6 @@ static inline unsigned long pmd_bad(pmd_t pmd)
 #define pte_page(x)	pfn_to_page(pte_pfn(x))
 #define pte_pfn(x)  ((pte_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
 
-static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
-{
-	pte_t pte;
-	pte_val(pte) = (page_nr << PAGE_SHIFT);
-	pte_val(pte) |= pgprot_val(pgprot);
-	pte_val(pte) &= __supported_pte_mask;
-	return pte;
-}
 struct vm_area_struct;
 
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
@@ -239,15 +230,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 /* page, protection -> pte */
 #define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
  
-/* Change flags of a PTE */
-static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
-{ 
-	pte_val(pte) &= _PAGE_CHG_MASK;
-	pte_val(pte) |= pgprot_val(newprot);
-	pte_val(pte) &= __supported_pte_mask;
-       return pte; 
-}
-
 #define pte_index(address) \
 		(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 #define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \

From 38472311679d8784c2934d071ddda23d9b3fe264 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:57 +0100
Subject: [PATCH 534/890] x86: unify pgtable accessors which use, #2

based on:

 Subject: x86: unify pgtable accessors which use supported_pte_mask
 From: Jeremy Fitzhardinge <jeremy@goop.org>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable.h    | 15 +++++++++++++++
 include/asm-x86/pgtable_32.h | 16 ----------------
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index b29a122fc1ff..6452286e71a1 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -155,6 +155,21 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
 		      pgprot_val(pgprot)) & __supported_pte_mask);
 }
 
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	pteval_t val = pte_val(pte);
+
+	/*
+	 * Chop off the NX bit (if present), and add the NX portion of
+	 * the newprot (if present):
+	 */
+	val &= _PAGE_CHG_MASK & ~_PAGE_NX;
+	val |= pgprot_val(newprot) & __supported_pte_mask;
+
+	return __pte(val);
+}
+
+
 #endif	/* __ASSEMBLY__ */
 
 #ifdef CONFIG_X86_32
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 0aaefdda5158..109dad5e16ee 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -235,22 +235,6 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
 
 #define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
 
-static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
-{
-	pte.pte_low &= _PAGE_CHG_MASK;
-	pte.pte_low |= pgprot_val(newprot);
-#ifdef CONFIG_X86_PAE
-	/*
-	 * Chop off the NX bit (if present), and add the NX portion of
-	 * the newprot (if present):
-	 */
-	pte.pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
-	pte.pte_high |= (pgprot_val(newprot) >> 32) & \
-					(__supported_pte_mask >> 32);
-#endif
-	return pte;
-}
-
 /*
  * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
  *

From b7fff536d0ad45c4810f9b99845c707ceadc3afc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:32:57 +0100
Subject: [PATCH 535/890] x86: fix pte_modify() bug

fix sign extension bug in PTE_MASK / _PTE_CHG_MASK.

this resolves the following bootup crash on PAE systems:

[   94.710726] init[1]: segfault at 00000004 ip 49471cbb sp bff0c6c0 error 4
[   94.717764] init[1]: segfault at 00000004 ip 49471cbb sp bff0c6c0 error 4
[   94.724772] init[1]: segfault at 00000004 ip 49471cbb sp bff0c6c0 error 4
[   94.731777] init[1]: segfault at 00000004 ip 49471cbb sp bff0c6c0 error 4

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/page.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index 3a00d0d9dfd3..91920565a575 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -11,7 +11,7 @@
 #ifdef __KERNEL__
 
 #define PHYSICAL_PAGE_MASK	(PAGE_MASK & __PHYSICAL_MASK)
-#define PTE_MASK		PHYSICAL_PAGE_MASK
+#define PTE_MASK		(_AT(long, PHYSICAL_PAGE_MASK))
 
 #define LARGE_PAGE_SIZE		(_AC(1,UL) << PMD_SHIFT)
 #define LARGE_PAGE_MASK		(~(LARGE_PAGE_SIZE-1))

From c8e5393ab38564d2f45b560a2f95bc8f9ff6f823 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:32:57 +0100
Subject: [PATCH 536/890] x86: page.h: make pte_t a union to always include

Make sure pte_t, whatever its definition, has a pte element with type
pteval_t.  This allows common code to access it without needing to be
specifically parameterised on what pagetable mode we're compiling for.
For 32-bit, this means that pte_t becomes a union with "pte" and "{
pte_low, pte_high }" (PAE) or just "pte_low" (non-PAE).

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/mmu.c               |  4 +---
 include/asm-x86/page.h           | 10 ++++++++++
 include/asm-x86/page_32.h        | 30 +++++++-----------------------
 include/asm-x86/page_64.h        |  3 ---
 include/asm-x86/paravirt.h       |  2 +-
 include/asm-x86/pgtable-2level.h |  4 ++--
 include/asm-x86/pgtable-3level.h |  4 ++--
 include/asm-x86/pgtable_64.h     |  6 +++---
 include/asm-x86/processor.h      |  1 -
 include/xen/page.h               |  6 +++---
 10 files changed, 29 insertions(+), 41 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 0ac6c5dc49ba..52f392893008 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -244,9 +244,7 @@ pte_t xen_make_pte(unsigned long long pte)
 	if (pte & 1)
 		pte = phys_to_machine(XPADDR(pte)).maddr;
 
-	pte &= ~_PAGE_PCD;
-
-	return (pte_t){ pte, pte >> 32 };
+	return (pte_t){ .pte = pte };
 }
 
 pmd_t xen_make_pmd(unsigned long long pmd)
diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index 91920565a575..a6495eb5c605 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -108,6 +108,16 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
 #include <asm-generic/pgtable-nopmd.h>
 #endif	/* PAGETABLE_LEVELS >= 3 */
 
+static inline pte_t native_make_pte(pteval_t val)
+{
+	return (pte_t) { .pte = val };
+}
+
+static inline pteval_t native_pte_val(pte_t pte)
+{
+	return pte.pte;
+}
+
 #define pgprot_val(x)	((x).pgprot)
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index e424bef4395a..11c4b39cada1 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -26,18 +26,12 @@ typedef u64	pgdval_t;
 typedef u64	pgprotval_t;
 typedef u64	phys_addr_t;
 
-typedef struct { unsigned long pte_low, pte_high; } pte_t;
-
-static inline unsigned long long native_pte_val(pte_t pte)
-{
-	return pte.pte_low | ((unsigned long long)pte.pte_high << 32);
-}
-
-static inline pte_t native_make_pte(unsigned long long val)
-{
-	return (pte_t) { .pte_low = val, .pte_high = (val >> 32) } ;
-}
-
+typedef union {
+	struct {
+		unsigned long pte_low, pte_high;
+	};
+	pteval_t pte;
+} pte_t;
 #endif	/* __ASSEMBLY__
  */
 #else  /* !CONFIG_X86_PAE */
@@ -53,19 +47,9 @@ typedef unsigned long	pgdval_t;
 typedef unsigned long	pgprotval_t;
 typedef unsigned long	phys_addr_t;
 
-typedef struct { pteval_t pte_low; } pte_t;
+typedef union { pteval_t pte, pte_low; } pte_t;
 typedef pte_t boot_pte_t;
 
-static inline unsigned long native_pte_val(pte_t pte)
-{
-	return pte.pte_low;
-}
-
-static inline pte_t native_make_pte(unsigned long val)
-{
-	return (pte_t) { .pte_low = val };
-}
-
 #endif	/* __ASSEMBLY__ */
 #endif	/* CONFIG_X86_PAE */
 
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index ebf977a809a8..c1ac42d8707f 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -70,9 +70,6 @@ typedef unsigned long	phys_addr_t;
 
 typedef struct { pteval_t pte; } pte_t;
 
-#define native_pte_val(x)	((x).pte)
-#define native_make_pte(x) ((pte_t) { (x) } )
-
 #define vmemmap ((struct page *)VMEMMAP_START)
 
 #endif	/* !__ASSEMBLY__ */
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index b35ed95166e4..24385decbd47 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -922,7 +922,7 @@ static inline pte_t __pte(unsigned long long val)
 	unsigned long long ret = PVOP_CALL2(unsigned long long,
 					    pv_mmu_ops.make_pte,
 					    val, val >> 32);
-	return (pte_t) { ret, ret >> 32 };
+	return (pte_t) { .pte = ret };
 }
 
 static inline pmd_t __pmd(unsigned long long val)
diff --git a/include/asm-x86/pgtable-2level.h b/include/asm-x86/pgtable-2level.h
index d3171c0cb157..f949bb083089 100644
--- a/include/asm-x86/pgtable-2level.h
+++ b/include/asm-x86/pgtable-2level.h
@@ -72,13 +72,13 @@ static inline int pte_exec_kernel(pte_t pte)
 	((((pte).pte_low >> 1) & 0x1f ) + (((pte).pte_low >> 8) << 5 ))
 
 #define pgoff_to_pte(off) \
-	((pte_t) { (((off) & 0x1f) << 1) + (((off) >> 5) << 8) + _PAGE_FILE })
+	((pte_t) { .pte_low = (((off) & 0x1f) << 1) + (((off) >> 5) << 8) + _PAGE_FILE })
 
 /* Encode and de-code a swap entry */
 #define __swp_type(x)			(((x).val >> 1) & 0x1f)
 #define __swp_offset(x)			((x).val >> 8)
 #define __swp_entry(type, offset)	((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { (pte).pte_low })
-#define __swp_entry_to_pte(x)		((pte_t) { (x).val })
+#define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
 
 #endif /* _I386_PGTABLE_2LEVEL_H */
diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h
index 61a7d0029f60..3da96f792dd0 100644
--- a/include/asm-x86/pgtable-3level.h
+++ b/include/asm-x86/pgtable-3level.h
@@ -163,7 +163,7 @@ static inline unsigned long pte_pfn(pte_t pte)
  * put the 32 bits of offset into the high part.
  */
 #define pte_to_pgoff(pte) ((pte).pte_high)
-#define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) })
+#define pgoff_to_pte(off) ((pte_t) { { .pte_low = _PAGE_FILE, .pte_high = (off) } })
 #define PTE_FILE_MAX_BITS       32
 
 /* Encode and de-code a swap entry */
@@ -171,7 +171,7 @@ static inline unsigned long pte_pfn(pte_t pte)
 #define __swp_offset(x)			((x).val >> 5)
 #define __swp_entry(type, offset)	((swp_entry_t){(type) | (offset) << 5})
 #define __pte_to_swp_entry(pte)		((swp_entry_t){ (pte).pte_high })
-#define __swp_entry_to_pte(x)		((pte_t){ 0, (x).val })
+#define __swp_entry_to_pte(x)		((pte_t){ { .pte_high = (x).val } })
 
 #define __pmd_free_tlb(tlb, x)		do { } while (0)
 
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 57e4ebe0f7fc..77038d8e9bfd 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -72,7 +72,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
 
 static inline void set_pte(pte_t *dst, pte_t val)
 {
-	pte_val(*dst) = pte_val(val);
+	*dst = val;
 } 
 #define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
 
@@ -222,7 +222,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 #define pmd_pfn(x)  ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
 
 #define pte_to_pgoff(pte) ((pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
-#define pgoff_to_pte(off) ((pte_t) { ((off) << PAGE_SHIFT) | _PAGE_FILE })
+#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | _PAGE_FILE })
 #define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT
 
 /* PTE - Level 1 access. */
@@ -264,7 +264,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 #define __swp_offset(x)			((x).val >> 8)
 #define __swp_entry(type, offset)	((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
-#define __swp_entry_to_pte(x)		((pte_t) { (x).val })
+#define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
 
 extern spinlock_t pgd_lock;
 extern struct list_head pgd_list;
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index cc549bf819a4..e701ac5487e5 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -14,7 +14,6 @@ struct mm_struct;
 #include <asm/vm86.h>
 #include <asm/math_emu.h>
 #include <asm/segment.h>
-#include <asm/page.h>
 #include <asm/types.h>
 #include <asm/sigcontext.h>
 #include <asm/current.h>
diff --git a/include/xen/page.h b/include/xen/page.h
index c0c8fcb27899..031ef22a971e 100644
--- a/include/xen/page.h
+++ b/include/xen/page.h
@@ -156,16 +156,16 @@ static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot)
 
 static inline unsigned long long pte_val_ma(pte_t x)
 {
-	return ((unsigned long long)x.pte_high << 32) | x.pte_low;
+	return x.pte;
 }
 #define pmd_val_ma(v) ((v).pmd)
 #define pud_val_ma(v) ((v).pgd.pgd)
-#define __pte_ma(x)	((pte_t) { .pte_low = (x), .pte_high = (x)>>32 } )
+#define __pte_ma(x)	((pte_t) { .pte = (x) })
 #define __pmd_ma(x)	((pmd_t) { (x) } )
 #else  /* !X86_PAE */
 #define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT)
 #define mfn_pte(pfn, prot)	__pte_ma(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
-#define pte_val_ma(x)	((x).pte_low)
+#define pte_val_ma(x)	((x).pte)
 #define pmd_val_ma(v)	((v).pud.pgd.pgd)
 #define __pte_ma(x)	((pte_t) { (x) } )
 #endif	/* CONFIG_X86_PAE */

From e33287013585e96180c575288bf1db22bee47b52 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:32:58 +0100
Subject: [PATCH 537/890] x86/vmi: fix compilation as a result of pte_t changes

Fix various compilation problems as a result of changing pte_t.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Zachary Amsden <zach@vmware.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/vmi_32.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 10c46419d35d..2ee5d8e0ada5 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -475,7 +475,7 @@ static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep
 static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
 #ifdef CONFIG_X86_PAE
-	const pte_t pte = { pmdval.pmd, pmdval.pmd >> 32 };
+	const pte_t pte = { .pte = pmdval.pmd };
 	vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PMD);
 #else
 	const pte_t pte = { pmdval.pud.pgd.pgd };
@@ -508,21 +508,21 @@ static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t
 static void vmi_set_pud(pud_t *pudp, pud_t pudval)
 {
 	/* Um, eww */
-	const pte_t pte = { pudval.pgd.pgd, pudval.pgd.pgd >> 32 };
+	const pte_t pte = { .pte = pudval.pgd.pgd };
 	vmi_check_page_type(__pa(pudp) >> PAGE_SHIFT, VMI_PAGE_PGD);
 	vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP);
 }
 
 static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
-	const pte_t pte = { 0 };
+	const pte_t pte = { .pte = 0 };
 	vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
 	vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
 }
 
 static void vmi_pmd_clear(pmd_t *pmd)
 {
-	const pte_t pte = { 0 };
+	const pte_t pte = { .pte = 0 };
 	vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD);
 	vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD);
 }

From 195466dc4b9b8a4cc89d37ea1211746f3afbc941 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:32:58 +0100
Subject: [PATCH 538/890] x86: pgtable: unify pte accessors

Make various pte accessors common.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable-2level.h |   1 -
 include/asm-x86/pgtable-3level.h |   1 -
 include/asm-x86/pgtable.h        | 110 ++++++++++++++++++++++++++++++-
 include/asm-x86/pgtable_32.h     | 101 ----------------------------
 include/asm-x86/pgtable_64.h     |  43 ++----------
 5 files changed, 113 insertions(+), 143 deletions(-)

diff --git a/include/asm-x86/pgtable-2level.h b/include/asm-x86/pgtable-2level.h
index f949bb083089..65cdc8f9e3b3 100644
--- a/include/asm-x86/pgtable-2level.h
+++ b/include/asm-x86/pgtable-2level.h
@@ -33,7 +33,6 @@ static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
 #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
 #define set_pte_present(mm,addr,ptep,pteval) set_pte_at(mm,addr,ptep,pteval)
 
-#define pte_clear(mm,addr,xp)	do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
 #define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
 
 static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *xp)
diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h
index 3da96f792dd0..a123e687112f 100644
--- a/include/asm-x86/pgtable-3level.h
+++ b/include/asm-x86/pgtable-3level.h
@@ -101,7 +101,6 @@ static inline void native_pmd_clear(pmd_t *pmd)
 #define set_pte_atomic(ptep, pte)		native_set_pte_atomic(ptep, pte)
 #define set_pmd(pmdp, pmd)			native_set_pmd(pmdp, pmd)
 #define set_pud(pudp, pud)			native_set_pud(pudp, pud)
-#define pte_clear(mm, addr, ptep)		native_pte_clear(mm, addr, ptep)
 #define pmd_clear(pmd)				native_pmd_clear(pmd)
 #endif
 
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 6452286e71a1..62fa856c2491 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -116,6 +116,7 @@ extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
 #define __S111	PAGE_SHARED_EXEC
 
 #ifndef __ASSEMBLY__
+
 /*
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
@@ -169,7 +170,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 	return __pte(val);
 }
 
-
 #endif	/* __ASSEMBLY__ */
 
 #ifdef CONFIG_X86_32
@@ -178,4 +178,112 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 # include "pgtable_64.h"
 #endif
 
+#ifndef __ASSEMBLY__
+
+#ifndef CONFIG_PARAVIRT
+/*
+ * Rules for using pte_update - it must be called after any PTE update which
+ * has not been done using the set_pte / clear_pte interfaces.  It is used by
+ * shadow mode hypervisors to resynchronize the shadow page tables.  Kernel PTE
+ * updates should either be sets, clears, or set_pte_atomic for P->P
+ * transitions, which means this hook should only be called for user PTEs.
+ * This hook implies a P->P protection or access change has taken place, which
+ * requires a subsequent TLB flush.  The notification can optionally be delayed
+ * until the TLB flush event by using the pte_update_defer form of the
+ * interface, but care must be taken to assure that the flush happens while
+ * still holding the same page table lock so that the shadow and primary pages
+ * do not become out of sync on SMP.
+ */
+#define pte_update(mm, addr, ptep)		do { } while (0)
+#define pte_update_defer(mm, addr, ptep)	do { } while (0)
+#endif
+
+/* local pte updates need not use xchg for locking */
+static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
+{
+	pte_t res = *ptep;
+
+	/* Pure native function needs no input for mm, addr */
+	native_pte_clear(NULL, 0, ptep);
+	return res;
+}
+
+/*
+ * We only update the dirty/accessed state if we set
+ * the dirty bit by hand in the kernel, since the hardware
+ * will do the accessed bit for us, and we don't want to
+ * race with other CPU's that might be updating the dirty
+ * bit at the same time.
+ */
+#define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+#define ptep_set_access_flags(vma, address, ptep, entry, dirty)		\
+({									\
+	int __changed = !pte_same(*(ptep), entry);			\
+	if (__changed && dirty) {					\
+		*ptep = entry;						\
+		pte_update_defer((vma)->vm_mm, (address), (ptep));	\
+		flush_tlb_page(vma, address);				\
+	}								\
+	__changed;							\
+})
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define ptep_test_and_clear_young(vma, addr, ptep) ({			\
+	int __ret = 0;							\
+	if (pte_young(*(ptep)))						\
+		__ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,		\
+					   &(ptep)->pte);		\
+	if (__ret)							\
+		pte_update((vma)->vm_mm, addr, ptep);			\
+	__ret;								\
+})
+
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+#define ptep_clear_flush_young(vma, address, ptep)			\
+({									\
+	int __young;							\
+	__young = ptep_test_and_clear_young((vma), (address), (ptep));	\
+	if (__young)							\
+		flush_tlb_page(vma, address);				\
+	__young;							\
+})
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	pte_t pte = native_ptep_get_and_clear(ptep);
+	pte_update(mm, addr, ptep);
+	return pte;
+}
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
+static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
+{
+	pte_t pte;
+	if (full) {
+		/*
+		 * Full address destruction in progress; paravirt does not
+		 * care about updates and native needs no locking
+		 */
+		pte = native_local_ptep_get_and_clear(ptep);
+	} else {
+		pte = ptep_get_and_clear(mm, addr, ptep);
+	}
+	return pte;
+}
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	clear_bit(_PAGE_BIT_RW, &ptep->pte);
+	pte_update(mm, addr, ptep);
+}
+
+#ifndef CONFIG_PARAVIRT
+#define pte_clear(mm, addr, ptep)		native_pte_clear(mm, addr, ptep)
+#endif	/* !CONFIG_PARAVIRT */
+
+#include <asm-generic/pgtable.h>
+#endif	/* __ASSEMBLY__ */
+
 #endif	/* _ASM_X86_PGTABLE_H */
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 109dad5e16ee..d4d238c10293 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -107,105 +107,6 @@ extern unsigned long pg0[];
 # include <asm/pgtable-2level.h>
 #endif
 
-#ifndef CONFIG_PARAVIRT
-/*
- * Rules for using pte_update - it must be called after any PTE update which
- * has not been done using the set_pte / clear_pte interfaces.  It is used by
- * shadow mode hypervisors to resynchronize the shadow page tables.  Kernel PTE
- * updates should either be sets, clears, or set_pte_atomic for P->P
- * transitions, which means this hook should only be called for user PTEs.
- * This hook implies a P->P protection or access change has taken place, which
- * requires a subsequent TLB flush.  The notification can optionally be delayed
- * until the TLB flush event by using the pte_update_defer form of the
- * interface, but care must be taken to assure that the flush happens while
- * still holding the same page table lock so that the shadow and primary pages
- * do not become out of sync on SMP.
- */
-#define pte_update(mm, addr, ptep)		do { } while (0)
-#define pte_update_defer(mm, addr, ptep)	do { } while (0)
-#endif
-
-/* local pte updates need not use xchg for locking */
-static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
-{
-	pte_t res = *ptep;
-
-	/* Pure native function needs no input for mm, addr */
-	native_pte_clear(NULL, 0, ptep);
-	return res;
-}
-
-/*
- * We only update the dirty/accessed state if we set
- * the dirty bit by hand in the kernel, since the hardware
- * will do the accessed bit for us, and we don't want to
- * race with other CPU's that might be updating the dirty
- * bit at the same time.
- */
-#define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-#define ptep_set_access_flags(vma, address, ptep, entry, dirty)		\
-({									\
-	int __changed = !pte_same(*(ptep), entry);			\
-	if (__changed && dirty) {					\
-		(ptep)->pte_low = (entry).pte_low;			\
-		pte_update_defer((vma)->vm_mm, (address), (ptep));	\
-		flush_tlb_page(vma, address);				\
-	}								\
-	__changed;							\
-})
-
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-#define ptep_test_and_clear_young(vma, addr, ptep) ({			\
-	int __ret = 0;							\
-	if (pte_young(*(ptep)))						\
-		__ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,		\
-						&(ptep)->pte_low);	\
-	if (__ret)							\
-		pte_update((vma)->vm_mm, addr, ptep);			\
-	__ret;								\
-})
-
-#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-#define ptep_clear_flush_young(vma, address, ptep)			\
-({									\
-	int __young;							\
-	__young = ptep_test_and_clear_young((vma), (address), (ptep));	\
-	if (__young)							\
-		flush_tlb_page(vma, address);				\
-	__young;							\
-})
-
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
-	pte_t pte = native_ptep_get_and_clear(ptep);
-	pte_update(mm, addr, ptep);
-	return pte;
-}
-
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
-static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
-{
-	pte_t pte;
-	if (full) {
-		/*
-		 * Full address destruction in progress; paravirt does not
-		 * care about updates and native needs no locking
-		 */
-		pte = native_local_ptep_get_and_clear(ptep);
-	} else {
-		pte = ptep_get_and_clear(mm, addr, ptep);
-	}
-	return pte;
-}
-
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
-	clear_bit(_PAGE_BIT_RW, &ptep->pte_low);
-	pte_update(mm, addr, ptep);
-}
-
 /*
  * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
  *
@@ -359,6 +260,4 @@ static inline void paravirt_pagetable_setup_done(pgd_t *base)
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)		\
 		remap_pfn_range(vma, vaddr, pfn, size, prot)
 
-#include <asm-generic/pgtable.h>
-
 #endif /* _I386_PGTABLE_H */
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 77038d8e9bfd..987f51f684a5 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -101,18 +101,18 @@ static inline void pgd_clear (pgd_t * pgd)
 	set_pgd(pgd, __pgd(0));
 }
 
-#define ptep_get_and_clear(mm,addr,xp)	__pte(xchg(&(xp)->pte, 0))
+#define native_ptep_get_and_clear(xp) __pte(xchg(&(xp)->pte, 0))
 
 struct mm_struct;
 
-static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
+static inline pte_t native_ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
 {
 	pte_t pte;
 	if (full) {
 		pte = *ptep;
 		*ptep = __pte(0);
 	} else {
-		pte = ptep_get_and_clear(mm, addr, ptep);
+		pte = native_ptep_get_and_clear(ptep);
 	}
 	return pte;
 }
@@ -158,26 +158,12 @@ static inline unsigned long pmd_bad(pmd_t pmd)
 
 #define pte_none(x)	(!pte_val(x))
 #define pte_present(x)	(pte_val(x) & (_PAGE_PRESENT | _PAGE_PROTNONE))
-#define pte_clear(mm,addr,xp)	do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
+#define native_pte_clear(mm,addr,xp)	do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
 
 #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))	/* FIXME: is this right? */
 #define pte_page(x)	pfn_to_page(pte_pfn(x))
 #define pte_pfn(x)  ((pte_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
 
-struct vm_area_struct;
-
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
-{
-	if (!pte_young(*ptep))
-		return 0;
-	return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte);
-}
-
-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
-	clear_bit(_PAGE_BIT_RW, &ptep->pte);
-}
-
 /*
  * Macro to mark a page protection value as "uncacheable".
  */
@@ -243,22 +229,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 
 #define update_mmu_cache(vma,address,pte) do { } while (0)
 
-/* We only update the dirty/accessed state if we set
- * the dirty bit by hand in the kernel, since the hardware
- * will do the accessed bit for us, and we don't want to
- * race with other CPU's that might be updating the dirty
- * bit at the same time. */
-#define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
-({									  \
-	int __changed = !pte_same(*(__ptep), __entry);			  \
-	if (__changed && __dirty) {					  \
-		set_pte(__ptep, __entry);			  	  \
-		flush_tlb_page(__vma, __address);		  	  \
-	}								  \
-	__changed;							  \
-})
-
 /* Encode and de-code a swap entry */
 #define __swp_type(x)			(((x).val >> 1) & 0x3f)
 #define __swp_offset(x)			((x).val >> 8)
@@ -290,12 +260,7 @@ pte_t *lookup_address(unsigned long addr);
 #define	kc_offset_to_vaddr(o) \
    (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
 
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
 #define __HAVE_ARCH_PTE_SAME
-#include <asm-generic/pgtable.h>
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _X86_64_PGTABLE_H */

From d8d89827fc0c9c2ea6ac4c22e14e30586a856f58 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:32:58 +0100
Subject: [PATCH 539/890] x86: fix warning

&ptep->pte isn't always an unsigned long *, so cast it to avoid a warning.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 62fa856c2491..820db41dbe4c 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -275,7 +275,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
-	clear_bit(_PAGE_BIT_RW, &ptep->pte);
+	clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
 	pte_update(mm, addr, ptep);
 }
 

From 8405b122ad0dd75354b3bfed4de9a96514fd40cb Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:32:58 +0100
Subject: [PATCH 540/890] x86: unify zero_page definition

Move ZERO_PAGE/empty_zero_page to common place.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable.h    | 8 ++++++++
 include/asm-x86/pgtable_32.h | 6 ------
 include/asm-x86/pgtable_64.h | 7 -------
 3 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 820db41dbe4c..eb14a70a4728 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -117,6 +117,14 @@ extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
 
 #ifndef __ASSEMBLY__
 
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
+
 /*
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index d4d238c10293..eb8cccfa6a49 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -25,12 +25,6 @@
 struct mm_struct;
 struct vm_area_struct;
 
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
-extern unsigned long empty_zero_page[1024];
 extern pgd_t swapper_pg_dir[1024];
 extern struct kmem_cache *pmd_cache;
 extern spinlock_t pgd_lock;
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 987f51f684a5..435e17187eb6 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -23,13 +23,6 @@ extern pgd_t init_level4_pgt[];
 extern void paging_init(void);
 extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
 
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
-
 #endif /* !__ASSEMBLY__ */
 
 /*

From 4891645e764d2e181b834509a689fcd12e890c10 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:32:58 +0100
Subject: [PATCH 541/890] x86: unify paravirt pagetable accessors

Put all the defines for mapping pagetable operations to their native
versions (for the non-paravirt case) into one place.  Make the
corresponding changes to paravirt.h.

The tricky part here is that when a pagetable entry can't be updated
atomically (ie, 32-bit PAE), we need special handlers for pte_clear,
set_pte_atomic and set_pte_present.  However, the other two modes
don't need special handling for these, and can use a common
set_pte(_at) path.

[ mingo@elte.hu: fixes ]

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/paravirt.h       | 21 +++++++
 include/asm-x86/pgtable-2level.h | 30 +++++-----
 include/asm-x86/pgtable-3level.h | 15 -----
 include/asm-x86/pgtable.h        | 63 ++++++++++++++++-----
 include/asm-x86/pgtable_64.h     | 94 +++++++++++++++++---------------
 5 files changed, 136 insertions(+), 87 deletions(-)

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 24385decbd47..4aa06b929e48 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -1039,6 +1039,27 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
 	PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, pmdval.pud.pgd.pgd);
 }
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	set_pmd(pmdp, __pmd(0));
+}
+
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	set_pte_at(mm, addr, ptep, __pte(0));
+}
+
+static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
+{
+	set_pte(ptep, pte);
+}
+
+static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep, pte_t pte)
+{
+	set_pte(ptep, pte);
+}
 #endif	/* CONFIG_X86_PAE */
 
 /* Lazy mode for batching updates / context switch */
diff --git a/include/asm-x86/pgtable-2level.h b/include/asm-x86/pgtable-2level.h
index 65cdc8f9e3b3..ce486bdbbbb7 100644
--- a/include/asm-x86/pgtable-2level.h
+++ b/include/asm-x86/pgtable-2level.h
@@ -15,29 +15,31 @@ static inline void native_set_pte(pte_t *ptep , pte_t pte)
 {
 	*ptep = pte;
 }
-static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
-				     pte_t *ptep , pte_t pte)
-{
-	native_set_pte(ptep, pte);
-}
+
 static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
 	*pmdp = pmd;
 }
-#ifndef CONFIG_PARAVIRT
-#define set_pte(pteptr, pteval)		native_set_pte(pteptr, pteval)
-#define set_pte_at(mm,addr,ptep,pteval) native_set_pte_at(mm, addr, ptep, pteval)
-#define set_pmd(pmdptr, pmdval)		native_set_pmd(pmdptr, pmdval)
-#endif
 
-#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
-#define set_pte_present(mm,addr,ptep,pteval) set_pte_at(mm,addr,ptep,pteval)
+static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
+{
+	native_set_pte(ptep, pte);
+}
 
-#define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
+static inline void native_set_pte_present(struct mm_struct *mm, unsigned long addr,
+					  pte_t *ptep, pte_t pte)
+{
+	native_set_pte(ptep, pte);
+}
+
+static inline void native_pmd_clear(pmd_t *pmdp)
+{
+	native_set_pmd(pmdp, __pmd(0));
+}
 
 static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *xp)
 {
-	*xp = __pte(0);
+	*xp = native_make_pte(0);
 }
 
 #ifdef CONFIG_SMP
diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h
index a123e687112f..5af7a44ed902 100644
--- a/include/asm-x86/pgtable-3level.h
+++ b/include/asm-x86/pgtable-3level.h
@@ -39,11 +39,6 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
 	smp_wmb();
 	ptep->pte_low = pte.pte_low;
 }
-static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
-				     pte_t *ptep , pte_t pte)
-{
-	native_set_pte(ptep, pte);
-}
 
 /*
  * Since this is only called on user PTEs, and the page fault handler
@@ -94,16 +89,6 @@ static inline void native_pmd_clear(pmd_t *pmd)
 	*(tmp + 1) = 0;
 }
 
-#ifndef CONFIG_PARAVIRT
-#define set_pte(ptep, pte)			native_set_pte(ptep, pte)
-#define set_pte_at(mm, addr, ptep, pte)		native_set_pte_at(mm, addr, ptep, pte)
-#define set_pte_present(mm, addr, ptep, pte)	native_set_pte_present(mm, addr, ptep, pte)
-#define set_pte_atomic(ptep, pte)		native_set_pte_atomic(ptep, pte)
-#define set_pmd(pmdp, pmd)			native_set_pmd(pmdp, pmd)
-#define set_pud(pudp, pud)			native_set_pud(pudp, pud)
-#define pmd_clear(pmd)				native_pmd_clear(pmd)
-#endif
-
 /*
  * Pentium-II erratum A13: in PAE mode we explicitly have to flush
  * the TLB via cr3 if the top-level pgd is changed...
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index eb14a70a4728..a1eeacdf9d8c 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -178,6 +178,39 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 	return __pte(val);
 }
 
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else  /* !CONFIG_PARAVIRT */
+#define set_pte(ptep, pte)		native_set_pte(ptep, pte)
+#define set_pte_at(mm, addr, ptep, pte)	native_set_pte_at(mm, addr, ptep, pte)
+
+#define set_pte_present(mm, addr, ptep, pte)				\
+	native_set_pte_present(mm, addr, ptep, pte)
+#define set_pte_atomic(ptep, pte)					\
+	native_set_pte_atomic(ptep, pte)
+
+#define set_pmd(pmdp, pmd)		native_set_pmd(pmdp, pmd)
+
+#ifndef __PAGETABLE_PUD_FOLDED
+#define set_pgd(pgdp, pgd)		native_set_pgd(pgdp, pgd)
+#define pgd_clear(pgd)			native_pgd_clear(pgd)
+#endif
+
+#ifndef set_pud
+# define set_pud(pudp, pud)		native_set_pud(pudp, pud)
+#endif
+
+#ifndef __PAGETABLE_PMD_FOLDED
+#define pud_clear(pud)			native_pud_clear(pud)
+#endif
+
+#define pte_clear(mm, addr, ptep)	native_pte_clear(mm, addr, ptep)
+#define pmd_clear(pmd)			native_pmd_clear(pmd)
+
+#define pte_update(mm, addr, ptep)              do { } while (0)
+#define pte_update_defer(mm, addr, ptep)        do { } while (0)
+#endif	/* CONFIG_PARAVIRT */
+
 #endif	/* __ASSEMBLY__ */
 
 #ifdef CONFIG_X86_32
@@ -188,6 +221,22 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 
 #ifndef __ASSEMBLY__
 
+/* local pte updates need not use xchg for locking */
+static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
+{
+	pte_t res = *ptep;
+
+	/* Pure native function needs no input for mm, addr */
+	native_pte_clear(NULL, 0, ptep);
+	return res;
+}
+
+static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
+				     pte_t *ptep , pte_t pte)
+{
+	native_set_pte(ptep, pte);
+}
+
 #ifndef CONFIG_PARAVIRT
 /*
  * Rules for using pte_update - it must be called after any PTE update which
@@ -206,16 +255,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #define pte_update_defer(mm, addr, ptep)	do { } while (0)
 #endif
 
-/* local pte updates need not use xchg for locking */
-static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
-{
-	pte_t res = *ptep;
-
-	/* Pure native function needs no input for mm, addr */
-	native_pte_clear(NULL, 0, ptep);
-	return res;
-}
-
 /*
  * We only update the dirty/accessed state if we set
  * the dirty bit by hand in the kernel, since the hardware
@@ -287,10 +326,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 	pte_update(mm, addr, ptep);
 }
 
-#ifndef CONFIG_PARAVIRT
-#define pte_clear(mm, addr, ptep)		native_pte_clear(mm, addr, ptep)
-#endif	/* !CONFIG_PARAVIRT */
-
 #include <asm-generic/pgtable.h>
 #endif	/* __ASSEMBLY__ */
 
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 435e17187eb6..29fdeb8111bb 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -63,51 +63,59 @@ extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
 #define pgd_none(x)	(!pgd_val(x))
 #define pud_none(x)	(!pud_val(x))
 
-static inline void set_pte(pte_t *dst, pte_t val)
-{
-	*dst = val;
-} 
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
-
-static inline void set_pmd(pmd_t *dst, pmd_t val)
-{
-	*dst = val;
-} 
-
-static inline void set_pud(pud_t *dst, pud_t val)
-{
-	*dst = val;
-}
-
-static inline void pud_clear (pud_t *pud)
-{
-	set_pud(pud, __pud(0));
-}
-
-static inline void set_pgd(pgd_t *dst, pgd_t val)
-{
-	*dst = val;
-} 
-
-static inline void pgd_clear (pgd_t * pgd)
-{
-	set_pgd(pgd, __pgd(0));
-}
-
-#define native_ptep_get_and_clear(xp) __pte(xchg(&(xp)->pte, 0))
-
 struct mm_struct;
 
-static inline pte_t native_ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
+static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
+				    pte_t *ptep)
 {
-	pte_t pte;
-	if (full) {
-		pte = *ptep;
-		*ptep = __pte(0);
-	} else {
-		pte = native_ptep_get_and_clear(ptep);
-	}
-	return pte;
+	*ptep = native_make_pte(0);
+}
+
+static inline void native_set_pte(pte_t *ptep, pte_t pte)
+{
+	*ptep = pte;
+}
+
+static inline pte_t native_ptep_get_and_clear(pte_t *xp)
+{
+#ifdef CONFIG_SMP
+	return native_make_pte(xchg(&xp->pte, 0));
+#else
+	/* native_local_ptep_get_and_clear, but duplicated because of cyclic dependency */
+	pte_t ret = *xp;
+	native_pte_clear(NULL, 0, xp);
+	return ret;
+#endif
+}
+
+static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
+{
+	*pmdp = pmd;
+}
+
+static inline void native_pmd_clear(pmd_t *pmd)
+{
+	native_set_pmd(pmd, native_make_pmd(0));
+}
+
+static inline void native_set_pud(pud_t *pudp, pud_t pud)
+{
+	*pudp = pud;
+}
+
+static inline void native_pud_clear(pud_t *pud)
+{
+	native_set_pud(pud, native_make_pud(0));
+}
+
+static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+	*pgdp = pgd;
+}
+
+static inline void native_pgd_clear(pgd_t * pgd)
+{
+	native_set_pgd(pgd, native_make_pgd(0));
 }
 
 #define pte_same(a, b)		((a).pte == (b).pte)
@@ -151,7 +159,6 @@ static inline unsigned long pmd_bad(pmd_t pmd)
 
 #define pte_none(x)	(!pte_val(x))
 #define pte_present(x)	(pte_val(x) & (_PAGE_PRESENT | _PAGE_PROTNONE))
-#define native_pte_clear(mm,addr,xp)	do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
 
 #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))	/* FIXME: is this right? */
 #define pte_page(x)	pfn_to_page(pte_pfn(x))
@@ -196,7 +203,6 @@ static inline unsigned long pmd_bad(pmd_t pmd)
 			pmd_index(address))
 #define pmd_none(x)	(!pmd_val(x))
 #define pmd_present(x)	(pmd_val(x) & _PAGE_PRESENT)
-#define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
 #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
 #define pmd_pfn(x)  ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
 

From 015c8dd0cb3b380cb4c3930968250c719d1dd303 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:32:58 +0100
Subject: [PATCH 542/890] xen: mask out PWT too

The hypervisor doesn't allow PCD or PWT to be set on guest ptes, so
make sure they're masked out.  Also, fix up some previous mispatching.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 52f392893008..3e9e095c295c 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -291,7 +291,7 @@ pte_t xen_make_pte(unsigned long pte)
 	if (pte & _PAGE_PRESENT)
 		pte = phys_to_machine(XPADDR(pte)).maddr;
 
-	pte &= ~_PAGE_PCD;
+	pte &= ~(_PAGE_PCD | _PAGE_PWT);
 
 	return (pte_t){ pte };
 }

From f29192320f22f1617d50a9c790862603eeef64e6 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:32:59 +0100
Subject: [PATCH 543/890] x86/pgtable: explain constant sign extension problem

When the _PAGE_FOO constants are defined as (1ul << _PAGE_BIT_FOO), they
become unsigned longs.  In 32-bit PAE mode, these end up being
implicitly cast to 64-bit types when used to manipulate a pte, and
because they're unsigned the top 32-bits are 0, destroying the upper
bits of the pte.

When _PAGE_FOO constants are given a signed integer type, the cast to
64-bits will sign-extend so that the upper bits are all ones,
preserving the upper pte bits in manipulations.

Explain this in a prominent place.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index a1eeacdf9d8c..75a656e6b3f8 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -19,6 +19,11 @@
 #define _PAGE_BIT_UNUSED3	11
 #define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */
 
+/*
+ * Note: we use _AC(1, L) instead of _AC(1, UL) so that we get a
+ * sign-extended value on 32-bit with all 1's in the upper word,
+ * which preserves the upper pte values on 64-bit ptes:
+ */
 #define _PAGE_PRESENT	(_AC(1, L)<<_PAGE_BIT_PRESENT)
 #define _PAGE_RW	(_AC(1, L)<<_PAGE_BIT_RW)
 #define _PAGE_USER	(_AC(1, L)<<_PAGE_BIT_USER)

From c68461b67d97739707b3fc57618f22091791f2af Mon Sep 17 00:00:00 2001
From: Nikanth Karthikesan <knikanth@suse.de>
Date: Wed, 30 Jan 2008 13:32:59 +0100
Subject: [PATCH 544/890] x86: change x86 machine check handler to use
 unlocked_ioctl instead

The machine check handler registers ioctl handler that is called
with the BKL held. Changing to register unlocked_ioctl instead.
Also mce ioctl handler does not seem to need any lock protection.

To: Andi Kleen <andi@firstfloor.org>
Cc: linux-kernel@vger.kernel.org
Cc: kernel-janitors@vger.kernel.org

Change the Machine check handler to use unlocked_ioctl instead of
ioctl handler. Also the mce ioctl handler does not need any lock
protection.

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mcheck/mce_64.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index bc6e35153d83..98b23d55fe6e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -634,8 +634,7 @@ static unsigned int mce_poll(struct file *file, poll_table *wait)
 	return 0;
 }
 
-static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd,
-		     unsigned long arg)
+static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 {
 	int __user *p = (int __user *)arg;
 
@@ -664,7 +663,7 @@ static const struct file_operations mce_chrdev_ops = {
 	.release = mce_release,
 	.read = mce_read,
 	.poll = mce_poll,
-	.ioctl = mce_ioctl,
+	.unlocked_ioctl = mce_ioctl,
 };
 
 static struct miscdevice mce_log_device = {

From a604b38036bee1483fb98a520c69895d5d6276a6 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:32:59 +0100
Subject: [PATCH 545/890] x86: introduce __die helper to X86_32

Small step towards unifying traps_32|64.c.  No functional
changes.  Pull out a small helper from an if() statement
in die().

Marked as __kprobes as eventually we will want to call this
from do_page_fault similar to how X86_64 does it.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/traps_32.c | 76 +++++++++++++++++++++-----------------
 1 file changed, 42 insertions(+), 34 deletions(-)

diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 2eb6ca0ef672..83df0f37ba75 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -352,6 +352,45 @@ int is_valid_bugaddr(unsigned long ip)
 	return ud2 == 0x0b0f;
 }
 
+static int die_counter;
+
+int __kprobes __die(const char * str, struct pt_regs * regs, long err)
+{
+	unsigned long sp;
+	unsigned short ss;
+
+	printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
+#ifdef CONFIG_PREEMPT
+	printk("PREEMPT ");
+#endif
+#ifdef CONFIG_SMP
+	printk("SMP ");
+#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	printk("DEBUG_PAGEALLOC");
+#endif
+	printk("\n");
+
+	if (notify_die(DIE_OOPS, str, regs, err,
+				current->thread.trap_no, SIGSEGV) !=
+			NOTIFY_STOP) {
+		show_registers(regs);
+		/* Executive summary in case the oops scrolled away */
+		sp = (unsigned long) (&regs->sp);
+		savesegment(ss, ss);
+		if (user_mode(regs)) {
+			sp = regs->sp;
+			ss = regs->ss & 0xffff;
+		}
+		printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
+		print_symbol("%s", regs->ip);
+		printk(" SS:ESP %04x:%08lx\n", ss, sp);
+		return 0;
+	} else {
+		return 1;
+	}
+}
+
 /*
  * This is gone through when something in the kernel has done something bad and
  * is about to be terminated.
@@ -367,7 +406,6 @@ void die(const char * str, struct pt_regs * regs, long err)
 		.lock_owner =		-1,
 		.lock_owner_depth =	0
 	};
-	static int die_counter;
 	unsigned long flags;
 
 	oops_enter();
@@ -383,43 +421,13 @@ void die(const char * str, struct pt_regs * regs, long err)
 		raw_local_irq_save(flags);
 
 	if (++die.lock_owner_depth < 3) {
-		unsigned long sp;
-		unsigned short ss;
-
 		report_bug(regs->ip, regs);
 
-		printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff,
-		       ++die_counter);
-#ifdef CONFIG_PREEMPT
-		printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
-		printk("SMP ");
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
-		printk("DEBUG_PAGEALLOC");
-#endif
-		printk("\n");
-
-		if (notify_die(DIE_OOPS, str, regs, err,
-					current->thread.trap_no, SIGSEGV) !=
-				NOTIFY_STOP) {
-			show_registers(regs);
-			/* Executive summary in case the oops scrolled away */
-			sp = (unsigned long) (&regs->sp);
-			savesegment(ss, ss);
-			if (user_mode(regs)) {
-				sp = regs->sp;
-				ss = regs->ss & 0xffff;
-			}
-			printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
-			print_symbol("%s", regs->ip);
-			printk(" SS:ESP %04x:%08lx\n", ss, sp);
-		}
-		else
+		if (__die(str, regs, err))
 			regs = NULL;
-  	} else
+	} else {
 		printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
+	}
 
 	bust_spinlocks(0);
 	die.lock_owner = -1;

From 318aa296c329625916c47703eab906d6ebf13930 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:32:59 +0100
Subject: [PATCH 546/890] x86: more users of PF_ constants in fault_32|64.c

Should be the last of the error_code tests that could use
the PF_ defines.  Makes X86_32|64 a little closer.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_32.c | 7 ++++---
 arch/x86/mm/fault_64.c | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index b1893ebf6456..421e276770ad 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -404,7 +404,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	 * protection error (error_code & 9) == 0.
 	 */
 	if (unlikely(address >= TASK_SIZE)) {
-		if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
+		if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
+		    vmalloc_fault(address) >= 0)
 			return;
 		if (notify_page_fault(regs))
 			return;
@@ -603,7 +604,7 @@ no_context:
 		__typeof__(pte_val(__pte(0))) page;
 
 #ifdef CONFIG_X86_PAE
-		if (error_code & 16) {
+		if (error_code & PF_INSTR) {
 			pte_t *pte = lookup_address(address);
 
 			if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
@@ -674,7 +675,7 @@ out_of_memory:
 		goto survive;
 	}
 	printk("VM: killing process %s\n", tsk->comm);
-	if (error_code & 4)
+	if (error_code & PF_USER)
 		do_group_exit(SIGKILL);
 	goto no_context;
 
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 058b04cb96d4..95f142f5b5cc 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -677,7 +677,7 @@ out_of_memory:
 		goto again;
 	}
 	printk("VM: killing process %s\n", tsk->comm);
-	if (error_code & 4)
+	if (error_code & PF_USER)
 		do_group_exit(SIGKILL);
 	goto no_context;
 

From b3a5acc17c4ad9c28c00ee5e5271de1b1285d22b Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:32:59 +0100
Subject: [PATCH 547/890] x86: use fixup_exception() in traps_64.c

Use the fixup_exception() helper instead of the open-coded
search_extable() users.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/traps_64.c | 47 ++++++++++++--------------------------
 1 file changed, 15 insertions(+), 32 deletions(-)

diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 7118fa0320ae..37b07d08704b 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -601,19 +601,12 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
 	}
 
 
-	/* kernel trap */ 
-	{	     
-		const struct exception_table_entry *fixup;
-		fixup = search_exception_tables(regs->ip);
-		if (fixup)
-			regs->ip = fixup->fixup;
-		else {
-			tsk->thread.error_code = error_code;
-			tsk->thread.trap_no = trapnr;
-			die(str, regs, error_code);
-		}
-		return;
+	if (!fixup_exception(regs)) {
+		tsk->thread.error_code = error_code;
+		tsk->thread.trap_no = trapnr;
+		die(str, regs, error_code);
 	}
+	return;
 }
 
 #define DO_ERROR(trapnr, signr, str, name) \
@@ -703,22 +696,15 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
 		return;
 	} 
 
-	/* kernel gp */
-	{
-		const struct exception_table_entry *fixup;
-		fixup = search_exception_tables(regs->ip);
-		if (fixup) {
-			regs->ip = fixup->fixup;
-			return;
-		}
+	if (fixup_exception(regs))
+		return;
 
-		tsk->thread.error_code = error_code;
-		tsk->thread.trap_no = 13;
-		if (notify_die(DIE_GPF, "general protection fault", regs,
-					error_code, 13, SIGSEGV) == NOTIFY_STOP)
-			return;
-		die("general protection fault", regs, error_code);
-	}
+	tsk->thread.error_code = error_code;
+	tsk->thread.trap_no = 13;
+	if (notify_die(DIE_GPF, "general protection fault", regs,
+				error_code, 13, SIGSEGV) == NOTIFY_STOP)
+		return;
+	die("general protection fault", regs, error_code);
 }
 
 static __kprobes void
@@ -910,12 +896,9 @@ clear_TF_reenable:
 
 static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
 {
-	const struct exception_table_entry *fixup;
-	fixup = search_exception_tables(regs->ip);
-	if (fixup) {
-		regs->ip = fixup->fixup;
+	if (fixup_exception(regs))
 		return 1;
-	}
+
 	notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE);
 	/* Illegal floating point operation in the kernel */
 	current->thread.trap_no = trapnr;

From 938f667198179dc0c8424e2cfac9cd9fe405bee3 Mon Sep 17 00:00:00 2001
From: Paolo Ciarrocchi <paolo.ciarrocchi@gmail.com>
Date: Wed, 30 Jan 2008 13:33:00 +0100
Subject: [PATCH 548/890] x86: coding style fixes in arch/x86/pci/fixup.c

Simple coding style fixes.

no code changed:

   text    data     bss     dec     hex filename
   3139     576     194    3909     f45 fixup.o.before
   3139     576     194    3909     f45 fixup.o.after

  md5:
   9a3467057478b2d99962bdd448282eeb  fixup.o.before.asm
   9a3467057478b2d99962bdd448282eeb  fixup.o.after.asm

Signed-off-by: Paolo Ciarrocchi <paolo.ciarrocchi@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/pci/fixup.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 6cff66dd0c91..cb63007e20b2 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -19,7 +19,7 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d)
 
 	printk(KERN_WARNING "PCI: Searching for i450NX host bridges on %s\n", pci_name(d));
 	reg = 0xd0;
-	for(pxb=0; pxb<2; pxb++) {
+	for(pxb = 0; pxb < 2; pxb++) {
 		pci_read_config_byte(d, reg++, &busno);
 		pci_read_config_byte(d, reg++, &suba);
 		pci_read_config_byte(d, reg++, &subb);
@@ -56,7 +56,7 @@ static void __devinit  pci_fixup_umc_ide(struct pci_dev *d)
 	int i;
 
 	printk(KERN_WARNING "PCI: Fixing base address flags for device %s\n", pci_name(d));
-	for(i=0; i<4; i++)
+	for(i = 0; i < 4; i++)
 		d->resource[i].flags |= PCI_BASE_ADDRESS_SPACE_IO;
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide);
@@ -127,7 +127,7 @@ static void pci_fixup_via_northbridge_bug(struct pci_dev *d)
 		   NB latency to zero */
 		pci_write_config_byte(d, PCI_LATENCY_TIMER, 0);
 
-		where = 0x95; /* the memory write queue timer register is 
+		where = 0x95; /* the memory write queue timer register is
 				different for the KT266x's: 0x95 not 0x55 */
 	} else if (d->device == PCI_DEVICE_ID_VIA_8363_0 &&
 			(d->revision == VIA_8363_KL133_REVISION_ID ||
@@ -230,7 +230,7 @@ static int quirk_pcie_aspm_write(struct pci_bus *bus, unsigned int devfn, int wh
 
 	if ((offset) && (where == offset))
 		value = value & 0xfffffffc;
-	
+
 	return raw_pci_ops->write(0, bus->number, devfn, where, size, value);
 }
 
@@ -271,8 +271,8 @@ static void pcie_rootport_aspm_quirk(struct pci_dev *pdev)
 		 * after hot-remove, the pbus->devices is empty and this code
 		 * will set the offsets to zero and the bus ops to parent's bus
 		 * ops, which is unmodified.
-	 	 */
-		for (i= GET_INDEX(pdev->device, 0); i <= GET_INDEX(pdev->device, 7); ++i)
+		 */
+		for (i = GET_INDEX(pdev->device, 0); i <= GET_INDEX(pdev->device, 7); ++i)
 			quirk_aspm_offset[i] = 0;
 
 		pbus->ops = pbus->parent->ops;
@@ -286,17 +286,17 @@ static void pcie_rootport_aspm_quirk(struct pci_dev *pdev)
 		list_for_each_entry(dev, &pbus->devices, bus_list) {
 			/* There are 0 to 8 devices attached to this bus */
 			cap_base = pci_find_capability(dev, PCI_CAP_ID_EXP);
-			quirk_aspm_offset[GET_INDEX(pdev->device, dev->devfn)]= cap_base + 0x10;
+			quirk_aspm_offset[GET_INDEX(pdev->device, dev->devfn)] = cap_base + 0x10;
 		}
 		pbus->ops = &quirk_pcie_aspm_ops;
 	}
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PA,	pcie_rootport_aspm_quirk );
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PA1,	pcie_rootport_aspm_quirk );
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PB,	pcie_rootport_aspm_quirk );
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PB1,	pcie_rootport_aspm_quirk );
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PC,	pcie_rootport_aspm_quirk );
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PC1,	pcie_rootport_aspm_quirk );
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PA,	pcie_rootport_aspm_quirk);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PA1,	pcie_rootport_aspm_quirk);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PB,	pcie_rootport_aspm_quirk);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PB1,	pcie_rootport_aspm_quirk);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PC,	pcie_rootport_aspm_quirk);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PC1,	pcie_rootport_aspm_quirk);
 
 /*
  * Fixup to mark boot BIOS video selected by BIOS before it changes
@@ -336,8 +336,8 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev)
 		 * PCI header type NORMAL.
 		 */
 		if (bridge
-		    &&((bridge->hdr_type == PCI_HEADER_TYPE_BRIDGE)
-		       ||(bridge->hdr_type == PCI_HEADER_TYPE_CARDBUS))) {
+		    && ((bridge->hdr_type == PCI_HEADER_TYPE_BRIDGE)
+		       || (bridge->hdr_type == PCI_HEADER_TYPE_CARDBUS))) {
 			pci_read_config_word(bridge, PCI_BRIDGE_CONTROL,
 						&config);
 			if (!(config & PCI_BRIDGE_CTL_VGA))

From 3a556b26a2718e48aa2b6ce06ea4875ddcd0778e Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 30 Jan 2008 13:33:00 +0100
Subject: [PATCH 549/890] x86: big ticket locks

This implements ticket lock support for more than 255 CPUs on x86. The
code gets switched according to the configured NR_CPUS.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/spinlock.h | 126 ++++++++++++++++++++++++++++---------
 1 file changed, 98 insertions(+), 28 deletions(-)

diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h
index 2076d5d62d83..23804c1890ff 100644
--- a/include/asm-x86/spinlock.h
+++ b/include/asm-x86/spinlock.h
@@ -35,10 +35,35 @@ typedef int _slock_t;
 # define LOCK_PTR_REG "D"
 #endif
 
-#if (NR_CPUS > 256)
-#error spinlock supports a maximum of 256 CPUs
+#if defined(CONFIG_X86_32) && \
+	(defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE))
+/*
+ * On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock
+ * (PPro errata 66, 92)
+ */
+# define UNLOCK_LOCK_PREFIX LOCK_PREFIX
+#else
+# define UNLOCK_LOCK_PREFIX
 #endif
 
+/*
+ * Ticket locks are conceptually two parts, one indicating the current head of
+ * the queue, and the other indicating the current tail. The lock is acquired
+ * by atomically noting the tail and incrementing it by one (thus adding
+ * ourself to the queue and noting our position), then waiting until the head
+ * becomes equal to the the initial value of the tail.
+ *
+ * We use an xadd covering *both* parts of the lock, to increment the tail and
+ * also load the position of the head, which takes care of memory ordering
+ * issues and should be optimal for the uncontended case. Note the tail must be
+ * in the high part, because a wide xadd increment of the low part would carry
+ * up and contaminate the high part.
+ *
+ * With fewer than 2^8 possible CPUs, we can use x86's partial registers to
+ * save some instructions and make the code more elegant. There really isn't
+ * much between them in performance though, especially as locks are out of line.
+ */
+#if (NR_CPUS < 256)
 static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
 {
 	int tmp = *(volatile signed int *)(&(lock)->slock);
@@ -57,21 +82,6 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
 {
 	short inc = 0x0100;
 
-	/*
-	 * Ticket locks are conceptually two bytes, one indicating the current
-	 * head of the queue, and the other indicating the current tail. The
-	 * lock is acquired by atomically noting the tail and incrementing it
-	 * by one (thus adding ourself to the queue and noting our position),
-	 * then waiting until the head becomes equal to the the initial value
-	 * of the tail.
-	 *
-	 * This uses a 16-bit xadd to increment the tail and also load the
-	 * position of the head, which takes care of memory ordering issues
-	 * and should be optimal for the uncontended case. Note the tail must
-	 * be in the high byte, otherwise the 16-bit wide increment of the low
-	 * byte would carry up and contaminate the high byte.
-	 */
-
 	__asm__ __volatile__ (
 		LOCK_PREFIX "xaddw %w0, %1\n"
 		"1:\t"
@@ -111,17 +121,6 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 	return tmp;
 }
 
-#if defined(CONFIG_X86_32) && \
-	(defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE))
-/*
- * On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock
- * (PPro errata 66, 92)
- */
-# define UNLOCK_LOCK_PREFIX LOCK_PREFIX
-#else
-# define UNLOCK_LOCK_PREFIX
-#endif
-
 static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 {
 	__asm__ __volatile__(
@@ -130,6 +129,77 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 		:
 		:"memory", "cc");
 }
+#else
+static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
+{
+	int tmp = *(volatile signed int *)(&(lock)->slock);
+
+	return (((tmp >> 16) & 0xffff) != (tmp & 0xffff));
+}
+
+static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
+{
+	int tmp = *(volatile signed int *)(&(lock)->slock);
+
+	return (((tmp >> 16) & 0xffff) - (tmp & 0xffff)) > 1;
+}
+
+static inline void __raw_spin_lock(raw_spinlock_t *lock)
+{
+	int inc = 0x00010000;
+	int tmp;
+
+	__asm__ __volatile__ (
+		"lock ; xaddl %0, %1\n"
+		"movzwl %w0, %2\n\t"
+		"shrl $16, %0\n\t"
+		"1:\t"
+		"cmpl %0, %2\n\t"
+		"je 2f\n\t"
+		"rep ; nop\n\t"
+		"movzwl %1, %2\n\t"
+		/* don't need lfence here, because loads are in-order */
+		"jmp 1b\n"
+		"2:"
+		:"+Q" (inc), "+m" (lock->slock), "=r" (tmp)
+		:
+		:"memory", "cc");
+}
+
+#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
+
+static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+{
+	int tmp;
+	int new;
+
+	asm volatile(
+		"movl %2,%0\n\t"
+		"movl %0,%1\n\t"
+		"roll $16, %0\n\t"
+		"cmpl %0,%1\n\t"
+		"jne 1f\n\t"
+		"addl $0x00010000, %1\n\t"
+		"lock ; cmpxchgl %1,%2\n\t"
+		"1:"
+		"sete %b1\n\t"
+		"movzbl %b1,%0\n\t"
+		:"=&a" (tmp), "=r" (new), "+m" (lock->slock)
+		:
+		: "memory", "cc");
+
+	return tmp;
+}
+
+static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+{
+	__asm__ __volatile__(
+		UNLOCK_LOCK_PREFIX "incw %0"
+		:"+m" (lock->slock)
+		:
+		:"memory", "cc");
+}
+#endif
 
 static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
 {

From d50efc6c40620b2e11648cac64ebf4a824e40382 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:00 +0100
Subject: [PATCH 550/890] x86: fix UML and -regparm=3

introduce the "asmregparm" calling convention: for functions
implemented in assembly with a fixed regparm input parameters
calling convention.

mark the semaphore and rwsem slowpath functions with that.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/linkage.h      |  5 +++++
 include/asm-x86/rwsem.h        | 12 ++++++++----
 include/asm-x86/semaphore_32.h |  8 ++++----
 include/linux/linkage.h        |  4 ++++
 lib/rwsem.c                    |  8 ++++----
 5 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/include/asm-x86/linkage.h b/include/asm-x86/linkage.h
index 5a4c95905420..31739c7d66a9 100644
--- a/include/asm-x86/linkage.h
+++ b/include/asm-x86/linkage.h
@@ -9,6 +9,11 @@
 #ifdef CONFIG_X86_32
 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
 #define prevent_tail_call(ret) __asm__ ("" : "=r" (ret) : "0" (ret))
+/*
+ * For 32-bit UML - mark functions implemented in assembly that use
+ * regparm input parameters:
+ */
+#define asmregparm __attribute__((regparm(3)))
 #endif
 
 #ifdef CONFIG_X86_ALIGNMENT_16
diff --git a/include/asm-x86/rwsem.h b/include/asm-x86/rwsem.h
index a7e7e14cb430..520a379f4b80 100644
--- a/include/asm-x86/rwsem.h
+++ b/include/asm-x86/rwsem.h
@@ -44,10 +44,14 @@
 
 struct rwsem_waiter;
 
-extern struct rw_semaphore *FASTCALL(rwsem_down_read_failed(struct rw_semaphore *sem));
-extern struct rw_semaphore *FASTCALL(rwsem_down_write_failed(struct rw_semaphore *sem));
-extern struct rw_semaphore *FASTCALL(rwsem_wake(struct rw_semaphore *));
-extern struct rw_semaphore *FASTCALL(rwsem_downgrade_wake(struct rw_semaphore *sem));
+extern asmregparm struct rw_semaphore *
+ rwsem_down_read_failed(struct rw_semaphore *sem);
+extern asmregparm struct rw_semaphore *
+ rwsem_down_write_failed(struct rw_semaphore *sem);
+extern asmregparm struct rw_semaphore *
+ rwsem_wake(struct rw_semaphore *);
+extern asmregparm struct rw_semaphore *
+ rwsem_downgrade_wake(struct rw_semaphore *sem);
 
 /*
  * the semaphore definition
diff --git a/include/asm-x86/semaphore_32.h b/include/asm-x86/semaphore_32.h
index cc826e85323f..ac96d3804d0c 100644
--- a/include/asm-x86/semaphore_32.h
+++ b/include/asm-x86/semaphore_32.h
@@ -83,10 +83,10 @@ static inline void init_MUTEX_LOCKED (struct semaphore *sem)
 	sema_init(sem, 0);
 }
 
-void __down_failed(void /* special register calling convention */);
-int  __down_failed_interruptible(void  /* params in registers */);
-int  __down_failed_trylock(void  /* params in registers */);
-void __up_wakeup(void /* special register calling convention */);
+extern asmregparm void __down_failed(atomic_t *count_ptr);
+extern asmregparm int  __down_failed_interruptible(atomic_t *count_ptr);
+extern asmregparm int  __down_failed_trylock(atomic_t *count_ptr);
+extern asmregparm void __up_wakeup(atomic_t *count_ptr);
 
 /*
  * This is ugly, but we want the default case to fall through.
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index ff203dd02919..ceeeb5ea5b15 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -13,6 +13,10 @@
 #define asmlinkage CPP_ASMLINKAGE
 #endif
 
+#ifndef asmregparm
+# define asmregparm
+#endif
+
 #ifndef prevent_tail_call
 # define prevent_tail_call(ret) do { } while (0)
 #endif
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 7d02700a4b0e..3e3365e5665e 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -187,7 +187,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
 /*
  * wait for the read lock to be granted
  */
-struct rw_semaphore fastcall __sched *
+asmregparm struct rw_semaphore __sched *
 rwsem_down_read_failed(struct rw_semaphore *sem)
 {
 	struct rwsem_waiter waiter;
@@ -201,7 +201,7 @@ rwsem_down_read_failed(struct rw_semaphore *sem)
 /*
  * wait for the write lock to be granted
  */
-struct rw_semaphore fastcall __sched *
+asmregparm struct rw_semaphore __sched *
 rwsem_down_write_failed(struct rw_semaphore *sem)
 {
 	struct rwsem_waiter waiter;
@@ -216,7 +216,7 @@ rwsem_down_write_failed(struct rw_semaphore *sem)
  * handle waking up a waiter on the semaphore
  * - up_read/up_write has decremented the active part of count if we come here
  */
-struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem)
+asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
 {
 	unsigned long flags;
 
@@ -236,7 +236,7 @@ struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem)
  * - caller incremented waiting part of count and discovered it still negative
  * - just wake up any readers at the front of the queue
  */
-struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem)
+asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
 {
 	unsigned long flags;
 

From 3d97775a80a03013abe1fd681620925f884ad18a Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 30 Jan 2008 13:33:00 +0100
Subject: [PATCH 551/890] x86: move out tick_nohz_stop_sched_tick() call from
 the loop

Move out tick_nohz_stop_sched_tick() call from the loop in cpu_idle
same as 32-bit version.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process_64.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 78d80067b7f9..a0130eb2fa50 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -170,14 +170,13 @@ void cpu_idle(void)
 	current_thread_info()->status |= TS_POLLING;
 	/* endless idle loop with no priority at all */
 	while (1) {
+		tick_nohz_stop_sched_tick();
 		while (!need_resched()) {
 			void (*idle)(void);
 
 			if (__get_cpu_var(cpu_idle_state))
 				__get_cpu_var(cpu_idle_state) = 0;
 
-			tick_nohz_stop_sched_tick();
-
 			rmb();
 			idle = pm_idle;
 			if (!idle)

From 1379a5ce3ffc549a7ff3daffc49c5e1c372717a3 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:33:00 +0100
Subject: [PATCH 552/890] x86: move get_segment_eip() to step.c

get_segment_eip has similarities to convert_rip_to_linear(),
and is used in a similar context.  Move get_segment_eip to
step.c to allow easier consolidation.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/step.c   | 81 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/mm/fault_32.c   | 77 --------------------------------------
 arch/x86/mm/fault_64.c   | 77 --------------------------------------
 include/asm-x86/ptrace.h |  2 +
 4 files changed, 83 insertions(+), 154 deletions(-)

diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 21ea22fda5fc..5884dd485db8 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -5,6 +5,87 @@
 #include <linux/mm.h>
 #include <linux/ptrace.h>
 
+#ifdef CONFIG_X86_32
+#include <linux/uaccess.h>
+
+#include <asm/desc.h>
+
+/*
+ * Return EIP plus the CS segment base.  The segment limit is also
+ * adjusted, clamped to the kernel/user address space (whichever is
+ * appropriate), and returned in *eip_limit.
+ *
+ * The segment is checked, because it might have been changed by another
+ * task between the original faulting instruction and here.
+ *
+ * If CS is no longer a valid code segment, or if EIP is beyond the
+ * limit, or if it is a kernel address when CS is not a kernel segment,
+ * then the returned value will be greater than *eip_limit.
+ *
+ * This is slow, but is very rarely executed.
+ */
+unsigned long get_segment_eip(struct pt_regs *regs,
+					    unsigned long *eip_limit)
+{
+	unsigned long ip = regs->ip;
+	unsigned seg = regs->cs & 0xffff;
+	u32 seg_ar, seg_limit, base, *desc;
+
+	/* Unlikely, but must come before segment checks. */
+	if (unlikely(regs->flags & VM_MASK)) {
+		base = seg << 4;
+		*eip_limit = base + 0xffff;
+		return base + (ip & 0xffff);
+	}
+
+	/* The standard kernel/user address space limit. */
+	*eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
+
+	/* By far the most common cases. */
+	if (likely(SEGMENT_IS_FLAT_CODE(seg)))
+		return ip;
+
+	/* Check the segment exists, is within the current LDT/GDT size,
+	   that kernel/user (ring 0..3) has the appropriate privilege,
+	   that it's a code segment, and get the limit. */
+	__asm__("larl %3,%0; lsll %3,%1"
+		 : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg));
+	if ((~seg_ar & 0x9800) || ip > seg_limit) {
+		*eip_limit = 0;
+		return 1;	 /* So that returned ip > *eip_limit. */
+	}
+
+	/* Get the GDT/LDT descriptor base.
+	   When you look for races in this code remember that
+	   LDT and other horrors are only used in user space. */
+	if (seg & (1<<2)) {
+		/* Must lock the LDT while reading it. */
+		mutex_lock(&current->mm->context.lock);
+		desc = current->mm->context.ldt;
+		desc = (void *)desc + (seg & ~7);
+	} else {
+		/* Must disable preemption while reading the GDT. */
+		desc = (u32 *)get_cpu_gdt_table(get_cpu());
+		desc = (void *)desc + (seg & ~7);
+	}
+
+	/* Decode the code segment base from the descriptor */
+	base = get_desc_base((struct desc_struct *)desc);
+
+	if (seg & (1<<2))
+		mutex_unlock(&current->mm->context.lock);
+	else
+		put_cpu();
+
+	/* Adjust EIP and segment limit, and clamp at the kernel limit.
+	   It's legitimate for segments to wrap at 0xffffffff. */
+	seg_limit += base;
+	if (seg_limit < *eip_limit && seg_limit >= base)
+		*eip_limit = seg_limit;
+	return ip + base;
+}
+#endif
+
 #ifdef CONFIG_X86_32
 static
 #endif
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 421e276770ad..b92922a1d65f 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -61,83 +61,6 @@ static inline int notify_page_fault(struct pt_regs *regs)
 #endif
 }
 
-#ifdef CONFIG_X86_32
-/*
- * Return EIP plus the CS segment base.  The segment limit is also
- * adjusted, clamped to the kernel/user address space (whichever is
- * appropriate), and returned in *eip_limit.
- *
- * The segment is checked, because it might have been changed by another
- * task between the original faulting instruction and here.
- *
- * If CS is no longer a valid code segment, or if EIP is beyond the
- * limit, or if it is a kernel address when CS is not a kernel segment,
- * then the returned value will be greater than *eip_limit.
- *
- * This is slow, but is very rarely executed.
- */
-static inline unsigned long get_segment_eip(struct pt_regs *regs,
-					    unsigned long *eip_limit)
-{
-	unsigned long ip = regs->ip;
-	unsigned seg = regs->cs & 0xffff;
-	u32 seg_ar, seg_limit, base, *desc;
-
-	/* Unlikely, but must come before segment checks. */
-	if (unlikely(regs->flags & VM_MASK)) {
-		base = seg << 4;
-		*eip_limit = base + 0xffff;
-		return base + (ip & 0xffff);
-	}
-
-	/* The standard kernel/user address space limit. */
-	*eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
-
-	/* By far the most common cases. */
-	if (likely(SEGMENT_IS_FLAT_CODE(seg)))
-		return ip;
-
-	/* Check the segment exists, is within the current LDT/GDT size,
-	   that kernel/user (ring 0..3) has the appropriate privilege,
-	   that it's a code segment, and get the limit. */
-	__asm__ ("larl %3,%0; lsll %3,%1"
-		 : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg));
-	if ((~seg_ar & 0x9800) || ip > seg_limit) {
-		*eip_limit = 0;
-		return 1;	 /* So that returned ip > *eip_limit. */
-	}
-
-	/* Get the GDT/LDT descriptor base.
-	   When you look for races in this code remember that
-	   LDT and other horrors are only used in user space. */
-	if (seg & (1<<2)) {
-		/* Must lock the LDT while reading it. */
-		mutex_lock(&current->mm->context.lock);
-		desc = current->mm->context.ldt;
-		desc = (void *)desc + (seg & ~7);
-	} else {
-		/* Must disable preemption while reading the GDT. */
-		desc = (u32 *)get_cpu_gdt_table(get_cpu());
-		desc = (void *)desc + (seg & ~7);
-	}
-
-	/* Decode the code segment base from the descriptor */
-	base = get_desc_base((struct desc_struct *)desc);
-
-	if (seg & (1<<2))
-		mutex_unlock(&current->mm->context.lock);
-	else
-		put_cpu();
-
-	/* Adjust EIP and segment limit, and clamp at the kernel limit.
-	   It's legitimate for segments to wrap at 0xffffffff. */
-	seg_limit += base;
-	if (seg_limit < *eip_limit && seg_limit >= base)
-		*eip_limit = seg_limit;
-	return ip + base;
-}
-#endif
-
 /*
  * X86_32
  * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 95f142f5b5cc..e82832961d72 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -64,83 +64,6 @@ static inline int notify_page_fault(struct pt_regs *regs)
 #endif
 }
 
-#ifdef CONFIG_X86_32
-/*
- * Return EIP plus the CS segment base.  The segment limit is also
- * adjusted, clamped to the kernel/user address space (whichever is
- * appropriate), and returned in *eip_limit.
- *
- * The segment is checked, because it might have been changed by another
- * task between the original faulting instruction and here.
- *
- * If CS is no longer a valid code segment, or if EIP is beyond the
- * limit, or if it is a kernel address when CS is not a kernel segment,
- * then the returned value will be greater than *eip_limit.
- *
- * This is slow, but is very rarely executed.
- */
-static inline unsigned long get_segment_eip(struct pt_regs *regs,
-					    unsigned long *eip_limit)
-{
-	unsigned long ip = regs->ip;
-	unsigned seg = regs->cs & 0xffff;
-	u32 seg_ar, seg_limit, base, *desc;
-
-	/* Unlikely, but must come before segment checks. */
-	if (unlikely(regs->flags & VM_MASK)) {
-		base = seg << 4;
-		*eip_limit = base + 0xffff;
-		return base + (ip & 0xffff);
-	}
-
-	/* The standard kernel/user address space limit. */
-	*eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
-
-	/* By far the most common cases. */
-	if (likely(SEGMENT_IS_FLAT_CODE(seg)))
-		return ip;
-
-	/* Check the segment exists, is within the current LDT/GDT size,
-	   that kernel/user (ring 0..3) has the appropriate privilege,
-	   that it's a code segment, and get the limit. */
-	__asm__("larl %3,%0; lsll %3,%1"
-		 : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg));
-	if ((~seg_ar & 0x9800) || ip > seg_limit) {
-		*eip_limit = 0;
-		return 1;	 /* So that returned ip > *eip_limit. */
-	}
-
-	/* Get the GDT/LDT descriptor base.
-	   When you look for races in this code remember that
-	   LDT and other horrors are only used in user space. */
-	if (seg & (1<<2)) {
-		/* Must lock the LDT while reading it. */
-		mutex_lock(&current->mm->context.lock);
-		desc = current->mm->context.ldt;
-		desc = (void *)desc + (seg & ~7);
-	} else {
-		/* Must disable preemption while reading the GDT. */
-		desc = (u32 *)get_cpu_gdt_table(get_cpu());
-		desc = (void *)desc + (seg & ~7);
-	}
-
-	/* Decode the code segment base from the descriptor */
-	base = get_desc_base((struct desc_struct *)desc);
-
-	if (seg & (1<<2))
-		mutex_unlock(&current->mm->context.lock);
-	else
-		put_cpu();
-
-	/* Adjust EIP and segment limit, and clamp at the kernel limit.
-	   It's legitimate for segments to wrap at 0xffffffff. */
-	seg_limit += base;
-	if (seg_limit < *eip_limit && seg_limit >= base)
-		*eip_limit = seg_limit;
-	return ip + base;
-}
-#endif
-
 /*
  * X86_32
  * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index 61946fe8c085..cc4456667d89 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -184,6 +184,8 @@ convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs);
 
 #ifdef __KERNEL__
 
+unsigned long get_segment_eip(struct pt_regs *regs, unsigned long *eip_limit);
+
 /*
  * These are defined as per linux/ptrace.h, which see.
  */

From 1017579a8c6ad6f32154e41ddfdf96d1efbb21b4 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:33:01 +0100
Subject: [PATCH 553/890] x86: trivial whitespace in kprobes.c

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/kprobes.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 53ba6a5b6550..93aff49798ee 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -583,8 +583,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
  * When a retprobed function returns, this code saves registers and
  * calls trampoline_handler() runs, which calls the kretprobe's handler.
  */
- void __kprobes kretprobe_trampoline_holder(void)
- {
+void __kprobes kretprobe_trampoline_holder(void)
+{
 	asm volatile (
 			".global kretprobe_trampoline\n"
 			"kretprobe_trampoline: \n"
@@ -670,7 +670,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 			"	popf\n"
 #endif
 			"	ret\n");
- }
+}
 
 /*
  * Called from kretprobe_trampoline

From 2a6648e65a2939b80c44262975176a15bac3a75e Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:33:01 +0100
Subject: [PATCH 554/890] x86: <asm/segment.h>: boot GDT entries are
 32/64-independent

The boot GDT entries are common between 32- and 64-bit mode, so move
them to common code instead of having two identical copies.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/segment.h    | 24 ++++++++----------------
 include/asm-x86/segment_32.h |  1 -
 2 files changed, 8 insertions(+), 17 deletions(-)
 delete mode 100644 include/asm-x86/segment_32.h

diff --git a/include/asm-x86/segment.h b/include/asm-x86/segment.h
index c7509df79141..1ff05a9f5513 100644
--- a/include/asm-x86/segment.h
+++ b/include/asm-x86/segment.h
@@ -1,6 +1,14 @@
 #ifndef _ASM_X86_SEGMENT_H_
 #define _ASM_X86_SEGMENT_H_
 
+/* Simple and small GDT entries for booting only */
+
+#define GDT_ENTRY_BOOT_CS	2
+#define __BOOT_CS		(GDT_ENTRY_BOOT_CS * 8)
+
+#define GDT_ENTRY_BOOT_DS	(GDT_ENTRY_BOOT_CS + 1)
+#define __BOOT_DS		(GDT_ENTRY_BOOT_DS * 8)
+
 #ifdef CONFIG_X86_32
 /*
  * The layout of the per-CPU GDT under Linux:
@@ -86,14 +94,6 @@
  */
 #define GDT_ENTRIES 32
 
-/* Simple and small GDT entries for booting only */
-
-#define GDT_ENTRY_BOOT_CS		2
-#define __BOOT_CS	(GDT_ENTRY_BOOT_CS * 8)
-
-#define GDT_ENTRY_BOOT_DS		(GDT_ENTRY_BOOT_CS + 1)
-#define __BOOT_DS	(GDT_ENTRY_BOOT_DS * 8)
-
 /* The PnP BIOS entries in the GDT */
 #define GDT_ENTRY_PNPBIOS_CS32		(GDT_ENTRY_PNPBIOS_BASE + 0)
 #define GDT_ENTRY_PNPBIOS_CS16		(GDT_ENTRY_PNPBIOS_BASE + 1)
@@ -136,14 +136,6 @@
 #else
 #include <asm/cache.h>
 
-/* Simple and small GDT entries for booting only */
-
-#define GDT_ENTRY_BOOT_CS		2
-#define __BOOT_CS	(GDT_ENTRY_BOOT_CS * 8)
-
-#define GDT_ENTRY_BOOT_DS		(GDT_ENTRY_BOOT_CS + 1)
-#define __BOOT_DS	(GDT_ENTRY_BOOT_DS * 8)
-
 #define __KERNEL_CS	0x10
 #define __KERNEL_DS	0x18
 
diff --git a/include/asm-x86/segment_32.h b/include/asm-x86/segment_32.h
deleted file mode 100644
index 8b137891791f..000000000000
--- a/include/asm-x86/segment_32.h
+++ /dev/null
@@ -1 +0,0 @@
-

From c4d9ba6da9f050ebb7e0d70769e3dca0fd45334f Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:33:01 +0100
Subject: [PATCH 555/890] x86 setup: make PM transition more paranoid; cleanup
 32-bit entry

Make the transition to protected mode more paranoid by having
back-to-back near jump (to synchronize the 386/486 prefetch queue) and
far jump (to set up the code segment.)

While we're at it, zero as many registers as practical (for future
expandability of the 32-bit entry interface) and enter 32-bit mode
with a valid stack.  Note that the 32-bit code cannot rely on this
stack, or we'll break all other existing users of the 32-bit
entrypoint, but it may make debugging hacks easier to write.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/pmjump.S | 44 ++++++++++++++++++++++++++++++------------
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S
index fa6bed1fac14..ef0da1f2c7fd 100644
--- a/arch/x86/boot/pmjump.S
+++ b/arch/x86/boot/pmjump.S
@@ -29,12 +29,13 @@
  */
 protected_mode_jump:
 	movl	%edx, %esi		# Pointer to boot_params table
-	movl	%eax, 2f		# Patch ljmpl instruction
+
+	xorl	%ebx, %ebx
+	movw	%cs, %bx
+	shll	$4, %ebx
+	addl	%ebx, 2f
 
 	movw	$__BOOT_DS, %cx
-	xorl	%ebx, %ebx		# Per the 32-bit boot protocol
-	xorl	%ebp, %ebp		# Per the 32-bit boot protocol
-	xorl	%edi, %edi		# Per the 32-bit boot protocol
 
 	movl	%cr0, %edx
 	orb	$1, %dl			# Protected mode (PE) bit
@@ -42,15 +43,34 @@ protected_mode_jump:
 	jmp	1f			# Short jump to serialize on 386/486
 1:
 
-	movw	%cx, %ds
-	movw	%cx, %es
-	movw	%cx, %fs
-	movw	%cx, %gs
-	movw	%cx, %ss
-
-	# Jump to the 32-bit entrypoint
+	# Transition to 32-bit mode
 	.byte	0x66, 0xea		# ljmpl opcode
-2:	.long	0			# offset
+2:	.long	in_pm32			# offset
 	.word	__BOOT_CS		# segment
 
 	.size	protected_mode_jump, .-protected_mode_jump
+
+	.code32
+	.type	in_pm32, @function
+in_pm32:
+	# Set up data segments for flat 32-bit mode
+	movl	%ecx, %ds
+	movl	%ecx, %es
+	movl	%ecx, %fs
+	movl	%ecx, %gs
+	movl	%ecx, %ss
+	# The 32-bit code sets up its own stack, but this way we do have
+	# a valid stack if some debugging hack wants to use it.
+	addl	%ebx, %esp
+
+	# Clear registers to allow for future extensions to the
+	# 32-bit boot protocol
+	xorl	%ecx, %ecx
+	xorl	%edx, %edx
+	xorl	%ebx, %ebx
+	xorl	%ebp, %ebp
+	xorl	%edi, %edi
+
+	jmpl	*%eax			# Jump to the 32-bit entrypoint
+
+	.size	in_pm32, .-in_pm32

From 88089519f302f1296b4739be45699f06f728ec31 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:33:02 +0100
Subject: [PATCH 556/890] x86 setup: initialize LDTR and TR to make life easier
 to Intel VT

Intel VT doesn't like to engage when the protected-mode state isn't
fully initialized.  Make life easier for it by initializing LDTR (to
null) and TR (to a dummy hunk of low memory which will never actually
be touched.)

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/pm.c        | 4 ++++
 arch/x86/boot/pmjump.S    | 7 +++++++
 include/asm-x86/segment.h | 3 +++
 3 files changed, 14 insertions(+)

diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c
index 09fb342cc62e..b23cbdc7d547 100644
--- a/arch/x86/boot/pm.c
+++ b/arch/x86/boot/pm.c
@@ -121,6 +121,10 @@ static void setup_gdt(void)
 		[GDT_ENTRY_BOOT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff),
 		/* DS: data, read/write, 4 GB, base 0 */
 		[GDT_ENTRY_BOOT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff),
+		/* TSS: 32-bit tss, 104 bytes, base 4096 */
+		/* We only have a TSS here to keep Intel VT happy;
+		   we don't actually use it for anything. */
+		[GDT_ENTRY_BOOT_TSS] = GDT_ENTRY(0x0089, 4096, 103),
 	};
 	/* Xen HVM incorrectly stores a pointer to the gdt_ptr, instead
 	   of the gdt_ptr contents.  Thus, make it static so it will
diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S
index ef0da1f2c7fd..f7153d0d476e 100644
--- a/arch/x86/boot/pmjump.S
+++ b/arch/x86/boot/pmjump.S
@@ -36,6 +36,7 @@ protected_mode_jump:
 	addl	%ebx, 2f
 
 	movw	$__BOOT_DS, %cx
+	movw	$__BOOT_TSS, %di
 
 	movl	%cr0, %edx
 	orb	$1, %dl			# Protected mode (PE) bit
@@ -63,6 +64,9 @@ in_pm32:
 	# a valid stack if some debugging hack wants to use it.
 	addl	%ebx, %esp
 
+	# Set up TR to make Intel VT happy
+	ltr	%di
+
 	# Clear registers to allow for future extensions to the
 	# 32-bit boot protocol
 	xorl	%ecx, %ecx
@@ -71,6 +75,9 @@ in_pm32:
 	xorl	%ebp, %ebp
 	xorl	%edi, %edi
 
+	# Set up LDTR to make Intel VT happy
+	lldt	%cx
+
 	jmpl	*%eax			# Jump to the 32-bit entrypoint
 
 	.size	in_pm32, .-in_pm32
diff --git a/include/asm-x86/segment.h b/include/asm-x86/segment.h
index 1ff05a9f5513..57c8d3723836 100644
--- a/include/asm-x86/segment.h
+++ b/include/asm-x86/segment.h
@@ -9,6 +9,9 @@
 #define GDT_ENTRY_BOOT_DS	(GDT_ENTRY_BOOT_CS + 1)
 #define __BOOT_DS		(GDT_ENTRY_BOOT_DS * 8)
 
+#define GDT_ENTRY_BOOT_TSS	(GDT_ENTRY_BOOT_CS + 2)
+#define __BOOT_TSS		(GDT_ENTRY_BOOT_TSS * 8)
+
 #ifdef CONFIG_X86_32
 /*
  * The layout of the per-CPU GDT under Linux:

From 02a7b425e82cd0052e5eaedbae81a522c6aae6c4 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:33:02 +0100
Subject: [PATCH 557/890] x86 setup: use X86_CR0_PE macro instead of hard-coded
 constant

To set CR0.PE, use the X86_CR0_PE macro defined in
<asm/processor-flags.h> instead of hardcoding it as a constant (1).

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/pmjump.S | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S
index f7153d0d476e..f5402d51f7c3 100644
--- a/arch/x86/boot/pmjump.S
+++ b/arch/x86/boot/pmjump.S
@@ -15,6 +15,7 @@
  */
 
 #include <asm/boot.h>
+#include <asm/processor-flags.h>
 #include <asm/segment.h>
 
 	.text
@@ -39,7 +40,7 @@ protected_mode_jump:
 	movw	$__BOOT_TSS, %di
 
 	movl	%cr0, %edx
-	orb	$1, %dl			# Protected mode (PE) bit
+	orb	$X86_CR0_PE, %dl	# Protected mode
 	movl	%edx, %cr0
 	jmp	1f			# Short jump to serialize on 386/486
 1:

From 1cac5004e953506166e980da5776d5cc1c176d79 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:33:02 +0100
Subject: [PATCH 558/890] x86 setup: display VESA graphics modes in vga=ask
 menu

Display VESA graphics modes, with their mode IDs, in the vga=ask
menu.  Most VESA mode numbers are platform-dependent, so it helps to
have an easy way to display them.

Based in part on a patch by Petr Vandrovec <petr@vandrovec.name>.

Cc: Petr Vandrovec <petr@vandrovec.name>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/video-bios.c |  3 ++-
 arch/x86/boot/video-vesa.c | 26 +++++++++++++++++---------
 arch/x86/boot/video-vga.c  | 20 ++++++++++----------
 arch/x86/boot/video.c      | 33 ++++++++++++++++++++++++++++++---
 arch/x86/boot/video.h      |  3 ++-
 5 files changed, 61 insertions(+), 24 deletions(-)

diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c
index ed0672a81870..ff664a117096 100644
--- a/arch/x86/boot/video-bios.c
+++ b/arch/x86/boot/video-bios.c
@@ -104,6 +104,7 @@ static int bios_probe(void)
 
 		mi = GET_HEAP(struct mode_info, 1);
 		mi->mode = VIDEO_FIRST_BIOS+mode;
+		mi->depth = 0;	/* text */
 		mi->x = rdfs16(0x44a);
 		mi->y = rdfs8(0x484)+1;
 		nmodes++;
@@ -116,7 +117,7 @@ static int bios_probe(void)
 
 __videocard video_bios =
 {
-	.card_name	= "BIOS (scanned)",
+	.card_name	= "BIOS",
 	.probe		= bios_probe,
 	.set_mode	= bios_set_mode,
 	.unsafe		= 1,
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c
index 4716b9a96357..662dd2f13068 100644
--- a/arch/x86/boot/video-vesa.c
+++ b/arch/x86/boot/video-vesa.c
@@ -79,20 +79,28 @@ static int vesa_probe(void)
 			/* Text Mode, TTY BIOS supported,
 			   supported by hardware */
 			mi = GET_HEAP(struct mode_info, 1);
-			mi->mode = mode + VIDEO_FIRST_VESA;
-			mi->x    = vminfo.h_res;
-			mi->y    = vminfo.v_res;
+			mi->mode  = mode + VIDEO_FIRST_VESA;
+			mi->depth = 0; /* text */
+			mi->x     = vminfo.h_res;
+			mi->y     = vminfo.v_res;
 			nmodes++;
-		} else if ((vminfo.mode_attr & 0x99) == 0x99) {
+		} else if ((vminfo.mode_attr & 0x99) == 0x99 &&
+			   (vminfo.memory_layout == 4 ||
+			    vminfo.memory_layout == 6) &&
+			   vminfo.memory_planes == 1) {
 #ifdef CONFIG_FB
 			/* Graphics mode, color, linear frame buffer
-			   supported -- register the mode but hide from
-			   the menu.  Only do this if framebuffer is
-			   configured, however, otherwise the user will
-			   be left without a screen. */
+			   supported.  Only register the mode if
+			   if framebuffer is configured, however,
+			   otherwise the user will be left without a screen.
+			   We don't require CONFIG_FB_VESA, however, since
+			   some of the other framebuffer drivers can use
+			   this mode-setting, too. */
 			mi = GET_HEAP(struct mode_info, 1);
 			mi->mode = mode + VIDEO_FIRST_VESA;
-			mi->x = mi->y = 0;
+			mi->depth = vminfo.bpp;
+			mi->x = vminfo.h_res;
+			mi->y = vminfo.v_res;
 			nmodes++;
 #endif
 		}
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index aef02f9ec0c1..7259387b7d19 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -18,22 +18,22 @@
 #include "video.h"
 
 static struct mode_info vga_modes[] = {
-	{ VIDEO_80x25,  80, 25 },
-	{ VIDEO_8POINT, 80, 50 },
-	{ VIDEO_80x43,  80, 43 },
-	{ VIDEO_80x28,  80, 28 },
-	{ VIDEO_80x30,  80, 30 },
-	{ VIDEO_80x34,  80, 34 },
-	{ VIDEO_80x60,  80, 60 },
+	{ VIDEO_80x25,  80, 25, 0 },
+	{ VIDEO_8POINT, 80, 50, 0 },
+	{ VIDEO_80x43,  80, 43, 0 },
+	{ VIDEO_80x28,  80, 28, 0 },
+	{ VIDEO_80x30,  80, 30, 0 },
+	{ VIDEO_80x34,  80, 34, 0 },
+	{ VIDEO_80x60,  80, 60, 0 },
 };
 
 static struct mode_info ega_modes[] = {
-	{ VIDEO_80x25,  80, 25 },
-	{ VIDEO_8POINT, 80, 43 },
+	{ VIDEO_80x25,  80, 25, 0 },
+	{ VIDEO_8POINT, 80, 43, 0 },
 };
 
 static struct mode_info cga_modes[] = {
-	{ VIDEO_80x25,  80, 25 },
+	{ VIDEO_80x25,  80, 25, 0 },
 };
 
 __videocard video_vga;
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index ad9712f01739..696d08f3843c 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -293,13 +293,28 @@ static void display_menu(void)
 	struct mode_info *mi;
 	char ch;
 	int i;
+	int nmodes;
+	int modes_per_line;
+	int col;
 
-	puts("Mode:    COLSxROWS:\n");
+	nmodes = 0;
+	for (card = video_cards; card < video_cards_end; card++)
+		nmodes += card->nmodes;
 
+	modes_per_line = 1;
+	if (nmodes >= 20)
+		modes_per_line = 3;
+
+	for (col = 0; col < modes_per_line; col++)
+		puts("Mode: Resolution:  Type: ");
+	putchar('\n');
+
+	col = 0;
 	ch = '0';
 	for (card = video_cards; card < video_cards_end; card++) {
 		mi = card->modes;
 		for (i = 0; i < card->nmodes; i++, mi++) {
+			char resbuf[32];
 			int visible = mi->x && mi->y;
 			u16 mode_id = mi->mode ? mi->mode :
 				(mi->y << 8)+mi->x;
@@ -307,8 +322,18 @@ static void display_menu(void)
 			if (!visible)
 				continue; /* Hidden mode */
 
-			printf("%c  %04X  %3dx%-3d  %s\n",
-			       ch, mode_id, mi->x, mi->y, card->card_name);
+			if (mi->depth)
+				sprintf(resbuf, "%dx%d", mi->y, mi->depth);
+			else
+				sprintf(resbuf, "%d", mi->y);
+
+			printf("%c %03X %4dx%-7s %-6s",
+			       ch, mode_id, mi->x, resbuf, card->card_name);
+			col++;
+			if (col >= modes_per_line) {
+				putchar('\n');
+				col = 0;
+			}
 
 			if (ch == '9')
 				ch = 'a';
@@ -318,6 +343,8 @@ static void display_menu(void)
 				ch++;
 		}
 	}
+	if (col)
+		putchar('\n');
 }
 
 #define H(x)	((x)-'a'+10)
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h
index b92447d51213..d69347f79e8e 100644
--- a/arch/x86/boot/video.h
+++ b/arch/x86/boot/video.h
@@ -83,7 +83,8 @@ void store_screen(void);
 
 struct mode_info {
 	u16 mode;		/* Mode number (vga= style) */
-	u8  x, y;		/* Width, height */
+	u16 x, y;		/* Width, height */
+	u16 depth;		/* Bits per pixel, 0 for text mode */
 };
 
 struct card_info {

From 811a0fff5d6e80e18e06be88e0fb685f3924bf8f Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:33:02 +0100
Subject: [PATCH 559/890] x86 setup: fix constraints in segment accessor
 functions

Fix the operand constraints for the segment accessor functions,
{rd,wr}{fs,gs}*.  In particular, the 8-bit functions used "r"
constraints instead of "q" constraints.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/boot.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index d2b5adf46512..8ec575249aa9 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -109,7 +109,7 @@ typedef unsigned int addr_t;
 static inline u8 rdfs8(addr_t addr)
 {
 	u8 v;
-	asm volatile("movb %%fs:%1,%0" : "=r" (v) : "m" (*(u8 *)addr));
+	asm volatile("movb %%fs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
 	return v;
 }
 static inline u16 rdfs16(addr_t addr)
@@ -127,21 +127,21 @@ static inline u32 rdfs32(addr_t addr)
 
 static inline void wrfs8(u8 v, addr_t addr)
 {
-	asm volatile("movb %1,%%fs:%0" : "+m" (*(u8 *)addr) : "r" (v));
+	asm volatile("movb %1,%%fs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
 }
 static inline void wrfs16(u16 v, addr_t addr)
 {
-	asm volatile("movw %1,%%fs:%0" : "+m" (*(u16 *)addr) : "r" (v));
+	asm volatile("movw %1,%%fs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
 }
 static inline void wrfs32(u32 v, addr_t addr)
 {
-	asm volatile("movl %1,%%fs:%0" : "+m" (*(u32 *)addr) : "r" (v));
+	asm volatile("movl %1,%%fs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
 }
 
 static inline u8 rdgs8(addr_t addr)
 {
 	u8 v;
-	asm volatile("movb %%gs:%1,%0" : "=r" (v) : "m" (*(u8 *)addr));
+	asm volatile("movb %%gs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
 	return v;
 }
 static inline u16 rdgs16(addr_t addr)
@@ -159,15 +159,15 @@ static inline u32 rdgs32(addr_t addr)
 
 static inline void wrgs8(u8 v, addr_t addr)
 {
-	asm volatile("movb %1,%%gs:%0" : "+m" (*(u8 *)addr) : "r" (v));
+	asm volatile("movb %1,%%gs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
 }
 static inline void wrgs16(u16 v, addr_t addr)
 {
-	asm volatile("movw %1,%%gs:%0" : "+m" (*(u16 *)addr) : "r" (v));
+	asm volatile("movw %1,%%gs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
 }
 static inline void wrgs32(u32 v, addr_t addr)
 {
-	asm volatile("movl %1,%%gs:%0" : "+m" (*(u32 *)addr) : "r" (v));
+	asm volatile("movl %1,%%gs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
 }
 
 /* Note: these only return true/false, not a signed return value! */

From 32d0b9898029b7b3c7f161d31f57c4831d9049eb Mon Sep 17 00:00:00 2001
From: "devzero@web.de" <devzero@web.de>
Date: Wed, 30 Jan 2008 13:33:02 +0100
Subject: [PATCH 560/890] x86 setup: early cmdline parser handle boolean
 options

This patch extends the early commandline parser to support boolean options.
The current version in mainline only supports parsing "option=arg" value pairs.

With this it should be easy making other messages like "Uncompressing kernel"
honour the "quiet" parameter, too.

Signed-off-by: Roland Kletzing <devzero@web.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/cmdline.c | 65 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c
index 34bb778c4357..cc65a33bf65c 100644
--- a/arch/x86/boot/cmdline.c
+++ b/arch/x86/boot/cmdline.c
@@ -95,3 +95,68 @@ int cmdline_find_option(const char *option, char *buffer, int bufsize)
 
 	return len;
 }
+
+/*
+ * Find a boolean option (like quiet,noapic,nosmp....)
+ *
+ * Returns the position of that option (starts counting with 1)
+ * or 0 on not found
+ */
+int cmdline_find_option_bool(const char *option)
+{
+	u32 cmdline_ptr = boot_params.hdr.cmd_line_ptr;
+	addr_t cptr;
+	char c;
+	int pos =0 , wstart = 0;
+	const char *opptr = NULL;
+	enum {
+		st_wordstart,	/* Start of word/after whitespace */
+		st_wordcmp,	/* Comparing this word */
+		st_wordskip,	/* Miscompare, skip */
+	} state = st_wordstart;
+
+	if (!cmdline_ptr || cmdline_ptr >= 0x100000)
+		return -1;	/* No command line, or inaccessible */
+
+	cptr = cmdline_ptr & 0xf;
+	set_fs(cmdline_ptr >> 4);
+
+	while (cptr < 0x10000) {
+		c = rdfs8(cptr++);
+		pos++;
+
+		switch (state) {
+		case st_wordstart:
+			if (!c)
+				return 0;
+			else if (myisspace(c))
+				break;
+
+			state = st_wordcmp;
+			opptr = option;
+			wstart = pos;
+			/* fall through */
+
+		case st_wordcmp:
+			if (!*opptr)
+				if (!c || myisspace(c))
+					return wstart;
+				else
+					state = st_wordskip;
+			else if (!c)
+				return 0;
+			else if (c != *opptr++)
+				state = st_wordskip;
+			break;
+
+		case st_wordskip:
+			if (!c)
+				return 0;
+			else if (myisspace(c))
+				state = st_wordstart;
+			break;
+		}
+	}
+
+	return 0;	/* Buffer overrun */
+}

From e479c8306f898fcdb9b36179071eae6338a17364 Mon Sep 17 00:00:00 2001
From: "devzero@web.de" <devzero@web.de>
Date: Wed, 30 Jan 2008 13:33:03 +0100
Subject: [PATCH 561/890] x86 setup: be more verbose when probing EDD

On early boot, probing the Bios for EDD happens without any message.

Enhanced Disk Drive Services (EDD) is a mechanism to match x86 BIOS device
names (int13 device 80h) to Linux device names (e.g. /dev/sda, /dev/hda)

There are buggy Bios out there having problems with EDD. This can be problems
with the Bios itself or with addon cards, too.

This patch is adds an informational message on early boot.

CONFIG_EDD is not set with defconfig, but with allmodconfig (i.e. CONFIG_EDD=m)
so the EDD probe may be active on early boot on many systems nowadays.

I can tell, that the probe is active on SuSE distro and with that I have seen
more than one system hanging endlessly with those "black screen with a blinking
cursor in the the upper left" on installation, making it difficult for the end-
user to find out, what`s the issue.
For sure I have seen this on FujitsuSiemens PCs with i810 and with i815 chipset.

This one also honours the "quiet" bootparam.

Also see:
http://marc.info/?l=linux-kernel&m=119781937207969&w=2
http://marc.info/?l=linux-kernel&m=119783934032326&w=2
http://marc.info/?l=linux-kernel&m=119783678529100&w=2

Signed-off-by: Roland Kletzing <devzero@web.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/edd.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c
index bd138e442ec2..b3504cb24de7 100644
--- a/arch/x86/boot/edd.c
+++ b/arch/x86/boot/edd.c
@@ -129,6 +129,7 @@ void query_edd(void)
 	char eddarg[8];
 	int do_mbr = 1;
 	int do_edd = 1;
+	int be_quiet;
 	int devno;
 	struct edd_info ei, *edp;
 	u32 *mbrptr;
@@ -140,12 +141,21 @@ void query_edd(void)
 			do_edd = 0;
 	}
 
+	be_quiet = cmdline_find_option_bool("quiet");
+
 	edp    = boot_params.eddbuf;
 	mbrptr = boot_params.edd_mbr_sig_buffer;
 
 	if (!do_edd)
 		return;
 
+	/* Bugs in OnBoard or AddOnCards Bios may hang the EDD probe,
+	 * so give a hint if this happens.
+	 */
+
+	if (!be_quiet)
+		printf("Probing EDD...");
+
 	for (devno = 0x80; devno < 0x80+EDD_MBR_SIG_MAX; devno++) {
 		/*
 		 * Scan the BIOS-supported hard disks and query EDD
@@ -162,6 +172,9 @@ void query_edd(void)
 		if (do_mbr && !read_mbr_sig(devno, &ei, mbrptr++))
 			boot_params.edd_mbr_sig_buf_entries = devno-0x80+1;
 	}
+
+	if (!be_quiet)
+		printf("OK\n");
 }
 
 #endif

From f7775016c66c2f45125f22968c351c88868ee7a3 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:33:03 +0100
Subject: [PATCH 562/890] x86 setup: OK -> ok (no need to scream)

Unnecessary capitals are shouting; no need for it here.
Thus, change "OK" to "ok" and add a space.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/edd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c
index b3504cb24de7..d3e978030c5c 100644
--- a/arch/x86/boot/edd.c
+++ b/arch/x86/boot/edd.c
@@ -154,7 +154,7 @@ void query_edd(void)
 	 */
 
 	if (!be_quiet)
-		printf("Probing EDD...");
+		printf("Probing EDD... ");
 
 	for (devno = 0x80; devno < 0x80+EDD_MBR_SIG_MAX; devno++) {
 		/*
@@ -174,7 +174,7 @@ void query_edd(void)
 	}
 
 	if (!be_quiet)
-		printf("OK\n");
+		printf("ok\n");
 }
 
 #endif

From 70d8abf5df857ca7befe02e5d61fde807420a54c Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:33:03 +0100
Subject: [PATCH 563/890] x86 setup: add missing prototype; formatting fix

Add prototype for cmdline_find_option_bool() missing from:

    x86 setup: early cmdline parser handle boolean options

Also, fix up a minor formatting error in that patch.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/boot.h    | 1 +
 arch/x86/boot/cmdline.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 8ec575249aa9..7822a4983da2 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -241,6 +241,7 @@ int query_apm_bios(void);
 
 /* cmdline.c */
 int cmdline_find_option(const char *option, char *buffer, int bufsize);
+int cmdline_find_option_bool(const char *option);
 
 /* cpu.c, cpucheck.c */
 int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);
diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c
index cc65a33bf65c..680408a0f463 100644
--- a/arch/x86/boot/cmdline.c
+++ b/arch/x86/boot/cmdline.c
@@ -107,7 +107,7 @@ int cmdline_find_option_bool(const char *option)
 	u32 cmdline_ptr = boot_params.hdr.cmd_line_ptr;
 	addr_t cptr;
 	char c;
-	int pos =0 , wstart = 0;
+	int pos = 0, wstart = 0;
 	const char *opptr = NULL;
 	enum {
 		st_wordstart,	/* Start of word/after whitespace */

From b710763608f1bfc8d4cd346e508f3f4878af361e Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:33:03 +0100
Subject: [PATCH 564/890] x86 setup: add note to use edd=off on EDD probing
 hangs

Tell the user to specify edd=off in the case of EDD probing hangs.
Per LKML discussion.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/edd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c
index d3e978030c5c..8721dc46a0b6 100644
--- a/arch/x86/boot/edd.c
+++ b/arch/x86/boot/edd.c
@@ -154,7 +154,7 @@ void query_edd(void)
 	 */
 
 	if (!be_quiet)
-		printf("Probing EDD... ");
+		printf("Probing EDD (edd=off to disable)... ");
 
 	for (devno = 0x80; devno < 0x80+EDD_MBR_SIG_MAX; devno++) {
 		/*

From 1a8514e04e3f0249a75f66225e99cdf48d305be7 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:33:03 +0100
Subject: [PATCH 565/890] x86 setup: push video mode setup as late as possible

Push video mode setup as late as possible; messages issued through the
BIOS interface after video mode setup will either not be seen (for
framebuffer modes) or will screw up the cursor (for text modes.)

In particular, this makes the EDD probing message show up correctly.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/main.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 1f95750ede28..22ca62ba40c8 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -131,9 +131,6 @@ void main(void)
 	/* Set keyboard repeat rate (why?) */
 	keyboard_set_repeat();
 
-	/* Set the video mode */
-	set_video();
-
 	/* Query MCA information */
 	query_mca();
 
@@ -154,6 +151,10 @@ void main(void)
 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
 	query_edd();
 #endif
+
+	/* Set the video mode */
+	set_video();
+
 	/* Do the last things and invoke protected mode */
 	go_to_protected_mode();
 }

From acd644bb4abb4d9f0ba6b9ec2b356263971ef9d0 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:33:04 +0100
Subject: [PATCH 566/890] x86 setup: guard the heap against invalid stack
 setups

If we use the bootloader-provided stack pointer, we might end up in a
situation where the bootloader (incorrectly) pointed the stack in the
middle of our heap.  Catch this by simply comparing the computed heap
end value to the stack pointer minus the defined stack size.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/main.c | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 22ca62ba40c8..7828da5cfd07 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -100,20 +100,32 @@ static void set_bios_mode(void)
 #endif
 }
 
+static void init_heap(void)
+{
+	char *stack_end;
+
+	if (boot_params.hdr.loadflags & CAN_USE_HEAP) {
+		asm("leal %P1(%%esp),%0"
+		    : "=r" (stack_end) : "i" (-STACK_SIZE));
+
+		heap_end = (char *)
+			((size_t)boot_params.hdr.heap_end_ptr + 0x200);
+		if (heap_end > stack_end)
+			heap_end = stack_end;
+	} else {
+		/* Boot protocol 2.00 only, no heap available */
+		puts("WARNING: Ancient bootloader, some functionality "
+		     "may be limited!\n");
+	}
+}
+
 void main(void)
 {
 	/* First, copy the boot header into the "zeropage" */
 	copy_boot_params();
 
 	/* End of heap check */
-	if (boot_params.hdr.loadflags & CAN_USE_HEAP) {
-		heap_end = (char *)(boot_params.hdr.heap_end_ptr
-				    +0x200-STACK_SIZE);
-	} else {
-		/* Boot protocol 2.00 only, no heap available */
-		puts("WARNING: Ancient bootloader, some functionality "
-		     "may be limited!\n");
-	}
+	init_heap();
 
 	/* Make sure we have all the proper CPU support */
 	if (validate_cpu()) {

From 387faedb1dee2e917811acd05902cc36142a4850 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:33:04 +0100
Subject: [PATCH 567/890] x86 setup: correct the base in the GDT_ENTRY() macro

The GDT_ENTRY() macro in pm.c would incorrectly cut the bottom 8 bits
off the base.  We didn't define any bases with the bottom 8 bits
nonzero, so it is a non-manifest bug, but it's still a bug.

Pointed out by John Smith <johnsmith9344@gmail.com>.
Cc: John Smith <johnsmith9344@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/pm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c
index b23cbdc7d547..1a0f936c160b 100644
--- a/arch/x86/boot/pm.c
+++ b/arch/x86/boot/pm.c
@@ -104,7 +104,7 @@ static void reset_coprocessor(void)
 	(((u64)(base & 0xff000000) << 32) |	\
 	 ((u64)flags << 40) |			\
 	 ((u64)(limit & 0x00ff0000) << 32) |	\
-	 ((u64)(base & 0x00ffff00) << 16) |	\
+	 ((u64)(base & 0x00ffffff) << 16) |	\
 	 ((u64)(limit & 0x0000ffff)))
 
 struct gdt_ptr {

From c2b84b30b8c8bbccf4d2e32f8a3a70ad09ba9ab8 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:33:04 +0100
Subject: [PATCH 568/890] x86: sanity check APIC timer frequency

Check the APIC timer calibration result for sanity. When the frequency
is out of range, issue a warning and disable the local APIC timer.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 13 +++++++++++++
 arch/x86/kernel/apic_64.c | 12 ++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index cbcf72cde956..20d4dbc42e6e 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -470,6 +470,19 @@ void __init setup_boot_APIC_clock(void)
 
 	local_apic_timer_verify_ok = 1;
 
+	/*
+	 * Do a sanity check on the APIC calibration result
+	 */
+	if (calibration_result < (1000000 / HZ)) {
+		local_irq_enable();
+		printk(KERN_WARNING
+		       "APIC frequency too slow, disabling apic timer\n");
+		/* No broadcast on UP ! */
+		if (num_possible_cpus() > 1)
+			setup_APIC_timer();
+		return;
+	}
+
 	/* We trust the pm timer based calibration */
 	if (!pm_referenced) {
 		apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index d1a696673d9d..731b48660fc4 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -383,6 +383,18 @@ void __init setup_boot_APIC_clock(void)
 	printk(KERN_INFO "Using local APIC timer interrupts.\n");
 	calibrate_APIC_clock();
 
+	/*
+	 * Do a sanity check on the APIC calibration result
+	 */
+	if (calibration_result < (1000000 / HZ)) {
+		printk(KERN_WARNING
+		       "APIC frequency too slow, disabling apic timer\n");
+		/* No broadcast on UP ! */
+		if (num_possible_cpus() > 1)
+			setup_APIC_timer();
+		return;
+	}
+
 	/*
 	 * If nmi_watchdog is set to IO_APIC, we need the
 	 * PIT/HPET going.  Otherwise register lapic as a dummy

From 9d09951da948c0ca30944df2a9b78e68d6e0744c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:33:04 +0100
Subject: [PATCH 569/890] x86: preset apic clockevents multiplicator

The check for an unitialized clock event device triggers, when the local
apic timer is registered as a dummy clock event device for broadcasting.
Preset the multiplicator to avoid a false positive.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 4 +++-
 arch/x86/kernel/apic_64.c | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 20d4dbc42e6e..d07a603807d1 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -377,8 +377,10 @@ void __init setup_boot_APIC_clock(void)
 	 */
 	if (local_apic_timer_disabled) {
 		/* No broadcast on UP ! */
-		if (num_possible_cpus() > 1)
+		if (num_possible_cpus() > 1) {
+			lapic_clockevent.mult = 1;
 			setup_APIC_timer();
+		}
 		return;
 	}
 
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 731b48660fc4..01d4ca27ecf0 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -375,8 +375,10 @@ void __init setup_boot_APIC_clock(void)
 	if (disable_apic_timer) {
 		printk(KERN_INFO "Disabling APIC timer\n");
 		/* No broadcast on UP ! */
-		if (num_possible_cpus() > 1)
+		if (num_possible_cpus() > 1) {
+			lapic_clockevent.mult = 1;
 			setup_APIC_timer();
+		}
 		return;
 	}
 

From d504e39efd4e64a1a6e01dc85fd8a33fdb196dce Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 30 Jan 2008 13:33:05 +0100
Subject: [PATCH 570/890] x86: discover_ebda section mismatch

Fix section mismatches.  discover_ebda() can be __init.

WARNING: vmlinux.o(.text+0x738a): Section mismatch: reference to .init.data:ebda_addr (between 'discover_ebda' and 'get_model_name')
WARNING: vmlinux.o(.text+0x73c4): Section mismatch: reference to .init.data:ebda_size (between 'discover_ebda' and 'get_model_name')

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index a7124bfb8578..07547febac7a 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -248,7 +248,7 @@ static inline void __init reserve_crashkernel(void)
 unsigned __initdata ebda_addr;
 unsigned __initdata ebda_size;
 
-static void discover_ebda(void)
+static void __init discover_ebda(void)
 {
 	/*
 	 * there is a real-mode segmented pointer pointing to the

From 8866cd9dc9d0bbadcf361a14e0cdfecb66473087 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:33:06 +0100
Subject: [PATCH 571/890] x86: early_idt_handler improvements, 64-bit

It's not too pretty, but I found this made the "PANIC: early exception"
messages become much more reliably useful: 1. print the vector number,
2. print the %cs value, 3. handle error-code-pushing vs non-pushing vectors.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/head64.c  |  2 +-
 arch/x86/kernel/head_64.S | 34 ++++++++++++++++++++++++++++++----
 include/asm-x86/segment.h |  6 ++++++
 3 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 4a1c1356c41a..85c1e6bf8022 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -59,7 +59,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
 	zap_identity_mappings();
 
 	for (i = 0; i < IDT_ENTRIES; i++)
-		set_intr_gate(i, early_idt_handler);
+		set_intr_gate(i, &early_idt_handlers[i]);
 	load_idt((const struct desc_ptr *)&idt_descr);
 
 	early_printk("Kernel alive\n");
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index c31b1c96a9d3..8b4c35cb519a 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -267,14 +267,40 @@ init_rsp:
 bad_address:
 	jmp bad_address
 
+.macro early_idt_tramp first, last
+	.ifgt \last-\first
+	early_idt_tramp \first, \last-1
+	.endif
+	movl $\last,%esi
+	jmp early_idt_handler
+.endm
+
+	.globl early_idt_handlers
+early_idt_handlers:
+	early_idt_tramp 0, 63
+	early_idt_tramp 64, 127
+	early_idt_tramp 128, 191
+	early_idt_tramp 192, 255
+
 ENTRY(early_idt_handler)
 	cmpl $2,early_recursion_flag(%rip)
 	jz  1f
 	incl early_recursion_flag(%rip)
-	xorl %eax,%eax
-	movq 8(%rsp),%rsi	# get rip
-	movq (%rsp),%rdx
 	GET_CR2_INTO_RCX
+	movq %rcx,%r9
+	xorl %r8d,%r8d		# zero for error code
+	movl %esi,%ecx		# get vector number
+	# Test %ecx against mask of vectors that push error code.
+	cmpl $31,%ecx
+	ja 0f
+	movl $1,%eax
+	salq %cl,%rax
+	testl $0x27d00,%eax
+	je 0f
+	popq %r8		# get error code
+0:	movq 0(%rsp),%rcx	# get ip
+	movq 8(%rsp),%rdx	# get cs
+	xorl %eax,%eax
 	leaq early_idt_msg(%rip),%rdi
 	call early_printk
 	cmpl $2,early_recursion_flag(%rip)
@@ -291,7 +317,7 @@ early_recursion_flag:
 	.long 0
 
 early_idt_msg:
-	.asciz "PANIC: early exception rip %lx error %lx cr2 %lx\n"
+	.asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n"
 early_idt_ripmsg:
 	.asciz "RIP %s\n"
 
diff --git a/include/asm-x86/segment.h b/include/asm-x86/segment.h
index 57c8d3723836..23f0535fec61 100644
--- a/include/asm-x86/segment.h
+++ b/include/asm-x86/segment.h
@@ -195,4 +195,10 @@
 #define GDT_ENTRY_TLS_ENTRIES 3
 #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
 
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+extern const char early_idt_handlers[IDT_ENTRIES][10];
+#endif
+#endif
+
 #endif

From 076f9776f5d8d131b36955db8641aba3893c2c1b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:06 +0100
Subject: [PATCH 572/890] x86: make early printk selectable on 64-bit as well

Enable CONFIG_EMBEDDED to select CONFIG_EARLY_PRINTK on 64-bit as well.

saves ~2K:

   text    data     bss     dec     hex filename
   7290283 3672091 1907848 12870222         c4624e vmlinux.before
   7288373 3671795 1907848 12868016         c459b0 vmlinux.after

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig.debug    | 2 +-
 arch/x86/kernel/head64.c  | 7 ++++++-
 arch/x86/kernel/head_64.S | 7 +++++++
 include/asm-x86/kdebug.h  | 1 -
 include/linux/kernel.h    | 3 +++
 kernel/printk.c           | 7 +++++++
 6 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 15854b53badc..88420af98140 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -6,7 +6,7 @@ config TRACE_IRQFLAGS_SUPPORT
 source "lib/Kconfig.debug"
 
 config EARLY_PRINTK
-	bool "Early printk" if EMBEDDED && DEBUG_KERNEL && X86_32
+	bool "Early printk" if EMBEDDED
 	default y
 	help
 	  Write kernel log output directly into the VGA buffer or to a serial
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 85c1e6bf8022..87e031d4abf1 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -58,8 +58,13 @@ void __init x86_64_start_kernel(char * real_mode_data)
 	/* Make NULL pointers segfault */
 	zap_identity_mappings();
 
-	for (i = 0; i < IDT_ENTRIES; i++)
+	for (i = 0; i < IDT_ENTRIES; i++) {
+#ifdef CONFIG_EARLY_PRINTK
 		set_intr_gate(i, &early_idt_handlers[i]);
+#else
+		set_intr_gate(i, early_idt_handler);
+#endif
+	}
 	load_idt((const struct desc_ptr *)&idt_descr);
 
 	early_printk("Kernel alive\n");
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 8b4c35cb519a..1d5a7a361200 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -267,6 +267,7 @@ init_rsp:
 bad_address:
 	jmp bad_address
 
+#ifdef CONFIG_EARLY_PRINTK
 .macro early_idt_tramp first, last
 	.ifgt \last-\first
 	early_idt_tramp \first, \last-1
@@ -281,8 +282,10 @@ early_idt_handlers:
 	early_idt_tramp 64, 127
 	early_idt_tramp 128, 191
 	early_idt_tramp 192, 255
+#endif
 
 ENTRY(early_idt_handler)
+#ifdef CONFIG_EARLY_PRINTK
 	cmpl $2,early_recursion_flag(%rip)
 	jz  1f
 	incl early_recursion_flag(%rip)
@@ -311,8 +314,11 @@ ENTRY(early_idt_handler)
 	movq 8(%rsp),%rsi	# get rip again
 	call __print_symbol
 #endif
+#endif /* EARLY_PRINTK */
 1:	hlt
 	jmp 1b
+
+#ifdef CONFIG_EARLY_PRINTK
 early_recursion_flag:
 	.long 0
 
@@ -320,6 +326,7 @@ early_idt_msg:
 	.asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n"
 early_idt_ripmsg:
 	.asciz "RIP %s\n"
+#endif /* CONFIG_EARLY_PRINTK */
 
 .balign PAGE_SIZE
 
diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h
index a5e5e3b7eb23..e9f42d1ac38f 100644
--- a/include/asm-x86/kdebug.h
+++ b/include/asm-x86/kdebug.h
@@ -22,7 +22,6 @@ enum die_val {
 	DIE_PAGE_FAULT,
 };
 
-extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2)));
 extern void printk_address(unsigned long address);
 extern void die(const char *,struct pt_regs *,long);
 extern int __must_check __die(const char *, struct pt_regs *, long);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index a7283c9beadf..ff356b2ee478 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -194,6 +194,9 @@ static inline int log_buf_read(int idx) { return 0; }
 static inline int log_buf_copy(char *dest, int idx, int len) { return 0; }
 #endif
 
+extern void __attribute__((format(printf, 1, 2)))
+	early_printk(const char *fmt, ...);
+
 unsigned long int_sqrt(unsigned long);
 
 extern int printk_ratelimit(void);
diff --git a/kernel/printk.c b/kernel/printk.c
index 3b7c968d0ef9..58bbec684119 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -36,6 +36,13 @@
 
 #include <asm/uaccess.h>
 
+/*
+ * Architectures can override it:
+ */
+void __attribute__((weak)) early_printk(const char *fmt, ...)
+{
+}
+
 #define __LOG_BUF_LEN	(1 << CONFIG_LOG_BUF_SHIFT)
 
 /* printk's without a loglevel use this.. */

From 3d1f7cae883ce4aac99c661562111a25d52effe0 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 30 Jan 2008 13:33:06 +0100
Subject: [PATCH 573/890] x86: fix 32-bit FRAME_POINTER chasing code

The current x86 32 bit FRAME_POINTER chasing code has a nasty bug in
that the EBP tracer doesn't actually update the value of EBP it is
tracing, so that the code doesn't actually switch to the irq stack
properly.

The result is a truncated backtrace:

 WARNING: at timeroops.c:8 kerneloops_regression_test() (Not tainted)
 Pid: 0, comm: swapper Not tainted 2.6.24-0.77.rc4.git4.fc9 #1
  [<c040649a>] show_trace_log_lvl+0x1a/0x2f
  [<c0406d41>] show_trace+0x12/0x14
  [<c0407061>] dump_stack+0x6c/0x72
  [<e0258049>] kerneloops_regression_test+0x44/0x46 [timeroops]
  [<c04371ac>] run_timer_softirq+0x127/0x18f
  [<c0434685>] __do_softirq+0x78/0xff
  [<c0407759>] do_softirq+0x74/0xf7
  =======================

This patch fixes the code to update EBP properly, and to check the EIP
before printing (as the non-framepointer backtracer does) so that
the same test backtrace now looks like this:

 WARNING: at timeroops.c:8 kerneloops_regression_test()
 Pid: 0, comm: swapper Not tainted 2.6.24-rc7 #4
  [<c0405d17>] show_trace_log_lvl+0x1a/0x2f
  [<c0406681>] show_trace+0x12/0x14
  [<c0406ef2>] dump_stack+0x6a/0x70
  [<e01f6040>] kerneloops_regression_test+0x3b/0x3d [timeroops]
  [<c0426f07>] run_timer_softirq+0x11b/0x17c
  [<c04243ac>] __do_softirq+0x42/0x94
  [<c040704c>] do_softirq+0x50/0xb6
  [<c04242a9>] irq_exit+0x37/0x67
  [<c040714c>] do_IRQ+0x9a/0xaf
  [<c04057da>] common_interrupt+0x2e/0x34
  [<c05807fe>] cpuidle_idle_call+0x52/0x78
  [<c04034f3>] cpu_idle+0x46/0x60
  [<c05fbbd3>] rest_init+0x43/0x45
  [<c070aa3d>] start_kernel+0x279/0x27f
  =======================

This shows that the backtrace goes all the way down to user context now.
This bug was found during the port to 64 bit of the frame pointer backtracer.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/traps_32.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 83df0f37ba75..acc9af260fac 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -125,7 +125,8 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
 		unsigned long addr;
 
 		addr = frame->return_address;
-		ops->address(data, addr);
+		if (__kernel_text_address(addr))
+			ops->address(data, addr);
 		/*
 		 * break out of recursive entries (such as
 		 * end_of_stack_stop_unwind_function). Also,
@@ -133,6 +134,7 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
 		 * move downwards!
 		 */
 		next = frame->next_frame;
+		bp = (unsigned long) next;
 		if (next <= frame)
 			break;
 		frame = next;

From bc850d6b374fffd08336996f4b4d3bbd6bf427f6 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 30 Jan 2008 13:33:07 +0100
Subject: [PATCH 574/890] x86: add the capability to print fuzzy backtraces

For enhancing the 32 bit EBP based backtracer, I need the capability
for the backtracer to tell it's customer that an entry is either
reliable or unreliable, and the backtrace printing code then needs to
print the unreliable ones slightly different.

This patch adds the basic capability, the next patch will add a user
of this capability.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/pci-gart_64.c |  2 +-
 arch/x86/kernel/process_64.c  |  4 ++--
 arch/x86/kernel/stacktrace.c  |  2 +-
 arch/x86/kernel/traps_32.c    |  8 ++++---
 arch/x86/kernel/traps_64.c    | 39 +++++++++++++++++++++--------------
 arch/x86/mm/fault_64.c        |  2 +-
 arch/x86/oprofile/backtrace.c |  2 +-
 include/asm-x86/kdebug.h      |  5 +++--
 include/asm-x86/stacktrace.h  |  5 +++--
 9 files changed, 40 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index d2b46b489412..04ca5c5221d7 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -167,7 +167,7 @@ static void dump_leak(void)
 	       iommu_leak_pages);
 	for (i = 0; i < iommu_leak_pages; i += 2) {
 		printk(KERN_DEBUG "%lu: ", iommu_pages-i);
-		printk_address((unsigned long) iommu_leak_tab[iommu_pages-i]);
+		printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], 0);
 		printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
 	}
 	printk(KERN_DEBUG "\n");
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index a0130eb2fa50..383760bfd283 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -329,7 +329,7 @@ void __show_regs(struct pt_regs * regs)
 		(int)strcspn(init_utsname()->version, " "),
 		init_utsname()->version);
 	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
-	printk_address(regs->ip);
+	printk_address(regs->ip, regs->bp);
 	printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->sp,
 		regs->flags);
 	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
@@ -377,7 +377,7 @@ void show_regs(struct pt_regs *regs)
 {
 	printk("CPU %d:", smp_processor_id());
 	__show_regs(regs);
-	show_trace(NULL, regs, (void *)(regs + 1));
+	show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
 }
 
 /*
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index c571edd11878..8c4e4f5bf040 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -22,7 +22,7 @@ static int save_stack_stack(void *data, char *name)
 	return -1;
 }
 
-static void save_stack_address(void *data, unsigned long addr)
+static void save_stack_address(void *data, unsigned long addr, int reliable)
 {
 	struct stack_trace *trace = (struct stack_trace *)data;
 	if (trace->skip > 0) {
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index acc9af260fac..8ef8a9ddfec6 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -126,7 +126,7 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
 
 		addr = frame->return_address;
 		if (__kernel_text_address(addr))
-			ops->address(data, addr);
+			ops->address(data, addr, 1);
 		/*
 		 * break out of recursive entries (such as
 		 * end_of_stack_stop_unwind_function). Also,
@@ -145,7 +145,7 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
 
 		addr = *stack++;
 		if (__kernel_text_address(addr))
-			ops->address(data, addr);
+			ops->address(data, addr, 1);
 	}
 #endif
 	return bp;
@@ -220,9 +220,11 @@ static int print_trace_stack(void *data, char *name)
 /*
  * Print one address/symbol entries per line.
  */
-static void print_trace_address(void *data, unsigned long addr)
+static void print_trace_address(void *data, unsigned long addr, int reliable)
 {
 	printk("%s [<%08lx>] ", (char *)data, addr);
+	if (!reliable)
+		printk("? ");
 	print_symbol("%s\n", addr);
 	touch_nmi_watchdog();
 }
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 37b07d08704b..62c4d8f46ee9 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -99,13 +99,14 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
 int kstack_depth_to_print = 12;
 
 #ifdef CONFIG_KALLSYMS
-void printk_address(unsigned long address)
+void printk_address(unsigned long address, int reliable)
 {
 	unsigned long offset = 0, symsize;
 	const char *symname;
 	char *modname;
 	char *delim = ":";
 	char namebuf[128];
+	char reliab[4] = "";;
 
 	symname = kallsyms_lookup(address, &symsize, &offset,
 					&modname, namebuf);
@@ -113,13 +114,16 @@ void printk_address(unsigned long address)
 		printk(" [<%016lx>]\n", address);
 		return;
 	}
+	if (!reliable)
+		strcpy(reliab, "? ");
+
 	if (!modname)
 		modname = delim = ""; 		
-	printk(" [<%016lx>] %s%s%s%s+0x%lx/0x%lx\n",
-		address, delim, modname, delim, symname, offset, symsize);
+	printk(" [<%016lx>] %s%s%s%s%s+0x%lx/0x%lx\n",
+		address, reliab, delim, modname, delim, symname, offset, symsize);
 }
 #else
-void printk_address(unsigned long address)
+void printk_address(unsigned long address, int reliable)
 {
 	printk(" [<%016lx>]\n", address);
 }
@@ -215,7 +219,7 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
 }
 
 void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
-		unsigned long *stack,
+		unsigned long *stack, unsigned long bp,
 		const struct stacktrace_ops *ops, void *data)
 {
 	const unsigned cpu = get_cpu();
@@ -252,7 +256,7 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 			 * down the cause of the crash will be able to figure \
 			 * out the call path that was taken. \
 			 */ \
-			ops->address(data, addr);   \
+			ops->address(data, addr, 1);   \
 		} \
 	} while (0)
 
@@ -331,10 +335,10 @@ static int print_trace_stack(void *data, char *name)
 	return 0;
 }
 
-static void print_trace_address(void *data, unsigned long addr)
+static void print_trace_address(void *data, unsigned long addr, int reliable)
 {
 	touch_nmi_watchdog();
-	printk_address(addr);
+	printk_address(addr, reliable);
 }
 
 static const struct stacktrace_ops print_trace_ops = {
@@ -345,15 +349,17 @@ static const struct stacktrace_ops print_trace_ops = {
 };
 
 void
-show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack)
+show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack,
+		unsigned long bp)
 {
 	printk("\nCall Trace:\n");
-	dump_trace(tsk, regs, stack, &print_trace_ops, NULL);
+	dump_trace(tsk, regs, stack, bp, &print_trace_ops, NULL);
 	printk("\n");
 }
 
 static void
-_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp)
+_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp,
+							unsigned long bp)
 {
 	unsigned long *stack;
 	int i;
@@ -387,12 +393,12 @@ _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp)
 		printk(" %016lx", *stack++);
 		touch_nmi_watchdog();
 	}
-	show_trace(tsk, regs, sp);
+	show_trace(tsk, regs, sp, bp);
 }
 
 void show_stack(struct task_struct *tsk, unsigned long * sp)
 {
-	_show_stack(tsk, NULL, sp);
+	_show_stack(tsk, NULL, sp, 0);
 }
 
 /*
@@ -401,13 +407,14 @@ void show_stack(struct task_struct *tsk, unsigned long * sp)
 void dump_stack(void)
 {
 	unsigned long dummy;
+	unsigned long bp = 0;
 
 	printk("Pid: %d, comm: %.20s %s %s %.*s\n",
 		current->pid, current->comm, print_tainted(),
 		init_utsname()->release,
 		(int)strcspn(init_utsname()->version, " "),
 		init_utsname()->version);
-	show_trace(NULL, NULL, &dummy);
+	show_trace(NULL, NULL, &dummy, bp);
 }
 
 EXPORT_SYMBOL(dump_stack);
@@ -432,7 +439,7 @@ void show_registers(struct pt_regs *regs)
 	 */
 	if (in_kernel) {
 		printk("Stack: ");
-		_show_stack(NULL, regs, (unsigned long*)sp);
+		_show_stack(NULL, regs, (unsigned long *)sp, regs->bp);
 
 		printk("\nCode: ");
 		if (regs->ip < PAGE_OFFSET)
@@ -527,7 +534,7 @@ int __kprobes __die(const char * str, struct pt_regs * regs, long err)
 	add_taint(TAINT_DIE);
 	/* Executive summary in case the oops scrolled away */
 	printk(KERN_ALERT "RIP ");
-	printk_address(regs->ip);
+	printk_address(regs->ip, regs->bp);
 	printk(" RSP <%016lx>\n", regs->sp);
 	if (kexec_should_crash(current))
 		crash_kexec(regs);
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index e82832961d72..cf7e99895b91 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -578,7 +578,7 @@ no_context:
 	else
 		printk(KERN_ALERT "Unable to handle kernel paging request");
 	printk(" at %016lx RIP: \n" KERN_ALERT, address);
-	printk_address(regs->ip);
+	printk_address(regs->ip, regs->bp);
 	dump_pagetable(address);
 	tsk->thread.cr2 = address;
 	tsk->thread.trap_no = 14;
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index cc353a0b183e..671a7ecf11aa 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -32,7 +32,7 @@ static int backtrace_stack(void *data, char *name)
 	return 0;
 }
 
-static void backtrace_address(void *data, unsigned long addr)
+static void backtrace_address(void *data, unsigned long addr, int reliable)
 {
 	unsigned int *depth = data;
 
diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h
index e9f42d1ac38f..dd442a1632c0 100644
--- a/include/asm-x86/kdebug.h
+++ b/include/asm-x86/kdebug.h
@@ -22,12 +22,13 @@ enum die_val {
 	DIE_PAGE_FAULT,
 };
 
-extern void printk_address(unsigned long address);
+extern void printk_address(unsigned long address, int reliable);
 extern void die(const char *,struct pt_regs *,long);
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_registers(struct pt_regs *regs);
 extern void __show_registers(struct pt_regs *, int all);
-extern void show_trace(struct task_struct *, struct pt_regs *, unsigned long *);
+extern void show_trace(struct task_struct *t, struct pt_regs *regs,
+			unsigned long *sp, unsigned long bp);
 extern void __show_regs(struct pt_regs *regs);
 extern void show_regs(struct pt_regs *regs);
 extern void dump_pagetable(unsigned long);
diff --git a/include/asm-x86/stacktrace.h b/include/asm-x86/stacktrace.h
index 70dd5bae3235..30f82526a8e2 100644
--- a/include/asm-x86/stacktrace.h
+++ b/include/asm-x86/stacktrace.h
@@ -9,12 +9,13 @@ struct stacktrace_ops {
 	void (*warning)(void *data, char *msg);
 	/* msg must contain %s for the symbol */
 	void (*warning_symbol)(void *data, char *msg, unsigned long symbol);
-	void (*address)(void *data, unsigned long address);
+	void (*address)(void *data, unsigned long address, int reliable);
 	/* On negative return stop dumping */
 	int (*stack)(void *data, char *name);
 };
 
-void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack,
+void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
+		unsigned long *stack, unsigned long bp,
 		const struct stacktrace_ops *ops, void *data);
 
 #endif

From e9d4efddbec3d852d435b370b9c40ff7ac24afe6 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 30 Jan 2008 13:33:07 +0100
Subject: [PATCH 575/890] x86: improve the 32 bit Frame Pointer backtracer to
 also use the traditional backtrace

The 32 bit Frame Pointer backtracer code checks if the EBP is valid
to do a backtrace; however currently on a failure it just gives up
and prints nothing. That's not very nice; we can do better and still
print a decent backtrace.

This patch changes the backtracer to use the regular backtracing algorithm
at the same time as the EBP backtracer; the EBP backtracer is basically
used to figure out which part of the backtrace are reliable vs those
which are likely to be noise.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/traps_32.c | 44 +++++++++++++++++---------------------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 8ef8a9ddfec6..959d40edecd5 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -118,36 +118,32 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
 				unsigned long *stack, unsigned long bp,
 				const struct stacktrace_ops *ops, void *data)
 {
-#ifdef	CONFIG_FRAME_POINTER
 	struct stack_frame *frame = (struct stack_frame *)bp;
-	while (valid_stack_ptr(tinfo, frame, sizeof(*frame))) {
-		struct stack_frame *next;
-		unsigned long addr;
 
-		addr = frame->return_address;
-		if (__kernel_text_address(addr))
-			ops->address(data, addr, 1);
-		/*
-		 * break out of recursive entries (such as
-		 * end_of_stack_stop_unwind_function). Also,
-		 * we can never allow a frame pointer to
-		 * move downwards!
-		 */
-		next = frame->next_frame;
-		bp = (unsigned long) next;
-		if (next <= frame)
-			break;
-		frame = next;
-	}
-#else
+	/*
+	 * if EBP is "deeper" into the stack than the actual stack pointer,
+	 * we need to rewind the stack pointer a little to start at the
+	 * first stack frame, but only if EBP is in this stack frame.
+	 */
+	if (stack > (unsigned long *) bp
+			&& valid_stack_ptr(tinfo, frame, sizeof(*frame)))
+		stack = (unsigned long *) bp;
+
 	while (valid_stack_ptr(tinfo, stack, sizeof(*stack))) {
 		unsigned long addr;
 
-		addr = *stack++;
-		if (__kernel_text_address(addr))
-			ops->address(data, addr, 1);
+		addr = *stack;
+		if (__kernel_text_address(addr)) {
+			if ((unsigned long) stack == bp + 4) {
+				ops->address(data, addr, 1);
+				frame = frame->next_frame;
+				bp = (unsigned long) frame;
+			} else {
+				ops->address(data, addr, 0);
+			}
+		}
+		stack++;
 	}
-#endif
 	return bp;
 }
 

From 5bc27dc2f55fd3043597b5a8de6536183f28a449 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 30 Jan 2008 13:33:07 +0100
Subject: [PATCH 576/890] x86: pull bp calculation earlier into the backtrace
 path

Right now, we take the stack pointer early during the backtrace path, but
only calculate bp several functions deep later, making it hard to reconcile
the stack and bp backtraces (as well as showing several internal backtrace
functions on the stack with bp based backtracing).

This patch moves the bp taking to the same place we take the stack pointer;
sadly this ripples through several layers of the back tracing stack,
but it's not all that bad in the end I hope.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process_32.c  |  2 +-
 arch/x86/kernel/stacktrace.c  |  7 ++++---
 arch/x86/kernel/traps_32.c    | 39 +++++++++++++++--------------------
 arch/x86/oprofile/backtrace.c |  2 +-
 4 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 35a6f318c541..7a61b54649de 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -379,7 +379,7 @@ void __show_registers(struct pt_regs *regs, int all)
 void show_regs(struct pt_regs *regs)
 {
 	__show_registers(regs, 1);
-	show_trace(NULL, regs, &regs->sp);
+	show_trace(NULL, regs, &regs->sp, regs->bp);
 }
 
 /*
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 8c4e4f5bf040..4f4021b5bfb5 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -33,7 +33,8 @@ static void save_stack_address(void *data, unsigned long addr, int reliable)
 		trace->entries[trace->nr_entries++] = addr;
 }
 
-static void save_stack_address_nosched(void *data, unsigned long addr)
+static void
+save_stack_address_nosched(void *data, unsigned long addr, int reliable)
 {
 	struct stack_trace *trace = (struct stack_trace *)data;
 	if (in_sched_functions(addr))
@@ -65,14 +66,14 @@ static const struct stacktrace_ops save_stack_ops_nosched = {
  */
 void save_stack_trace(struct stack_trace *trace)
 {
-	dump_trace(current, NULL, NULL, &save_stack_ops, trace);
+	dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace);
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
-	dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace);
+	dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace);
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 959d40edecd5..6f3bb287c702 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -120,15 +120,6 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
 {
 	struct stack_frame *frame = (struct stack_frame *)bp;
 
-	/*
-	 * if EBP is "deeper" into the stack than the actual stack pointer,
-	 * we need to rewind the stack pointer a little to start at the
-	 * first stack frame, but only if EBP is in this stack frame.
-	 */
-	if (stack > (unsigned long *) bp
-			&& valid_stack_ptr(tinfo, frame, sizeof(*frame)))
-		stack = (unsigned long *) bp;
-
 	while (valid_stack_ptr(tinfo, stack, sizeof(*stack))) {
 		unsigned long addr;
 
@@ -139,7 +130,7 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
 				frame = frame->next_frame;
 				bp = (unsigned long) frame;
 			} else {
-				ops->address(data, addr, 0);
+				ops->address(data, addr, bp == 0);
 			}
 		}
 		stack++;
@@ -150,11 +141,9 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
 #define MSG(msg) ops->warning(data, msg)
 
 void dump_trace(struct task_struct *task, struct pt_regs *regs,
-	        unsigned long *stack,
+		unsigned long *stack, unsigned long bp,
 		const struct stacktrace_ops *ops, void *data)
 {
-	unsigned long bp = 0;
-
 	if (!task)
 		task = current;
 
@@ -234,20 +223,20 @@ static const struct stacktrace_ops print_trace_ops = {
 
 static void
 show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		   unsigned long * stack, char *log_lvl)
+		unsigned long *stack, unsigned long bp, char *log_lvl)
 {
-	dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
+	dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
 	printk("%s =======================\n", log_lvl);
 }
 
 void show_trace(struct task_struct *task, struct pt_regs *regs,
-		unsigned long * stack)
+		unsigned long *stack, unsigned long bp)
 {
-	show_trace_log_lvl(task, regs, stack, "");
+	show_trace_log_lvl(task, regs, stack, bp, "");
 }
 
 static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-			       unsigned long *sp, char *log_lvl)
+		       unsigned long *sp, unsigned long bp, char *log_lvl)
 {
 	unsigned long *stack;
 	int i;
@@ -268,13 +257,13 @@ static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		printk("%08lx ", *stack++);
 	}
 	printk("\n%sCall Trace:\n", log_lvl);
-	show_trace_log_lvl(task, regs, sp, log_lvl);
+	show_trace_log_lvl(task, regs, sp, bp, log_lvl);
 }
 
 void show_stack(struct task_struct *task, unsigned long *sp)
 {
 	printk("       ");
-	show_stack_log_lvl(task, NULL, sp, "");
+	show_stack_log_lvl(task, NULL, sp, 0, "");
 }
 
 /*
@@ -283,13 +272,19 @@ void show_stack(struct task_struct *task, unsigned long *sp)
 void dump_stack(void)
 {
 	unsigned long stack;
+	unsigned long bp = 0;
+
+#ifdef CONFIG_FRAME_POINTER
+	if (!bp)
+		asm("movl %%ebp, %0" : "=r" (bp):);
+#endif
 
 	printk("Pid: %d, comm: %.20s %s %s %.*s\n",
 		current->pid, current->comm, print_tainted(),
 		init_utsname()->release,
 		(int)strcspn(init_utsname()->version, " "),
 		init_utsname()->version);
-	show_trace(current, NULL, &stack);
+	show_trace(current, NULL, &stack, bp);
 }
 
 EXPORT_SYMBOL(dump_stack);
@@ -314,7 +309,7 @@ void show_registers(struct pt_regs *regs)
 		unsigned char c;
 
 		printk("\n" KERN_EMERG "Stack: ");
-		show_stack_log_lvl(NULL, regs, &regs->sp, KERN_EMERG);
+		show_stack_log_lvl(NULL, regs, &regs->sp, 0, KERN_EMERG);
 
 		printk(KERN_EMERG "Code: ");
 
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 671a7ecf11aa..0ca4815a2938 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -81,7 +81,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
 
 	if (!user_mode_vm(regs)) {
 		if (depth)
-			dump_trace(NULL, regs, (unsigned long *)stack,
+			dump_trace(NULL, regs, (unsigned long *)stack, 0,
 				   &backtrace_ops, &depth);
 		return;
 	}

From e4a94568b18c5d7d72741ebde5736d77d235743c Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 30 Jan 2008 13:33:07 +0100
Subject: [PATCH 577/890] x86: turn 64 bit x86 HANDLE_STACK into
 print_context_stack like 32 bit has

This patch turns the x86 64 bit HANDLE_STACK macro in the backtrace code
into a function, just like 32 bit has. This is needed pre work in order to
get exact backtraces for CONFIG_FRAME_POINTER to work.

The function and it's arguments are not the same as 32 bit; due to the
exception/interrupt stack way of x86-64 there are a few differences.

This patch should not have any behavior changes, only code movement.

Due to the fragility and importance of the backtrace code, this needs to be
well reviewed and well tested before merging into mainlne.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/traps_64.c | 74 +++++++++++++++++++++++---------------
 1 file changed, 45 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 62c4d8f46ee9..b8303ed95057 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -212,10 +212,46 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
  */
 
-static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
+static inline int valid_stack_ptr(struct thread_info *tinfo,
+			void *p, unsigned int size, void *end)
 {
 	void *t = (void *)tinfo;
-        return p > t && p < t + THREAD_SIZE - 3;
+	if (end) {
+		if (p < end && p >= (end-THREAD_SIZE))
+			return 1;
+		else
+			return 0;
+	}
+	return p > t && p < t + THREAD_SIZE - size;
+}
+
+static inline unsigned long print_context_stack(struct thread_info *tinfo,
+				unsigned long *stack, unsigned long bp,
+				const struct stacktrace_ops *ops, void *data,
+				unsigned long *end)
+{
+	/*
+	 * Print function call entries within a stack. 'cond' is the
+	 * "end of stackframe" condition, that the 'stack++'
+	 * iteration will eventually trigger.
+	 */
+	while (valid_stack_ptr(tinfo, stack, 3, end)) {
+		unsigned long addr = *stack++;
+		/* Use unlocked access here because except for NMIs
+		   we should be already protected against module unloads */
+		if (__kernel_text_address(addr)) {
+			/*
+			 * If the address is either in the text segment of the
+			 * kernel, or in the region which contains vmalloc'ed
+			 * memory, it *may* be the address of a calling
+			 * routine; if so, print it so that someone tracing
+			 * down the cause of the crash will be able to figure
+			 * out the call path that was taken.
+			 */
+			ops->address(data, addr, 1);
+		}
+	}
+	return bp;
 }
 
 void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
@@ -229,6 +265,7 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 
 	if (!tsk)
 		tsk = current;
+	tinfo = task_thread_info(tsk);
 
 	if (!stack) {
 		unsigned long dummy;
@@ -237,28 +274,6 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 			stack = (unsigned long *)tsk->thread.sp;
 	}
 
-	/*
-	 * Print function call entries within a stack. 'cond' is the
-	 * "end of stackframe" condition, that the 'stack++'
-	 * iteration will eventually trigger.
-	 */
-#define HANDLE_STACK(cond) \
-	do while (cond) { \
-		unsigned long addr = *stack++; \
-		/* Use unlocked access here because except for NMIs	\
-		   we should be already protected against module unloads */ \
-		if (__kernel_text_address(addr)) { \
-			/* \
-			 * If the address is either in the text segment of the \
-			 * kernel, or in the region which contains vmalloc'ed \
-			 * memory, it *may* be the address of a calling \
-			 * routine; if so, print it so that someone tracing \
-			 * down the cause of the crash will be able to figure \
-			 * out the call path that was taken. \
-			 */ \
-			ops->address(data, addr, 1);   \
-		} \
-	} while (0)
 
 	/*
 	 * Print function call entries in all stacks, starting at the
@@ -274,7 +289,9 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 		if (estack_end) {
 			if (ops->stack(data, id) < 0)
 				break;
-			HANDLE_STACK (stack < estack_end);
+
+			print_context_stack(tinfo, stack, 0, ops,
+						data, estack_end);
 			ops->stack(data, "<EOE>");
 			/*
 			 * We link to the next stack via the
@@ -292,7 +309,8 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 			if (stack >= irqstack && stack < irqstack_end) {
 				if (ops->stack(data, "IRQ") < 0)
 					break;
-				HANDLE_STACK (stack < irqstack_end);
+				print_context_stack(tinfo, stack, 0, ops,
+							 data, irqstack_end);
 				/*
 				 * We link to the next stack (which would be
 				 * the process stack normally) the last
@@ -310,9 +328,7 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 	/*
 	 * This handles the process stack:
 	 */
-	tinfo = task_thread_info(tsk);
-	HANDLE_STACK (valid_stack_ptr(tinfo, stack));
-#undef HANDLE_STACK
+	print_context_stack(tinfo, stack, 0, ops, data, NULL);
 	put_cpu();
 }
 EXPORT_SYMBOL(dump_trace);

From 80b51f310b6f55006a265d087b8f48744e65663d Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 30 Jan 2008 13:33:07 +0100
Subject: [PATCH 578/890] x86: use the stack frames to get exact stack-traces
 for CONFIG_FRAMEPOINTER on x86-64

x86 32 bit already has this feature: This patch uses the stack frames with
frame pointer into an exact stack trace, by following the frame pointer.
This only affects kernels built with the CONFIG_FRAME_POINTER config option
enabled, and greatly reduces the amount of noise in oopses.

This code uses the traditional method of doing backtraces, but if it
finds a valid frame pointer chain, will use that to show which parts
of the backtrace are reliable and which parts are not

Due to the fragility and importance of the backtrace code, this needs to
be well reviewed and well tested before merging into mainlne.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/traps_64.c | 67 +++++++++++++++++++++++++-------------
 1 file changed, 44 insertions(+), 23 deletions(-)

diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index b8303ed95057..304ca6b4a1ca 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -225,31 +225,34 @@ static inline int valid_stack_ptr(struct thread_info *tinfo,
 	return p > t && p < t + THREAD_SIZE - size;
 }
 
+/* The form of the top of the frame on the stack */
+struct stack_frame {
+	struct stack_frame *next_frame;
+	unsigned long return_address;
+};
+
+
 static inline unsigned long print_context_stack(struct thread_info *tinfo,
 				unsigned long *stack, unsigned long bp,
 				const struct stacktrace_ops *ops, void *data,
 				unsigned long *end)
 {
-	/*
-	 * Print function call entries within a stack. 'cond' is the
-	 * "end of stackframe" condition, that the 'stack++'
-	 * iteration will eventually trigger.
-	 */
-	while (valid_stack_ptr(tinfo, stack, 3, end)) {
-		unsigned long addr = *stack++;
-		/* Use unlocked access here because except for NMIs
-		   we should be already protected against module unloads */
+	struct stack_frame *frame = (struct stack_frame *)bp;
+
+	while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
+		unsigned long addr;
+
+		addr = *stack;
 		if (__kernel_text_address(addr)) {
-			/*
-			 * If the address is either in the text segment of the
-			 * kernel, or in the region which contains vmalloc'ed
-			 * memory, it *may* be the address of a calling
-			 * routine; if so, print it so that someone tracing
-			 * down the cause of the crash will be able to figure
-			 * out the call path that was taken.
-			 */
-			ops->address(data, addr, 1);
+			if ((unsigned long) stack == bp + 8) {
+				ops->address(data, addr, 1);
+				frame = frame->next_frame;
+				bp = (unsigned long) frame;
+			} else {
+				ops->address(data, addr, bp == 0);
+			}
 		}
+		stack++;
 	}
 	return bp;
 }
@@ -274,6 +277,19 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 			stack = (unsigned long *)tsk->thread.sp;
 	}
 
+#ifdef CONFIG_FRAME_POINTER
+	if (!bp) {
+		if (tsk == current) {
+			/* Grab bp right from our regs */
+			asm("movq %%rbp, %0" : "=r" (bp):);
+		} else {
+			/* bp is the last reg pushed by switch_to */
+			bp = *(unsigned long *) tsk->thread.sp;
+		}
+	}
+#endif
+
+
 
 	/*
 	 * Print function call entries in all stacks, starting at the
@@ -290,8 +306,8 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 			if (ops->stack(data, id) < 0)
 				break;
 
-			print_context_stack(tinfo, stack, 0, ops,
-						data, estack_end);
+			bp = print_context_stack(tinfo, stack, bp, ops,
+							data, estack_end);
 			ops->stack(data, "<EOE>");
 			/*
 			 * We link to the next stack via the
@@ -309,8 +325,8 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 			if (stack >= irqstack && stack < irqstack_end) {
 				if (ops->stack(data, "IRQ") < 0)
 					break;
-				print_context_stack(tinfo, stack, 0, ops,
-							 data, irqstack_end);
+				bp = print_context_stack(tinfo, stack, bp,
+						ops, data, irqstack_end);
 				/*
 				 * We link to the next stack (which would be
 				 * the process stack normally) the last
@@ -328,7 +344,7 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 	/*
 	 * This handles the process stack:
 	 */
-	print_context_stack(tinfo, stack, 0, ops, data, NULL);
+	bp = print_context_stack(tinfo, stack, bp, ops, data, NULL);
 	put_cpu();
 }
 EXPORT_SYMBOL(dump_trace);
@@ -425,6 +441,11 @@ void dump_stack(void)
 	unsigned long dummy;
 	unsigned long bp = 0;
 
+#ifdef CONFIG_FRAME_POINTER
+	if (!bp)
+		asm("movq %%rbp, %0" : "=r" (bp):);
+#endif
+
 	printk("Pid: %d, comm: %.20s %s %s %.*s\n",
 		current->pid, current->comm, print_tainted(),
 		init_utsname()->release,

From 6dab27784b2a97823b522e1cb88e40be40a93d45 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Wed, 30 Jan 2008 13:33:08 +0100
Subject: [PATCH 579/890] x86: add a simple backtrace test module

During the work on the x86 32 and 64 bit backtrace code I found it useful
to have a simple test module to test a process and irq context backtrace.
Since the existing backtrace code was buggy, I figure it might be useful
to have such a test module in the kernel so that maybe we can even
detect such bugs earlier..

[ mingo@elte.hu: build fix ]

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/Makefile        |  1 +
 kernel/backtracetest.c | 48 ++++++++++++++++++++++++++++++++++++++++++
 lib/Kconfig.debug      | 12 +++++++++++
 3 files changed, 61 insertions(+)
 create mode 100644 kernel/backtracetest.c

diff --git a/kernel/Makefile b/kernel/Makefile
index 62015c3d8d91..8885627ea021 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_KALLSYMS) += kallsyms.o
 obj-$(CONFIG_PM) += power/
 obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
 obj-$(CONFIG_KEXEC) += kexec.o
+obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
 obj-$(CONFIG_COMPAT) += compat.o
 obj-$(CONFIG_CGROUPS) += cgroup.o
 obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
diff --git a/kernel/backtracetest.c b/kernel/backtracetest.c
new file mode 100644
index 000000000000..d1a7605c5b8f
--- /dev/null
+++ b/kernel/backtracetest.c
@@ -0,0 +1,48 @@
+/*
+ * Simple stack backtrace regression test module
+ *
+ * (C) Copyright 2008 Intel Corporation
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+
+static struct timer_list backtrace_timer;
+
+static void backtrace_test_timer(unsigned long data)
+{
+	printk("Testing a backtrace from irq context.\n");
+	printk("The following trace is a kernel self test and not a bug!\n");
+	dump_stack();
+}
+static int backtrace_regression_test(void)
+{
+	printk("====[ backtrace testing ]===========\n");
+	printk("Testing a backtrace from process context.\n");
+	printk("The following trace is a kernel self test and not a bug!\n");
+	dump_stack();
+
+	init_timer(&backtrace_timer);
+	backtrace_timer.function = backtrace_test_timer;
+	mod_timer(&backtrace_timer, jiffies + 10);
+
+	msleep(10);
+	printk("====[ end of backtrace testing ]====\n");
+	return 0;
+}
+
+static void exitf(void)
+{
+}
+
+module_init(backtrace_regression_test);
+module_exit(exitf);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index f535b9b5eb00..aa56e631580d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -506,6 +506,18 @@ config KPROBES_SANITY_TEST
 
 	  Say N if you are unsure.
 
+config BACKTRACE_SELF_TEST
+	tristate "Self test for the backtrace code"
+	depends on DEBUG_KERNEL
+	default n
+	help
+	  This option provides a kernel module that can be used to test
+	  the kernel stack backtrace code. This option is not useful
+	  for distributions or general kernels, but only for kernel
+	  developers working on architecture code.
+
+	  Say N if you are unsure.
+
 config LKDTM
 	tristate "Linux Kernel Dump Test Tool Module"
 	depends on DEBUG_KERNEL

From a25bd94964e87b1b93903a822fba5025d995d4da Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 30 Jan 2008 13:33:08 +0100
Subject: [PATCH 580/890] x86: add the "print code before the trapping
 instruction" feature to 64 bit

The 32 bit x86 tree has a very useful feature that prints the Code: line
for the code even before the trapping instrution (and the start of the
trapping instruction is then denoted with a <>). Unfortunately, the 64 bit
x86 tree does not yet have this feature, making diagnosing backtraces harder
than needed.

This patch adds this feature in the same was as the 32 bit tree has
(including the same kernel boot parameter), and including a bugfix
to make the code use probe_kernel_address() rarther than a buggy (deadlocking)
__get_user.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/kernel-parameters.txt |  4 +--
 arch/x86/kernel/traps_64.c          | 44 +++++++++++++++++++++--------
 2 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e7910f8b4fcc..40db7dd1d92a 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -416,8 +416,8 @@ and is between 256 and 4096 characters. It is defined in the file
 			[SPARC64] tick
 			[X86-64] hpet,tsc
 
-	code_bytes	[IA32] How many bytes of object code to print in an
-			oops report.
+	code_bytes	[IA32/X86_64] How many bytes of object code to print
+			in an oops report.
 			Range: 0 - 8192
 			Default: 64
 
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 304ca6b4a1ca..0bba7924604e 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -74,6 +74,8 @@ asmlinkage void alignment_check(void);
 asmlinkage void machine_check(void);
 asmlinkage void spurious_interrupt_bug(void);
 
+static unsigned int code_bytes = 64;
+
 static inline void conditional_sti(struct pt_regs *regs)
 {
 	if (regs->flags & X86_EFLAGS_IF)
@@ -459,12 +461,15 @@ EXPORT_SYMBOL(dump_stack);
 void show_registers(struct pt_regs *regs)
 {
 	int i;
-	int in_kernel = !user_mode(regs);
 	unsigned long sp;
 	const int cpu = smp_processor_id();
 	struct task_struct *cur = cpu_pda(cpu)->pcurrent;
+	u8 *ip;
+	unsigned int code_prologue = code_bytes * 43 / 64;
+	unsigned int code_len = code_bytes;
 
 	sp = regs->sp;
+	ip = (u8 *) regs->ip - code_prologue;
 	printk("CPU %d ", cpu);
 	__show_regs(regs);
 	printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
@@ -474,22 +479,28 @@ void show_registers(struct pt_regs *regs)
 	 * When in-kernel, we also print out the stack and code at the
 	 * time of the fault..
 	 */
-	if (in_kernel) {
+	if (!user_mode(regs)) {
+		unsigned char c;
 		printk("Stack: ");
 		_show_stack(NULL, regs, (unsigned long *)sp, regs->bp);
+		printk("\n");
 
-		printk("\nCode: ");
-		if (regs->ip < PAGE_OFFSET)
-			goto bad;
-
-		for (i=0; i<20; i++) {
-			unsigned char c;
-			if (__get_user(c, &((unsigned char*)regs->ip)[i])) {
-bad:
+		printk(KERN_EMERG "Code: ");
+		if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
+			/* try starting at RIP */
+			ip = (u8 *) regs->ip;
+			code_len = code_len - code_prologue + 1;
+		}
+		for (i = 0; i < code_len; i++, ip++) {
+			if (ip < (u8 *)PAGE_OFFSET ||
+					probe_kernel_address(ip, c)) {
 				printk(" Bad RIP value.");
 				break;
 			}
-			printk("%02x ", c);
+			if (ip == (u8 *)regs->ip)
+				printk("<%02x> ", c);
+			else
+				printk("%02x ", c);
 		}
 	}
 	printk("\n");
@@ -1164,3 +1175,14 @@ static int __init kstack_setup(char *s)
 	return 0;
 }
 early_param("kstack", kstack_setup);
+
+
+static int __init code_bytes_setup(char *s)
+{
+	code_bytes = simple_strtoul(s, NULL, 0);
+	if (code_bytes > 8192)
+		code_bytes = 8192;
+
+	return 1;
+}
+__setup("code_bytes=", code_bytes_setup);

From aafbd7eb2057edfc9a17b258e3f0258a4e6d8198 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 30 Jan 2008 13:33:08 +0100
Subject: [PATCH 581/890] x86: make printk_address regs->ip always reliable

printk_address()'s second parameter is the reliability indication,
not the ebp. If we're printing regs->ip we're reliable by definition,
so pass a 1 here.

Signed-off-by: Arjan van de Ven
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process_64.c | 2 +-
 arch/x86/kernel/traps_64.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 383760bfd283..4e65ae8a54bf 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -329,7 +329,7 @@ void __show_regs(struct pt_regs * regs)
 		(int)strcspn(init_utsname()->version, " "),
 		init_utsname()->version);
 	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
-	printk_address(regs->ip, regs->bp);
+	printk_address(regs->ip, 1);
 	printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->sp,
 		regs->flags);
 	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 0bba7924604e..814801f4eb9e 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -582,7 +582,7 @@ int __kprobes __die(const char * str, struct pt_regs * regs, long err)
 	add_taint(TAINT_DIE);
 	/* Executive summary in case the oops scrolled away */
 	printk(KERN_ALERT "RIP ");
-	printk_address(regs->ip, regs->bp);
+	printk_address(regs->ip, 1);
 	printk(" RSP <%016lx>\n", regs->sp);
 	if (kexec_should_crash(current))
 		crash_kexec(regs);

From e91a3b4353577c7d38b77dd1293fc3d0a173e8e6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:08 +0100
Subject: [PATCH 582/890] x86: smp_scan_config() debugging printouts

These are useful in figuring out early-mapping problems.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/mpparse_32.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c
index a4c05372626b..cc06eae1b037 100644
--- a/arch/x86/kernel/mpparse_32.c
+++ b/arch/x86/kernel/mpparse_32.c
@@ -721,7 +721,7 @@ static int __init smp_scan_config (unsigned long base, unsigned long length)
 	unsigned long *bp = phys_to_virt(base);
 	struct intel_mp_floating *mpf;
 
-	Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
+	printk(KERN_INFO "Scan SMP from %p for %ld bytes.\n", bp,length);
 	if (sizeof(*mpf) != 16)
 		printk("Error: MPF size\n");
 
@@ -734,8 +734,8 @@ static int __init smp_scan_config (unsigned long base, unsigned long length)
 				|| (mpf->mpf_specification == 4)) ) {
 
 			smp_found_config = 1;
-			printk(KERN_INFO "found SMP MP-table at %08lx\n",
-						virt_to_phys(mpf));
+			printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
+				mpf, virt_to_phys(mpf));
 			reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
 			if (mpf->mpf_physptr) {
 				/*

From 306c142f99caef5a30a0d61a39a956c069941b63 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:08 +0100
Subject: [PATCH 583/890] x86: pat: e820 cleanup

NOP change.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/ioremap_64.c  | 4 +---
 include/asm-x86/e820.h    | 3 +++
 include/asm-x86/e820_64.h | 5 +++++
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/ioremap_64.c b/arch/x86/mm/ioremap_64.c
index b03db4ca9cad..6fbd3c7932e0 100644
--- a/arch/x86/mm/ioremap_64.c
+++ b/arch/x86/mm/ioremap_64.c
@@ -19,6 +19,7 @@
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
 #include <asm/proto.h>
+#include <asm/e820.h>
 
 unsigned long __phys_addr(unsigned long x)
 {
@@ -28,9 +29,6 @@ unsigned long __phys_addr(unsigned long x)
 }
 EXPORT_SYMBOL(__phys_addr);
 
-#define ISA_START_ADDRESS      0xa0000
-#define ISA_END_ADDRESS                0x100000
-
 /*
  * Fix up the linear direct mapping of the kernel to avoid cache attribute
  * conflicts.
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 3e214f39fad3..f96f1853bc41 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -22,6 +22,9 @@ struct e820map {
 };
 #endif /* __ASSEMBLY__ */
 
+#define ISA_START_ADDRESS	0xa0000
+#define ISA_END_ADDRESS		0x100000
+
 #ifdef __KERNEL__
 #ifdef CONFIG_X86_32
 # include "e820_32.h"
diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h
index 1c7ba8804176..8cba49da4795 100644
--- a/include/asm-x86/e820_64.h
+++ b/include/asm-x86/e820_64.h
@@ -26,6 +26,10 @@ extern void e820_reserve_resources(struct resource *code_resource,
 extern void e820_mark_nosave_regions(void);
 extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
 extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
+extern int e820_any_non_reserved(unsigned long start, unsigned long end);
+extern int is_memory_any_valid(unsigned long start, unsigned long end);
+extern int e820_all_non_reserved(unsigned long start, unsigned long end);
+extern int is_memory_all_valid(unsigned long start, unsigned long end);
 extern unsigned long e820_hole_size(unsigned long start, unsigned long end);
 
 extern void e820_setup_gap(void);
@@ -38,6 +42,7 @@ extern struct e820map e820;
 
 extern unsigned ebda_addr, ebda_size;
 extern unsigned long nodemap_addr, nodemap_size;
+
 #endif/*!__ASSEMBLY__*/
 
 #endif/*__E820_HEADER*/

From 0d64484f7ea12ca04211b497e94634c3d27cf3fb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:09 +0100
Subject: [PATCH 584/890] x86: fix DMI ioremap leak

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/firmware/dmi_scan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 0b24a1141009..5f9d0bc839ba 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -349,6 +349,7 @@ void __init dmi_scan_machine(void)
 			rc = dmi_present(q);
 			if (!rc) {
 				dmi_available = 1;
+				dmi_iounmap(p, 0x10000);
 				return;
 			}
 		}

From 94878efdd0815fe3a4159007b1454b25c7696d53 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:09 +0100
Subject: [PATCH 585/890] x86: early fault debugging improvement

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/head_32.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index fbad51fce672..f409fe2a52e4 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -502,6 +502,7 @@ early_fault:
 	call printk
 #endif
 #endif
+	call dump_stack
 hlt_loop:
 	hlt
 	jmp hlt_loop

From a261670aed2b5b77a20ce9b15fed57abeb126c0e Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Wed, 30 Jan 2008 13:33:09 +0100
Subject: [PATCH 586/890] x86: cleanup setup_node_zones called by paging_init()

setup_node_zones() calcuates some variables but only use them when
FLAT_NODE_MEM_MAP is set

so change the MACRO postion to avoid calculating.

also change it to static, and rename it to flat_setup_node_zones().

Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/numa_64.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 848231481619..551e3590e5c5 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -233,8 +233,9 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
 	node_set_online(nodeid);
 }
 
+#ifdef CONFIG_FLAT_NODE_MEM_MAP
 /* Initialize final allocator for a zone */
-void __init setup_node_zones(int nodeid)
+static void __init flat_setup_node_zones(int nodeid)
 {
 	unsigned long start_pfn, end_pfn, memmapsize, limit;
 
@@ -250,14 +251,16 @@ void __init setup_node_zones(int nodeid)
 	 */
 	memmapsize = sizeof(struct page) * (end_pfn-start_pfn);
 	limit = end_pfn << PAGE_SHIFT;
-#ifdef CONFIG_FLAT_NODE_MEM_MAP
+
 	NODE_DATA(nodeid)->node_mem_map =
 		__alloc_bootmem_core(NODE_DATA(nodeid)->bdata,
 				     memmapsize, SMP_CACHE_BYTES,
 				     round_down(limit - memmapsize, PAGE_SIZE),
 				     limit);
-#endif
 }
+#else
+#define flat_setup_node_zones(i) do {} while (0)
+#endif
 
 /*
  * There are unfortunately some poorly designed mainboards around that
@@ -581,7 +584,7 @@ void __init paging_init(void)
 	sparse_init();
 
 	for_each_online_node(i)
-		setup_node_zones(i);
+		flat_setup_node_zones(i);
 
 	free_area_init_nodes(max_zone_pfns);
 }

From aaf230424204864e2833dcc1da23e2cb0b9f39cd Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Wed, 30 Jan 2008 13:33:09 +0100
Subject: [PATCH 587/890] x86: disable the GART early, 64-bit

For K8 system: 4G RAM with memory hole remapping enabled, or more than
4G RAM installed.

when try to use kexec second kernel, and the first doesn't include
gart_shutdown. the second kernel could have different aper position than
the first kernel. and second kernel could use that hole as RAM that is
still used by GART set by the first kernel. esp. when try to kexec
2.6.24 with sparse mem enable from previous kernel (from RHEL 5 or SLES
10). the new kernel will use aper by GART (set by first kernel) for
vmemmap. and after new kernel setting one new GART. the position will be
real RAM. the _mapcount set is lost.

Bad page state in process 'swapper'
page:ffffe2000e600020 flags:0x0000000000000000 mapping:0000000000000000 mapcount:1 count:0
Trying to fix it up, but a reboot is needed
Backtrace:
Pid: 0, comm: swapper Not tainted 2.6.24-rc7-smp-gcdf71a10-dirty #13

Call Trace:
 [<ffffffff8026401f>] bad_page+0x63/0x8d
 [<ffffffff80264169>] __free_pages_ok+0x7c/0x2a5
 [<ffffffff80ba75d1>] free_all_bootmem_core+0xd0/0x198
 [<ffffffff80ba3a42>] numa_free_all_bootmem+0x3b/0x76
 [<ffffffff80ba3461>] mem_init+0x3b/0x152
 [<ffffffff80b959d3>] start_kernel+0x236/0x2c2
 [<ffffffff80b9511a>] _sinittext+0x11a/0x121

and
 [ffffe2000e600000-ffffe2000e7fffff] PMD ->ffff81001c200000 on node 0
phys addr is : 0x1c200000

RHEL 5.1 kernel -53 said:
PCI-DMA: aperture base @ 1c000000 size 65536 KB

new kernel said:
Mapping aperture over 65536 KB of RAM @ 3c000000

So could try to disable that GART if possible.

According to Ingo

> hm, i'm wondering, instead of modifying the GART, why dont we simply
> _detect_ whatever GART settings we have inherited, and propagate that
> into our e820 maps? I.e. if there's inconsistency, then punch that out
> from the memory maps and just dont use that memory.
>
> that way it would not matter whether the GART settings came from a [old
> or crashing] Linux kernel that has not called gart_iommu_shutdown(), or
> whether it's a BIOS that has set up an aperture hole inconsistent with
> the memory map it passed. (or the memory map we _think_ i tried to pass
> us)
>
> it would also be more robust to only read and do a memory map quirk
> based on that, than actively trying to change the GART so early in the
> bootup. Later on we have to re-enable the GART _anyway_ and have to
> punch a hole for it.
>
> and as a bonus, we would have shored up our defenses against crappy
> BIOSes as well.

add e820 modification for gart inconsistent setting.

gart_fix_e820=off could be used to disable e820 fix.

Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/kernel-parameters.txt |  4 ++
 arch/x86/kernel/aperture_64.c       | 89 +++++++++++++++++++++++++++++
 arch/x86/kernel/e820_64.c           | 12 ++++
 arch/x86/kernel/setup_64.c          |  3 +
 include/asm-x86/e820_64.h           |  1 +
 include/asm-x86/gart.h              |  5 ++
 6 files changed, 114 insertions(+)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 40db7dd1d92a..860a90875491 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -660,6 +660,10 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	gamma=		[HW,DRM]
 
+	gart_fix_e820=  [X86_64] disable the fix e820 for K8 GART
+			Format: off | on
+			default: on
+
 	gdth=		[HW,SCSI]
 			See header of drivers/scsi/gdth.c.
 
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 52d2beac4556..bf1b469d5847 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -218,6 +218,95 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp)
 	return 0;
 }
 
+static int gart_fix_e820 __initdata = 1;
+
+static int __init parse_gart_mem(char *p)
+{
+	if (!p)
+		return -EINVAL;
+
+	if (!strncmp(p, "off", 3))
+		gart_fix_e820 = 0;
+	else if (!strncmp(p, "on", 2))
+		gart_fix_e820 = 1;
+
+	return 0;
+}
+early_param("gart_fix_e820", parse_gart_mem);
+
+void __init early_gart_iommu_check(void)
+{
+	/*
+	 * in case it is enabled before, esp for kexec/kdump,
+	 * previous kernel already enable that. memset called
+	 * by allocate_aperture/__alloc_bootmem_nopanic cause restart.
+	 * or second kernel have different position for GART hole. and new
+	 * kernel could use hole as RAM that is still used by GART set by
+	 * first kernel
+	 * or BIOS forget to put that in reserved.
+	 * try to update e820 to make that region as reserved.
+	 */
+	int fix, num;
+	u32 ctl;
+	u32 aper_size = 0, aper_order = 0, last_aper_order = 0;
+	u64 aper_base = 0, last_aper_base = 0;
+	int aper_enabled = 0, last_aper_enabled = 0;
+
+	if (!early_pci_allowed())
+		return;
+
+	fix = 0;
+	for (num = 24; num < 32; num++) {
+		if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
+			continue;
+
+		ctl = read_pci_config(0, num, 3, 0x90);
+		aper_enabled = ctl & 1;
+		aper_order = (ctl >> 1) & 7;
+		aper_size = (32 * 1024 * 1024) << aper_order;
+		aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff;
+		aper_base <<= 25;
+
+		if ((last_aper_order && aper_order != last_aper_order) ||
+		    (last_aper_base && aper_base != last_aper_base) ||
+		    (last_aper_enabled && aper_enabled != last_aper_enabled)) {
+			fix = 1;
+			break;
+		}
+		last_aper_order = aper_order;
+		last_aper_base = aper_base;
+		last_aper_enabled = aper_enabled;
+	}
+
+	if (!fix && !aper_enabled)
+		return;
+
+	if (!aper_base || !aper_size || aper_base + aper_size > 0x100000000UL)
+		fix = 1;
+
+	if (gart_fix_e820 && !fix && aper_enabled) {
+		if (e820_any_mapped(aper_base, aper_base + aper_size,
+				    E820_RAM)) {
+			/* reserved it, so we can resuse it in second kernel */
+			printk(KERN_INFO "update e820 for GART\n");
+			add_memory_region(aper_base, aper_size, E820_RESERVED);
+			update_e820();
+		}
+		return;
+	}
+
+	/* different nodes have different setting, disable them all at first*/
+	for (num = 24; num < 32; num++) {
+		if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
+			continue;
+
+		ctl = read_pci_config(0, num, 3, 0x90);
+		ctl &= ~1;
+		write_pci_config(0, num, 3, 0x90, ctl);
+	}
+
+}
+
 void __init gart_iommu_hole_init(void)
 {
 	u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0;
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index abc473bcabe8..07cfaae7ab07 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -728,6 +728,18 @@ void __init finish_e820_parsing(void)
 	}
 }
 
+void __init update_e820(void)
+{
+	u8 nr_map;
+
+	nr_map = e820.nr_map;
+	if (sanitize_e820_map(e820.map, &nr_map))
+		return;
+	e820.nr_map = nr_map;
+	printk(KERN_INFO "modified physical RAM map:\n");
+	e820_print_map("modified");
+}
+
 unsigned long pci_mem_start = 0xaeedbabe;
 EXPORT_SYMBOL(pci_mem_start);
 
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 07547febac7a..12bad27d66f8 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -53,6 +53,7 @@
 #include <video/edid.h>
 #include <asm/e820.h>
 #include <asm/dma.h>
+#include <asm/gart.h>
 #include <asm/mpspec.h>
 #include <asm/mmu_context.h>
 #include <asm/proto.h>
@@ -335,6 +336,8 @@ void __init setup_arch(char **cmdline_p)
 
 	finish_e820_parsing();
 
+	early_gart_iommu_check();
+
 	e820_register_active_regions(0, 0, -1UL);
 	/*
 	 * partially used pages are not usable - thus
diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h
index 8cba49da4795..ff36f434f00b 100644
--- a/include/asm-x86/e820_64.h
+++ b/include/asm-x86/e820_64.h
@@ -39,6 +39,7 @@ extern void e820_register_active_regions(int nid,
 extern void finish_e820_parsing(void);
 
 extern struct e820map e820;
+extern void update_e820(void);
 
 extern unsigned ebda_addr, ebda_size;
 extern unsigned long nodemap_addr, nodemap_size;
diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h
index f704c50519b8..90958ed993fa 100644
--- a/include/asm-x86/gart.h
+++ b/include/asm-x86/gart.h
@@ -9,6 +9,7 @@ extern int iommu_detected;
 extern void gart_iommu_init(void);
 extern void gart_iommu_shutdown(void);
 extern void __init gart_parse_options(char *);
+extern void early_gart_iommu_check(void);
 extern void gart_iommu_hole_init(void);
 extern int fallback_aper_order;
 extern int fallback_aper_force;
@@ -20,6 +21,10 @@ extern int fix_aperture;
 #define gart_iommu_aperture 0
 #define gart_iommu_aperture_allowed 0
 
+static inline void early_gart_iommu_check(void)
+{
+}
+
 static inline void gart_iommu_shutdown(void)
 {
 }

From ccafa59a0061d7c44d15d02403120fd02b52c667 Mon Sep 17 00:00:00 2001
From: "mboton@gmail.com" <mboton@gmail.com>
Date: Wed, 30 Jan 2008 13:33:10 +0100
Subject: [PATCH 588/890] x86: ioport_{32|64}.c unification
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ioport_{32|64}.c unification.

This patch unifies the code from the ioport_32.c and ioport_64.c files.

Tested and working fine with i386 and x86_64 kernels.

Signed-off-by: Miguel Botón <mboton@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/Makefile                  |   2 +-
 arch/x86/kernel/{ioport_32.c => ioport.c} |  39 ++++++--
 arch/x86/kernel/ioport_64.c               | 117 ----------------------
 3 files changed, 31 insertions(+), 127 deletions(-)
 rename arch/x86/kernel/{ioport_32.c => ioport.c} (79%)
 delete mode 100644 arch/x86/kernel/ioport_64.c

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3d23ccd366ea..c414c45a0f13 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -10,7 +10,7 @@ CFLAGS_vsyscall_64.o := $(PROFILING) -g0
 
 obj-y			:= process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
 obj-y			+= traps_$(BITS).o irq_$(BITS).o
-obj-y			+= time_$(BITS).o ioport_$(BITS).o ldt.o
+obj-y			+= time_$(BITS).o ioport.o ldt.o
 obj-y			+= setup_$(BITS).o i8259_$(BITS).o
 obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
diff --git a/arch/x86/kernel/ioport_32.c b/arch/x86/kernel/ioport.c
similarity index 79%
rename from arch/x86/kernel/ioport_32.c
rename to arch/x86/kernel/ioport.c
index 9295e01ff49c..e723ff3b1d53 100644
--- a/arch/x86/kernel/ioport_32.c
+++ b/arch/x86/kernel/ioport.c
@@ -1,6 +1,6 @@
 /*
  * This contains the io-permission bitmap code - written by obz, with changes
- * by Linus.
+ * by Linus. 32/64 bits code unification by Miguel Botón.
  */
 
 #include <linux/sched.h>
@@ -36,7 +36,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 {
 	struct thread_struct * t = &current->thread;
 	struct tss_struct * tss;
-	unsigned long i, max_long;
+	unsigned int i, max_long, bytes, bytes_updated;
 
 	if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
 		return -EINVAL;
@@ -79,8 +79,12 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 		if (t->io_bitmap_ptr[i] != ~0UL)
 			max_long = i;
 
-	t->io_bitmap_max = (max_long + 1) * sizeof(unsigned long);
+	bytes = (max_long + 1) * sizeof(unsigned long);
+	bytes_updated = max(bytes, t->io_bitmap_max);
 
+	t->io_bitmap_max = bytes;
+
+#ifdef CONFIG_X86_32
 	/*
 	 * Sets the lazy trigger so that the next I/O operation will
 	 * reload the correct bitmap.
@@ -89,6 +93,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 	 */
 	tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
 	tss->io_bitmap_owner = NULL;
+#else
+	/* Update the TSS: */
+	memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);
+#endif
 
 	put_cpu();
 
@@ -105,13 +113,12 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
  * on system-call entry - see also fork() and the signal handling
  * code.
  */
-
+#ifdef CONFIG_X86_32
 asmlinkage long sys_iopl(unsigned long regsp)
 {
 	volatile struct pt_regs *regs = (struct pt_regs *)&regsp;
 	unsigned int level = regs->bx;
 	unsigned int old = (regs->flags >> 12) & 3;
-	struct thread_struct *t = &current->thread;
 
 	if (level > 3)
 		return -EINVAL;
@@ -120,10 +127,24 @@ asmlinkage long sys_iopl(unsigned long regsp)
 		if (!capable(CAP_SYS_RAWIO))
 			return -EPERM;
 	}
-
-	t->iopl = level << 12;
-	regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | t->iopl;
-	set_iopl_mask(t->iopl);
+	regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12);
 
 	return 0;
 }
+#else
+asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs)
+{
+	unsigned int old = (regs->flags >> 12) & 3;
+
+	if (level > 3)
+		return -EINVAL;
+	/* Trying to gain more privileges? */
+	if (level > old) {
+		if (!capable(CAP_SYS_RAWIO))
+			return -EPERM;
+	}
+	regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12);
+
+	return 0;
+}
+#endif
diff --git a/arch/x86/kernel/ioport_64.c b/arch/x86/kernel/ioport_64.c
deleted file mode 100644
index ff7514b757e5..000000000000
--- a/arch/x86/kernel/ioport_64.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * This contains the io-permission bitmap code - written by obz, with changes
- * by Linus.
- */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/ioport.h>
-#include <linux/smp.h>
-#include <linux/stddef.h>
-#include <linux/slab.h>
-#include <linux/thread_info.h>
-#include <linux/syscalls.h>
-
-/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
-static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value)
-{
-	int i;
-		if (new_value)
-		for (i = base; i < base + extent; i++) 
-			__set_bit(i, bitmap); 
-		else
-		for (i = base; i < base + extent; i++) 
-			clear_bit(i, bitmap); 
-}
-
-/*
- * this changes the io permissions bitmap in the current task.
- */
-asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
-{
-	unsigned int i, max_long, bytes, bytes_updated;
-	struct thread_struct * t = &current->thread;
-	struct tss_struct * tss;
-	unsigned long *bitmap;
-
-	if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
-		return -EINVAL;
-	if (turn_on && !capable(CAP_SYS_RAWIO))
-		return -EPERM;
-
-	/*
-	 * If it's the first ioperm() call in this thread's lifetime, set the
-	 * IO bitmap up. ioperm() is much less timing critical than clone(),
-	 * this is why we delay this operation until now:
-	 */
-	if (!t->io_bitmap_ptr) {
-		bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
-		if (!bitmap)
-			return -ENOMEM;
-
-		memset(bitmap, 0xff, IO_BITMAP_BYTES);
-		t->io_bitmap_ptr = bitmap;
-		set_thread_flag(TIF_IO_BITMAP);
-	}
-
-	/*
-	 * do it in the per-thread copy and in the TSS ...
-	 *
-	 * Disable preemption via get_cpu() - we must not switch away
-	 * because the ->io_bitmap_max value must match the bitmap
-	 * contents:
-	 */
-	tss = &per_cpu(init_tss, get_cpu());
-
-	set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
-
-	/*
-	 * Search for a (possibly new) maximum. This is simple and stupid,
-	 * to keep it obviously correct:
-	 */
-	max_long = 0;
-	for (i = 0; i < IO_BITMAP_LONGS; i++)
-		if (t->io_bitmap_ptr[i] != ~0UL)
-			max_long = i;
-
-	bytes = (max_long + 1) * sizeof(long);
-	bytes_updated = max(bytes, t->io_bitmap_max);
-
-	t->io_bitmap_max = bytes;
-
-	/* Update the TSS: */
-	memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);
-
-	put_cpu();
-
-	return 0;
-}
-
-/*
- * sys_iopl has to be used when you want to access the IO ports
- * beyond the 0x3ff range: to get the full 65536 ports bitmapped
- * you'd need 8kB of bitmaps/process, which is a bit excessive.
- *
- * Here we just change the flags value on the stack: we allow
- * only the super-user to do it. This depends on the stack-layout
- * on system-call entry - see also fork() and the signal handling
- * code.
- */
-
-asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs)
-{
-	unsigned int old = (regs->flags >> 12) & 3;
-
-	if (level > 3)
-		return -EINVAL;
-	/* Trying to gain more privileges? */
-	if (level > old) {
-		if (!capable(CAP_SYS_RAWIO))
-			return -EPERM;
-	}
-	regs->flags = (regs->flags &~ X86_EFLAGS_IOPL) | (level << 12);
-	return 0;
-}

From 9718769d298f8642d5ef41eb5f55669d7c5b9fd6 Mon Sep 17 00:00:00 2001
From: Chris Wright <chrisw@sous-sol.org>
Date: Wed, 30 Jan 2008 13:33:10 +0100
Subject: [PATCH 589/890] x86: fix ioport unification on 32-bit

ioport unification was broken for 32-bit; it was missing
the acutal pushf/popf EFLAGS manipulation (set_iopl_mask()).
Also, use of volatile looks like leftover cruft.

Cc: mboton@gmail.com
Cc: Kevin Winchester <kjwinchester@gmail.com>
Cc: Zach Brown <zach.brown@oracle.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ioport.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index e723ff3b1d53..be72d809bce7 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -116,9 +116,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 #ifdef CONFIG_X86_32
 asmlinkage long sys_iopl(unsigned long regsp)
 {
-	volatile struct pt_regs *regs = (struct pt_regs *)&regsp;
+	struct pt_regs *regs = (struct pt_regs *)&regsp;
 	unsigned int level = regs->bx;
 	unsigned int old = (regs->flags >> 12) & 3;
+	struct thread_struct *t = &current->thread;
 
 	if (level > 3)
 		return -EINVAL;
@@ -127,8 +128,9 @@ asmlinkage long sys_iopl(unsigned long regsp)
 		if (!capable(CAP_SYS_RAWIO))
 			return -EPERM;
 	}
+	t->iopl = level << 12;
 	regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12);
-
+	set_iopl_mask(t->iopl);
 	return 0;
 }
 #else

From a1bf250a6f31afb8caac166ae50dc7b89c38084c Mon Sep 17 00:00:00 2001
From: Chris Wright <chrisw@sous-sol.org>
Date: Wed, 30 Jan 2008 13:33:10 +0100
Subject: [PATCH 590/890] x86: refactor ioport unification

Refactor ioport unification to pull out common code.

Cc: mboton@gmail.com
Cc: Kevin Winchester <kjwinchester@gmail.com>
Cc: Zach Brown <zach.brown@oracle.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ioport.c | 46 +++++++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index be72d809bce7..50e5e4a31c85 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -113,40 +113,42 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
  * on system-call entry - see also fork() and the signal handling
  * code.
  */
+static int do_iopl(unsigned int level, struct pt_regs *regs)
+{
+	unsigned int old = (regs->flags >> 12) & 3;
+
+	if (level > 3)
+		return -EINVAL;
+	/* Trying to gain more privileges? */
+	if (level > old) {
+		if (!capable(CAP_SYS_RAWIO))
+			return -EPERM;
+	}
+	regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12);
+
+	return 0;
+}
+
 #ifdef CONFIG_X86_32
 asmlinkage long sys_iopl(unsigned long regsp)
 {
 	struct pt_regs *regs = (struct pt_regs *)&regsp;
 	unsigned int level = regs->bx;
-	unsigned int old = (regs->flags >> 12) & 3;
 	struct thread_struct *t = &current->thread;
+	int rc;
+
+	rc = do_iopl(level, regs);
+	if (rc < 0)
+		goto out;
 
-	if (level > 3)
-		return -EINVAL;
-	/* Trying to gain more privileges? */
-	if (level > old) {
-		if (!capable(CAP_SYS_RAWIO))
-			return -EPERM;
-	}
 	t->iopl = level << 12;
-	regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12);
 	set_iopl_mask(t->iopl);
-	return 0;
+out:
+	return rc;
 }
 #else
 asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs)
 {
-	unsigned int old = (regs->flags >> 12) & 3;
-
-	if (level > 3)
-		return -EINVAL;
-	/* Trying to gain more privileges? */
-	if (level > old) {
-		if (!capable(CAP_SYS_RAWIO))
-			return -EPERM;
-	}
-	regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12);
-
-	return 0;
+	return do_iopl(level, regs);
 }
 #endif

From ef97001f3d869d7cc1956e0cc0d89e514e3f7db0 Mon Sep 17 00:00:00 2001
From: "travis@sgi.com" <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:33:10 +0100
Subject: [PATCH 591/890] x86: change size of APICIDs from u8 to u16

Change the size of APICIDs from u8 to u16.  This partially
supports the new x2apic mode that will be present on future
processor chips. (Chips actually support 32-bit APICIDs, but that
change is more intrusive. Supporting 16-bit is sufficient for now).

Signed-off-by: Jack Steiner <steiner@sgi.com>

I've included just the partial change from u8 to u16 apicids.  The
remaining x2apic changes will be in a separate patch.

In addition, the fake_node_to_pxm_map[] and fake_apicid_to_node[]
tables have been moved from local data to the __initdata section
reducing stack pressure when MAX_NUMNODES and MAX_LOCAL_APIC are
increased in size.

Signed-off-by: Mike Travis <travis@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/genapic_64.c |  4 ++--
 arch/x86/kernel/mpparse_64.c |  4 ++--
 arch/x86/kernel/smpboot_64.c |  2 +-
 arch/x86/mm/numa_64.c        |  2 +-
 arch/x86/mm/srat_64.c        | 22 +++++++++++++---------
 include/asm-x86/processor.h  | 14 +++++++-------
 include/asm-x86/smp_64.h     |  8 ++++----
 7 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index ce703e21c912..ac2b78f24074 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -32,10 +32,10 @@
  * array during this time.  Is it zeroed when the per_cpu
  * data area is removed.
  */
-u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata
+u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata
 					= { [0 ... NR_CPUS-1] = BAD_APICID };
 void *x86_cpu_to_apicid_ptr;
-DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID;
+DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
 EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
 
 struct genapic __read_mostly *genapic = &apic_flat;
diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c
index ef4aab123581..17d21e5b22d6 100644
--- a/arch/x86/kernel/mpparse_64.c
+++ b/arch/x86/kernel/mpparse_64.c
@@ -67,7 +67,7 @@ unsigned disabled_cpus __cpuinitdata;
 /* Bitmask of physically existing CPUs */
 physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
 
-u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+u16 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
 
 
 /*
@@ -132,7 +132,7 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
 	 * area is created.
 	 */
 	if (x86_cpu_to_apicid_ptr) {
-		u8 *x86_cpu_to_apicid = (u8 *)x86_cpu_to_apicid_ptr;
+		u16 *x86_cpu_to_apicid = (u16 *)x86_cpu_to_apicid_ptr;
 		x86_cpu_to_apicid[cpu] = m->mpc_apicid;
 	} else {
 		per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid;
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index 5bd42ce144da..1fea185c9dca 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -65,7 +65,7 @@ int smp_num_siblings = 1;
 EXPORT_SYMBOL(smp_num_siblings);
 
 /* Last level cache ID of each logical CPU */
-DEFINE_PER_CPU(u8, cpu_llc_id) = BAD_APICID;
+DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
 
 /* Bitmask of currently online CPUs */
 cpumask_t cpu_online_map __read_mostly;
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 551e3590e5c5..650001a87c8f 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -626,7 +626,7 @@ void __init init_cpu_to_node(void)
 	int i;
 
 	for (i = 0; i < NR_CPUS; i++) {
-		u8 apicid = x86_cpu_to_apicid_init[i];
+		u16 apicid = x86_cpu_to_apicid_init[i];
 
 		if (apicid == BAD_APICID)
 			continue;
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 5c0637e4c2f4..b367bc342fe0 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -130,6 +130,9 @@ void __init
 acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 {
 	int pxm, node;
+	int apic_id;
+
+	apic_id = pa->apic_id;
 	if (srat_disabled())
 		return;
 	if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
@@ -145,10 +148,10 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 		bad_srat();
 		return;
 	}
-	apicid_to_node[pa->apic_id] = node;
+	apicid_to_node[apic_id] = node;
 	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
-	       pxm, pa->apic_id, node);
+	       pxm, apic_id, node);
 }
 
 int update_end_of_memory(unsigned long end) {return -1;}
@@ -343,7 +346,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
 	/* First clean up the node list */
 	for (i = 0; i < MAX_NUMNODES; i++) {
 		cutoff_node(i, start, end);
-		if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
+		/* ZZZ why was this needed. At least add a comment */
+		if (nodes[i].end && (nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
 			unparse_node(i);
 			node_set_offline(i);
 		}
@@ -384,6 +388,12 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
 }
 
 #ifdef CONFIG_NUMA_EMU
+static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
+	[0 ... MAX_NUMNODES-1] = PXM_INVAL
+};
+static unsigned char fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
+	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
 static int __init find_node_by_addr(unsigned long addr)
 {
 	int ret = NUMA_NO_NODE;
@@ -414,12 +424,6 @@ static int __init find_node_by_addr(unsigned long addr)
 void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
 {
 	int i, j;
-	int fake_node_to_pxm_map[MAX_NUMNODES] = {
-		[0 ... MAX_NUMNODES-1] = PXM_INVAL
-	};
-	unsigned char fake_apicid_to_node[MAX_LOCAL_APIC] = {
-		[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-	};
 
 	printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
 			 "topology.\n");
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index e701ac5487e5..81ecfed83e47 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -90,14 +90,14 @@ struct cpuinfo_x86 {
 #ifdef CONFIG_SMP
 	cpumask_t llc_shared_map;	/* cpus sharing the last level cache */
 #endif
-	unsigned char x86_max_cores;	/* cpuid returned max cores value */
-	unsigned char apicid;
-	unsigned short x86_clflush_size;
+	u16 x86_max_cores;		/* cpuid returned max cores value */
+	u16 apicid;
+	u16 x86_clflush_size;
 #ifdef CONFIG_SMP
-	unsigned char booted_cores;	/* number of cores as seen by OS */
-	__u8 phys_proc_id; 		/* Physical processor id. */
-	__u8 cpu_core_id;  		/* Core id */
-	__u8 cpu_index;			/* index into per_cpu list */
+	u16 booted_cores;		/* number of cores as seen by OS */
+	u16 phys_proc_id; 		/* Physical processor id. */
+	u16 cpu_core_id;  		/* Core id */
+	u16 cpu_index;			/* index into per_cpu list */
 #endif
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
diff --git a/include/asm-x86/smp_64.h b/include/asm-x86/smp_64.h
index 2feddda91e12..b1d5381aa760 100644
--- a/include/asm-x86/smp_64.h
+++ b/include/asm-x86/smp_64.h
@@ -26,14 +26,14 @@ extern void unlock_ipi_call_lock(void);
 extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *),
 				  void *info, int wait);
 
-extern u8 __initdata x86_cpu_to_apicid_init[];
+extern u16 __initdata x86_cpu_to_apicid_init[];
 extern void *x86_cpu_to_apicid_ptr;
-extern u8 bios_cpu_apicid[];
+extern u16 bios_cpu_apicid[];
 
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 DECLARE_PER_CPU(cpumask_t, cpu_core_map);
-DECLARE_PER_CPU(u8, cpu_llc_id);
-DECLARE_PER_CPU(u8, x86_cpu_to_apicid);
+DECLARE_PER_CPU(u16, cpu_llc_id);
+DECLARE_PER_CPU(u16, x86_cpu_to_apicid);
 
 static inline int cpu_present_to_apicid(int mps_cpu)
 {

From 3cc87e3f4042a099fb649c5df33d3ccfae36173f Mon Sep 17 00:00:00 2001
From: "travis@sgi.com" <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:33:11 +0100
Subject: [PATCH 592/890] x86: change size of node ids from u8 to u16

Change the size of node ids from 8 bits to 16 bits to
accomodate more than 256 nodes.

Signed-off-by: Mike Travis <travis@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/numa_64.c      | 9 ++++++---
 arch/x86/mm/srat_64.c      | 2 +-
 include/asm-x86/numa_64.h  | 4 ++--
 include/asm-x86/topology.h | 2 +-
 4 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 650001a87c8f..bc6dd5051d60 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -11,6 +11,7 @@
 #include <linux/ctype.h>
 #include <linux/module.h>
 #include <linux/nodemask.h>
+#include <linux/sched.h>
 
 #include <asm/e820.h>
 #include <asm/proto.h>
@@ -30,12 +31,12 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES];
 
 struct memnode memnode;
 
-int cpu_to_node_map[NR_CPUS] __read_mostly = {
+u16 cpu_to_node_map[NR_CPUS] __read_mostly = {
 	[0 ... NR_CPUS-1] = NUMA_NO_NODE
 };
 EXPORT_SYMBOL(cpu_to_node_map);
 
-unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+u16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
 	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
 };
 
@@ -543,7 +544,9 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 	node_set(0, node_possible_map);
 	for (i = 0; i < NR_CPUS; i++)
 		numa_set_node(i, 0);
-	node_to_cpumask_map[0] = cpumask_of_cpu(0);
+	/* we can't use cpumask_of_cpu() yet */
+	memset(&node_to_cpumask_map[0], 0, sizeof(node_to_cpumask_map[0]));
+	cpu_set(0, node_to_cpumask_map[0]);
 	e820_register_active_regions(0, start_pfn, end_pfn);
 	setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
 }
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index b367bc342fe0..bccbdc7be434 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -391,7 +391,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
 static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
 	[0 ... MAX_NUMNODES-1] = PXM_INVAL
 };
-static unsigned char fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
+static u16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
 	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
 };
 static int __init find_node_by_addr(unsigned long addr)
diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h
index 4449889bb0c0..941889471b52 100644
--- a/include/asm-x86/numa_64.h
+++ b/include/asm-x86/numa_64.h
@@ -20,7 +20,7 @@ extern void numa_set_node(int cpu, int node);
 extern void srat_reserve_add_area(int nodeid);
 extern int hotadd_percent;
 
-extern unsigned char apicid_to_node[MAX_LOCAL_APIC];
+extern u16 apicid_to_node[MAX_LOCAL_APIC];
 
 extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
 extern unsigned long numa_free_all_bootmem(void);
@@ -40,6 +40,6 @@ static inline void clear_node_cpumask(int cpu)
 #define clear_node_cpumask(cpu) do {} while (0)
 #endif
 
-#define NUMA_NO_NODE 0xff
+#define NUMA_NO_NODE 0xffff
 
 #endif
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index 9c251604ecc4..f8706b2a3d88 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -30,7 +30,7 @@
 #include <asm/mpspec.h>
 
 /* Mappings between logical cpu number and node number */
-extern int cpu_to_node_map[];
+extern u16 cpu_to_node_map[];
 extern cpumask_t node_to_cpumask_map[];
 
 /* Returns the number of the node containing CPU 'cpu' */

From 2c6b8c030cfca334c3d700ee504036c585c4c6a3 Mon Sep 17 00:00:00 2001
From: "travis@sgi.com" <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:33:11 +0100
Subject: [PATCH 593/890] x86: change NR_CPUS arrays in powernow-k8

Change the following static arrays sized by NR_CPUS to
per_cpu data variables:

	powernow_k8_data *powernow_data[NR_CPUS];

Signed-off-by: Mike Travis <travis@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 99e1ef9939be..a0522735dd9d 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -52,7 +52,7 @@
 /* serialize freq changes  */
 static DEFINE_MUTEX(fidvid_mutex);
 
-static struct powernow_k8_data *powernow_data[NR_CPUS];
+static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data);
 
 static int cpu_family = CPU_OPTERON;
 
@@ -1018,7 +1018,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
 static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation)
 {
 	cpumask_t oldmask = CPU_MASK_ALL;
-	struct powernow_k8_data *data = powernow_data[pol->cpu];
+	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
 	u32 checkfid;
 	u32 checkvid;
 	unsigned int newstate;
@@ -1094,7 +1094,7 @@ err_out:
 /* Driver entry point to verify the policy and range of frequencies */
 static int powernowk8_verify(struct cpufreq_policy *pol)
 {
-	struct powernow_k8_data *data = powernow_data[pol->cpu];
+	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
 
 	if (!data)
 		return -EINVAL;
@@ -1202,7 +1202,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 		dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n",
 			data->currfid, data->currvid);
 
-	powernow_data[pol->cpu] = data;
+	per_cpu(powernow_data, pol->cpu) = data;
 
 	return 0;
 
@@ -1216,7 +1216,7 @@ err_out:
 
 static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol)
 {
-	struct powernow_k8_data *data = powernow_data[pol->cpu];
+	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
 
 	if (!data)
 		return -EINVAL;
@@ -1237,7 +1237,7 @@ static unsigned int powernowk8_get (unsigned int cpu)
 	cpumask_t oldmask = current->cpus_allowed;
 	unsigned int khz = 0;
 
-	data = powernow_data[first_cpu(per_cpu(cpu_core_map, cpu))];
+	data = per_cpu(powernow_data, first_cpu(per_cpu(cpu_core_map, cpu)));
 
 	if (!data)
 		return -EINVAL;

From 24b0d22b7b63bfc71853faf64e76cd45701ecf2a Mon Sep 17 00:00:00 2001
From: "travis@sgi.com" <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:33:11 +0100
Subject: [PATCH 594/890] x86: change NR_CPUS arrays in smpboot_64

Change the following static arrays sized by NR_CPUS to
per_cpu data variables:

	task_struct *idle_thread_array[NR_CPUS];

This is only done if CONFIG_HOTPLUG_CPU is defined
as otherwise, the array is removed after initialization
anyways.

Signed-off-by: Mike Travis <travis@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/smpboot_64.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index 1fea185c9dca..50e207a8261f 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -111,10 +111,20 @@ DEFINE_PER_CPU(int, cpu_state) = { 0 };
  * a new thread. Also avoids complicated thread destroy functionality
  * for idle threads.
  */
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is
+ * removed after init for !CONFIG_HOTPLUG_CPU.
+ */
+static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
+#define get_idle_for_cpu(x)     (per_cpu(idle_thread_array, x))
+#define set_idle_for_cpu(x,p)   (per_cpu(idle_thread_array, x) = (p))
+#else
 struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
-
 #define get_idle_for_cpu(x)     (idle_thread_array[(x)])
 #define set_idle_for_cpu(x,p)   (idle_thread_array[(x)] = (p))
+#endif
+
 
 /*
  * Currently trivial. Write the real->protected mode

From 30964d54e94229f567a7312a0e6666f9deb6a488 Mon Sep 17 00:00:00 2001
From: "travis@sgi.com" <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:33:11 +0100
Subject: [PATCH 595/890] x86: change NR_CPUS arrays in topology

Change the following static arrays sized by NR_CPUS to
per_cpu data variables:

	i386_cpu cpu_devices[NR_CPUS];

(And change the struct name to x86_cpu.)

Signed-off-by: Mike Travis <travis@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/topology.c | 8 ++++----
 include/asm-x86/cpu.h      | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index 7e16d675eb85..a0d1719bda79 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -31,7 +31,7 @@
 #include <linux/mmzone.h>
 #include <asm/cpu.h>
 
-static struct i386_cpu cpu_devices[NR_CPUS];
+static DEFINE_PER_CPU(struct x86_cpu, cpu_devices);
 
 int __cpuinit arch_register_cpu(int num)
 {
@@ -46,16 +46,16 @@ int __cpuinit arch_register_cpu(int num)
 	 */
 #ifdef CONFIG_HOTPLUG_CPU
 	if (num)
-		cpu_devices[num].cpu.hotpluggable = 1;
+		per_cpu(cpu_devices, num).cpu.hotpluggable = 1;
 #endif
 
-	return register_cpu(&cpu_devices[num].cpu, num);
+	return register_cpu(&per_cpu(cpu_devices, num).cpu, num);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
 void arch_unregister_cpu(int num)
 {
-	return unregister_cpu(&cpu_devices[num].cpu);
+	return unregister_cpu(&per_cpu(cpu_devices, num).cpu);
 }
 EXPORT_SYMBOL(arch_register_cpu);
 EXPORT_SYMBOL(arch_unregister_cpu);
diff --git a/include/asm-x86/cpu.h b/include/asm-x86/cpu.h
index b1bc7b1b64b0..85ece5f10e9e 100644
--- a/include/asm-x86/cpu.h
+++ b/include/asm-x86/cpu.h
@@ -7,7 +7,7 @@
 #include <linux/nodemask.h>
 #include <linux/percpu.h>
 
-struct i386_cpu {
+struct x86_cpu {
 	struct cpu cpu;
 };
 extern int arch_register_cpu(int num);

From 3b41908902df1dba141cd7de1a727bb03718a654 Mon Sep 17 00:00:00 2001
From: "travis@sgi.com" <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:33:11 +0100
Subject: [PATCH 596/890] x86: cleanup x86_cpu_to_apicid references

Clean up references to x86_cpu_to_apicid.  Removes extraneous
comments and standardizes on "x86_*_early_ptr" for the early
kernel init references.

Signed-off-by: Mike Travis <travis@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/genapic_64.c | 11 ++---------
 arch/x86/kernel/mpparse_64.c | 11 +++--------
 arch/x86/kernel/setup_64.c   |  2 +-
 arch/x86/kernel/smpboot_32.c |  9 ++-------
 arch/x86/kernel/smpboot_64.c | 16 +++++++++-------
 include/asm-x86/smp_32.h     |  2 +-
 include/asm-x86/smp_64.h     |  2 +-
 7 files changed, 19 insertions(+), 34 deletions(-)

diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index ac2b78f24074..4ae7b6440260 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -24,17 +24,10 @@
 #include <acpi/acpi_bus.h>
 #endif
 
-/*
- * which logical CPU number maps to which CPU (physical APIC ID)
- *
- * The following static array is used during kernel startup
- * and the x86_cpu_to_apicid_ptr contains the address of the
- * array during this time.  Is it zeroed when the per_cpu
- * data area is removed.
- */
+/* which logical CPU number maps to which CPU (physical APIC ID) */
 u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata
 					= { [0 ... NR_CPUS-1] = BAD_APICID };
-void *x86_cpu_to_apicid_ptr;
+void *x86_cpu_to_apicid_early_ptr;
 DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
 EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
 
diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c
index 17d21e5b22d6..528ad9696d96 100644
--- a/arch/x86/kernel/mpparse_64.c
+++ b/arch/x86/kernel/mpparse_64.c
@@ -125,14 +125,9 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
 		cpu = 0;
  	}
 	bios_cpu_apicid[cpu] = m->mpc_apicid;
-	/*
-	 * We get called early in the the start_kernel initialization
-	 * process when the per_cpu data area is not yet setup, so we
-	 * use a static array that is removed after the per_cpu data
-	 * area is created.
-	 */
-	if (x86_cpu_to_apicid_ptr) {
-		u16 *x86_cpu_to_apicid = (u16 *)x86_cpu_to_apicid_ptr;
+	/* are we being called early in kernel startup? */
+	if (x86_cpu_to_apicid_early_ptr) {
+		u16 *x86_cpu_to_apicid = (u16 *)x86_cpu_to_apicid_early_ptr;
 		x86_cpu_to_apicid[cpu] = m->mpc_apicid;
 	} else {
 		per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid;
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 12bad27d66f8..e2beb4cba15f 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -360,7 +360,7 @@ void __init setup_arch(char **cmdline_p)
 
 #ifdef CONFIG_SMP
 	/* setup to use the static apicid table during kernel startup */
-	x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init;
+	x86_cpu_to_apicid_early_ptr = (void *)&x86_cpu_to_apicid_init;
 #endif
 
 #ifdef CONFIG_ACPI
diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c
index 2034332ad080..915ec6267326 100644
--- a/arch/x86/kernel/smpboot_32.c
+++ b/arch/x86/kernel/smpboot_32.c
@@ -91,15 +91,10 @@ static cpumask_t smp_commenced_mask;
 DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
 EXPORT_PER_CPU_SYMBOL(cpu_info);
 
-/*
- * The following static array is used during kernel startup
- * and the x86_cpu_to_apicid_ptr contains the address of the
- * array during this time.  Is it zeroed when the per_cpu
- * data area is removed.
- */
+/* which logical CPU number maps to which CPU (physical APIC ID) */
 u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata =
 			{ [0 ... NR_CPUS-1] = BAD_APICID };
-void *x86_cpu_to_apicid_ptr;
+void *x86_cpu_to_apicid_early_ptr;
 DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID;
 EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
 
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index 50e207a8261f..a2076b5f12af 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -852,23 +852,25 @@ static int __init smp_sanity_check(unsigned max_cpus)
 }
 
 /*
- * Copy apicid's found by MP_processor_info from initial array to the per cpu
- * data area.  The x86_cpu_to_apicid_init array is then expendable and the
- * x86_cpu_to_apicid_ptr is zeroed indicating that the static array is no
- * longer available.
+ * Copy data used in early init routines from the initial arrays to the
+ * per cpu data areas.  These arrays then become expendable and the
+ * *_ptrs are zeroed indicating that the static arrays are gone.
  */
 void __init smp_set_apicids(void)
 {
 	int cpu;
 
-	for_each_cpu_mask(cpu, cpu_possible_map) {
+	for_each_possible_cpu(cpu) {
 		if (per_cpu_offset(cpu))
 			per_cpu(x86_cpu_to_apicid, cpu) =
 						x86_cpu_to_apicid_init[cpu];
+		else
+			printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n",
+									cpu);
 	}
 
-	/* indicate the static array will be going away soon */
-	x86_cpu_to_apicid_ptr = NULL;
+	/* indicate the early static arrays are gone */
+	x86_cpu_to_apicid_early_ptr = NULL;
 }
 
 static void __init smp_cpu_index_default(void)
diff --git a/include/asm-x86/smp_32.h b/include/asm-x86/smp_32.h
index c69e960429cc..56152e312287 100644
--- a/include/asm-x86/smp_32.h
+++ b/include/asm-x86/smp_32.h
@@ -30,7 +30,7 @@ extern void (*mtrr_hook) (void);
 extern void zap_low_mappings (void);
 
 extern u8 __initdata x86_cpu_to_apicid_init[];
-extern void *x86_cpu_to_apicid_ptr;
+extern void *x86_cpu_to_apicid_early_ptr;
 
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 DECLARE_PER_CPU(cpumask_t, cpu_core_map);
diff --git a/include/asm-x86/smp_64.h b/include/asm-x86/smp_64.h
index b1d5381aa760..6fa332db29cc 100644
--- a/include/asm-x86/smp_64.h
+++ b/include/asm-x86/smp_64.h
@@ -27,7 +27,7 @@ extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *),
 				  void *info, int wait);
 
 extern u16 __initdata x86_cpu_to_apicid_init[];
-extern void *x86_cpu_to_apicid_ptr;
+extern void *x86_cpu_to_apicid_early_ptr;
 extern u16 bios_cpu_apicid[];
 
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);

From df3825c56dd70a4d7796041388f3cfe51c1db832 Mon Sep 17 00:00:00 2001
From: "travis@sgi.com" <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:33:11 +0100
Subject: [PATCH 597/890] x86: change NR_CPUS arrays in numa_64

Change the following static arrays sized by NR_CPUS to
per_cpu data variables:

	char cpu_to_node_map[NR_CPUS];

Signed-off-by: Mike Travis <travis@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup_64.c   |  4 +++-
 arch/x86/kernel/smpboot_64.c |  6 +++++-
 arch/x86/mm/numa_64.c        | 20 ++++++++++++++++----
 include/asm-x86/numa_64.h    |  2 --
 include/asm-x86/topology.h   | 15 +++++++++++++--
 net/sunrpc/svc.c             |  1 +
 6 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index e2beb4cba15f..529e45c37b1c 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -65,6 +65,7 @@
 #include <asm/cacheflush.h>
 #include <asm/mce.h>
 #include <asm/ds.h>
+#include <asm/topology.h>
 
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
@@ -359,8 +360,9 @@ void __init setup_arch(char **cmdline_p)
 	io_delay_init();
 
 #ifdef CONFIG_SMP
-	/* setup to use the static apicid table during kernel startup */
+	/* setup to use the early static init tables during kernel startup */
 	x86_cpu_to_apicid_early_ptr = (void *)&x86_cpu_to_apicid_init;
+	x86_cpu_to_node_map_early_ptr = (void *)&x86_cpu_to_node_map_init;
 #endif
 
 #ifdef CONFIG_ACPI
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index a2076b5f12af..a8bc2bcdb74a 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -861,9 +861,12 @@ void __init smp_set_apicids(void)
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
-		if (per_cpu_offset(cpu))
+		if (per_cpu_offset(cpu)) {
 			per_cpu(x86_cpu_to_apicid, cpu) =
 						x86_cpu_to_apicid_init[cpu];
+			per_cpu(x86_cpu_to_node_map, cpu) =
+						x86_cpu_to_node_map_init[cpu];
+		}
 		else
 			printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n",
 									cpu);
@@ -871,6 +874,7 @@ void __init smp_set_apicids(void)
 
 	/* indicate the early static arrays are gone */
 	x86_cpu_to_apicid_early_ptr = NULL;
+	x86_cpu_to_node_map_early_ptr = NULL;
 }
 
 static void __init smp_cpu_index_default(void)
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index bc6dd5051d60..382377d6421d 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -31,10 +31,14 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES];
 
 struct memnode memnode;
 
-u16 cpu_to_node_map[NR_CPUS] __read_mostly = {
+u16 x86_cpu_to_node_map_init[NR_CPUS] __initdata = {
 	[0 ... NR_CPUS-1] = NUMA_NO_NODE
 };
-EXPORT_SYMBOL(cpu_to_node_map);
+void *x86_cpu_to_node_map_early_ptr;
+EXPORT_SYMBOL(x86_cpu_to_node_map_init);
+EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
+DEFINE_PER_CPU(u16, x86_cpu_to_node_map) = NUMA_NO_NODE;
+EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map);
 
 u16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
 	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
@@ -544,7 +548,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 	node_set(0, node_possible_map);
 	for (i = 0; i < NR_CPUS; i++)
 		numa_set_node(i, 0);
-	/* we can't use cpumask_of_cpu() yet */
+	/* cpumask_of_cpu() may not be available during early startup */
 	memset(&node_to_cpumask_map[0], 0, sizeof(node_to_cpumask_map[0]));
 	cpu_set(0, node_to_cpumask_map[0]);
 	e820_register_active_regions(0, start_pfn, end_pfn);
@@ -558,8 +562,16 @@ __cpuinit void numa_add_cpu(int cpu)
 
 void __cpuinit numa_set_node(int cpu, int node)
 {
+	u16 *cpu_to_node_map = (u16 *)x86_cpu_to_node_map_early_ptr;
+
 	cpu_pda(cpu)->nodenumber = node;
-	cpu_to_node_map[cpu] = node;
+
+	if(cpu_to_node_map)
+		cpu_to_node_map[cpu] = node;
+	else if(per_cpu_offset(cpu))
+		per_cpu(x86_cpu_to_node_map, cpu) = node;
+	else
+		Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu);
 }
 
 unsigned long __init numa_free_all_bootmem(void)
diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h
index 941889471b52..c797cd523d2b 100644
--- a/include/asm-x86/numa_64.h
+++ b/include/asm-x86/numa_64.h
@@ -40,6 +40,4 @@ static inline void clear_node_cpumask(int cpu)
 #define clear_node_cpumask(cpu) do {} while (0)
 #endif
 
-#define NUMA_NO_NODE 0xffff
-
 #endif
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index f8706b2a3d88..e612ed8d2bc7 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -30,13 +30,24 @@
 #include <asm/mpspec.h>
 
 /* Mappings between logical cpu number and node number */
-extern u16 cpu_to_node_map[];
+DECLARE_PER_CPU(u16, x86_cpu_to_node_map);
+extern u16 __initdata x86_cpu_to_node_map_init[];
+extern void *x86_cpu_to_node_map_early_ptr;
 extern cpumask_t node_to_cpumask_map[];
 
+#define NUMA_NO_NODE	((u16)(~0))
+
 /* Returns the number of the node containing CPU 'cpu' */
 static inline int cpu_to_node(int cpu)
 {
-	return cpu_to_node_map[cpu];
+	u16 *cpu_to_node_map = (u16 *)x86_cpu_to_node_map_early_ptr;
+
+	if (cpu_to_node_map)
+		return cpu_to_node_map[cpu];
+	else if (per_cpu_offset(cpu))
+		return per_cpu(x86_cpu_to_node_map, cpu);
+	else
+		return NUMA_NO_NODE;
 }
 
 /*
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index a4a6bf7deaa4..4ad5fbbb18b4 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -18,6 +18,7 @@
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/sched.h>
 
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/xdr.h>

From ea348f3e58f43a27c8ac414dd3a14ee59528b86a Mon Sep 17 00:00:00 2001
From: "travis@sgi.com" <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:33:12 +0100
Subject: [PATCH 598/890] x86: change NR_CPUS arrays in acpi-cpufreq

Change the following static arrays sized by NR_CPUS to
per_cpu data variables:

	acpi_cpufreq_data *drv_data[NR_CPUS]

Signed-off-by: Mike Travis <travis@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 25 +++++++++++-----------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index fea0af0476b9..a962dcb9c408 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -67,7 +67,8 @@ struct acpi_cpufreq_data {
 	unsigned int cpu_feature;
 };
 
-static struct acpi_cpufreq_data *drv_data[NR_CPUS];
+static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
+
 /* acpi_perf_data is a pointer to percpu data. */
 static struct acpi_processor_performance *acpi_perf_data;
 
@@ -218,14 +219,14 @@ static u32 get_cur_val(cpumask_t mask)
 	if (unlikely(cpus_empty(mask)))
 		return 0;
 
-	switch (drv_data[first_cpu(mask)]->cpu_feature) {
+	switch (per_cpu(drv_data, first_cpu(mask))->cpu_feature) {
 	case SYSTEM_INTEL_MSR_CAPABLE:
 		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
 		cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
 		break;
 	case SYSTEM_IO_CAPABLE:
 		cmd.type = SYSTEM_IO_CAPABLE;
-		perf = drv_data[first_cpu(mask)]->acpi_data;
+		perf = per_cpu(drv_data, first_cpu(mask))->acpi_data;
 		cmd.addr.io.port = perf->control_register.address;
 		cmd.addr.io.bit_width = perf->control_register.bit_width;
 		break;
@@ -325,7 +326,7 @@ static unsigned int get_measured_perf(unsigned int cpu)
 
 #endif
 
-	retval = drv_data[cpu]->max_freq * perf_percent / 100;
+	retval = per_cpu(drv_data, cpu)->max_freq * perf_percent / 100;
 
 	put_cpu();
 	set_cpus_allowed(current, saved_mask);
@@ -336,7 +337,7 @@ static unsigned int get_measured_perf(unsigned int cpu)
 
 static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 {
-	struct acpi_cpufreq_data *data = drv_data[cpu];
+	struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu);
 	unsigned int freq;
 
 	dprintk("get_cur_freq_on_cpu (%d)\n", cpu);
@@ -370,7 +371,7 @@ static unsigned int check_freqs(cpumask_t mask, unsigned int freq,
 static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 			       unsigned int target_freq, unsigned int relation)
 {
-	struct acpi_cpufreq_data *data = drv_data[policy->cpu];
+	struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
 	struct acpi_processor_performance *perf;
 	struct cpufreq_freqs freqs;
 	cpumask_t online_policy_cpus;
@@ -466,7 +467,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 
 static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
 {
-	struct acpi_cpufreq_data *data = drv_data[policy->cpu];
+	struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
 
 	dprintk("acpi_cpufreq_verify\n");
 
@@ -570,7 +571,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		return -ENOMEM;
 
 	data->acpi_data = percpu_ptr(acpi_perf_data, cpu);
-	drv_data[cpu] = data;
+	per_cpu(drv_data, cpu) = data;
 
 	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
 		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
@@ -714,20 +715,20 @@ err_unreg:
 	acpi_processor_unregister_performance(perf, cpu);
 err_free:
 	kfree(data);
-	drv_data[cpu] = NULL;
+	per_cpu(drv_data, cpu) = NULL;
 
 	return result;
 }
 
 static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 {
-	struct acpi_cpufreq_data *data = drv_data[policy->cpu];
+	struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
 
 	dprintk("acpi_cpufreq_cpu_exit\n");
 
 	if (data) {
 		cpufreq_frequency_table_put_attr(policy->cpu);
-		drv_data[policy->cpu] = NULL;
+		per_cpu(drv_data, policy->cpu) = NULL;
 		acpi_processor_unregister_performance(data->acpi_data,
 						      policy->cpu);
 		kfree(data);
@@ -738,7 +739,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 
 static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
 {
-	struct acpi_cpufreq_data *data = drv_data[policy->cpu];
+	struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
 
 	dprintk("acpi_cpufreq_resume\n");
 

From e8c10ef9dde3ab7b7d7db6804859d9daf38f01c4 Mon Sep 17 00:00:00 2001
From: "travis@sgi.com" <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:33:12 +0100
Subject: [PATCH 599/890] x86: change bios_cpu_apicid to percpu data variable

Change static bios_cpu_apicid array to a per_cpu data variable.
This includes using a static array used during initialization
similar to the way x86_cpu_to_apicid[] is handled.

There is one early use of bios_cpu_apicid in apic_is_clustered_box().
The other reference in cpu_present_to_apicid() is called after
smp_set_apicids() has setup the percpu version of bios_cpu_apicid.

[ mingo@elte.hu: build fix ]

Signed-off-by: Mike Travis <travis@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic_64.c    | 16 ++++++++++++++--
 arch/x86/kernel/mpparse_64.c | 17 ++++++++++++-----
 arch/x86/kernel/setup_64.c   |  3 +++
 arch/x86/kernel/smpboot_64.c |  7 +++++++
 include/asm-x86/smp_64.h     |  8 +++++---
 5 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 01d4ca27ecf0..f9919c492699 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -1180,14 +1180,26 @@ __cpuinit int apic_is_clustered_box(void)
 	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
 
 	for (i = 0; i < NR_CPUS; i++) {
-		id = bios_cpu_apicid[i];
+		/* are we being called early in kernel startup? */
+		if (x86_bios_cpu_apicid_early_ptr) {
+			id = ((u16 *)x86_bios_cpu_apicid_early_ptr)[i];
+		}
+		else if (i < nr_cpu_ids) {
+			if (cpu_present(i))
+				id = per_cpu(x86_bios_cpu_apicid, i);
+			else
+				continue;
+		}
+		else
+			break;
+
 		if (id != BAD_APICID)
 			__set_bit(APIC_CLUSTERID(id), clustermap);
 	}
 
 	/* Problem:  Partially populated chassis may not have CPUs in some of
 	 * the APIC clusters they have been allocated.  Only present CPUs have
-	 * bios_cpu_apicid entries, thus causing zeroes in the bitmap.  Since
+	 * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.  Since
 	 * clusters are allocated sequentially, count zeros only if they are
 	 * bounded by ones.
 	 */
diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c
index 528ad9696d96..fd671754dcb5 100644
--- a/arch/x86/kernel/mpparse_64.c
+++ b/arch/x86/kernel/mpparse_64.c
@@ -67,7 +67,11 @@ unsigned disabled_cpus __cpuinitdata;
 /* Bitmask of physically existing CPUs */
 physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
 
-u16 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata
+				= { [0 ... NR_CPUS-1] = BAD_APICID };
+void *x86_bios_cpu_apicid_early_ptr;
+DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
+EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
 
 
 /*
@@ -118,19 +122,22 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
 	physid_set(m->mpc_apicid, phys_cpu_present_map);
  	if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
  		/*
- 		 * bios_cpu_apicid is required to have processors listed
+ 		 * x86_bios_cpu_apicid is required to have processors listed
  		 * in same order as logical cpu numbers. Hence the first
  		 * entry is BSP, and so on.
  		 */
 		cpu = 0;
  	}
-	bios_cpu_apicid[cpu] = m->mpc_apicid;
 	/* are we being called early in kernel startup? */
 	if (x86_cpu_to_apicid_early_ptr) {
-		u16 *x86_cpu_to_apicid = (u16 *)x86_cpu_to_apicid_early_ptr;
-		x86_cpu_to_apicid[cpu] = m->mpc_apicid;
+		u16 *cpu_to_apicid = (u16 *)x86_cpu_to_apicid_early_ptr;
+		u16 *bios_cpu_apicid = (u16 *)x86_bios_cpu_apicid_early_ptr;
+
+		cpu_to_apicid[cpu] = m->mpc_apicid;
+		bios_cpu_apicid[cpu] = m->mpc_apicid;
 	} else {
 		per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid;
+		per_cpu(x86_bios_cpu_apicid, cpu) = m->mpc_apicid;
 	}
 
 	cpu_set(cpu, cpu_possible_map);
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 529e45c37b1c..71a420c7fee7 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -362,8 +362,11 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_SMP
 	/* setup to use the early static init tables during kernel startup */
 	x86_cpu_to_apicid_early_ptr = (void *)&x86_cpu_to_apicid_init;
+#ifdef CONFIG_NUMA
 	x86_cpu_to_node_map_early_ptr = (void *)&x86_cpu_to_node_map_init;
 #endif
+	x86_bios_cpu_apicid_early_ptr = (void *)&x86_bios_cpu_apicid_init;
+#endif
 
 #ifdef CONFIG_ACPI
 	/*
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index a8bc2bcdb74a..93071cdf0849 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -864,8 +864,12 @@ void __init smp_set_apicids(void)
 		if (per_cpu_offset(cpu)) {
 			per_cpu(x86_cpu_to_apicid, cpu) =
 						x86_cpu_to_apicid_init[cpu];
+#ifdef CONFIG_NUMA
 			per_cpu(x86_cpu_to_node_map, cpu) =
 						x86_cpu_to_node_map_init[cpu];
+#endif
+			per_cpu(x86_bios_cpu_apicid, cpu) =
+						x86_bios_cpu_apicid_init[cpu];
 		}
 		else
 			printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n",
@@ -874,7 +878,10 @@ void __init smp_set_apicids(void)
 
 	/* indicate the early static arrays are gone */
 	x86_cpu_to_apicid_early_ptr = NULL;
+#ifdef CONFIG_NUMA
 	x86_cpu_to_node_map_early_ptr = NULL;
+#endif
+	x86_bios_cpu_apicid_early_ptr = NULL;
 }
 
 static void __init smp_cpu_index_default(void)
diff --git a/include/asm-x86/smp_64.h b/include/asm-x86/smp_64.h
index 6fa332db29cc..e0a75519ad21 100644
--- a/include/asm-x86/smp_64.h
+++ b/include/asm-x86/smp_64.h
@@ -27,18 +27,20 @@ extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *),
 				  void *info, int wait);
 
 extern u16 __initdata x86_cpu_to_apicid_init[];
+extern u16 __initdata x86_bios_cpu_apicid_init[];
 extern void *x86_cpu_to_apicid_early_ptr;
-extern u16 bios_cpu_apicid[];
+extern void *x86_bios_cpu_apicid_early_ptr;
 
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 DECLARE_PER_CPU(cpumask_t, cpu_core_map);
 DECLARE_PER_CPU(u16, cpu_llc_id);
 DECLARE_PER_CPU(u16, x86_cpu_to_apicid);
+DECLARE_PER_CPU(u16, x86_bios_cpu_apicid);
 
 static inline int cpu_present_to_apicid(int mps_cpu)
 {
-	if (mps_cpu < NR_CPUS)
-		return (int)bios_cpu_apicid[mps_cpu];
+	if (cpu_present(mps_cpu))
+		return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
 	else
 		return BAD_APICID;
 }

From f13bd3e7935f7020f7c622bf3f8cae8eee757a53 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:33:12 +0100
Subject: [PATCH 600/890] x86: use wrmsrl in kprobes.c, step.c

Where x86_32 passed zero in the high 32 bits, use wrmsrl which
will zero extend for us.  This allows ifdefs for 32/64 bit to
be eliminated.

Eliminate ifdef in step.c.  Similar cleanup was done when unifying
kprobes_32|64.c and wrmsr() was chosen there over wrmsrl().  This
patch changes these to wrmsrl.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/kprobes.c | 4 ++--
 arch/x86/kernel/step.c    | 4 ----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 93aff49798ee..edc0a8e59247 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -410,13 +410,13 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 static void __kprobes clear_btf(void)
 {
 	if (test_thread_flag(TIF_DEBUGCTLMSR))
-		wrmsr(MSR_IA32_DEBUGCTLMSR, 0, 0);
+		wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
 }
 
 static void __kprobes restore_btf(void)
 {
 	if (test_thread_flag(TIF_DEBUGCTLMSR))
-		wrmsr(MSR_IA32_DEBUGCTLMSR, current->thread.debugctlmsr, 0);
+		wrmsrl(MSR_IA32_DEBUGCTLMSR, current->thread.debugctlmsr);
 }
 
 static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 5884dd485db8..b801e76cebf7 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -229,11 +229,7 @@ static void write_debugctlmsr(struct task_struct *child, unsigned long val)
 	if (child != current)
 		return;
 
-#ifdef CONFIG_X86_64
 	wrmsrl(MSR_IA32_DEBUGCTLMSR, val);
-#else
-	wrmsr(MSR_IA32_DEBUGCTLMSR, val, 0);
-#endif
 }
 
 /*

From 37cd9cf3dafed82f7cf905785883300f6ff7c818 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:33:12 +0100
Subject: [PATCH 601/890] x86: common x86_32|64 naming

Rename convert_rip_to_linear to convert_ip_to_linear for shared
X86_32|64 use.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/step.c   | 4 ++--
 arch/x86/mm/fault_32.c   | 2 +-
 arch/x86/mm/fault_64.c   | 2 +-
 include/asm-x86/ptrace.h | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index b801e76cebf7..80b37181a42b 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -89,7 +89,7 @@ unsigned long get_segment_eip(struct pt_regs *regs,
 #ifdef CONFIG_X86_32
 static
 #endif
-unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs)
+unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs)
 {
 	unsigned long addr, seg;
 
@@ -136,7 +136,7 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
 {
 	int i, copied;
 	unsigned char opcode[15];
-	unsigned long addr = convert_rip_to_linear(child, regs);
+	unsigned long addr = convert_ip_to_linear(child, regs);
 
 	copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
 	for (i = 0; i < copied; i++) {
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index b92922a1d65f..b4d19c2d4f05 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -95,7 +95,7 @@ static int is_prefetch(struct pt_regs *regs, unsigned long addr,
 	/* If it was a exec fault ignore */
 	if (error_code & PF_INSTR)
 		return 0;
-	instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
+	instr = (unsigned char __user *)convert_ip_to_linear(current, regs);
 #endif
 
 	max_instr = instr + 15;
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index cf7e99895b91..d519b41f1962 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -98,7 +98,7 @@ static int is_prefetch(struct pt_regs *regs, unsigned long addr,
 	/* If it was a exec fault ignore */
 	if (error_code & PF_INSTR)
 		return 0;
-	instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
+	instr = (unsigned char __user *)convert_ip_to_linear(current, regs);
 #endif
 
 	max_instr = instr + 15;
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index cc4456667d89..35c103714906 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -177,7 +177,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
 struct task_struct;
 
 extern unsigned long
-convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs);
+convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
 
 #endif /* __KERNEL__ */
 #endif /* !__i386__ */

From f2857ce92023409df1544737d5b3499b4630a183 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:33:12 +0100
Subject: [PATCH 602/890] x86: remove last user of get_segment_eip

is_prefetch was the last user of get_segment_eip and only on
X86_32.  This function returned the faulting instruction's
address and set the upper segment limit.

Instead, use the convert_ip_to_linear helper and rely on
probe_kernel_address to do the segment checks which was
already done everywhere the segment limit was being checked
on X86_32.

Remove get_segment_eip as well.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/step.c   | 84 ----------------------------------------
 arch/x86/mm/fault_32.c   | 15 +------
 arch/x86/mm/fault_64.c   | 15 +------
 include/asm-x86/ptrace.h |  6 ++-
 4 files changed, 8 insertions(+), 112 deletions(-)

diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 80b37181a42b..2ef1a5f8d675 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -5,90 +5,6 @@
 #include <linux/mm.h>
 #include <linux/ptrace.h>
 
-#ifdef CONFIG_X86_32
-#include <linux/uaccess.h>
-
-#include <asm/desc.h>
-
-/*
- * Return EIP plus the CS segment base.  The segment limit is also
- * adjusted, clamped to the kernel/user address space (whichever is
- * appropriate), and returned in *eip_limit.
- *
- * The segment is checked, because it might have been changed by another
- * task between the original faulting instruction and here.
- *
- * If CS is no longer a valid code segment, or if EIP is beyond the
- * limit, or if it is a kernel address when CS is not a kernel segment,
- * then the returned value will be greater than *eip_limit.
- *
- * This is slow, but is very rarely executed.
- */
-unsigned long get_segment_eip(struct pt_regs *regs,
-					    unsigned long *eip_limit)
-{
-	unsigned long ip = regs->ip;
-	unsigned seg = regs->cs & 0xffff;
-	u32 seg_ar, seg_limit, base, *desc;
-
-	/* Unlikely, but must come before segment checks. */
-	if (unlikely(regs->flags & VM_MASK)) {
-		base = seg << 4;
-		*eip_limit = base + 0xffff;
-		return base + (ip & 0xffff);
-	}
-
-	/* The standard kernel/user address space limit. */
-	*eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
-
-	/* By far the most common cases. */
-	if (likely(SEGMENT_IS_FLAT_CODE(seg)))
-		return ip;
-
-	/* Check the segment exists, is within the current LDT/GDT size,
-	   that kernel/user (ring 0..3) has the appropriate privilege,
-	   that it's a code segment, and get the limit. */
-	__asm__("larl %3,%0; lsll %3,%1"
-		 : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg));
-	if ((~seg_ar & 0x9800) || ip > seg_limit) {
-		*eip_limit = 0;
-		return 1;	 /* So that returned ip > *eip_limit. */
-	}
-
-	/* Get the GDT/LDT descriptor base.
-	   When you look for races in this code remember that
-	   LDT and other horrors are only used in user space. */
-	if (seg & (1<<2)) {
-		/* Must lock the LDT while reading it. */
-		mutex_lock(&current->mm->context.lock);
-		desc = current->mm->context.ldt;
-		desc = (void *)desc + (seg & ~7);
-	} else {
-		/* Must disable preemption while reading the GDT. */
-		desc = (u32 *)get_cpu_gdt_table(get_cpu());
-		desc = (void *)desc + (seg & ~7);
-	}
-
-	/* Decode the code segment base from the descriptor */
-	base = get_desc_base((struct desc_struct *)desc);
-
-	if (seg & (1<<2))
-		mutex_unlock(&current->mm->context.lock);
-	else
-		put_cpu();
-
-	/* Adjust EIP and segment limit, and clamp at the kernel limit.
-	   It's legitimate for segments to wrap at 0xffffffff. */
-	seg_limit += base;
-	if (seg_limit < *eip_limit && seg_limit >= base)
-		*eip_limit = seg_limit;
-	return ip + base;
-}
-#endif
-
-#ifdef CONFIG_X86_32
-static
-#endif
 unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs)
 {
 	unsigned long addr, seg;
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index b4d19c2d4f05..36cb67e02b04 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -81,7 +81,6 @@ static int is_prefetch(struct pt_regs *regs, unsigned long addr,
 	unsigned char *max_instr;
 
 #ifdef CONFIG_X86_32
-	unsigned long limit;
 	if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
 		     boot_cpu_data.x86 >= 6)) {
 		/* Catch an obscure case of prefetch inside an NX page. */
@@ -90,30 +89,23 @@ static int is_prefetch(struct pt_regs *regs, unsigned long addr,
 	} else {
 		return 0;
 	}
-	instr = (unsigned char *)get_segment_eip(regs, &limit);
 #else
 	/* If it was a exec fault ignore */
 	if (error_code & PF_INSTR)
 		return 0;
-	instr = (unsigned char __user *)convert_ip_to_linear(current, regs);
 #endif
 
+	instr = (unsigned char *)convert_ip_to_linear(current, regs);
 	max_instr = instr + 15;
 
-#ifdef CONFIG_X86_64
 	if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
 		return 0;
-#endif
 
 	while (scan_more && instr < max_instr) {
 		unsigned char opcode;
 		unsigned char instr_hi;
 		unsigned char instr_lo;
 
-#ifdef CONFIG_X86_32
-		if (instr > (unsigned char *)limit)
-			break;
-#endif
 		if (probe_kernel_address(instr, opcode))
 			break;
 
@@ -155,10 +147,7 @@ static int is_prefetch(struct pt_regs *regs, unsigned long addr,
 		case 0x00:
 			/* Prefetch instruction is 0x0F0D or 0x0F18 */
 			scan_more = 0;
-#ifdef CONFIG_X86_32
-			if (instr > (unsigned char *)limit)
-				break;
-#endif
+
 			if (probe_kernel_address(instr, opcode))
 				break;
 			prefetch = (instr_lo == 0xF) &&
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index d519b41f1962..80f8436ac8b2 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -84,7 +84,6 @@ static int is_prefetch(struct pt_regs *regs, unsigned long addr,
 	unsigned char *max_instr;
 
 #ifdef CONFIG_X86_32
-	unsigned long limit;
 	if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
 		     boot_cpu_data.x86 >= 6)) {
 		/* Catch an obscure case of prefetch inside an NX page. */
@@ -93,30 +92,23 @@ static int is_prefetch(struct pt_regs *regs, unsigned long addr,
 	} else {
 		return 0;
 	}
-	instr = (unsigned char *)get_segment_eip(regs, &limit);
 #else
 	/* If it was a exec fault ignore */
 	if (error_code & PF_INSTR)
 		return 0;
-	instr = (unsigned char __user *)convert_ip_to_linear(current, regs);
 #endif
 
+	instr = (unsigned char *)convert_ip_to_linear(current, regs);
 	max_instr = instr + 15;
 
-#ifdef CONFIG_X86_64
 	if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
 		return 0;
-#endif
 
 	while (scan_more && instr < max_instr) {
 		unsigned char opcode;
 		unsigned char instr_hi;
 		unsigned char instr_lo;
 
-#ifdef CONFIG_X86_32
-		if (instr > (unsigned char *)limit)
-			break;
-#endif
 		if (probe_kernel_address(instr, opcode))
 			break;
 
@@ -158,10 +150,7 @@ static int is_prefetch(struct pt_regs *regs, unsigned long addr,
 		case 0x00:
 			/* Prefetch instruction is 0x0F0D or 0x0F18 */
 			scan_more = 0;
-#ifdef CONFIG_X86_32
-			if (instr > (unsigned char *)limit)
-				break;
-#endif
+
 			if (probe_kernel_address(instr, opcode))
 				break;
 			prefetch = (instr_lo == 0xF) &&
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index 35c103714906..ee4b595e1ccc 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -70,6 +70,10 @@ struct pt_regs {
 #include <asm/segment.h>
 
 struct task_struct;
+
+extern unsigned long
+convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
+
 extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code);
 
 /*
@@ -184,8 +188,6 @@ convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
 
 #ifdef __KERNEL__
 
-unsigned long get_segment_eip(struct pt_regs *regs, unsigned long *eip_limit);
-
 /*
  * These are defined as per linux/ptrace.h, which see.
  */

From 608566b4edda5079c7812c2108a89c0fcf2894bb Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:33:12 +0100
Subject: [PATCH 603/890] x86: do_page_fault small unification

Copy the prefetch of map_sem from X86_64 and move the check
notify_page_fault (soon to be kprobe_handle_fault) out of
the unlikely if() statement.

This makes the X86_32|64 pagefault handlers closer to each
other.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_32.c | 16 +++++++---------
 arch/x86/mm/fault_64.c |  7 ++-----
 2 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 36cb67e02b04..52c13d2e011e 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -295,13 +295,18 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	 */
 	trace_hardirqs_fixup();
 
+	tsk = current;
+	mm = tsk->mm;
+	prefetchw(&mm->mmap_sem);
+
 	/* get the address */
 	address = read_cr2();
 
-	tsk = current;
-
 	si_code = SEGV_MAPERR;
 
+	if (notify_page_fault(regs))
+		return;
+
 	/*
 	 * We fault-in kernel-space virtual memory on-demand. The
 	 * 'reference' page table is init_mm.pgd.
@@ -319,8 +324,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 		if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
 		    vmalloc_fault(address) >= 0)
 			return;
-		if (notify_page_fault(regs))
-			return;
 		/*
 		 * Don't take the mm semaphore here. If we fixup a prefetch
 		 * fault we could otherwise deadlock.
@@ -328,16 +331,11 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 		goto bad_area_nosemaphore;
 	}
 
-	if (notify_page_fault(regs))
-		return;
-
 	/* It's safe to allow irq's after cr2 has been saved and the vmalloc
 	   fault has been handled. */
 	if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
 		local_irq_enable();
 
-	mm = tsk->mm;
-
 	/*
 	 * If we're in an interrupt, have no user context or are running in an
 	 * atomic region then we must not take the fault.
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 80f8436ac8b2..c6b3ad515cf1 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -355,6 +355,8 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 
 	si_code = SEGV_MAPERR;
 
+	if (notify_page_fault(regs))
+		return;
 
 	/*
 	 * We fault-in kernel-space virtual memory on-demand. The
@@ -380,8 +382,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 			if (vmalloc_fault(address) >= 0)
 				return;
 		}
-		if (notify_page_fault(regs))
-			return;
 		/*
 		 * Don't take the mm semaphore here. If we fixup a prefetch
 		 * fault we could otherwise deadlock.
@@ -389,9 +389,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 		goto bad_area_nosemaphore;
 	}
 
-	if (notify_page_fault(regs))
-		return;
-
 	if (likely(regs->flags & X86_EFLAGS_IF))
 		local_irq_enable();
 

From 6f4d368ef9e9f91aa0019c11e90773ea32d94625 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:33:13 +0100
Subject: [PATCH 604/890] x86: last of trivial fault_32|64.c unification

Comments, indentation, printk format.

Uses task_pid_nr() on X86_64 now, but this is always defined
to task->pid.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_32.c | 20 ++++++++++++--------
 arch/x86/mm/fault_64.c | 29 +++++++++++++++++++----------
 2 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 52c13d2e011e..31113deeb7c0 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -36,10 +36,10 @@
  *	bit 3 == 1 means use of reserved bit detected
  *	bit 4 == 1 means fault was an instruction fetch
  */
-#define PF_PROT	(1<<0)
+#define PF_PROT		(1<<0)
 #define PF_WRITE	(1<<1)
-#define PF_USER	(1<<2)
-#define PF_RSVD	(1<<3)
+#define PF_USER		(1<<2)
+#define PF_RSVD		(1<<3)
 #define PF_INSTR	(1<<4)
 
 static inline int notify_page_fault(struct pt_regs *regs)
@@ -460,11 +460,15 @@ bad_area_nosemaphore:
 
 		if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
 		    printk_ratelimit()) {
-			printk("%s%s[%d]: segfault at %08lx ip %08lx "
-			    "sp %08lx error %lx\n",
-			    task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
-			    tsk->comm, task_pid_nr(tsk), address, regs->ip,
-			    regs->sp, error_code);
+			printk(
+#ifdef CONFIG_X86_32
+			"%s%s[%d]: segfault at %08lx ip %08lx sp %08lx error %lx\n",
+#else
+			"%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx\n",
+#endif
+			task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
+			tsk->comm, task_pid_nr(tsk), address, regs->ip,
+			regs->sp, error_code);
 		}
 		tsk->thread.cr2 = address;
 		/* Kernel addresses are always protection faults */
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index c6b3ad515cf1..c48d95c306df 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -440,8 +440,11 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	if (!(vma->vm_flags & VM_GROWSDOWN))
 		goto bad_area;
 	if (error_code & PF_USER) {
-		/* Allow userspace just enough access below the stack pointer
-		 * to let the 'enter' instruction work.
+		/*
+		 * Accessing the stack below %sp is always a bug.
+		 * The large cushion allows instructions like enter
+		 * and pusha to work.  ("enter $65535,$31" pushes
+		 * 32 pointers and then decrements %sp by 65535.)
 		 */
 		if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
 			goto bad_area;
@@ -522,10 +525,14 @@ bad_area_nosemaphore:
 		if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
 		    printk_ratelimit()) {
 			printk(
-		       "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx\n",
-					tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
-					tsk->comm, tsk->pid, address, regs->ip,
-					regs->sp, error_code);
+#ifdef CONFIG_X86_32
+			"%s%s[%d]: segfault at %08lx ip %08lx sp %08lx error %lx\n",
+#else
+			"%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx\n",
+#endif
+			task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
+			tsk->comm, task_pid_nr(tsk), address, regs->ip,
+			regs->sp, error_code);
 		}
 
 		tsk->thread.cr2 = address;
@@ -609,10 +616,12 @@ LIST_HEAD(pgd_list);
 
 void vmalloc_sync_all(void)
 {
-	/* Note that races in the updates of insync and start aren't
-	   problematic:
-	   insync can only get set bits added, and updates to start are only
-	   improving performance (without affecting correctness if undone). */
+	/*
+	 * Note that races in the updates of insync and start aren't
+	 * problematic: insync can only get set bits added, and updates to
+	 * start are only improving performance (without affecting correctness
+	 * if undone).
+	 */
 	static DECLARE_BITMAP(insync, PTRS_PER_PGD);
 	static unsigned long start = VMALLOC_START & PGDIR_MASK;
 	unsigned long address;

From fdfe8aa84dd78cfdff427d0520f5974fb5e9c220 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:33:13 +0100
Subject: [PATCH 605/890] x86: function ifdefs in fault_32|64.c

Add caller of is_errata93() to X86_32, ifdef'd to do
nothing.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_32.c | 59 +++++++++++++++++++++++++++++++++++++++---
 arch/x86/mm/fault_64.c | 30 ++++++++++++++++++---
 2 files changed, 83 insertions(+), 6 deletions(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 31113deeb7c0..eba90198b15a 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -217,6 +217,7 @@ KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
 KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
 KERN_ERR "******* Please consider a BIOS update.\n"
 KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
+#endif
 
 /* Workaround for K8 erratum #93 & buggy BIOS.
    BIOS SMM functions are required to use a specific workaround
@@ -224,10 +225,12 @@ KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
    A lot of BIOS that didn't get tested properly miss this.
    The OS sees this as a page fault with the upper 32bits of RIP cleared.
    Try to work around it here.
-   Note we only handle faults in kernel here. */
-
+   Note we only handle faults in kernel here.
+   Does nothing for X86_32
+ */
 static int is_errata93(struct pt_regs *regs, unsigned long address)
 {
+#ifdef CONFIG_X86_64
 	static int warned;
 	if (address != regs->ip)
 		return 0;
@@ -243,9 +246,10 @@ static int is_errata93(struct pt_regs *regs, unsigned long address)
 		regs->ip = address;
 		return 1;
 	}
+#endif
 	return 0;
 }
-#endif
+
 
 /*
  * Handle a fault on the vmalloc or module mapping area
@@ -254,6 +258,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address)
  */
 static inline int vmalloc_fault(unsigned long address)
 {
+#ifdef CONFIG_X86_32
 	unsigned long pgd_paddr;
 	pmd_t *pmd_k;
 	pte_t *pte_k;
@@ -272,6 +277,51 @@ static inline int vmalloc_fault(unsigned long address)
 	if (!pte_present(*pte_k))
 		return -1;
 	return 0;
+#else
+	pgd_t *pgd, *pgd_ref;
+	pud_t *pud, *pud_ref;
+	pmd_t *pmd, *pmd_ref;
+	pte_t *pte, *pte_ref;
+
+	/* Copy kernel mappings over when needed. This can also
+	   happen within a race in page table update. In the later
+	   case just flush. */
+
+	pgd = pgd_offset(current->mm ?: &init_mm, address);
+	pgd_ref = pgd_offset_k(address);
+	if (pgd_none(*pgd_ref))
+		return -1;
+	if (pgd_none(*pgd))
+		set_pgd(pgd, *pgd_ref);
+	else
+		BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+
+	/* Below here mismatches are bugs because these lower tables
+	   are shared */
+
+	pud = pud_offset(pgd, address);
+	pud_ref = pud_offset(pgd_ref, address);
+	if (pud_none(*pud_ref))
+		return -1;
+	if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
+		BUG();
+	pmd = pmd_offset(pud, address);
+	pmd_ref = pmd_offset(pud_ref, address);
+	if (pmd_none(*pmd_ref))
+		return -1;
+	if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
+		BUG();
+	pte_ref = pte_offset_kernel(pmd_ref, address);
+	if (!pte_present(*pte_ref))
+		return -1;
+	pte = pte_offset_kernel(pmd, address);
+	/* Don't use pte_page here, because the mappings can point
+	   outside mem_map, and the NUMA hash lookup cannot handle
+	   that. */
+	if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
+		BUG();
+	return 0;
+#endif
 }
 
 int show_unhandled_signals = 1;
@@ -507,6 +557,9 @@ no_context:
 	if (is_prefetch(regs, address, error_code))
 		return;
 
+	if (is_errata93(regs, address))
+		return;
+
 /*
  * Oops. The kernel tried to access some bad page. We'll have to
  * terminate things with extreme prejudice.
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index c48d95c306df..75b7b165bdf3 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -223,6 +223,7 @@ KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
 KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
 KERN_ERR "******* Please consider a BIOS update.\n"
 KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
+#endif
 
 /* Workaround for K8 erratum #93 & buggy BIOS.
    BIOS SMM functions are required to use a specific workaround
@@ -230,10 +231,12 @@ KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
    A lot of BIOS that didn't get tested properly miss this.
    The OS sees this as a page fault with the upper 32bits of RIP cleared.
    Try to work around it here.
-   Note we only handle faults in kernel here. */
-
+   Note we only handle faults in kernel here.
+   Does nothing for X86_32
+ */
 static int is_errata93(struct pt_regs *regs, unsigned long address)
 {
+#ifdef CONFIG_X86_64
 	static int warned;
 	if (address != regs->ip)
 		return 0;
@@ -249,9 +252,9 @@ static int is_errata93(struct pt_regs *regs, unsigned long address)
 		regs->ip = address;
 		return 1;
 	}
+#endif
 	return 0;
 }
-#endif
 
 static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
 				 unsigned long error_code)
@@ -278,6 +281,26 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
  */
 static int vmalloc_fault(unsigned long address)
 {
+#ifdef CONFIG_X86_32
+	unsigned long pgd_paddr;
+	pmd_t *pmd_k;
+	pte_t *pte_k;
+	/*
+	 * Synchronize this task's top level page-table
+	 * with the 'reference' page table.
+	 *
+	 * Do _not_ use "current" here. We might be inside
+	 * an interrupt in the middle of a task switch..
+	 */
+	pgd_paddr = read_cr3();
+	pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
+	if (!pmd_k)
+		return -1;
+	pte_k = pte_offset_kernel(pmd_k, address);
+	if (!pte_present(*pte_k))
+		return -1;
+	return 0;
+#else
 	pgd_t *pgd, *pgd_ref;
 	pud_t *pud, *pud_ref;
 	pmd_t *pmd, *pmd_ref;
@@ -321,6 +344,7 @@ static int vmalloc_fault(unsigned long address)
 	if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
 		BUG();
 	return 0;
+#endif
 }
 
 int show_unhandled_signals = 1;

From 6b8be6df7f971919622d152d144c8798ad7fd160 Mon Sep 17 00:00:00 2001
From: John Reiser <jreiser@BitWagon.com>
Date: Wed, 30 Jan 2008 13:33:13 +0100
Subject: [PATCH 606/890] x86: add ENDPROC() markers

The ENDPROCs() were not used everywhere.  Some code used just END() instead,
while other code used nothing.  um/sys-i386/checksum.S didn't #include
<linux/linkage.h> .  I also got confused because gcc puts the
.type near the ENTRY, while ENDPROC puts it on the opposite end.

Signed off by: John Reiser <jreiser@BitWagon.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/lib/semaphore_32.S | 20 ++++++++++----------
 include/linux/linkage.h     |  4 ++++
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S
index 444fba400983..e2c6e0d5e125 100644
--- a/arch/x86/lib/semaphore_32.S
+++ b/arch/x86/lib/semaphore_32.S
@@ -49,7 +49,7 @@ ENTRY(__down_failed)
 	ENDFRAME
 	ret
 	CFI_ENDPROC
-	END(__down_failed)
+	ENDPROC(__down_failed)
 
 ENTRY(__down_failed_interruptible)
 	CFI_STARTPROC
@@ -70,7 +70,7 @@ ENTRY(__down_failed_interruptible)
 	ENDFRAME
 	ret
 	CFI_ENDPROC
-	END(__down_failed_interruptible)
+	ENDPROC(__down_failed_interruptible)
 
 ENTRY(__down_failed_trylock)
 	CFI_STARTPROC
@@ -91,7 +91,7 @@ ENTRY(__down_failed_trylock)
 	ENDFRAME
 	ret
 	CFI_ENDPROC
-	END(__down_failed_trylock)
+	ENDPROC(__down_failed_trylock)
 
 ENTRY(__up_wakeup)
 	CFI_STARTPROC
@@ -112,7 +112,7 @@ ENTRY(__up_wakeup)
 	ENDFRAME
 	ret
 	CFI_ENDPROC
-	END(__up_wakeup)
+	ENDPROC(__up_wakeup)
 
 /*
  * rw spinlock fallbacks
@@ -132,7 +132,7 @@ ENTRY(__write_lock_failed)
 	ENDFRAME
 	ret
 	CFI_ENDPROC
-	END(__write_lock_failed)
+	ENDPROC(__write_lock_failed)
 
 ENTRY(__read_lock_failed)
 	CFI_STARTPROC
@@ -148,7 +148,7 @@ ENTRY(__read_lock_failed)
 	ENDFRAME
 	ret
 	CFI_ENDPROC
-	END(__read_lock_failed)
+	ENDPROC(__read_lock_failed)
 
 #endif
 
@@ -170,7 +170,7 @@ ENTRY(call_rwsem_down_read_failed)
 	CFI_ADJUST_CFA_OFFSET -4
 	ret
 	CFI_ENDPROC
-	END(call_rwsem_down_read_failed)
+	ENDPROC(call_rwsem_down_read_failed)
 
 ENTRY(call_rwsem_down_write_failed)
 	CFI_STARTPROC
@@ -182,7 +182,7 @@ ENTRY(call_rwsem_down_write_failed)
 	CFI_ADJUST_CFA_OFFSET -4
 	ret
 	CFI_ENDPROC
-	END(call_rwsem_down_write_failed)
+	ENDPROC(call_rwsem_down_write_failed)
 
 ENTRY(call_rwsem_wake)
 	CFI_STARTPROC
@@ -196,7 +196,7 @@ ENTRY(call_rwsem_wake)
 	CFI_ADJUST_CFA_OFFSET -4
 1:	ret
 	CFI_ENDPROC
-	END(call_rwsem_wake)
+	ENDPROC(call_rwsem_wake)
 
 /* Fix up special calling conventions */
 ENTRY(call_rwsem_downgrade_wake)
@@ -214,6 +214,6 @@ ENTRY(call_rwsem_downgrade_wake)
 	CFI_ADJUST_CFA_OFFSET -4
 	ret
 	CFI_ENDPROC
-	END(call_rwsem_downgrade_wake)
+	ENDPROC(call_rwsem_downgrade_wake)
 
 #endif
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index ceeeb5ea5b15..3faf599ea58e 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -57,6 +57,10 @@
   .size name, .-name
 #endif
 
+/* If symbol 'name' is treated as a subroutine (gets called, and returns)
+ * then please use ENDPROC to mark 'name' as STT_FUNC for the benefit of
+ * static analysis tools such as stack depth analyzer.
+ */
 #ifndef ENDPROC
 #define ENDPROC(name) \
   .type name, @function; \

From 0723a69a63beec1ca6e792239ef75d0181387ef0 Mon Sep 17 00:00:00 2001
From: Benjamin LaHaise <bcrl@kvack.org>
Date: Wed, 30 Jan 2008 13:33:13 +0100
Subject: [PATCH 607/890] x86: fix synchronize_rcu(): high latency on idle
 system

an otherwise idle system takes about 3 ticks per network
interface in unregister_netdev() due to multiple calls to synchronize_rcu(),
which adds up to quite a few seconds for tearing down thousands of
interfaces.  By flushing pending rcu callbacks in the idle loop, the system
makes progress hundreds of times faster.  If this is indeed a sane thing to,
it probably needs to be done for other architectures than x86.  And yes, the
network stack shouldn't call synchronize_rcu() quite so much, but fixing that
is a little more involved.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process_32.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 7a61b54649de..69a69c3f43bb 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -198,6 +198,9 @@ void cpu_idle(void)
 			rmb();
 			idle = pm_idle;
 
+			if (rcu_pending(cpu))
+				rcu_check_callbacks(cpu, 0);
+
 			if (!idle)
 				idle = default_idle;
 

From fb8830e72d9bd86f1e7b6886cb1886c391130f86 Mon Sep 17 00:00:00 2001
From: Abhishek Sagar <sagar.abhishek@gmail.com>
Date: Wed, 30 Jan 2008 13:33:13 +0100
Subject: [PATCH 608/890] x86: fix singlestep handling in reenter_kprobe

Highlight peculiar cases in singles-step kprobe handling.

In reenter_kprobe(), a breakpoint in KPROBE_HIT_SS case can only occur
when single-stepping a breakpoint on which a probe was installed. Since
such probes are single-stepped inline, identifying these cases is
unambiguous. All other cases leading up to KPROBE_HIT_SS are possible
bugs. Identify and WARN_ON such cases.

Signed-off-by: Abhishek Sagar <sagar.abhishek@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/kprobes.c | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index edc0a8e59247..f7ddbb8c3fe8 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -443,17 +443,6 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 	*sara = (unsigned long) &kretprobe_trampoline;
 }
 
-static void __kprobes recursive_singlestep(struct kprobe *p,
-					   struct pt_regs *regs,
-					   struct kprobe_ctlblk *kcb)
-{
-	save_previous_kprobe(kcb);
-	set_current_kprobe(p, regs, kcb);
-	kprobes_inc_nmissed_count(p);
-	prepare_singlestep(p, regs);
-	kcb->kprobe_status = KPROBE_REENTER;
-}
-
 static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
 				       struct kprobe_ctlblk *kcb)
 {
@@ -492,20 +481,29 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
 		break;
 #endif
 	case KPROBE_HIT_ACTIVE:
-		recursive_singlestep(p, regs, kcb);
+		save_previous_kprobe(kcb);
+		set_current_kprobe(p, regs, kcb);
+		kprobes_inc_nmissed_count(p);
+		prepare_singlestep(p, regs);
+		kcb->kprobe_status = KPROBE_REENTER;
 		break;
 	case KPROBE_HIT_SS:
-		if (*p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
+		if (p == kprobe_running()) {
 			regs->flags &= ~TF_MASK;
 			regs->flags |= kcb->kprobe_saved_flags;
 			return 0;
 		} else {
-			recursive_singlestep(p, regs, kcb);
+			/* A probe has been hit in the codepath leading up
+			 * to, or just after, single-stepping of a probed
+			 * instruction. This entire codepath should strictly
+			 * reside in .kprobes.text section. Raise a warning
+			 * to highlight this peculiar case.
+			 */
 		}
-		break;
 	default:
 		/* impossible cases */
 		WARN_ON(1);
+		return 0;
 	}
 
 	return 1;

From 466eed22d127a1f16e1251cdc54a9f8f944140c0 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Wed, 30 Jan 2008 13:33:14 +0100
Subject: [PATCH 609/890] x86: isolate PIC/PIT in/out calls

Rather than remove and/or mangle inb_p/outb_p we want to remove the use
of them from inappropriate places. For the PIC/PIT this may eventually
depend on 32/64bitism or similar so start by adding inb/outb_pit and
inb/outb_pic so that we can make them use any scheme we settle on without
disturbing the existing, correct (for ISA), port 0x80 usage. (eg we can
make inb_pit use udelay without messing up inb_p).

Floppy already does this for the fdc. That really only leaves the CMOS as
a core logic item to tackle, and bits of parallel port handling in the
chipset layers.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apm_32.c      |  6 +++---
 arch/x86/kernel/i8253.c       | 30 +++++++++++++++---------------
 arch/x86/kernel/i8259_32.c    | 20 ++++++++++----------
 arch/x86/kernel/i8259_64.c    | 20 ++++++++++----------
 arch/x86/kernel/vmiclock_32.c |  2 +-
 include/asm-x86/i8253.h       |  3 +++
 include/asm-x86/i8259.h       |  3 +++
 7 files changed, 45 insertions(+), 39 deletions(-)

diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 955dd43b1676..d4438ef296d8 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1174,11 +1174,11 @@ static void reinit_timer(void)
 
 	spin_lock_irqsave(&i8253_lock, flags);
 	/* set the clock to HZ */
-	outb_p(0x34, PIT_MODE);		/* binary, mode 2, LSB/MSB, ch 0 */
+	outb_pit(0x34, PIT_MODE);		/* binary, mode 2, LSB/MSB, ch 0 */
 	udelay(10);
-	outb_p(LATCH & 0xff, PIT_CH0);	/* LSB */
+	outb_pit(LATCH & 0xff, PIT_CH0);	/* LSB */
 	udelay(10);
-	outb(LATCH >> 8, PIT_CH0);	/* MSB */
+	outb_pit(LATCH >> 8, PIT_CH0);	/* MSB */
 	udelay(10);
 	spin_unlock_irqrestore(&i8253_lock, flags);
 #endif
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index c76fef1ce355..ef62b07b2b48 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -43,26 +43,26 @@ static void init_pit_timer(enum clock_event_mode mode,
 	switch(mode) {
 	case CLOCK_EVT_MODE_PERIODIC:
 		/* binary, mode 2, LSB/MSB, ch 0 */
-		outb_p(0x34, PIT_MODE);
-		outb_p(LATCH & 0xff , PIT_CH0);	/* LSB */
-		outb(LATCH >> 8 , PIT_CH0);	/* MSB */
+		outb_pit(0x34, PIT_MODE);
+		outb_pit(LATCH & 0xff , PIT_CH0);	/* LSB */
+		outb_pit(LATCH >> 8 , PIT_CH0);		/* MSB */
 		break;
 
 	case CLOCK_EVT_MODE_SHUTDOWN:
 	case CLOCK_EVT_MODE_UNUSED:
 		if (evt->mode == CLOCK_EVT_MODE_PERIODIC ||
 		    evt->mode == CLOCK_EVT_MODE_ONESHOT) {
-			outb_p(0x30, PIT_MODE);
-			outb_p(0, PIT_CH0);
-			outb_p(0, PIT_CH0);
+			outb_pit(0x30, PIT_MODE);
+			outb_pit(0, PIT_CH0);
+			outb_pit(0, PIT_CH0);
 		}
 		pit_disable_clocksource();
 		break;
 
 	case CLOCK_EVT_MODE_ONESHOT:
 		/* One shot setup */
-		outb_p(0x38, PIT_MODE);
 		pit_disable_clocksource();
+		outb_pit(0x38, PIT_MODE);
 		break;
 
 	case CLOCK_EVT_MODE_RESUME:
@@ -80,8 +80,8 @@ static void init_pit_timer(enum clock_event_mode mode,
 static int pit_next_event(unsigned long delta, struct clock_event_device *evt)
 {
 	spin_lock(&i8253_lock);
-	outb_p(delta & 0xff , PIT_CH0);	/* LSB */
-	outb(delta >> 8 , PIT_CH0);	/* MSB */
+	outb_pit(delta & 0xff , PIT_CH0);	/* LSB */
+	outb_pit(delta >> 8 , PIT_CH0);		/* MSB */
 	spin_unlock(&i8253_lock);
 
 	return 0;
@@ -153,15 +153,15 @@ static cycle_t pit_read(void)
 	 * count), it cannot be newer.
 	 */
 	jifs = jiffies;
-	outb_p(0x00, PIT_MODE);	/* latch the count ASAP */
-	count = inb_p(PIT_CH0);	/* read the latched count */
-	count |= inb_p(PIT_CH0) << 8;
+	outb_pit(0x00, PIT_MODE);	/* latch the count ASAP */
+	count = inb_pit(PIT_CH0);	/* read the latched count */
+	count |= inb_pit(PIT_CH0) << 8;
 
 	/* VIA686a test code... reset the latch if count > max + 1 */
 	if (count > LATCH) {
-		outb_p(0x34, PIT_MODE);
-		outb_p(LATCH & 0xff, PIT_CH0);
-		outb(LATCH >> 8, PIT_CH0);
+		outb_pit(0x34, PIT_MODE);
+		outb_pit(LATCH & 0xff, PIT_CH0);
+		outb_pit(LATCH >> 8, PIT_CH0);
 		count = LATCH - 1;
 	}
 
diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c
index f201e7da1bbc..2d25b77102fe 100644
--- a/arch/x86/kernel/i8259_32.c
+++ b/arch/x86/kernel/i8259_32.c
@@ -289,20 +289,20 @@ void init_8259A(int auto_eoi)
 	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-2 */
 
 	/*
-	 * outb_p - this has to work on a wide range of PC hardware.
+	 * outb_pic - this has to work on a wide range of PC hardware.
 	 */
-	outb_p(0x11, PIC_MASTER_CMD);	/* ICW1: select 8259A-1 init */
-	outb_p(0x20 + 0, PIC_MASTER_IMR);	/* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */
-	outb_p(1U << PIC_CASCADE_IR, PIC_MASTER_IMR);	/* 8259A-1 (the master) has a slave on IR2 */
+	outb_pic(0x11, PIC_MASTER_CMD);	/* ICW1: select 8259A-1 init */
+	outb_pic(0x20 + 0, PIC_MASTER_IMR);	/* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */
+	outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR);	/* 8259A-1 (the master) has a slave on IR2 */
 	if (auto_eoi)	/* master does Auto EOI */
-		outb_p(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
+		outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
 	else		/* master expects normal EOI */
-		outb_p(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
+		outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
 
-	outb_p(0x11, PIC_SLAVE_CMD);	/* ICW1: select 8259A-2 init */
-	outb_p(0x20 + 8, PIC_SLAVE_IMR);	/* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */
-	outb_p(PIC_CASCADE_IR, PIC_SLAVE_IMR);	/* 8259A-2 is a slave on master's IR2 */
-	outb_p(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */
+	outb_pic(0x11, PIC_SLAVE_CMD);	/* ICW1: select 8259A-2 init */
+	outb_pic(0x20 + 8, PIC_SLAVE_IMR);	/* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */
+	outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);	/* 8259A-2 is a slave on master's IR2 */
+	outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */
 	if (auto_eoi)
 		/*
 		 * In AEOI mode we just have to mask the interrupt
diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c
index 99c8406ae253..d3edb9f23f2c 100644
--- a/arch/x86/kernel/i8259_64.c
+++ b/arch/x86/kernel/i8259_64.c
@@ -359,25 +359,25 @@ void init_8259A(int auto_eoi)
 	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-2 */
 
 	/*
-	 * outb_p - this has to work on a wide range of PC hardware.
+	 * outb_pic - this has to work on a wide range of PC hardware.
 	 */
-	outb_p(0x11, PIC_MASTER_CMD);	/* ICW1: select 8259A-1 init */
+	outb_pic(0x11, PIC_MASTER_CMD);	/* ICW1: select 8259A-1 init */
 	/* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
-	outb_p(IRQ0_VECTOR, PIC_MASTER_IMR);
+	outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
 	/* 8259A-1 (the master) has a slave on IR2 */
-	outb_p(0x04, PIC_MASTER_IMR);
+	outb_pic(0x04, PIC_MASTER_IMR);
 	if (auto_eoi)	/* master does Auto EOI */
-		outb_p(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
+		outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
 	else		/* master expects normal EOI */
-		outb_p(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
+		outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
 
-	outb_p(0x11, PIC_SLAVE_CMD);	/* ICW1: select 8259A-2 init */
+	outb_pic(0x11, PIC_SLAVE_CMD);	/* ICW1: select 8259A-2 init */
 	/* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
-	outb_p(IRQ8_VECTOR, PIC_SLAVE_IMR);
+	outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR);
 	/* 8259A-2 is a slave on master's IR2 */
-	outb_p(PIC_CASCADE_IR, PIC_SLAVE_IMR);
+	outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);
 	/* (slave's support for AEOI in flat mode is to be investigated) */
-	outb_p(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR);
+	outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR);
 
 	if (auto_eoi)
 		/*
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 57f9ef5a324c..a2b030780aa9 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -237,7 +237,7 @@ static void __devinit vmi_time_init_clockevent(void)
 void __init vmi_time_init(void)
 {
 	/* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */
-	outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
+	outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
 
 	vmi_time_init_clockevent();
 	setup_irq(0, &vmi_clock_action);
diff --git a/include/asm-x86/i8253.h b/include/asm-x86/i8253.h
index 747548ec5d1d..b51c0487fc41 100644
--- a/include/asm-x86/i8253.h
+++ b/include/asm-x86/i8253.h
@@ -12,4 +12,7 @@ extern struct clock_event_device *global_clock_event;
 
 extern void setup_pit_timer(void);
 
+#define inb_pit		inb_p
+#define outb_pit	outb_p
+
 #endif	/* __ASM_I8253_H__ */
diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h
index cabcc6cf3923..67c319e0efc7 100644
--- a/include/asm-x86/i8259.h
+++ b/include/asm-x86/i8259.h
@@ -29,4 +29,7 @@ extern void enable_8259A_irq(unsigned int irq);
 extern void disable_8259A_irq(unsigned int irq);
 extern unsigned int startup_8259A_irq(unsigned int irq);
 
+#define inb_pic		inb_p
+#define outb_pic	outb_p
+
 #endif	/* __ASM_I8259_H__ */

From c11b68bc8b1f7c7daaea2220e0a8d9f7d391cb8d Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Wed, 30 Jan 2008 13:33:14 +0100
Subject: [PATCH 610/890] x86: serverworks: IRQ routing needs no _p

I can find no reason for the _p on the serverworks IRQ routing logic, and
a review of the documentation contains no indication that any such delay
is needed so lets try this

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/pci/irq.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index ee524ca5f8c0..ed07ce6c171b 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -461,14 +461,14 @@ static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq,
  */
 static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
 {
-	outb_p(pirq, 0xc00);
+	outb(pirq, 0xc00);
 	return inb(0xc01) & 0xf;
 }
 
 static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
 {
-	outb_p(pirq, 0xc00);
-	outb_p(irq, 0xc01);
+	outb(pirq, 0xc00);
+	outb(irq, 0xc01);
 	return 1;
 }
 

From c0400030b28e95fd477fdfe8ba8acb0af21e5c15 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Wed, 30 Jan 2008 13:33:14 +0100
Subject: [PATCH 611/890] x86-64: clean up linker script

Remove the dead .text.lock. Move _etext and __{start,stop}___ex_table
into their sections.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/vmlinux_64.lds.S | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index ea5386944e67..7457370d7916 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -37,16 +37,15 @@ SECTIONS
 	KPROBES_TEXT
 	*(.fixup)
 	*(.gnu.warning)
-	} :text = 0x9090
-  				/* out-of-line lock text */
-  .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) { *(.text.lock) }
-
-  _etext = .;			/* End of text section */
+	_etext = .;			/* End of text section */
+  } :text = 0x9090
 
   . = ALIGN(16);		/* Exception table */
-  __start___ex_table = .;
-  __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
-  __stop___ex_table = .;
+  __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
+  	__start___ex_table = .;
+	 *(__ex_table)
+  	__stop___ex_table = .;
+  }
 
   NOTES :text :note
 

From 4dbf7af6442a9a882855bed0d999659ac413e3ac Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Wed, 30 Jan 2008 13:33:14 +0100
Subject: [PATCH 612/890] x86: adjust/fix LDT handling for Xen

Based on patch from Jan Beulich <jbeulich@novell.com>.

Don't rely on kmalloc(PAGE_SIZE) returning PAGE_SIZE aligned memory
(Xen requires GDT *and* LDT to be page-aligned). Using the page
allocator interface also removes the (albeit small) slab allocator
overhead. The same change being done for 64-bits for consistency.

Further, the Xen hypercall interface expects the LDT address to be
virtual, not machine.

[ Adjusted to unified ldt.c - Jeremy ]

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Acked-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/ldt.c    | 7 +++----
 arch/x86/xen/enlighten.c | 9 +--------
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index b8ef46270e24..8a7660c8394a 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -12,7 +12,6 @@
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/vmalloc.h>
-#include <linux/slab.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -40,7 +39,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
 	if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
 		newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
 	else
-		newldt = kmalloc(mincount * LDT_ENTRY_SIZE, GFP_KERNEL);
+		newldt = (void *)__get_free_page(GFP_KERNEL);
 
 	if (!newldt)
 		return -ENOMEM;
@@ -78,7 +77,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
 		if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
 			vfree(oldldt);
 		else
-			kfree(oldldt);
+			put_page(virt_to_page(oldldt));
 	}
 	return 0;
 }
@@ -129,7 +128,7 @@ void destroy_context(struct mm_struct *mm)
 		if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
 			vfree(mm->context.ldt);
 		else
-			kfree(mm->context.ldt);
+			put_page(virt_to_page(mm->context.ldt));
 		mm->context.size = 0;
 	}
 }
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 72dd14d0685c..845b4fd94463 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -275,19 +275,12 @@ static unsigned long xen_store_tr(void)
 
 static void xen_set_ldt(const void *addr, unsigned entries)
 {
-	unsigned long linear_addr = (unsigned long)addr;
 	struct mmuext_op *op;
 	struct multicall_space mcs = xen_mc_entry(sizeof(*op));
 
 	op = mcs.args;
 	op->cmd = MMUEXT_SET_LDT;
-	if (linear_addr) {
-		/* ldt my be vmalloced, use arbitrary_virt_to_machine */
-		xmaddr_t maddr;
-		maddr = arbitrary_virt_to_machine((unsigned long)addr);
-		linear_addr = (unsigned long)maddr.maddr;
-	}
-	op->arg1.linear_addr = linear_addr;
+	op->arg1.linear_addr = (unsigned long)addr;
 	op->arg2.nr_ents = entries;
 
 	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);

From 693e3c560317577a29c625d89f6745d5c7cfd918 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:33:14 +0100
Subject: [PATCH 613/890] x86: reduce memory and intra-node effects

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic_64.c    | 5 +++--
 arch/x86/kernel/mpparse_64.c | 6 +++---
 arch/x86/mm/numa_64.c        | 2 +-
 arch/x86/mm/srat_64.c        | 8 ++++++--
 include/asm-x86/topology.h   | 2 +-
 5 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index f9919c492699..85bd3d463cdf 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -1175,14 +1175,15 @@ __cpuinit int apic_is_clustered_box(void)
 {
 	int i, clusters, zeros;
 	unsigned id;
+	u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
 	DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
 
 	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
 
 	for (i = 0; i < NR_CPUS; i++) {
 		/* are we being called early in kernel startup? */
-		if (x86_bios_cpu_apicid_early_ptr) {
-			id = ((u16 *)x86_bios_cpu_apicid_early_ptr)[i];
+		if (bios_cpu_apicid) {
+			id = bios_cpu_apicid[i];
 		}
 		else if (i < nr_cpu_ids) {
 			if (cpu_present(i))
diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c
index fd671754dcb5..d3260f8f17dc 100644
--- a/arch/x86/kernel/mpparse_64.c
+++ b/arch/x86/kernel/mpparse_64.c
@@ -122,7 +122,7 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
 	physid_set(m->mpc_apicid, phys_cpu_present_map);
  	if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
  		/*
- 		 * x86_bios_cpu_apicid is required to have processors listed
+		 * x86_bios_cpu_apicid is required to have processors listed
  		 * in same order as logical cpu numbers. Hence the first
  		 * entry is BSP, and so on.
  		 */
@@ -130,8 +130,8 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
  	}
 	/* are we being called early in kernel startup? */
 	if (x86_cpu_to_apicid_early_ptr) {
-		u16 *cpu_to_apicid = (u16 *)x86_cpu_to_apicid_early_ptr;
-		u16 *bios_cpu_apicid = (u16 *)x86_bios_cpu_apicid_early_ptr;
+		u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
+		u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
 
 		cpu_to_apicid[cpu] = m->mpc_apicid;
 		bios_cpu_apicid[cpu] = m->mpc_apicid;
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 382377d6421d..dca58fb39b08 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -562,7 +562,7 @@ __cpuinit void numa_add_cpu(int cpu)
 
 void __cpuinit numa_set_node(int cpu, int node)
 {
-	u16 *cpu_to_node_map = (u16 *)x86_cpu_to_node_map_early_ptr;
+	u16 *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
 
 	cpu_pda(cpu)->nodenumber = node;
 
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index bccbdc7be434..e5a1ec8342dc 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -346,8 +346,12 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
 	/* First clean up the node list */
 	for (i = 0; i < MAX_NUMNODES; i++) {
 		cutoff_node(i, start, end);
-		/* ZZZ why was this needed. At least add a comment */
-		if (nodes[i].end && (nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
+		/*
+		 * don't confuse VM with a node that doesn't have the
+		 * minimum memory.
+		 */
+		if (nodes[i].end &&
+			(nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
 			unparse_node(i);
 			node_set_offline(i);
 		}
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index e612ed8d2bc7..a89b46eb2526 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -40,7 +40,7 @@ extern cpumask_t node_to_cpumask_map[];
 /* Returns the number of the node containing CPU 'cpu' */
 static inline int cpu_to_node(int cpu)
 {
-	u16 *cpu_to_node_map = (u16 *)x86_cpu_to_node_map_early_ptr;
+	u16 *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
 
 	if (cpu_to_node_map)
 		return cpu_to_node_map[cpu];

From 118c890961a2ca456947d72ebe0ce43de07907f0 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 30 Jan 2008 13:33:14 +0100
Subject: [PATCH 614/890] arch/x86/mm/numa_64.c: section fix

WARNING: vmlinux.o(__ksymtab+0x670): Section mismatch: reference to .init.data:x86_cpu_to_node_map_init (between '__ksymtab_x86_cpu_to_node_map_init' and '__ksymtab_node_data')

Cc: Matthew Dobson <colpatch@us.ibm.com>
Cc: Mike Travis <travis@sgi.com>
Cc: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/numa_64.c      | 2 +-
 include/asm-x86/topology.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index dca58fb39b08..7b03d2090c28 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -31,7 +31,7 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES];
 
 struct memnode memnode;
 
-u16 x86_cpu_to_node_map_init[NR_CPUS] __initdata = {
+u16 x86_cpu_to_node_map_init[NR_CPUS] = {
 	[0 ... NR_CPUS-1] = NUMA_NO_NODE
 };
 void *x86_cpu_to_node_map_early_ptr;
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index a89b46eb2526..2da1464ecbef 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -31,7 +31,7 @@
 
 /* Mappings between logical cpu number and node number */
 DECLARE_PER_CPU(u16, x86_cpu_to_node_map);
-extern u16 __initdata x86_cpu_to_node_map_init[];
+extern u16 x86_cpu_to_node_map_init[];
 extern void *x86_cpu_to_node_map_early_ptr;
 extern cpumask_t node_to_cpumask_map[];
 

From 2f98b2faac1b5fec327edbde945cdf7f7a53f351 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:33:14 +0100
Subject: [PATCH 615/890] add native_pud_val and _pmd_val for 2 and 3

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/page.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index a6495eb5c605..eba88d940092 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -91,6 +91,11 @@ static inline pudval_t native_pud_val(pud_t pud)
 }
 #else	/* PAGETABLE_LEVELS == 3 */
 #include <asm-generic/pgtable-nopud.h>
+
+static inline pudval_t native_pud_val(pud_t pud)
+{
+	return native_pgd_val(pud.pgd);
+}
 #endif	/* PAGETABLE_LEVELS == 4 */
 
 typedef struct { pmdval_t pmd; } pmd_t;
@@ -106,6 +111,11 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
 }
 #else  /* PAGETABLE_LEVELS == 2 */
 #include <asm-generic/pgtable-nopmd.h>
+
+static inline pmdval_t native_pmd_val(pmd_t pmd)
+{
+	return native_pgd_val(pmd.pud.pgd);
+}
 #endif	/* PAGETABLE_LEVELS >= 3 */
 
 static inline pte_t native_make_pte(pteval_t val)

From 5b8dd1e95b5f2b7b80cfc7e6bb709603aef8bbc1 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:33:15 +0100
Subject: [PATCH 616/890] x86/paravirt: rearrange common mmu_ops

Rearrange the various pagetable mmu_ops to remove duplication.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/paravirt.h | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 4aa06b929e48..c38cf1ac4e48 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -230,28 +230,32 @@ struct pv_mmu_ops {
 	void (*pte_update_defer)(struct mm_struct *mm,
 				 unsigned long addr, pte_t *ptep);
 
+	pteval_t (*pte_val)(pte_t);
+	pte_t (*make_pte)(pteval_t pte);
+
+	pgdval_t (*pgd_val)(pgd_t);
+	pgd_t (*make_pgd)(pgdval_t pgd);
+
+#if PAGETABLE_LEVELS >= 3
 #ifdef CONFIG_X86_PAE
 	void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
 	void (*set_pte_present)(struct mm_struct *mm, unsigned long addr,
 				pte_t *ptep, pte_t pte);
-	void (*set_pud)(pud_t *pudp, pud_t pudval);
 	void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
 	void (*pmd_clear)(pmd_t *pmdp);
 
-	unsigned long long (*pte_val)(pte_t);
-	unsigned long long (*pmd_val)(pmd_t);
-	unsigned long long (*pgd_val)(pgd_t);
+#endif	/* CONFIG_X86_PAE */
 
-	pte_t (*make_pte)(unsigned long long pte);
-	pmd_t (*make_pmd)(unsigned long long pmd);
-	pgd_t (*make_pgd)(unsigned long long pgd);
-#else
-	unsigned long (*pte_val)(pte_t);
-	unsigned long (*pgd_val)(pgd_t);
+	void (*set_pud)(pud_t *pudp, pud_t pudval);
 
-	pte_t (*make_pte)(unsigned long pte);
-	pgd_t (*make_pgd)(unsigned long pgd);
-#endif
+	pmdval_t (*pmd_val)(pmd_t);
+	pmd_t (*make_pmd)(pmdval_t pmd);
+
+#if PAGETABLE_LEVELS == 4
+	pudval_t (*pud_val)(pud_t);
+	pud_t (*make_pud)(pudval_t pud);
+#endif	/* PAGETABLE_LEVELS == 4 */
+#endif	/* PAGETABLE_LEVELS >= 3 */
 
 #ifdef CONFIG_HIGHPTE
 	void *(*kmap_atomic_pte)(struct page *page, enum km_type type);

From 773221f46f82dc2f277dacc331d9d2ef2c690cb6 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:33:15 +0100
Subject: [PATCH 617/890] x86/paravirt: common implementation for pte value ops

Remove duplicate __pte/pte_val functions.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/paravirt.h | 48 +++++++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 21 deletions(-)

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index c38cf1ac4e48..b7342efab834 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -920,15 +920,37 @@ static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr,
 	PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep);
 }
 
-#ifdef CONFIG_X86_PAE
-static inline pte_t __pte(unsigned long long val)
+static inline pte_t __pte(pteval_t val)
 {
-	unsigned long long ret = PVOP_CALL2(unsigned long long,
-					    pv_mmu_ops.make_pte,
-					    val, val >> 32);
+	pteval_t ret;
+
+	if (sizeof(pteval_t) > sizeof(long))
+		ret = PVOP_CALL2(pteval_t,
+				 pv_mmu_ops.make_pte,
+				 val, (u64)val >> 32);
+	else
+		ret = PVOP_CALL1(pteval_t,
+				 pv_mmu_ops.make_pte,
+				 val);
+
 	return (pte_t) { .pte = ret };
 }
 
+static inline pteval_t pte_val(pte_t pte)
+{
+	pteval_t ret;
+
+	if (sizeof(pteval_t) > sizeof(long))
+		ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val,
+				 pte.pte, (u64)pte.pte >> 32);
+	else
+		ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val,
+				 pte.pte);
+
+	return ret;
+}
+
+#ifdef CONFIG_X86_PAE
 static inline pmd_t __pmd(unsigned long long val)
 {
 	return (pmd_t) { PVOP_CALL2(unsigned long long, pv_mmu_ops.make_pmd,
@@ -941,12 +963,6 @@ static inline pgd_t __pgd(unsigned long long val)
 				    val, val >> 32) };
 }
 
-static inline unsigned long long pte_val(pte_t x)
-{
-	return PVOP_CALL2(unsigned long long, pv_mmu_ops.pte_val,
-			  x.pte_low, x.pte_high);
-}
-
 static inline unsigned long long pmd_val(pmd_t x)
 {
 	return PVOP_CALL2(unsigned long long, pv_mmu_ops.pmd_val,
@@ -1008,21 +1024,11 @@ static inline void pmd_clear(pmd_t *pmdp)
 
 #else  /* !CONFIG_X86_PAE */
 
-static inline pte_t __pte(unsigned long val)
-{
-	return (pte_t) { PVOP_CALL1(unsigned long, pv_mmu_ops.make_pte, val) };
-}
-
 static inline pgd_t __pgd(unsigned long val)
 {
 	return (pgd_t) { PVOP_CALL1(unsigned long, pv_mmu_ops.make_pgd, val) };
 }
 
-static inline unsigned long pte_val(pte_t x)
-{
-	return PVOP_CALL1(unsigned long, pv_mmu_ops.pte_val, x.pte_low);
-}
-
 static inline unsigned long pgd_val(pgd_t x)
 {
 	return PVOP_CALL1(unsigned long, pv_mmu_ops.pgd_val, x.pgd);

From ef38503e034f82ad16a75a9dfea40ed7adaf00a8 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:33:15 +0100
Subject: [PATCH 618/890] x86/paravirt: common implementation for pgd value ops

Remove duplicate __pgd/pgd_val functions.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/paravirt.h | 50 +++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index b7342efab834..eac25b5323a2 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -950,6 +950,34 @@ static inline pteval_t pte_val(pte_t pte)
 	return ret;
 }
 
+static inline pgd_t __pgd(pgdval_t val)
+{
+	pgdval_t ret;
+
+	if (sizeof(pgdval_t) > sizeof(long))
+		ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd,
+				 val, (u64)val >> 32);
+	else
+		ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd,
+				 val);
+
+	return (pgd_t) { ret };
+}
+
+static inline pgdval_t pgd_val(pgd_t pgd)
+{
+	pgdval_t ret;
+
+	if (sizeof(pgdval_t) > sizeof(long))
+		ret =  PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val,
+				  pgd.pgd, (u64)pgd.pgd >> 32);
+	else
+		ret =  PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val,
+				  pgd.pgd);
+
+	return ret;
+}
+
 #ifdef CONFIG_X86_PAE
 static inline pmd_t __pmd(unsigned long long val)
 {
@@ -957,24 +985,12 @@ static inline pmd_t __pmd(unsigned long long val)
 				    val, val >> 32) };
 }
 
-static inline pgd_t __pgd(unsigned long long val)
-{
-	return (pgd_t) { PVOP_CALL2(unsigned long long, pv_mmu_ops.make_pgd,
-				    val, val >> 32) };
-}
-
 static inline unsigned long long pmd_val(pmd_t x)
 {
 	return PVOP_CALL2(unsigned long long, pv_mmu_ops.pmd_val,
 			  x.pmd, x.pmd >> 32);
 }
 
-static inline unsigned long long pgd_val(pgd_t x)
-{
-	return PVOP_CALL2(unsigned long long, pv_mmu_ops.pgd_val,
-			  x.pgd, x.pgd >> 32);
-}
-
 static inline void set_pte(pte_t *ptep, pte_t pteval)
 {
 	PVOP_VCALL3(pv_mmu_ops.set_pte, ptep, pteval.pte_low, pteval.pte_high);
@@ -1024,16 +1040,6 @@ static inline void pmd_clear(pmd_t *pmdp)
 
 #else  /* !CONFIG_X86_PAE */
 
-static inline pgd_t __pgd(unsigned long val)
-{
-	return (pgd_t) { PVOP_CALL1(unsigned long, pv_mmu_ops.make_pgd, val) };
-}
-
-static inline unsigned long pgd_val(pgd_t x)
-{
-	return PVOP_CALL1(unsigned long, pv_mmu_ops.pgd_val, x.pgd);
-}
-
 static inline void set_pte(pte_t *ptep, pte_t pteval)
 {
 	PVOP_VCALL2(pv_mmu_ops.set_pte, ptep, pteval.pte_low);

From a632da2fc9c1e4e8021c5665fc34201fea485dcf Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:33:15 +0100
Subject: [PATCH 619/890] x86/paravirt: common implementation for pmd value ops

Remove duplicate __pmd/pmd_val functions.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/paravirt.h | 33 ++++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index eac25b5323a2..3593211e345b 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -978,18 +978,37 @@ static inline pgdval_t pgd_val(pgd_t pgd)
 	return ret;
 }
 
-#ifdef CONFIG_X86_PAE
-static inline pmd_t __pmd(unsigned long long val)
+#if PAGETABLE_LEVELS >= 3
+static inline pmd_t __pmd(pmdval_t val)
 {
-	return (pmd_t) { PVOP_CALL2(unsigned long long, pv_mmu_ops.make_pmd,
-				    val, val >> 32) };
+	pmdval_t ret;
+
+	if (sizeof(pmdval_t) > sizeof(long))
+		ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd,
+				 val, (u64)val >> 32);
+	else
+		ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd,
+				 val);
+
+	return (pmd_t) { ret };
 }
 
-static inline unsigned long long pmd_val(pmd_t x)
+static inline pmdval_t pmd_val(pmd_t pmd)
 {
-	return PVOP_CALL2(unsigned long long, pv_mmu_ops.pmd_val,
-			  x.pmd, x.pmd >> 32);
+	pmdval_t ret;
+
+	if (sizeof(pmdval_t) > sizeof(long))
+		ret =  PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val,
+				  pmd.pmd, (u64)pmd.pmd >> 32);
+	else
+		ret =  PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val,
+				  pmd.pmd);
+
+	return ret;
 }
+#endif	/* PAGETABLE_LEVELS >= 3 */
+
+#ifdef CONFIG_X86_PAE
 
 static inline void set_pte(pte_t *ptep, pte_t pteval)
 {

From 4eed80cd7a1668df6ddd884f9474beccc81ef3f7 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:33:15 +0100
Subject: [PATCH 620/890] x86/paravirt: make set_pte operations common

Remove duplicate set_pte* operations.  PAE still needs to have special
variants of some of these because it can't atomically update a 64-bit
pte, so there's still some duplication.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/paravirt.h | 116 +++++++++++++++++++------------------
 1 file changed, 60 insertions(+), 56 deletions(-)

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 3593211e345b..ba1b94334144 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -978,6 +978,66 @@ static inline pgdval_t pgd_val(pgd_t pgd)
 	return ret;
 }
 
+static inline void set_pte(pte_t *ptep, pte_t pte)
+{
+	if (sizeof(pteval_t) > sizeof(long))
+		PVOP_VCALL3(pv_mmu_ops.set_pte, ptep,
+			    pte.pte, (u64)pte.pte >> 32);
+	else
+		PVOP_VCALL2(pv_mmu_ops.set_pte, ptep,
+			    pte.pte);
+}
+
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep, pte_t pte)
+{
+	if (sizeof(pteval_t) > sizeof(long))
+		/* 5 arg words */
+		pv_mmu_ops.set_pte_at(mm, addr, ptep, pte);
+	else
+		PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
+}
+
+#ifdef CONFIG_X86_PAE
+/* Special-case pte-setting operations for PAE, which can't update a
+   64-bit pte atomically */
+static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
+{
+	PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep,
+		    pte.pte, pte.pte >> 32);
+}
+
+static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep, pte_t pte)
+{
+	/* 5 arg words */
+	pv_mmu_ops.set_pte_present(mm, addr, ptep, pte);
+}
+
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
+			     pte_t *ptep)
+{
+	PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep);
+}
+#else  /* !CONFIG_X86_PAE */
+static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
+{
+	set_pte(ptep, pte);
+}
+
+static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep, pte_t pte)
+{
+	set_pte(ptep, pte);
+}
+
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
+			     pte_t *ptep)
+{
+	set_pte_at(mm, addr, ptep, __pte(0));
+}
+#endif	/* CONFIG_X86_PAE */
+
 #if PAGETABLE_LEVELS >= 3
 static inline pmd_t __pmd(pmdval_t val)
 {
@@ -1010,31 +1070,6 @@ static inline pmdval_t pmd_val(pmd_t pmd)
 
 #ifdef CONFIG_X86_PAE
 
-static inline void set_pte(pte_t *ptep, pte_t pteval)
-{
-	PVOP_VCALL3(pv_mmu_ops.set_pte, ptep, pteval.pte_low, pteval.pte_high);
-}
-
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
-			      pte_t *ptep, pte_t pteval)
-{
-	/* 5 arg words */
-	pv_mmu_ops.set_pte_at(mm, addr, ptep, pteval);
-}
-
-static inline void set_pte_atomic(pte_t *ptep, pte_t pteval)
-{
-	PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep,
-		    pteval.pte_low, pteval.pte_high);
-}
-
-static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
-				   pte_t *ptep, pte_t pte)
-{
-	/* 5 arg words */
-	pv_mmu_ops.set_pte_present(mm, addr, ptep, pte);
-}
-
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
 	PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp,
@@ -1047,11 +1082,6 @@ static inline void set_pud(pud_t *pudp, pud_t pudval)
 		    pudval.pgd.pgd, pudval.pgd.pgd >> 32);
 }
 
-static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
-	PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep);
-}
-
 static inline void pmd_clear(pmd_t *pmdp)
 {
 	PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp);
@@ -1059,17 +1089,6 @@ static inline void pmd_clear(pmd_t *pmdp)
 
 #else  /* !CONFIG_X86_PAE */
 
-static inline void set_pte(pte_t *ptep, pte_t pteval)
-{
-	PVOP_VCALL2(pv_mmu_ops.set_pte, ptep, pteval.pte_low);
-}
-
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
-			      pte_t *ptep, pte_t pteval)
-{
-	PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pteval.pte_low);
-}
-
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
 	PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, pmdval.pud.pgd.pgd);
@@ -1080,21 +1099,6 @@ static inline void pmd_clear(pmd_t *pmdp)
 	set_pmd(pmdp, __pmd(0));
 }
 
-static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
-	set_pte_at(mm, addr, ptep, __pte(0));
-}
-
-static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
-{
-	set_pte(ptep, pte);
-}
-
-static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
-				   pte_t *ptep, pte_t pte)
-{
-	set_pte(ptep, pte);
-}
 #endif	/* CONFIG_X86_PAE */
 
 /* Lazy mode for batching updates / context switch */

From 60b3f626dee9f0b1b656918e40e211822c18200d Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:33:15 +0100
Subject: [PATCH 621/890] x86/paravirt: make set_pmd operation common

Remove duplicate set_pmd()s.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/paravirt.h | 43 ++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 23 deletions(-)

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index ba1b94334144..d9957ece49b5 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -998,6 +998,16 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 		PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
 }
 
+static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
+{
+	pmdval_t val = native_pmd_val(pmd);
+
+	if (sizeof(pmdval_t) > sizeof(long))
+		PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp, val, (u64)val >> 32);
+	else
+		PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val);
+}
+
 #ifdef CONFIG_X86_PAE
 /* Special-case pte-setting operations for PAE, which can't update a
    64-bit pte atomically */
@@ -1019,6 +1029,11 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
 {
 	PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep);
 }
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp);
+}
 #else  /* !CONFIG_X86_PAE */
 static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
 {
@@ -1036,6 +1051,11 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
 {
 	set_pte_at(mm, addr, ptep, __pte(0));
 }
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	set_pmd(pmdp, __pmd(0));
+}
 #endif	/* CONFIG_X86_PAE */
 
 #if PAGETABLE_LEVELS >= 3
@@ -1070,35 +1090,12 @@ static inline pmdval_t pmd_val(pmd_t pmd)
 
 #ifdef CONFIG_X86_PAE
 
-static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval)
-{
-	PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp,
-		    pmdval.pmd, pmdval.pmd >> 32);
-}
-
 static inline void set_pud(pud_t *pudp, pud_t pudval)
 {
 	PVOP_VCALL3(pv_mmu_ops.set_pud, pudp,
 		    pudval.pgd.pgd, pudval.pgd.pgd >> 32);
 }
 
-static inline void pmd_clear(pmd_t *pmdp)
-{
-	PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp);
-}
-
-#else  /* !CONFIG_X86_PAE */
-
-static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval)
-{
-	PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, pmdval.pud.pgd.pgd);
-}
-
-static inline void pmd_clear(pmd_t *pmdp)
-{
-	set_pmd(pmdp, __pmd(0));
-}
-
 #endif	/* CONFIG_X86_PAE */
 
 /* Lazy mode for batching updates / context switch */

From 28c6075c72a312986f6d7802005251bdb91ca80c Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:33:15 +0100
Subject: [PATCH 622/890] x86/paravirt: make set_pud operation common

Remove duplicate set_pud()s.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/paravirt.h | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index d9957ece49b5..c56b17a5eba0 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -1086,17 +1086,19 @@ static inline pmdval_t pmd_val(pmd_t pmd)
 
 	return ret;
 }
-#endif	/* PAGETABLE_LEVELS >= 3 */
 
-#ifdef CONFIG_X86_PAE
-
-static inline void set_pud(pud_t *pudp, pud_t pudval)
+static inline void set_pud(pud_t *pudp, pud_t pud)
 {
-	PVOP_VCALL3(pv_mmu_ops.set_pud, pudp,
-		    pudval.pgd.pgd, pudval.pgd.pgd >> 32);
-}
+	pudval_t val = native_pud_val(pud);
 
-#endif	/* CONFIG_X86_PAE */
+	if (sizeof(pudval_t) > sizeof(long))
+		PVOP_VCALL3(pv_mmu_ops.set_pud, pudp,
+			    val, (u64)val >> 32);
+	else
+		PVOP_VCALL2(pv_mmu_ops.set_pud, pudp,
+			    val);
+}
+#endif	/* PAGETABLE_LEVELS >= 3 */
 
 /* Lazy mode for batching updates / context switch */
 enum paravirt_lazy_mode {

From 316390b093e9b80f8ab13b3eea82f912e186bbf6 Mon Sep 17 00:00:00 2001
From: "travis@sgi.com" <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:33:15 +0100
Subject: [PATCH 623/890] x86: fixup NR-CPUS patch for numa

This patch removes the EXPORT_SYMBOL for:

	x86_cpu_to_node_map_init
	x86_cpu_to_node_map_early_ptr

... thus fixing the section mismatch problem.

Also, the mem -> node hash lookup is fixed.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/numa_64.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 7b03d2090c28..37d429beba96 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -35,8 +35,6 @@ u16 x86_cpu_to_node_map_init[NR_CPUS] = {
 	[0 ... NR_CPUS-1] = NUMA_NO_NODE
 };
 void *x86_cpu_to_node_map_early_ptr;
-EXPORT_SYMBOL(x86_cpu_to_node_map_init);
-EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
 DEFINE_PER_CPU(u16, x86_cpu_to_node_map) = NUMA_NO_NODE;
 EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map);
 
@@ -88,7 +86,7 @@ static int __init allocate_cachealigned_memnodemap(void)
 	unsigned long pad, pad_addr;
 
 	memnodemap = memnode.embedded_map;
-	if (memnodemapsize <= 48)
+	if (memnodemapsize <= ARRAY_SIZE(memnode.embedded_map))
 		return 0;
 
 	pad = L1_CACHE_BYTES - 1;

From e34cda04d4ec0114185459eb101a2245563bbeba Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@mcmartin.ca>
Date: Wed, 30 Jan 2008 13:33:15 +0100
Subject: [PATCH 624/890] x86: 64-bit, remove redundant cpu_has_ definitions

PSE, PGE, XMM, XMM2, and FXSR are defined as required features, and
will be optimized to a constant at compile time. Remove their redundant
definitions.

Signed-off-by: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/cpufeature.h | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 93c143fb8a51..29727bf0e177 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -194,21 +194,6 @@
 #undef  cpu_has_centaur_mcr
 #define cpu_has_centaur_mcr	0
 
-#undef  cpu_has_pse
-#define cpu_has_pse		1
-
-#undef  cpu_has_pge
-#define cpu_has_pge		1
-
-#undef  cpu_has_xmm
-#define cpu_has_xmm		1
-
-#undef  cpu_has_xmm2
-#define cpu_has_xmm2		1
-
-#undef  cpu_has_fxsr
-#define cpu_has_fxsr		1
-
 #endif /* CONFIG_X86_64 */
 
 #endif /* _ASM_X86_CPUFEATURE_H */

From 770181d99098bc141031fe0a4b1d6221258c1182 Mon Sep 17 00:00:00 2001
From: Dmitri Vorobiev <dmitri.vorobiev@gmail.com>
Date: Wed, 30 Jan 2008 13:33:16 +0100
Subject: [PATCH 625/890] x86_32: remove the useless NR_syscalls macro

This is against current x86.git.

The size of the system call table for 32-bit x86 kernels is obtained by
compile-time calculation of the sys_call_table array, not from the value,
which the NR_syscalls macro expands to. This trivial patch removes the
fossil macro.

Manually tested by grepping the x86 files for the "NR_syscalls" string.
No relevant use cases found.

Build-tested using allyesconfig, allnoconfig and a couple of randconfig
instances. All builds successfully finished.

Runtime test performed using a stripped-down Debian-ish config. The system
booted successfully.

Signed-off-by: Dmitri Vorobiev <dmitri.vorobiev@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/unistd_32.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index 9b15545eb9b5..8d8f9b5adbb9 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -333,8 +333,6 @@
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 325
-
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_OLD_STAT

From 064954761254ef17fae2b84fb5a034d48a769143 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 30 Jan 2008 13:33:16 +0100
Subject: [PATCH 626/890] x86: kdump failure

kdump needs ELF_CORE_COPY_REGS in crash_save_cpu().
This lack of the macro causes the following BUG.

 SysRq : Trigger a crashdump
 ------------[ cut here ]------------
 kernel BUG at include/linux/elfcore.h:105!
 invalid opcode: 0000 [1] PREEMPT SMP

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/elf.h | 62 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/include/asm-x86/elf.h b/include/asm-x86/elf.h
index dab4744f9586..d9c94e707289 100644
--- a/include/asm-x86/elf.h
+++ b/include/asm-x86/elf.h
@@ -106,6 +106,31 @@ extern unsigned int vdso_enabled;
 	_r->ax = 0; \
 } while (0)
 
+/*
+ * regs is struct pt_regs, pr_reg is elf_gregset_t (which is
+ * now struct_user_regs, they are different)
+ */
+
+#define ELF_CORE_COPY_REGS(pr_reg, regs) do {		\
+	pr_reg[0] = regs->bx;				\
+	pr_reg[1] = regs->cx;				\
+	pr_reg[2] = regs->dx;				\
+	pr_reg[3] = regs->si;				\
+	pr_reg[4] = regs->di;				\
+	pr_reg[5] = regs->bp;				\
+	pr_reg[6] = regs->ax;				\
+	pr_reg[7] = regs->ds & 0xffff;			\
+	pr_reg[8] = regs->es & 0xffff;			\
+	pr_reg[9] = regs->fs & 0xffff;			\
+	savesegment(gs, pr_reg[10]);			\
+	pr_reg[11] = regs->orig_ax;			\
+	pr_reg[12] = regs->ip;				\
+	pr_reg[13] = regs->cs & 0xffff;			\
+	pr_reg[14] = regs->flags;			\
+	pr_reg[15] = regs->sp;				\
+	pr_reg[16] = regs->ss & 0xffff;			\
+} while (0);
+
 #define ELF_PLATFORM	(utsname()->machine)
 #define set_personality_64bit()	do { } while (0)
 
@@ -165,6 +190,43 @@ static inline void elf_common_init(struct thread_struct *t,
 	} while (0)
 #define COMPAT_ELF_PLATFORM			("i686")
 
+/*
+ * regs is struct pt_regs, pr_reg is elf_gregset_t (which is
+ * now struct_user_regs, they are different). Assumes current is the process
+ * getting dumped.
+ */
+
+#define ELF_CORE_COPY_REGS(pr_reg, regs)  do {			\
+	unsigned v;						\
+	(pr_reg)[0] = (regs)->r15;				\
+	(pr_reg)[1] = (regs)->r14;				\
+	(pr_reg)[2] = (regs)->r13;				\
+	(pr_reg)[3] = (regs)->r12;				\
+	(pr_reg)[4] = (regs)->bp;				\
+	(pr_reg)[5] = (regs)->bx;				\
+	(pr_reg)[6] = (regs)->r11;				\
+	(pr_reg)[7] = (regs)->r10;				\
+	(pr_reg)[8] = (regs)->r9;				\
+	(pr_reg)[9] = (regs)->r8;				\
+	(pr_reg)[10] = (regs)->ax;				\
+	(pr_reg)[11] = (regs)->cx;				\
+	(pr_reg)[12] = (regs)->dx;				\
+	(pr_reg)[13] = (regs)->si;				\
+	(pr_reg)[14] = (regs)->di;				\
+	(pr_reg)[15] = (regs)->orig_ax;				\
+	(pr_reg)[16] = (regs)->ip;				\
+	(pr_reg)[17] = (regs)->cs;				\
+	(pr_reg)[18] = (regs)->flags;				\
+	(pr_reg)[19] = (regs)->sp;				\
+	(pr_reg)[20] = (regs)->ss;				\
+	(pr_reg)[21] = current->thread.fs;			\
+	(pr_reg)[22] = current->thread.gs;			\
+	asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v;	\
+	asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v;	\
+	asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v;	\
+	asm("movl %%gs,%0" : "=r" (v)); (pr_reg)[26] = v;	\
+} while (0);
+
 /* I'm not sure if we can use '-' here */
 #define ELF_PLATFORM       ("x86_64")
 extern void set_personality_64bit(void);

From 90d43d728d22a5e20c63c8db1cfb497210768f19 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:33:16 +0100
Subject: [PATCH 627/890] x86: unify pt_regs accessors ptrace.h

Unify the definiton of:
v8086_mode
user_mode
user_mode_vm
stack_pointer
instruction_pointer
frame_pointer

in ptrace.h to make it clear where the differences are between
32 and 64 bit.  Changes macros to static inlines as well.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/ptrace.h | 84 ++++++++++++++++++++++++++--------------
 1 file changed, 54 insertions(+), 30 deletions(-)

diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index ee4b595e1ccc..0ad9a2b7f8c0 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -76,30 +76,6 @@ convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
 
 extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code);
 
-/*
- * user_mode_vm(regs) determines whether a register set came from user mode.
- * This is true if V8086 mode was enabled OR if the register set was from
- * protected mode with RPL-3 CS value.  This tricky test checks that with
- * one comparison.  Many places in the kernel can bypass this full check
- * if they have already ruled out V8086 mode, so user_mode(regs) can be used.
- */
-static inline int user_mode(struct pt_regs *regs)
-{
-	return (regs->cs & SEGMENT_RPL_MASK) == USER_RPL;
-}
-static inline int user_mode_vm(struct pt_regs *regs)
-{
-	return ((regs->cs & SEGMENT_RPL_MASK) |
-		(regs->flags & VM_MASK)) >= USER_RPL;
-}
-static inline int v8086_mode(struct pt_regs *regs)
-{
-	return (regs->flags & VM_MASK);
-}
-
-#define instruction_pointer(regs) ((regs)->ip)
-#define frame_pointer(regs) ((regs)->bp)
-#define stack_pointer(regs) ((unsigned long)(regs))
 #define regs_return_value(regs) ((regs)->ax)
 
 extern unsigned long profile_pc(struct pt_regs *regs);
@@ -167,12 +143,6 @@ struct pt_regs {
 /* top of stack page */
 };
 
-#define user_mode(regs) (!!((regs)->cs & 3))
-#define user_mode_vm(regs) user_mode(regs)
-#define v8086_mode(regs) 0	/* No V86 mode support in long mode */
-#define instruction_pointer(regs) ((regs)->ip)
-#define frame_pointer(regs) ((regs)->bp)
-#define stack_pointer(regs) ((regs)->sp)
 #define regs_return_value(regs) ((regs)->ax)
 
 extern unsigned long profile_pc(struct pt_regs *regs);
@@ -188,6 +158,60 @@ convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
 
 #ifdef __KERNEL__
 
+/*
+ * user_mode_vm(regs) determines whether a register set came from user mode.
+ * This is true if V8086 mode was enabled OR if the register set was from
+ * protected mode with RPL-3 CS value.  This tricky test checks that with
+ * one comparison.  Many places in the kernel can bypass this full check
+ * if they have already ruled out V8086 mode, so user_mode(regs) can be used.
+ */
+static inline int user_mode(struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_32
+	return (regs->cs & SEGMENT_RPL_MASK) == USER_RPL;
+#else
+	return !!(regs->cs & 3);
+#endif
+}
+
+static inline int user_mode_vm(struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_32
+	return ((regs->cs & SEGMENT_RPL_MASK) |
+		(regs->flags & VM_MASK)) >= USER_RPL;
+#else
+	return user_mode(regs);
+#endif
+}
+
+static inline int v8086_mode(struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_32
+	return (regs->flags & VM_MASK);
+#else
+	return 0;	/* No V86 mode support in long mode */
+#endif
+}
+
+static inline unsigned long stack_pointer(struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_32
+	return (unsigned long)regs;
+#else
+	return regs->sp;
+#endif
+}
+
+static inline unsigned long instruction_pointer(struct pt_regs *regs)
+{
+	return regs->ip;
+}
+
+static inline unsigned long frame_pointer(struct pt_regs *regs)
+{
+	return regs->bp;
+}
+
 /*
  * These are defined as per linux/ptrace.h, which see.
  */

From dbe3533b7f687402651e893d5b3a7b66f22d6487 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:33:16 +0100
Subject: [PATCH 628/890] x86: clean up ptrace.h

Leave definition of pt_regs in its own section, move all kernel
code to section afterwards, unify prototype definitions, has some
conditional prototypes to make it clear what was only defined in
32 and 64 bit.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/ptrace.h | 51 ++++++++++++++++------------------------
 1 file changed, 20 insertions(+), 31 deletions(-)

diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index 0ad9a2b7f8c0..79d5b8fcd7b1 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -7,17 +7,6 @@
 
 #ifndef __ASSEMBLY__
 
-#ifdef __KERNEL__
-
-/* the DS BTS struct is used for ptrace as well */
-#include <asm/ds.h>
-
-struct task_struct;
-extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier);
-
-#endif /* __KERNEL__ */
-
-
 #ifdef __i386__
 /* this struct defines the way the registers are stored on the
    stack during a system call. */
@@ -69,16 +58,6 @@ struct pt_regs {
 #include <asm/vm86.h>
 #include <asm/segment.h>
 
-struct task_struct;
-
-extern unsigned long
-convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
-
-extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code);
-
-#define regs_return_value(regs) ((regs)->ax)
-
-extern unsigned long profile_pc(struct pt_regs *regs);
 #endif /* __KERNEL__ */
 
 #else /* __i386__ */
@@ -143,21 +122,31 @@ struct pt_regs {
 /* top of stack page */
 };
 
-#define regs_return_value(regs) ((regs)->ax)
-
-extern unsigned long profile_pc(struct pt_regs *regs);
-void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
-
-struct task_struct;
-
-extern unsigned long
-convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
-
 #endif /* __KERNEL__ */
 #endif /* !__i386__ */
 
 #ifdef __KERNEL__
 
+/* the DS BTS struct is used for ptrace as well */
+#include <asm/ds.h>
+
+struct task_struct;
+
+extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier);
+
+extern unsigned long profile_pc(struct pt_regs *regs);
+
+extern unsigned long
+convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
+
+#ifdef CONFIG_X86_32
+extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code);
+#else
+void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
+#endif
+
+#define regs_return_value(regs) ((regs)->ax)
+
 /*
  * user_mode_vm(regs) determines whether a register set came from user mode.
  * This is true if V8086 mode was enabled OR if the register set was from

From f6e8e28410c017adb273983057023a17a8791ad3 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:33:16 +0100
Subject: [PATCH 629/890] x86: rename stack_pointer to kernel_trap_sp

Choose a less generic name for such a special case.  Add
a comment explaining the odd use in X86_32.

Change the one user of stack_pointer.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/oprofile/backtrace.c | 2 +-
 include/asm-x86/ptrace.h      | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 0ca4815a2938..e2095cba409f 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -77,7 +77,7 @@ void
 x86_backtrace(struct pt_regs * const regs, unsigned int depth)
 {
 	struct frame_head *head = (struct frame_head *)frame_pointer(regs);
-	unsigned long stack = stack_pointer(regs);
+	unsigned long stack = kernel_trap_sp(regs);
 
 	if (!user_mode_vm(regs)) {
 		if (depth)
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index 79d5b8fcd7b1..d9e04b46a440 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -182,7 +182,13 @@ static inline int v8086_mode(struct pt_regs *regs)
 #endif
 }
 
-static inline unsigned long stack_pointer(struct pt_regs *regs)
+/*
+ * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
+ * when it traps.  So regs will be the current sp.
+ *
+ * This is valid only for kernel mode traps.
+ */
+static inline unsigned long kernel_trap_sp(struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_32
 	return (unsigned long)regs;

From 30d432dfab2bcfd021d352e2058fae6b9405caeb Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:16 +0100
Subject: [PATCH 630/890] x86: move MWAIT idle check to generic CPU
 initialization on 32-bit

Previously it was only run for Intel CPUs, but AMD Fam10h implements MWAIT too.

This matches 64bit behaviour.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/common.c | 2 ++
 arch/x86/kernel/cpu/intel.c  | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ed05c7a0ca9b..4bd326d0322c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -499,6 +499,8 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 
 	/* Init Machine Check Exception if available. */
 	mcheck_init(c);
+
+	select_idle_routine(c);
 }
 
 void __init identify_boot_cpu(void)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index f1136115279a..d1c372b018db 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -137,7 +137,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 	}
 #endif
 
-	select_idle_routine(c);
 	l2 = init_intel_cacheinfo(c);
 	if (c->cpuid_level > 9 ) {
 		unsigned eax = cpuid_eax(10);

From 0c07ee38c9d4eb081758f5ad14bbffa7197e1aec Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:16 +0100
Subject: [PATCH 631/890] x86: use the correct cpuid method to detect MWAIT
 support for C states

Previously there was a AMD specific quirk to handle the case of
AMD Fam10h MWAIT not supporting any C states. But it turns out
that CPUID already has ways to detectly detect that without
using special quirks.

The new code simply checks if MWAIT supports at least C1 and doesn't
use it if it doesn't. No more vendor specific code.

Note this is does not simply clear MWAIT because MWAIT can be still
useful even without C states.

Credit goes to Ben Serebrin for pointing out the (nearly) obvious.

Cc: "Andreas Herrmann" <andreas.herrmann3@amd.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/amd.c    |  3 ---
 arch/x86/kernel/process_32.c | 10 +++++++++-
 arch/x86/kernel/process_64.c | 11 ++++++++++-
 arch/x86/kernel/setup_64.c   |  4 ----
 4 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index cd2fe15ff4b5..06fa159232fd 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -300,9 +300,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 		local_apic_timer_disabled = 1;
 #endif
 
-	if (c->x86 == 0x10 && !force_mwait)
-		clear_bit(X86_FEATURE_MWAIT, c->x86_capability);
-
 	/* K6s reports MCEs but don't actually have all the MSRs */
 	if (c->x86 < 6)
 		clear_bit(X86_FEATURE_MCE, c->x86_capability);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 69a69c3f43bb..9f45a51af968 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -285,9 +285,17 @@ static void mwait_idle(void)
 	mwait_idle_with_hints(0, 0);
 }
 
+static int mwait_usable(const struct cpuinfo_x86 *c)
+{
+	if (force_mwait)
+		return 1;
+	/* Any C1 states supported? */
+	return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
+}
+
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
-	if (cpu_has(c, X86_FEATURE_MWAIT)) {
+	if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
 		printk("monitor/mwait feature present.\n");
 		/*
 		 * Skip, if setup has overridden idle.
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 4e65ae8a54bf..dbe0a846ec52 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -280,10 +280,19 @@ static void mwait_idle(void)
 	}
 }
 
+
+static int mwait_usable(const struct cpuinfo_x86 *c)
+{
+	if (force_mwait)
+		return 1;
+	/* Any C1 states supported? */
+	return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
+}
+
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
 	static int printed;
-	if (cpu_has(c, X86_FEATURE_MWAIT)) {
+	if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
 		/*
 		 * Skip, if setup has overridden idle.
 		 * One CPU supports mwait => All CPUs supports mwait
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 71a420c7fee7..4a3f00b49236 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -761,10 +761,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	/* MFENCE stops RDTSC speculation */
 	set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
 
-	/* Family 10 doesn't support C states in MWAIT so don't use it */
-	if (c->x86 == 0x10 && !force_mwait)
-		clear_cpu_cap(c, X86_FEATURE_MWAIT);
-
 	if (amd_apic_timer_broken())
 		disable_apic_timer = 1;
 }

From 27cc2a812eb504f4aadff5baa862da715fb0f886 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:16 +0100
Subject: [PATCH 632/890] x86: use shorter addresses in i386 segfault printks

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index eba90198b15a..71c722bb6963 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -512,7 +512,7 @@ bad_area_nosemaphore:
 		    printk_ratelimit()) {
 			printk(
 #ifdef CONFIG_X86_32
-			"%s%s[%d]: segfault at %08lx ip %08lx sp %08lx error %lx\n",
+			"%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx\n",
 #else
 			"%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx\n",
 #endif

From edcd81199dbad5db11ae91b507cec1d46dd94a49 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:33:16 +0100
Subject: [PATCH 633/890] x86: unify printk strings in fault_32|64.c

Adding the address of the faulting library missed removing a
line ending from X86_32.

Also update the shorter printk format for X86_32 in fault_64.c
to make it easier to se the remaining differences.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_32.c | 2 +-
 arch/x86/mm/fault_64.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 71c722bb6963..13d295506a17 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -512,7 +512,7 @@ bad_area_nosemaphore:
 		    printk_ratelimit()) {
 			printk(
 #ifdef CONFIG_X86_32
-			"%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx\n",
+			"%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx",
 #else
 			"%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx\n",
 #endif
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 75b7b165bdf3..b606bdefbb72 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -550,7 +550,7 @@ bad_area_nosemaphore:
 		    printk_ratelimit()) {
 			printk(
 #ifdef CONFIG_X86_32
-			"%s%s[%d]: segfault at %08lx ip %08lx sp %08lx error %lx\n",
+			"%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx",
 #else
 			"%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx\n",
 #endif

From 751752789162fde69474edfa15935d0a77c0bc17 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:17 +0100
Subject: [PATCH 634/890] x86: replace hard coded reservations in 64-bit early
 boot code with dynamic table

On x86-64 there are several memory allocations before bootmem. To avoid
them stomping on each other they used to be all hard coded in bad_area().
Replace this with an array that is filled as needed.

This cleans up the code considerably and allows to expand its use.

Cc: peterz@infradead.org
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/e820_64.c  | 101 ++++++++++++++++++++-----------------
 arch/x86/kernel/head64.c   |  48 ++++++++++++++++++
 arch/x86/kernel/setup_64.c |  67 ++----------------------
 arch/x86/mm/init_64.c      |   5 +-
 arch/x86/mm/numa_64.c      |   1 +
 include/asm-x86/e820_64.h  |   4 +-
 include/asm-x86/proto.h    |   2 -
 7 files changed, 113 insertions(+), 115 deletions(-)

diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 07cfaae7ab07..f8b7bebb4344 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -47,56 +47,65 @@ unsigned long end_pfn_map;
  */
 static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
 
-/* Check for some hardcoded bad areas that early boot is not allowed to touch */
+/*
+ * Early reserved memory areas.
+ */
+#define MAX_EARLY_RES 20
+
+struct early_res {
+	unsigned long start, end;
+};
+static struct early_res early_res[MAX_EARLY_RES] __initdata = {
+	{ 0, PAGE_SIZE },			/* BIOS data page */
+#ifdef CONFIG_SMP
+	{ SMP_TRAMPOLINE_BASE, SMP_TRAMPOLINE_BASE + 2*PAGE_SIZE },
+#endif
+	{}
+};
+
+void __init reserve_early(unsigned long start, unsigned long end)
+{
+	int i;
+	struct early_res *r;
+	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+		r = &early_res[i];
+		if (end > r->start && start < r->end)
+			panic("Duplicated early reservation %lx-%lx\n",
+			      start, end);
+	}
+	if (i >= MAX_EARLY_RES)
+		panic("Too many early reservations");
+	r = &early_res[i];
+	r->start = start;
+	r->end = end;
+}
+
+void __init early_res_to_bootmem(void)
+{
+	int i;
+	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+		struct early_res *r = &early_res[i];
+		reserve_bootmem_generic(r->start, r->end - r->start);
+	}
+}
+
+/* Check for already reserved areas */
 static inline int bad_addr(unsigned long *addrp, unsigned long size)
 {
-	unsigned long addr = *addrp, last = addr + size;
-
-	/* various gunk below that needed for SMP startup */
-	if (addr < 0x8000) {
-		*addrp = PAGE_ALIGN(0x8000);
-		return 1;
-	}
-
-	/* direct mapping tables of the kernel */
-	if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) {
-		*addrp = PAGE_ALIGN(table_end << PAGE_SHIFT);
-		return 1;
-	}
-
-	/* initrd */
-#ifdef CONFIG_BLK_DEV_INITRD
-	if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
-		unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
-		unsigned long ramdisk_size  = boot_params.hdr.ramdisk_size;
-		unsigned long ramdisk_end   = ramdisk_image+ramdisk_size;
-
-		if (last >= ramdisk_image && addr < ramdisk_end) {
-			*addrp = PAGE_ALIGN(ramdisk_end);
-			return 1;
+	int i;
+	unsigned long addr = *addrp, last;
+	int changed = 0;
+again:
+	last = addr + size;
+	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+		struct early_res *r = &early_res[i];
+		if (last >= r->start && addr < r->end) {
+			*addrp = addr = r->end;
+			changed = 1;
+			goto again;
 		}
 	}
-#endif
-	/* kernel code */
-	if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
-		*addrp = PAGE_ALIGN(__pa_symbol(&_end));
-		return 1;
-	}
-
-	if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
-		*addrp = PAGE_ALIGN(ebda_addr + ebda_size);
-		return 1;
-	}
-
-#ifdef CONFIG_NUMA
-	/* NUMA memory to node map */
-	if (last >= nodemap_addr && addr < nodemap_addr + nodemap_size) {
-		*addrp = nodemap_addr + nodemap_size;
-		return 1;
-	}
-#endif
-	/* XXX ramdisk image here? */
-	return 0;
+	return changed;
 }
 
 /*
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 87e031d4abf1..58438bafedca 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -21,6 +21,7 @@
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 #include <asm/kdebug.h>
+#include <asm/e820.h>
 
 static void __init zap_identity_mappings(void)
 {
@@ -48,6 +49,35 @@ static void __init copy_bootdata(char *real_mode_data)
 	}
 }
 
+#define EBDA_ADDR_POINTER 0x40E
+
+static __init void reserve_ebda(void)
+{
+	unsigned ebda_addr, ebda_size;
+
+	/*
+	 * there is a real-mode segmented pointer pointing to the
+	 * 4K EBDA area at 0x40E
+	 */
+	ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
+	ebda_addr <<= 4;
+
+	if (!ebda_addr)
+		return;
+
+	ebda_size = *(unsigned short *)__va(ebda_addr);
+
+	/* Round EBDA up to pages */
+	if (ebda_size == 0)
+		ebda_size = 1;
+	ebda_size <<= 10;
+	ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
+	if (ebda_size > 64*1024)
+		ebda_size = 64*1024;
+
+	reserve_early(ebda_addr, ebda_addr + ebda_size);
+}
+
 void __init x86_64_start_kernel(char * real_mode_data)
 {
 	int i;
@@ -75,5 +105,23 @@ void __init x86_64_start_kernel(char * real_mode_data)
 	pda_init(0);
 	copy_bootdata(__va(real_mode_data));
 
+	reserve_early(__pa_symbol(&_text), __pa_symbol(&_end));
+
+	/* Reserve INITRD */
+	if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
+		unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
+		unsigned long ramdisk_size  = boot_params.hdr.ramdisk_size;
+		unsigned long ramdisk_end   = ramdisk_image + ramdisk_size;
+		reserve_early(ramdisk_image, ramdisk_end);
+	}
+
+	reserve_ebda();
+
+	/*
+	 * At this point everything still needed from the boot loader
+	 * or BIOS or kernel text should be early reserved or marked not
+	 * RAM in e820. All other memory is free game.
+	 */
+
 	start_kernel();
 }
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 4a3f00b49236..6cbd15625dce 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -245,41 +245,6 @@ static inline void __init reserve_crashkernel(void)
 {}
 #endif
 
-#define EBDA_ADDR_POINTER 0x40E
-
-unsigned __initdata ebda_addr;
-unsigned __initdata ebda_size;
-
-static void __init discover_ebda(void)
-{
-	/*
-	 * there is a real-mode segmented pointer pointing to the
-	 * 4K EBDA area at 0x40E
-	 */
-	ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
-	/*
-	 * There can be some situations, like paravirtualized guests,
-	 * in which there is no available ebda information. In such
-	 * case, just skip it
-	 */
-	if (!ebda_addr) {
-		ebda_size = 0;
-		return;
-	}
-
-	ebda_addr <<= 4;
-
-	ebda_size = *(unsigned short *)__va(ebda_addr);
-
-	/* Round EBDA up to pages */
-	if (ebda_size == 0)
-		ebda_size = 1;
-	ebda_size <<= 10;
-	ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
-	if (ebda_size > 64*1024)
-		ebda_size = 64*1024;
-}
-
 /* Overridden in paravirt.c if CONFIG_PARAVIRT */
 void __attribute__((weak)) __init memory_setup(void)
 {
@@ -349,8 +314,6 @@ void __init setup_arch(char **cmdline_p)
 
 	check_efer();
 
-	discover_ebda();
-
 	init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
 	if (efi_enabled)
 		efi_init();
@@ -397,33 +360,7 @@ void __init setup_arch(char **cmdline_p)
 	contig_initmem_init(0, end_pfn);
 #endif
 
-	/* Reserve direct mapping */
-	reserve_bootmem_generic(table_start << PAGE_SHIFT,
-				(table_end - table_start) << PAGE_SHIFT);
-
-	/* reserve kernel */
-	reserve_bootmem_generic(__pa_symbol(&_text),
-				__pa_symbol(&_end) - __pa_symbol(&_text));
-
-	/*
-	 * reserve physical page 0 - it's a special BIOS page on many boxes,
-	 * enabling clean reboots, SMP operation, laptop functions.
-	 */
-	reserve_bootmem_generic(0, PAGE_SIZE);
-
-	/* reserve ebda region */
-	if (ebda_addr)
-		reserve_bootmem_generic(ebda_addr, ebda_size);
-#ifdef CONFIG_NUMA
-	/* reserve nodemap region */
-	if (nodemap_addr)
-		reserve_bootmem_generic(nodemap_addr, nodemap_size);
-#endif
-
-#ifdef CONFIG_SMP
-	/* Reserve SMP trampoline */
-	reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE);
-#endif
+	early_res_to_bootmem();
 
 #ifdef CONFIG_ACPI_SLEEP
 	/*
@@ -453,6 +390,8 @@ void __init setup_arch(char **cmdline_p)
 			initrd_start = ramdisk_image + PAGE_OFFSET;
 			initrd_end = initrd_start+ramdisk_size;
 		} else {
+			/* Assumes everything on node 0 */
+			free_bootmem(ramdisk_image, ramdisk_size);
 			printk(KERN_ERR "initrd extends beyond end of memory "
 			       "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
 			       ramdisk_end, end_of_mem);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 05f12c527b02..8198840c3dcb 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -176,7 +176,8 @@ __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
 	set_pte_phys(address, phys, prot);
 }
 
-unsigned long __meminitdata table_start, table_end;
+static unsigned long __initdata table_start;
+static unsigned long __meminitdata table_end;
 
 static __meminit void *alloc_low_page(unsigned long *phys)
 { 
@@ -387,6 +388,8 @@ void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
 	if (!after_bootmem)
 		mmu_cr4_features = read_cr4();
 	__flush_tlb_all();
+
+	reserve_early(table_start << PAGE_SHIFT, table_end << PAGE_SHIFT);
 }
 
 #ifndef CONFIG_NUMA
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 37d429beba96..5d24dc1ec237 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -102,6 +102,7 @@ static int __init allocate_cachealigned_memnodemap(void)
 	}
 	pad_addr = (nodemap_addr + pad) & ~pad;
 	memnodemap = phys_to_virt(pad_addr);
+	reserve_early(nodemap_addr, nodemap_addr + nodemap_size);
 
 	printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
 	       nodemap_addr, nodemap_addr + nodemap_size);
diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h
index ff36f434f00b..51e4170f9ca5 100644
--- a/include/asm-x86/e820_64.h
+++ b/include/asm-x86/e820_64.h
@@ -41,8 +41,8 @@ extern void finish_e820_parsing(void);
 extern struct e820map e820;
 extern void update_e820(void);
 
-extern unsigned ebda_addr, ebda_size;
-extern unsigned long nodemap_addr, nodemap_size;
+extern void reserve_early(unsigned long start, unsigned long end);
+extern void early_res_to_bootmem(void);
 
 #endif/*!__ASSEMBLY__*/
 
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index 3c6fb89f1120..68563c0709ac 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -22,8 +22,6 @@ extern void syscall32_cpu_init(void);
 
 extern void check_efer(void);
 
-extern unsigned long table_start, table_end;
-
 extern int reboot_force;
 
 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);

From ca74a6f84e68b44867022f4a4f3ec17c087c864e Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:17 +0100
Subject: [PATCH 635/890] x86: optimize lock prefix switching to run less
 frequently

On VMs implemented using JITs that cache translated code changing the lock
prefixes is a quite costly operation that forces the JIT to throw away and
retranslate a lot of code.

Previously a SMP kernel would rewrite the locks once for each CPU which
is quite unnecessary. This patch changes the code to never switch at boot in
 the normal case (SMP kernel booting with >1 CPU) or only once for SMP kernel
on UP.

This makes a significant difference in boot up performance on AMD SimNow!
Also I expect it to be a little faster on native systems too because a smp
switch does a lot of text_poke()s which each synchronize the pipeline.

v1->v2: Rename max_cpus
v1->v2: Fix off by one in UP check (Thomas Gleixner)

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/alternative.c | 16 ++++++++++++++--
 include/linux/smp.h           |  2 ++
 init/main.c                   | 16 ++++++++--------
 3 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index cdc43242da92..318a4f9b7ece 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -273,6 +273,7 @@ struct smp_alt_module {
 };
 static LIST_HEAD(smp_alt_modules);
 static DEFINE_SPINLOCK(smp_alt);
+static int smp_mode = 1;	/* protected by smp_alt */
 
 void alternatives_smp_module_add(struct module *mod, char *name,
 				 void *locks, void *locks_end,
@@ -354,7 +355,14 @@ void alternatives_smp_switch(int smp)
 	BUG_ON(!smp && (num_online_cpus() > 1));
 
 	spin_lock_irqsave(&smp_alt, flags);
-	if (smp) {
+
+	/*
+	 * Avoid unnecessary switches because it forces JIT based VMs to
+	 * throw away all cached translations, which can be quite costly.
+	 */
+	if (smp == smp_mode) {
+		/* nothing */
+	} else if (smp) {
 		printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
 		clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
@@ -369,6 +377,7 @@ void alternatives_smp_switch(int smp)
 			alternatives_smp_unlock(mod->locks, mod->locks_end,
 						mod->text, mod->text_end);
 	}
+	smp_mode = smp;
 	spin_unlock_irqrestore(&smp_alt, flags);
 }
 
@@ -441,7 +450,10 @@ void __init alternative_instructions(void)
 		alternatives_smp_module_add(NULL, "core kernel",
 					    __smp_locks, __smp_locks_end,
 					    _text, _etext);
-		alternatives_smp_switch(0);
+
+		/* Only switch to UP mode if we don't immediately boot others */
+		if (num_possible_cpus() == 1 || setup_max_cpus <= 1)
+			alternatives_smp_switch(0);
 	}
 #endif
  	apply_paravirt(__parainstructions, __parainstructions_end);
diff --git a/include/linux/smp.h b/include/linux/smp.h
index c25e66bcecf3..55232ccf9cfd 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -78,6 +78,8 @@ int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait);
  */
 void smp_prepare_boot_cpu(void);
 
+extern unsigned int setup_max_cpus;
+
 #else /* !SMP */
 
 /*
diff --git a/init/main.c b/init/main.c
index 3f8aba291ed3..5843fe996703 100644
--- a/init/main.c
+++ b/init/main.c
@@ -128,7 +128,7 @@ static char *ramdisk_execute_command;
 
 #ifdef CONFIG_SMP
 /* Setup configured maximum number of CPUs to activate */
-static unsigned int __initdata max_cpus = NR_CPUS;
+unsigned int __initdata setup_max_cpus = NR_CPUS;
 
 /*
  * Setup routine for controlling SMP activation
@@ -146,7 +146,7 @@ static inline void disable_ioapic_setup(void) {};
 
 static int __init nosmp(char *str)
 {
-	max_cpus = 0;
+	setup_max_cpus = 0;
 	disable_ioapic_setup();
 	return 0;
 }
@@ -155,8 +155,8 @@ early_param("nosmp", nosmp);
 
 static int __init maxcpus(char *str)
 {
-	get_option(&str, &max_cpus);
-	if (max_cpus == 0)
+	get_option(&str, &setup_max_cpus);
+	if (setup_max_cpus == 0)
 		disable_ioapic_setup();
 
 	return 0;
@@ -164,7 +164,7 @@ static int __init maxcpus(char *str)
 
 early_param("maxcpus", maxcpus);
 #else
-#define max_cpus NR_CPUS
+#define setup_max_cpus NR_CPUS
 #endif
 
 /*
@@ -393,7 +393,7 @@ static void __init smp_init(void)
 
 	/* FIXME: This should be done in userspace --RR */
 	for_each_present_cpu(cpu) {
-		if (num_online_cpus() >= max_cpus)
+		if (num_online_cpus() >= setup_max_cpus)
 			break;
 		if (!cpu_online(cpu))
 			cpu_up(cpu);
@@ -401,7 +401,7 @@ static void __init smp_init(void)
 
 	/* Any cleanup work */
 	printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus());
-	smp_cpus_done(max_cpus);
+	smp_cpus_done(setup_max_cpus);
 }
 
 #endif
@@ -824,7 +824,7 @@ static int __init kernel_init(void * unused)
 	__set_special_pids(1, 1);
 	cad_pid = task_pid(current);
 
-	smp_prepare_cpus(max_cpus);
+	smp_prepare_cpus(setup_max_cpus);
 
 	do_pre_smp_initcalls();
 

From d3432896dae72ee97deb850ad7bbc30329d32c0d Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:17 +0100
Subject: [PATCH 636/890] x86: don't disable the APIC if it hasn't been mapped
 yet

When the kernel panics early for some unrelated reason
there would be eventually an early exception inside panic because
clear_local_APIC tried to disable the not yet mapped APIC.
Check for that explicitely.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic_32.c | 11 ++++++++---
 arch/x86/kernel/apic_64.c |  9 +++++++--
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index d07a603807d1..35a568ea8400 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -99,6 +99,8 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 /* Local APIC was disabled by the BIOS and enabled by the kernel */
 static int enabled_via_apicbase;
 
+static unsigned long apic_phys;
+
 /*
  * Get the LAPIC version
  */
@@ -631,9 +633,14 @@ int setup_profiling_timer(unsigned int multiplier)
  */
 void clear_local_APIC(void)
 {
-	int maxlvt = lapic_get_maxlvt();
+	int maxlvt;
 	u32 v;
 
+	/* APIC hasn't been mapped yet */
+	if (!apic_phys)
+		return;
+
+	maxlvt = lapic_get_maxlvt();
 	/*
 	 * Masking an LVT entry can trigger a local APIC error
 	 * if the vector is zero. Mask LVTERR first to prevent this.
@@ -1120,8 +1127,6 @@ no_apic:
  */
 void __init init_apic_mappings(void)
 {
-	unsigned long apic_phys;
-
 	/*
 	 * If no local APIC can be found then set up a fake all
 	 * zeroes page to simulate the local APIC and another
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 85bd3d463cdf..a7a38c9da136 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -81,6 +81,8 @@ static struct clock_event_device lapic_clockevent = {
 };
 static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 
+static unsigned long apic_phys;
+
 /*
  * Get the LAPIC version
  */
@@ -525,6 +527,11 @@ void clear_local_APIC(void)
 	int maxlvt = lapic_get_maxlvt();
 	u32 v;
 
+	/* APIC hasn't been mapped yet */
+	if (!apic_phys)
+		return;
+
+	maxlvt = lapic_get_maxlvt();
 	/*
 	 * Masking an LVT entry can trigger a local APIC error
 	 * if the vector is zero. Mask LVTERR first to prevent this.
@@ -859,8 +866,6 @@ static int __init detect_init_APIC(void)
  */
 void __init init_apic_mappings(void)
 {
-	unsigned long apic_phys;
-
 	/*
 	 * If no local APIC can be found then set up a fake all
 	 * zeroes page to simulate the local APIC and another

From 03252919b79891063cf99145612360efbdf9500b Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:18 +0100
Subject: [PATCH 637/890] x86: print which shared library/executable faulted in
 segfault etc. messages v3

They now look like:

hal-resmgr[13791]: segfault at 3c rip 2b9c8caec182 rsp 7fff1e825d30 error 4 in libacl.so.1.1.0[2b9c8caea000+6000]

This makes it easier to pinpoint bugs to specific libraries.

And printing the offset into a mapping also always allows to find the
correct fault point in a library even with randomized mappings. Previously
there was no way to actually find the correct code address inside
the randomized mapping.

Relies on earlier patch to shorten the printk formats.

They are often now longer than 80 characters, but I think that's worth it.

[includes fix from Eric Dumazet to check d_path error value]

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/signal_32.c |  7 +++++--
 arch/x86/kernel/signal_64.c |  7 +++++--
 arch/x86/kernel/traps_32.c  |  7 +++++--
 arch/x86/kernel/traps_64.c  | 14 ++++++++++----
 arch/x86/mm/fault_32.c      |  4 +++-
 arch/x86/mm/fault_64.c      |  4 +++-
 include/linux/mm.h          |  1 +
 mm/memory.c                 | 31 +++++++++++++++++++++++++++++++
 8 files changed, 63 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 74df55895c8c..89a690edf999 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -198,12 +198,15 @@ asmlinkage int sys_sigreturn(unsigned long __unused)
 	return ax;
 
 badframe:
-	if (show_unhandled_signals && printk_ratelimit())
+	if (show_unhandled_signals && printk_ratelimit()) {
 		printk("%s%s[%d] bad frame in sigreturn frame:%p ip:%lx"
-		       " sp:%lx oeax:%lx\n",
+		       " sp:%lx oeax:%lx",
 		    task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
 		    current->comm, task_pid_nr(current), frame, regs->ip,
 		    regs->sp, regs->orig_ax);
+		print_vma_addr(" in ", regs->ip);
+		printk("\n");
+	}
 
 	force_sig(SIGSEGV, current);
 	return 0;
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 4eb751c60390..7347bb14e306 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -484,9 +484,12 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
 { 
 	struct task_struct *me = current; 
-	if (show_unhandled_signals && printk_ratelimit())
-		printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx\n",
+	if (show_unhandled_signals && printk_ratelimit()) {
+		printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
 	       me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax);
+		print_vma_addr(" in ", regs->ip);
+		printk("\n");
+	}
 
 	force_sig(SIGSEGV, me); 
 } 
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 6f3bb287c702..270cfd483160 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -609,11 +609,14 @@ void __kprobes do_general_protection(struct pt_regs * regs,
 	current->thread.error_code = error_code;
 	current->thread.trap_no = 13;
 	if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) &&
-	    printk_ratelimit())
+	    printk_ratelimit()) {
 		printk(KERN_INFO
-		    "%s[%d] general protection ip:%lx sp:%lx error:%lx\n",
+		    "%s[%d] general protection ip:%lx sp:%lx error:%lx",
 		    current->comm, task_pid_nr(current),
 		    regs->ip, regs->sp, error_code);
+		print_vma_addr(" in ", regs->ip);
+		printk("\n");
+	}
 
 	force_sig(SIGSEGV, current);
 	return;
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 814801f4eb9e..911ed28afff8 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -642,11 +642,14 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
 		tsk->thread.trap_no = trapnr;
 
 		if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
-		    printk_ratelimit())
+		    printk_ratelimit()) {
 			printk(KERN_INFO
-			       "%s[%d] trap %s ip:%lx sp:%lx error:%lx\n",
+			       "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
 			       tsk->comm, tsk->pid, str,
 			       regs->ip, regs->sp, error_code);
+			print_vma_addr(" in ", regs->ip);
+			printk("\n");
+		}
 
 		if (info)
 			force_sig_info(signr, info, tsk);
@@ -741,11 +744,14 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
 		tsk->thread.trap_no = 13;
 
 		if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
-		    printk_ratelimit())
+		    printk_ratelimit()) {
 			printk(KERN_INFO
-		       "%s[%d] general protection ip:%lx sp:%lx error:%lx\n",
+		       "%s[%d] general protection ip:%lx sp:%lx error:%lx",
 			       tsk->comm, tsk->pid,
 			       regs->ip, regs->sp, error_code);
+			print_vma_addr(" in ", regs->ip);
+			printk("\n");
+		}
 
 		force_sig(SIGSEGV, tsk);
 		return;
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 13d295506a17..276863dc4bdd 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -514,11 +514,13 @@ bad_area_nosemaphore:
 #ifdef CONFIG_X86_32
 			"%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx",
 #else
-			"%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx\n",
+			"%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx",
 #endif
 			task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
 			tsk->comm, task_pid_nr(tsk), address, regs->ip,
 			regs->sp, error_code);
+			print_vma_addr(" in ", regs->ip);
+			printk("\n");
 		}
 		tsk->thread.cr2 = address;
 		/* Kernel addresses are always protection faults */
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index b606bdefbb72..9ef0306efe9e 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -552,11 +552,13 @@ bad_area_nosemaphore:
 #ifdef CONFIG_X86_32
 			"%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx",
 #else
-			"%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx\n",
+			"%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx",
 #endif
 			task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
 			tsk->comm, task_pid_nr(tsk), address, regs->ip,
 			regs->sp, error_code);
+			print_vma_addr(" in ", regs->ip);
+			printk("\n");
 		}
 
 		tsk->thread.cr2 = address;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1897ca223eca..3c22d971afa7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1146,6 +1146,7 @@ extern int randomize_va_space;
 #endif
 
 const char * arch_vma_name(struct vm_area_struct *vma);
+void print_vma_addr(char *prefix, unsigned long rip);
 
 struct page *sparse_mem_map_populate(unsigned long pnum, int nid);
 pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
diff --git a/mm/memory.c b/mm/memory.c
index 673ebbf499c7..d902d0e25edc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2754,3 +2754,34 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
 
 	return buf - old_buf;
 }
+
+/*
+ * Print the name of a VMA.
+ */
+void print_vma_addr(char *prefix, unsigned long ip)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, ip);
+	if (vma && vma->vm_file) {
+		struct file *f = vma->vm_file;
+		char *buf = (char *)__get_free_page(GFP_KERNEL);
+		if (buf) {
+			char *p, *s;
+
+			p = d_path(f->f_dentry, f->f_vfsmnt, buf, PAGE_SIZE);
+			if (IS_ERR(p))
+				p = "?";
+			s = strrchr(p, '/');
+			if (s)
+				p = s+1;
+			printk("%s%s[%lx+%lx]", prefix, p,
+					vma->vm_start,
+					vma->vm_end - vma->vm_start);
+			free_page((unsigned long)buf);
+		}
+	}
+	up_read(&current->mm->mmap_sem);
+}

From 99fc8d424bc5d803fe92cad56c068fe64e73747a Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jesse.barnes@intel.com>
Date: Wed, 30 Jan 2008 13:33:18 +0100
Subject: [PATCH 638/890] x86, 32-bit: trim memory not covered by wb mtrrs

On some machines, buggy BIOSes don't properly setup WB MTRRs to cover all
available RAM, meaning the last few megs (or even gigs) of memory will be
marked uncached.  Since Linux tends to allocate from high memory addresses
first, this causes the machine to be unusably slow as soon as the kernel
starts really using memory (i.e.  right around init time).

This patch works around the problem by scanning the MTRRs at boot and
figuring out whether the current end_pfn value (setup by early e820 code)
goes beyond the highest WB MTRR range, and if so, trimming it to match.  A
fairly obnoxious KERN_WARNING is printed too, letting the user know that
not all of their memory is available due to a likely BIOS bug.

Something similar could be done on i386 if needed, but the boot ordering
would be slightly different, since the MTRR code on i386 depends on the
boot_cpu_data structure being setup.

This patch fixes a bug in the last patch that caused the code to run on
non-Intel machines (AMD machines apparently don't need it and it's untested
on other non-Intel machines, so best keep it off).

Further enhancements and fixes from:

  Yinghai Lu <Yinghai.Lu@Sun.COM>
  Andi Kleen <ak@suse.de>

Signed-off-by: Jesse Barnes <jesse.barnes@intel.com>
Tested-by: Justin Piszcz <jpiszcz@lucidpixels.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/kernel-parameters.txt |   6 ++
 arch/x86/kernel/bugs_64.c           |   1 -
 arch/x86/kernel/cpu/mtrr/generic.c  |   8 +-
 arch/x86/kernel/cpu/mtrr/if.c       |   8 +-
 arch/x86/kernel/cpu/mtrr/main.c     | 140 +++++++++++++++++++++++-----
 arch/x86/kernel/cpu/mtrr/mtrr.h     |   3 +
 arch/x86/kernel/setup_64.c          |   7 ++
 include/asm-x86/mtrr.h              |   6 +-
 8 files changed, 140 insertions(+), 39 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 860a90875491..b8fadf5f75a3 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -570,6 +570,12 @@ and is between 256 and 4096 characters. It is defined in the file
 			See drivers/char/README.epca and
 			Documentation/digiepca.txt.
 
+	disable_mtrr_trim [X86-64, Intel only]
+			By default the kernel will trim any uncacheable
+			memory out of your available memory pool based on
+			MTRR settings.  This parameter disables that behavior,
+			possibly causing your machine to run very slowly.
+
 	dmasound=	[HW,OSS] Sound subsystem buffers
 
 	dscc4.setup=	[NET]
diff --git a/arch/x86/kernel/bugs_64.c b/arch/x86/kernel/bugs_64.c
index 9a189cef6404..8f520f93ffd4 100644
--- a/arch/x86/kernel/bugs_64.c
+++ b/arch/x86/kernel/bugs_64.c
@@ -13,7 +13,6 @@
 void __init check_bugs(void)
 {
 	identify_cpu(&boot_cpu_data);
-	mtrr_bp_init();
 #if !defined(CONFIG_SMP)
 	printk("CPU: ");
 	print_cpu_info(&boot_cpu_data);
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 55d31ff118fb..103d61a59b19 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -14,7 +14,7 @@
 #include "mtrr.h"
 
 struct mtrr_state {
-	struct mtrr_var_range *var_ranges;
+	struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
 	mtrr_type fixed_ranges[NUM_FIXED_RANGES];
 	unsigned char enabled;
 	unsigned char have_fixed;
@@ -86,12 +86,6 @@ void __init get_mtrr_state(void)
 	struct mtrr_var_range *vrs;
 	unsigned lo, dummy;
 
-	if (!mtrr_state.var_ranges) {
-		mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range), 
-						GFP_KERNEL);
-		if (!mtrr_state.var_ranges)
-			return;
-	} 
 	vrs = mtrr_state.var_ranges;
 
 	rdmsr(MTRRcap_MSR, lo, dummy);
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 14535686c099..91e150acb46c 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -11,10 +11,6 @@
 #include <asm/mtrr.h>
 #include "mtrr.h"
 
-/* RED-PEN: this is accessed without any locking */
-extern unsigned int *usage_table;
-
-
 #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
 
 static const char *const mtrr_strings[MTRR_NUM_TYPES] =
@@ -397,7 +393,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
 	for (i = 0; i < max; i++) {
 		mtrr_if->get(i, &base, &size, &type);
 		if (size == 0)
-			usage_table[i] = 0;
+			mtrr_usage_table[i] = 0;
 		else {
 			if (size < (0x100000 >> PAGE_SHIFT)) {
 				/* less than 1MB */
@@ -411,7 +407,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
 			len += seq_printf(seq, 
 				   "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n",
 			     i, base, base >> (20 - PAGE_SHIFT), size, factor,
-			     mtrr_attrib_to_str(type), usage_table[i]);
+			     mtrr_attrib_to_str(type), mtrr_usage_table[i]);
 		}
 	}
 	return 0;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 60af5ed2b5c0..ccd36ed2187b 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -38,8 +38,8 @@
 #include <linux/cpu.h>
 #include <linux/mutex.h>
 
+#include <asm/e820.h>
 #include <asm/mtrr.h>
-
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
@@ -47,7 +47,7 @@
 
 u32 num_var_ranges = 0;
 
-unsigned int *usage_table;
+unsigned int mtrr_usage_table[MAX_VAR_RANGES];
 static DEFINE_MUTEX(mtrr_mutex);
 
 u64 size_or_mask, size_and_mask;
@@ -121,13 +121,8 @@ static void __init init_table(void)
 	int i, max;
 
 	max = num_var_ranges;
-	if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
-	    == NULL) {
-		printk(KERN_ERR "mtrr: could not allocate\n");
-		return;
-	}
 	for (i = 0; i < max; i++)
-		usage_table[i] = 1;
+		mtrr_usage_table[i] = 1;
 }
 
 struct set_mtrr_data {
@@ -383,7 +378,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
 			goto out;
 		}
 		if (increment)
-			++usage_table[i];
+			++mtrr_usage_table[i];
 		error = i;
 		goto out;
 	}
@@ -391,15 +386,15 @@ int mtrr_add_page(unsigned long base, unsigned long size,
 	i = mtrr_if->get_free_region(base, size, replace);
 	if (i >= 0) {
 		set_mtrr(i, base, size, type);
-		if (likely(replace < 0))
-			usage_table[i] = 1;
-		else {
-			usage_table[i] = usage_table[replace];
+		if (likely(replace < 0)) {
+			mtrr_usage_table[i] = 1;
+		} else {
+			mtrr_usage_table[i] = mtrr_usage_table[replace];
 			if (increment)
-				usage_table[i]++;
+				mtrr_usage_table[i]++;
 			if (unlikely(replace != i)) {
 				set_mtrr(replace, 0, 0, 0);
-				usage_table[replace] = 0;
+				mtrr_usage_table[replace] = 0;
 			}
 		}
 	} else
@@ -529,11 +524,11 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
 		printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
 		goto out;
 	}
-	if (usage_table[reg] < 1) {
+	if (mtrr_usage_table[reg] < 1) {
 		printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
 		goto out;
 	}
-	if (--usage_table[reg] < 1)
+	if (--mtrr_usage_table[reg] < 1)
 		set_mtrr(reg, 0, 0, 0);
 	error = reg;
  out:
@@ -593,16 +588,11 @@ struct mtrr_value {
 	unsigned long	lsize;
 };
 
-static struct mtrr_value * mtrr_state;
+static struct mtrr_value mtrr_state[MAX_VAR_RANGES];
 
 static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
 {
 	int i;
-	int size = num_var_ranges * sizeof(struct mtrr_value);
-
-	mtrr_state = kzalloc(size,GFP_ATOMIC);
-	if (!mtrr_state)
-		return -ENOMEM;
 
 	for (i = 0; i < num_var_ranges; i++) {
 		mtrr_if->get(i,
@@ -624,7 +614,6 @@ static int mtrr_restore(struct sys_device * sysdev)
 				 mtrr_state[i].lsize,
 				 mtrr_state[i].ltype);
 	}
-	kfree(mtrr_state);
 	return 0;
 }
 
@@ -635,6 +624,109 @@ static struct sysdev_driver mtrr_sysdev_driver = {
 	.resume		= mtrr_restore,
 };
 
+#ifdef CONFIG_X86_64
+static int disable_mtrr_trim;
+
+static int __init disable_mtrr_trim_setup(char *str)
+{
+	disable_mtrr_trim = 1;
+	return 0;
+}
+early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
+
+/*
+ * Newer AMD K8s and later CPUs have a special magic MSR way to force WB
+ * for memory >4GB. Check for that here.
+ * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
+ * apply to are wrong, but so far we don't know of any such case in the wild.
+ */
+#define Tom2Enabled (1U << 21)
+#define Tom2ForceMemTypeWB (1U << 22)
+
+static __init int amd_special_default_mtrr(unsigned long end_pfn)
+{
+	u32 l, h;
+
+	/* Doesn't apply to memory < 4GB */
+	if (end_pfn <= (0xffffffff >> PAGE_SHIFT))
+		return 0;
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+		return 0;
+	if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
+		return 0;
+	/* In case some hypervisor doesn't pass SYSCFG through */
+	if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
+		return 0;
+	/*
+	 * Memory between 4GB and top of mem is forced WB by this magic bit.
+	 * Reserved before K8RevF, but should be zero there.
+	 */
+	if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
+		 (Tom2Enabled | Tom2ForceMemTypeWB))
+		return 1;
+	return 0;
+}
+
+/**
+ * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
+ *
+ * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
+ * memory configurations.  This routine checks that the highest MTRR matches
+ * the end of memory, to make sure the MTRRs having a write back type cover
+ * all of the memory the kernel is intending to use. If not, it'll trim any
+ * memory off the end by adjusting end_pfn, removing it from the kernel's
+ * allocation pools, warning the user with an obnoxious message.
+ */
+int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
+{
+	unsigned long i, base, size, highest_addr = 0, def, dummy;
+	mtrr_type type;
+	u64 trim_start, trim_size;
+
+	/*
+	 * Make sure we only trim uncachable memory on machines that
+	 * support the Intel MTRR architecture:
+	 */
+	rdmsr(MTRRdefType_MSR, def, dummy);
+	def &= 0xff;
+	if (!is_cpu(INTEL) || disable_mtrr_trim || def != MTRR_TYPE_UNCACHABLE)
+		return 0;
+
+	/* Find highest cached pfn */
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type != MTRR_TYPE_WRBACK)
+			continue;
+		base <<= PAGE_SHIFT;
+		size <<= PAGE_SHIFT;
+		if (highest_addr < base + size)
+			highest_addr = base + size;
+	}
+
+	if (amd_special_default_mtrr(end_pfn))
+		return 0;
+
+	if ((highest_addr >> PAGE_SHIFT) < end_pfn) {
+		printk(KERN_WARNING "***************\n");
+		printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
+		printk(KERN_WARNING "**** MTRRs don't cover all of "
+		       "memory, trimmed %ld pages\n", end_pfn -
+		       (highest_addr >> PAGE_SHIFT));
+		printk(KERN_WARNING "***************\n");
+
+		printk(KERN_INFO "update e820 for mtrr\n");
+		trim_start = highest_addr;
+		trim_size = end_pfn;
+		trim_size <<= PAGE_SHIFT;
+		trim_size -= trim_start;
+		add_memory_region(trim_start, trim_size, E820_RESERVED);
+		update_e820();
+		return 1;
+	}
+
+	return 0;
+}
+#endif
 
 /**
  * mtrr_bp_init - initialize mtrrs on the boot CPU
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 54347e9a95c0..fb74a2c20814 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -12,6 +12,7 @@
 #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
 
 #define NUM_FIXED_RANGES 88
+#define MAX_VAR_RANGES 256
 #define MTRRfix64K_00000_MSR 0x250
 #define MTRRfix16K_80000_MSR 0x258
 #define MTRRfix16K_A0000_MSR 0x259
@@ -32,6 +33,8 @@
    an 8 bit field: */
 typedef u8 mtrr_type;
 
+extern unsigned int mtrr_usage_table[MAX_VAR_RANGES];
+
 struct mtrr_ops {
 	u32	vendor;
 	u32	use_intel_if;
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 6cbd15625dce..12948316e6a6 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -310,6 +310,13 @@ void __init setup_arch(char **cmdline_p)
 	 * we are rounding upwards:
 	 */
 	end_pfn = e820_end_of_ram();
+	/* update e820 for memory not covered by WB MTRRs */
+	mtrr_bp_init();
+	if (mtrr_trim_uncached_memory(end_pfn)) {
+		e820_register_active_regions(0, 0, -1UL);
+		end_pfn = e820_end_of_ram();
+	}
+
 	num_physpages = end_pfn;
 
 	check_efer();
diff --git a/include/asm-x86/mtrr.h b/include/asm-x86/mtrr.h
index 262670e42078..319d065800be 100644
--- a/include/asm-x86/mtrr.h
+++ b/include/asm-x86/mtrr.h
@@ -97,6 +97,7 @@ extern int mtrr_del_page (int reg, unsigned long base, unsigned long size);
 extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
 extern void mtrr_ap_init(void);
 extern void mtrr_bp_init(void);
+extern int mtrr_trim_uncached_memory(unsigned long end_pfn);
 #  else
 #define mtrr_save_fixed_ranges(arg) do {} while (0)
 #define mtrr_save_state() do {} while (0)
@@ -120,7 +121,10 @@ static __inline__ int mtrr_del_page (int reg, unsigned long base,
 {
     return -ENODEV;
 }
-
+static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
+{
+	return 0;
+}
 static __inline__ void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) {;}
 
 #define mtrr_ap_init() do {} while (0)

From 71617bf140fd5a35645527502cd330f84045d40c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Wed, 30 Jan 2008 13:33:18 +0100
Subject: [PATCH 639/890] x86: only call early_init_amd one time

Andi's patch
"
    x86: move X86_FEATURE_CONSTANT_TSC into early cpu feature detection

    Need this in the next patch in time_init and that happens early.

    This includes a minor fix on i386 where early_intel_workarounds()
    [which is now called early_init_intel] really executes early as
    the comments say.
"
calling early_init_amd in early_identify_cpu and identify_cpu two times.

this patch remove the one in identify_cpu

Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup_64.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 12948316e6a6..08e85b79e702 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -954,6 +954,9 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 	case X86_VENDOR_AMD:
 		early_init_amd(c);
 		break;
+	case X86_VENDOR_INTEL:
+		early_init_intel(c);
+		break;
 	}
 
 }
@@ -1020,14 +1023,6 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 	numa_add_cpu(smp_processor_id());
 #endif
 
-	switch (c->x86_vendor) {
-	case X86_VENDOR_AMD:
-		early_init_amd(c);
-		break;
-	case X86_VENDOR_INTEL:
-		early_init_intel(c);
-		break;
-	}
 }
 
 void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)

From 67926892ef7a7fbc76de607120d44416019fdf07 Mon Sep 17 00:00:00 2001
From: Michael Opdenacker <michael-lists@free-electrons.com>
Date: Wed, 30 Jan 2008 13:33:18 +0100
Subject: [PATCH 640/890] x86: fix unconditional arch/x86/kernel/pcspeaker.o
 compiling

do not add the pcspkr platform device if pcspkr support is disabled.

Signed-off-by: Michael Opdenacker <michael@free-electrons.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/Makefile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index c414c45a0f13..5bdb0f0431e9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -67,7 +67,10 @@ obj-$(CONFIG_MGEODE_LX)		+= geode_32.o mfgpt_32.o
 
 obj-$(CONFIG_VMI)		+= vmi_32.o vmiclock_32.o
 obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
+
+ifdef CONFIG_INPUT_PCSPKR
 obj-y				+= pcspeaker.o
+endif
 
 obj-$(CONFIG_SCx200)		+= scx200_32.o
 

From 2274c33ebdfbb848e7291849e662d36519b52bf4 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Wed, 30 Jan 2008 13:33:18 +0100
Subject: [PATCH 641/890] x86: msr for AMD Fam 10h mmio

Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/msr-index.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h
index 4045bbe47379..fae118a25278 100644
--- a/include/asm-x86/msr-index.h
+++ b/include/asm-x86/msr-index.h
@@ -95,6 +95,14 @@
 #define MSR_AMD64_IBSDCPHYSAD		0xc0011039
 #define MSR_AMD64_IBSCTL		0xc001103a
 
+/* Fam 10h MSRs */
+#define MSR_FAM10H_MMIO_CONF_BASE	0xc0010058
+#define FAM10H_MMIO_CONF_ENABLE		(1<<0)
+#define FAM10H_MMIO_CONF_BUSRANGE_MASK	0xf
+#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
+#define FAM10H_MMIO_CONF_BASE_MASK	0xfffffff
+#define FAM10H_MMIO_CONF_BASE_SHIFT	20
+
 /* K8 MSRs */
 #define MSR_K8_TOP_MEM1			0xc001001a
 #define MSR_K8_TOP_MEM2			0xc001001d

From 27415a4fe369e07a1393ae52c8ed8e48aabed5a9 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 30 Jan 2008 13:33:18 +0100
Subject: [PATCH 642/890] x86: move warning message of polling idle and HT
 enabled

The warning message at idle_setup() is never shown because
smp_num_sibling hasn't been updated at this point yet.

Move this polling idle and HT enabled warning to select_idle_routine().
I also implement this warning on 64-bit kernel.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process_32.c | 18 ++++++++++++------
 arch/x86/kernel/process_64.c | 17 ++++++++++++-----
 2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 9f45a51af968..b72d7d132072 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -295,17 +295,27 @@ static int mwait_usable(const struct cpuinfo_x86 *c)
 
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
+	static int selected;
+
+	if (selected)
+		return;
+#ifdef CONFIG_X86_SMP
+	if (pm_idle == poll_idle && smp_num_siblings > 1) {
+		printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
+			" performance may degrade.\n");
+	}
+#endif
 	if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
-		printk("monitor/mwait feature present.\n");
 		/*
 		 * Skip, if setup has overridden idle.
 		 * One CPU supports mwait => All CPUs supports mwait
 		 */
 		if (!pm_idle) {
-			printk("using mwait in idle threads.\n");
+			printk(KERN_INFO "using mwait in idle threads.\n");
 			pm_idle = mwait_idle;
 		}
 	}
+	selected = 1;
 }
 
 static int __init idle_setup(char *str)
@@ -313,10 +323,6 @@ static int __init idle_setup(char *str)
 	if (!strcmp(str, "poll")) {
 		printk("using polling idle threads.\n");
 		pm_idle = poll_idle;
-#ifdef CONFIG_X86_SMP
-		if (smp_num_siblings > 1)
-			printk("WARNING: polling idle and HT enabled, performance may degrade.\n");
-#endif
 	} else if (!strcmp(str, "mwait"))
 		force_mwait = 1;
 	else
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index dbe0a846ec52..95313532b2e0 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -291,20 +291,27 @@ static int mwait_usable(const struct cpuinfo_x86 *c)
 
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
-	static int printed;
+	static int selected;
+
+	if (selected)
+		return;
+#ifdef CONFIG_X86_SMP
+	if (pm_idle == poll_idle && smp_num_siblings > 1) {
+		printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
+			" performance may degrade.\n");
+	}
+#endif
 	if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
 		/*
 		 * Skip, if setup has overridden idle.
 		 * One CPU supports mwait => All CPUs supports mwait
 		 */
 		if (!pm_idle) {
-			if (!printed) {
-				printk(KERN_INFO "using mwait in idle threads.\n");
-				printed = 1;
-			}
+			printk(KERN_INFO "using mwait in idle threads.\n");
 			pm_idle = mwait_idle;
 		}
 	}
+	selected = 1;
 }
 
 static int __init idle_setup(char *str)

From 74ff305b05b0974b30022a48145075e6d9a7fb2b Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 30 Jan 2008 13:33:18 +0100
Subject: [PATCH 643/890] x86: move select_idle_routine() call after
 detect_ht()

Move the select_idle_routine() call to after the detect_ht() call at
identify_cpu() on 64-bit.

This change is for printing the polling idle and HT enabled warning
message properly.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup_64.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 08e85b79e702..7edb43f0b279 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -999,7 +999,6 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 		break;
 	}
 
-	select_idle_routine(c);
 	detect_ht(c);
 
 	/*
@@ -1017,6 +1016,8 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 #ifdef CONFIG_X86_MCE
 	mcheck_init(c);
 #endif
+	select_idle_routine(c);
+
 	if (c != &boot_cpu_data)
 		mtrr_ap_init();
 #ifdef CONFIG_NUMA

From 47db4c3e932dbf889f34aab8aac2b1391581a9a5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Wed, 30 Jan 2008 13:33:18 +0100
Subject: [PATCH 644/890] x86: checking aperture report for node instead

currently when gart iommu is enabled by BIOS or previous we got

"
Checking aperture...
CPU 0: aperture @4000000 size 64MB
CPU 1: aperture @4000000 size 64MB
"
we should use use Node instead.

we will get
"
Checking aperture...
Node 0: aperture @4000000 size 64MB
Node 1: aperture @4000000 size 64MB
"

Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/aperture_64.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index bf1b469d5847..0b837bb3becb 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -312,6 +312,7 @@ void __init gart_iommu_hole_init(void)
 	u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0;
 	u64 aper_base, last_aper_base = 0;
 	int fix, num, valid_agp = 0;
+	int node;
 
 	if (gart_iommu_aperture_disabled || !fix_aperture ||
 	    !early_pci_allowed())
@@ -320,6 +321,7 @@ void __init gart_iommu_hole_init(void)
 	printk(KERN_INFO  "Checking aperture...\n");
 
 	fix = 0;
+	node = 0;
 	for (num = 24; num < 32; num++) {
 		if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
 			continue;
@@ -332,8 +334,9 @@ void __init gart_iommu_hole_init(void)
 		aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff;
 		aper_base <<= 25;
 
-		printk(KERN_INFO "CPU %d: aperture @ %Lx size %u MB\n",
-				num-24, aper_base, aper_size>>20);
+		printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n",
+				node, aper_base, aper_size >> 20);
+		node++;
 
 		if (!aperture_valid(aper_base, aper_size)) {
 			fix = 1;

From 8a650ce297c723ebe7da17ec2890f6971438aee1 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:33:19 +0100
Subject: [PATCH 645/890] x86: add stringify header

We use a __stringify construction at paravirt_patch_64.c.
It's better practice to include the stringify header directly

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/paravirt_patch_64.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index cbfc4f3069e3..7d904e138d7e 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -1,5 +1,6 @@
 #include <asm/paravirt.h>
 #include <asm/asm-offsets.h>
+#include <linux/stringify.h>
 
 DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
 DEF_NATIVE(pv_irq_ops, irq_enable, "sti");

From b03878307a78ba0248f6f8d24279c846f1a26f6e Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:33:19 +0100
Subject: [PATCH 646/890] x86: provide a native_init_IRQ function on 64-bit

x86_64 lacks a native_init_IRQ() function, so we turn the arch's
init_IRQ() function into a native construct

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/i8259_64.c  | 4 +++-
 include/asm-x86/hw_irq_64.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c
index d3edb9f23f2c..fa57a1568508 100644
--- a/arch/x86/kernel/i8259_64.c
+++ b/arch/x86/kernel/i8259_64.c
@@ -456,7 +456,9 @@ void __init init_ISA_irqs (void)
 	}
 }
 
-void __init init_IRQ(void)
+void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
+
+void __init native_init_IRQ(void)
 {
 	int i;
 
diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h
index a346159b6ac3..312a58d6dac6 100644
--- a/include/asm-x86/hw_irq_64.h
+++ b/include/asm-x86/hw_irq_64.h
@@ -141,6 +141,7 @@ extern void print_IO_APIC(void);
 extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
 extern void send_IPI(int dest, int vector);
 extern void setup_ioapic_dest(void);
+extern void native_init_IRQ(void);
 
 extern unsigned long io_apic_irqs;
 

From 046627130591993fd04acef9aebccf70fdad3702 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:33:19 +0100
Subject: [PATCH 647/890] x86: put generic mm_hooks include into PARAVIRT

With PARAVIRT, we actually have arch_{dup,exit}_mmap functions,
so we can't include the generic header

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/mmu_context_64.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/asm-x86/mmu_context_64.h b/include/asm-x86/mmu_context_64.h
index 7e2aa23fccbf..ad6dc821ef9e 100644
--- a/include/asm-x86/mmu_context_64.h
+++ b/include/asm-x86/mmu_context_64.h
@@ -7,7 +7,9 @@
 #include <asm/pda.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
+#ifndef CONFIG_PARAVIRT
 #include <asm-generic/mm_hooks.h>
+#endif
 
 /*
  * possibly do the LDT unload here?

From 4c9890c246121d070deb8cf5cf53e80caffc4dde Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:33:19 +0100
Subject: [PATCH 648/890] x86: puts read and write cr8 into pv_cpu_ops

This patch adds room for read and write_cr8 functions back in
pv_cpu_ops struct

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/paravirt.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index c56b17a5eba0..73547acbbbf5 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -101,6 +101,11 @@ struct pv_cpu_ops {
 	unsigned long (*read_cr4)(void);
 	void (*write_cr4)(unsigned long);
 
+#ifdef CONFIG_X86_64
+	unsigned long (*read_cr8)(void);
+	void (*write_cr8)(unsigned long);
+#endif
+
 	/* Segment descriptor handling */
 	void (*load_tr_desc)(void);
 	void (*load_gdt)(const struct desc_ptr *);
@@ -614,6 +619,16 @@ static inline void write_cr4(unsigned long x)
 	PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
 }
 
+static inline unsigned long read_cr8(void)
+{
+	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8);
+}
+
+static inline void write_cr8(unsigned long x)
+{
+	PVOP_VCALL1(pv_cpu_ops.write_cr8, x);
+}
+
 static inline void raw_safe_halt(void)
 {
 	PVOP_VCALL0(pv_irq_ops.safe_halt);

From 94ea03cdda520f0c0dc80cbb4674ab9a33749ee2 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:33:19 +0100
Subject: [PATCH 649/890] x86: provide read and write cr8 paravirt hooks

Since the cr8 manipulation functions ended up staying in the tree,
they can't be defined just when PARAVIRT is off: In this patch,
those functions are defined for the PARAVIRT case too.

[ mingo@elte.hu: fixes ]

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/paravirt.h |  2 ++
 include/asm-x86/system.h   | 30 ++++++++++++++++--------------
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 73547acbbbf5..a55f95e921ee 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -619,6 +619,7 @@ static inline void write_cr4(unsigned long x)
 	PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
 }
 
+#ifdef CONFIG_X86_64
 static inline unsigned long read_cr8(void)
 {
 	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8);
@@ -628,6 +629,7 @@ static inline void write_cr8(unsigned long x)
 {
 	PVOP_VCALL1(pv_cpu_ops.write_cr8, x);
 }
+#endif
 
 static inline void raw_safe_halt(void)
 {
diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index 39474f2957a3..ee32ef9367f4 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -231,6 +231,20 @@ static inline void native_write_cr4(unsigned long val)
 	asm volatile("mov %0,%%cr4": :"r" (val), "m" (__force_order));
 }
 
+#ifdef CONFIG_X86_64
+static inline unsigned long native_read_cr8(void)
+{
+	unsigned long cr8;
+	asm volatile("movq %%cr8,%0" : "=r" (cr8));
+	return cr8;
+}
+
+static inline void native_write_cr8(unsigned long val)
+{
+	asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
+}
+#endif
+
 static inline void native_wbinvd(void)
 {
 	asm volatile("wbinvd": : :"memory");
@@ -248,21 +262,9 @@ static inline void native_wbinvd(void)
 #define read_cr4_safe()	(native_read_cr4_safe())
 #define write_cr4(x)	(native_write_cr4(x))
 #define wbinvd()	(native_wbinvd())
-
 #ifdef CONFIG_X86_64
-
-static inline unsigned long read_cr8(void)
-{
-	unsigned long cr8;
-	asm volatile("movq %%cr8,%0" : "=r" (cr8));
-	return cr8;
-}
-
-static inline void write_cr8(unsigned long val)
-{
-	asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
-}
-
+#define read_cr8()	(native_read_cr8())
+#define write_cr8(x)	(native_write_cr8(x))
 #endif
 
 /* Clear the 'TS' bit */

From 88b4755f0fa8f5075ac0182f07852acbb397e140 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:33:19 +0100
Subject: [PATCH 650/890] x86: fill pv_cpu_ops structure with cr8 fields

This patch fills in the read and write cr8 fields with their
native version.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/paravirt.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index c20b4f8d62f5..c67d33103b91 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -319,6 +319,10 @@ struct pv_cpu_ops pv_cpu_ops = {
 	.read_cr4 = native_read_cr4,
 	.read_cr4_safe = native_read_cr4_safe,
 	.write_cr4 = native_write_cr4,
+#ifdef CONFIG_X86_64
+	.read_cr8 = native_read_cr8,
+	.write_cr8 = native_write_cr8,
+#endif
 	.wbinvd = native_wbinvd,
 	.read_msr = native_read_msr_safe,
 	.write_msr = native_write_msr_safe,

From a59153dceb860f810b2ecd2504162309291edd4c Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:33:19 +0100
Subject: [PATCH 651/890] x86: add asm_offset PARAVIRT constants

This patch adds the constant PARAVIRT needs in asm_offsets_64.c

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/asm-offsets_64.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 2b32719a3fea..494e1e096ee6 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -61,6 +61,20 @@ int main(void)
 	ENTRY(data_offset);
 	BLANK();
 #undef ENTRY
+#ifdef CONFIG_PARAVIRT
+	BLANK();
+	OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
+	OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
+	OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
+	OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
+	OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
+	OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
+	OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret);
+	OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
+	OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
+#endif
+
+
 #ifdef CONFIG_IA32_EMULATION
 #define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry))
 	ENTRY(ax);

From fbf519240c840f51e4d4af35c3c19e1df160dc48 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:33:19 +0100
Subject: [PATCH 652/890] x86: provide __parainstructions section

This patch adds the __parainstructions section to vmlinux.lds.S.
It's needed for the patching system.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/vmlinux_64.lds.S | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 7457370d7916..0992b9946c6f 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -178,6 +178,14 @@ SECTIONS
   }
   __con_initcall_end = .;
   SECURITY_INIT
+
+  . = ALIGN(8);
+  .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
+  __parainstructions = .;
+       *(.parainstructions)
+  __parainstructions_end = .;
+  }
+
   . = ALIGN(8);
   __alt_instructions = .;
   .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {

From 1fe91514a3dc07ab540bc891589c46aaa47e92f6 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:33:19 +0100
Subject: [PATCH 653/890] x86: change function orders in paravirt.h

__pmd, pmd_val and set_pud are used before they are defined (as static)
We move them a little up in the file, so it doesn't happen.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/paravirt.h | 84 +++++++++++++++++++-------------------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index a55f95e921ee..62cecb7778ee 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -1025,6 +1025,48 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 		PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val);
 }
 
+#if PAGETABLE_LEVELS >= 3
+static inline pmd_t __pmd(pmdval_t val)
+{
+	pmdval_t ret;
+
+	if (sizeof(pmdval_t) > sizeof(long))
+		ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd,
+				 val, (u64)val >> 32);
+	else
+		ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd,
+				 val);
+
+	return (pmd_t) { ret };
+}
+
+static inline pmdval_t pmd_val(pmd_t pmd)
+{
+	pmdval_t ret;
+
+	if (sizeof(pmdval_t) > sizeof(long))
+		ret =  PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val,
+				  pmd.pmd, (u64)pmd.pmd >> 32);
+	else
+		ret =  PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val,
+				  pmd.pmd);
+
+	return ret;
+}
+
+static inline void set_pud(pud_t *pudp, pud_t pud)
+{
+	pudval_t val = native_pud_val(pud);
+
+	if (sizeof(pudval_t) > sizeof(long))
+		PVOP_VCALL3(pv_mmu_ops.set_pud, pudp,
+			    val, (u64)val >> 32);
+	else
+		PVOP_VCALL2(pv_mmu_ops.set_pud, pudp,
+			    val);
+}
+#endif	/* PAGETABLE_LEVELS >= 3 */
+
 #ifdef CONFIG_X86_PAE
 /* Special-case pte-setting operations for PAE, which can't update a
    64-bit pte atomically */
@@ -1075,48 +1117,6 @@ static inline void pmd_clear(pmd_t *pmdp)
 }
 #endif	/* CONFIG_X86_PAE */
 
-#if PAGETABLE_LEVELS >= 3
-static inline pmd_t __pmd(pmdval_t val)
-{
-	pmdval_t ret;
-
-	if (sizeof(pmdval_t) > sizeof(long))
-		ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd,
-				 val, (u64)val >> 32);
-	else
-		ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd,
-				 val);
-
-	return (pmd_t) { ret };
-}
-
-static inline pmdval_t pmd_val(pmd_t pmd)
-{
-	pmdval_t ret;
-
-	if (sizeof(pmdval_t) > sizeof(long))
-		ret =  PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val,
-				  pmd.pmd, (u64)pmd.pmd >> 32);
-	else
-		ret =  PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val,
-				  pmd.pmd);
-
-	return ret;
-}
-
-static inline void set_pud(pud_t *pudp, pud_t pud)
-{
-	pudval_t val = native_pud_val(pud);
-
-	if (sizeof(pudval_t) > sizeof(long))
-		PVOP_VCALL3(pv_mmu_ops.set_pud, pudp,
-			    val, (u64)val >> 32);
-	else
-		PVOP_VCALL2(pv_mmu_ops.set_pud, pudp,
-			    val);
-}
-#endif	/* PAGETABLE_LEVELS >= 3 */
-
 /* Lazy mode for batching updates / context switch */
 enum paravirt_lazy_mode {
 	PARAVIRT_LAZY_NONE,

From 9042219cd8d43b81322b826d463dd6e52acae6cf Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Wed, 30 Jan 2008 13:33:20 +0100
Subject: [PATCH 654/890] x86: include/asm-x86/paravirt.h: x86_64 mmu
 operations

Add .set_pgd field to pv_mmu_ops.

Implement pud_val(), __pud(), set_pgd(), pud_clear(), pgd_clear().

pud_clear() and pgd_clear() are implemented simply using set_pud()
and set_pmd(). They don't have a field at pv_mmu_ops.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/paravirt.h | 55 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 62cecb7778ee..9f9ff57b3efb 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -259,6 +259,8 @@ struct pv_mmu_ops {
 #if PAGETABLE_LEVELS == 4
 	pudval_t (*pud_val)(pud_t);
 	pud_t (*make_pud)(pudval_t pud);
+
+	void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
 #endif	/* PAGETABLE_LEVELS == 4 */
 #endif	/* PAGETABLE_LEVELS >= 3 */
 
@@ -1065,6 +1067,59 @@ static inline void set_pud(pud_t *pudp, pud_t pud)
 		PVOP_VCALL2(pv_mmu_ops.set_pud, pudp,
 			    val);
 }
+#if PAGETABLE_LEVELS == 4
+static inline pud_t __pud(pudval_t val)
+{
+	pudval_t ret;
+
+	if (sizeof(pudval_t) > sizeof(long))
+		ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud,
+				 val, (u64)val >> 32);
+	else
+		ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud,
+				 val);
+
+	return (pud_t) { ret };
+}
+
+static inline pudval_t pud_val(pud_t pud)
+{
+	pudval_t ret;
+
+	if (sizeof(pudval_t) > sizeof(long))
+		ret =  PVOP_CALL2(pudval_t, pv_mmu_ops.pud_val,
+				  pud.pud, (u64)pud.pud >> 32);
+	else
+		ret =  PVOP_CALL1(pudval_t, pv_mmu_ops.pud_val,
+				  pud.pud);
+
+	return ret;
+}
+
+static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+	pgdval_t val = native_pgd_val(pgd);
+
+	if (sizeof(pgdval_t) > sizeof(long))
+		PVOP_VCALL3(pv_mmu_ops.set_pgd, pgdp,
+			    val, (u64)val >> 32);
+	else
+		PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp,
+			    val);
+}
+
+static inline void pgd_clear(pgd_t *pgdp)
+{
+	set_pgd(pgdp, __pgd(0));
+}
+
+static inline void pud_clear(pud_t *pudp)
+{
+	set_pud(pudp, __pud(0));
+}
+
+#endif	/* PAGETABLE_LEVELS == 4 */
+
 #endif	/* PAGETABLE_LEVELS >= 3 */
 
 #ifdef CONFIG_X86_PAE

From d8dd8eec57254adac48e46c6d191cb24161d8b4d Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Wed, 30 Jan 2008 13:33:20 +0100
Subject: [PATCH 655/890] x86: don't set pagetable_setup_{start,done} hooks on
 64-bit

paravirt_pagetable_setup_{start,done}() are not used (yet) under x86_64,
and native_pagetable_setup_{start,done}() don't exist on x86_64. So they
don't need to be set.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/paravirt.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index c67d33103b91..37f38b71c50a 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -367,8 +367,10 @@ struct pv_apic_ops pv_apic_ops = {
 };
 
 struct pv_mmu_ops pv_mmu_ops = {
+#ifndef CONFIG_X86_64
 	.pagetable_setup_start = native_pagetable_setup_start,
 	.pagetable_setup_done = native_pagetable_setup_done,
+#endif
 
 	.read_cr2 = native_read_cr2,
 	.write_cr2 = native_write_cr2,

From f95f2f7b9d7a3da79a833a8fd78a5154cc4b3107 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Wed, 30 Jan 2008 13:33:20 +0100
Subject: [PATCH 656/890] x86: fill in missing pv_mmu_ops entries for
 PAGETABLE_LEVELS >= 3

This finally makes paravirt-ops able to compile and boot under x86_64.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/paravirt.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 37f38b71c50a..075962cc75ab 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -398,16 +398,23 @@ struct pv_mmu_ops pv_mmu_ops = {
 	.kmap_atomic_pte = kmap_atomic,
 #endif
 
+#if PAGETABLE_LEVELS >= 3
 #ifdef CONFIG_X86_PAE
 	.set_pte_atomic = native_set_pte_atomic,
 	.set_pte_present = native_set_pte_present,
-	.set_pud = native_set_pud,
 	.pte_clear = native_pte_clear,
 	.pmd_clear = native_pmd_clear,
-
+#endif
+	.set_pud = native_set_pud,
 	.pmd_val = native_pmd_val,
 	.make_pmd = native_make_pmd,
+
+#if PAGETABLE_LEVELS == 4
+	.pud_val = native_pud_val,
+	.make_pud = native_make_pud,
+	.set_pgd = native_set_pgd,
 #endif
+#endif /* PAGETABLE_LEVELS >= 3 */
 
 	.pte_val = native_pte_val,
 	.pgd_val = native_pgd_val,

From 7d851c8d3db0f79b92c8b14361779ede8acd2488 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:20 +0100
Subject: [PATCH 657/890] x86: add framework to disable CPUID bits on the
 command line

There are already various options to disable specific cpuid bits
on the command line. They all use their own variable. Add a generic
mask to make this easier in the future.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/common.c | 6 ++++++
 arch/x86/kernel/setup_64.c   | 6 ++++++
 include/asm-x86/cpufeature.h | 4 ++++
 include/asm-x86/processor.h  | 1 +
 4 files changed, 17 insertions(+)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4bd326d0322c..f0f29ddf33a2 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -57,6 +57,8 @@ DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
 } };
 EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
 
+__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
+
 static int cachesize_override __cpuinitdata = -1;
 static int disable_x86_fxsr __cpuinitdata;
 static int disable_x86_serial_nr __cpuinitdata = 1;
@@ -497,6 +499,10 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
 	}
 
+	/* Clear all flags overriden by options */
+	for (i = 0; i < NCAPINTS; i++)
+		c->x86_capability[i] ^= cleared_cpu_caps[i];
+
 	/* Init Machine Check Exception if available. */
 	mcheck_init(c);
 
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 7edb43f0b279..df159520bbd2 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -80,6 +80,8 @@
 struct cpuinfo_x86 boot_cpu_data __read_mostly;
 EXPORT_SYMBOL(boot_cpu_data);
 
+__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
+
 unsigned long mmu_cr4_features;
 
 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
@@ -1013,6 +1015,10 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
 	}
 
+	/* Clear all flags overriden by options */
+	for (i = 0; i < NCAPINTS; i++)
+		c->x86_capability[i] ^= cleared_cpu_caps[i];
+
 #ifdef CONFIG_X86_MCE
 	mcheck_init(c);
 #endif
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 29727bf0e177..b8f53f869e1f 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -131,6 +131,10 @@
 
 #define set_cpu_cap(c, bit)	set_bit(bit, (unsigned long *)((c)->x86_capability))
 #define clear_cpu_cap(c, bit)	clear_bit(bit, (unsigned long *)((c)->x86_capability))
+#define setup_clear_cpu_cap(bit) do { \
+	clear_cpu_cap(&boot_cpu_data, bit);	\
+	set_bit(bit, cleared_cpu_caps); 	\
+} while (0)
 
 #define cpu_has_fpu		boot_cpu_has(X86_FEATURE_FPU)
 #define cpu_has_vme		boot_cpu_has(X86_FEATURE_VME)
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 81ecfed83e47..ab4d0c2a3f8f 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -118,6 +118,7 @@ struct cpuinfo_x86 {
 extern struct cpuinfo_x86 boot_cpu_data;
 extern struct cpuinfo_x86 new_cpu_data;
 extern struct tss_struct doublefault_tss;
+extern __u32 cleared_cpu_caps[NCAPINTS];
 
 #ifdef CONFIG_SMP
 DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);

From 135302577bb964ebf23376e2d991405ef4ff0457 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:20 +0100
Subject: [PATCH 658/890] x86: convert some existing cpuid disable options to
 new generic bitmap

This convers nofxsr, mem=nopentium and nosep to use the new
generic cpuid disable bitmap instead of using own variables.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/common.c | 34 ++++------------------------------
 arch/x86/kernel/setup_32.c   |  5 +----
 2 files changed, 5 insertions(+), 34 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f0f29ddf33a2..5f9c8e3a3e0f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -60,14 +60,10 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
 __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
 
 static int cachesize_override __cpuinitdata = -1;
-static int disable_x86_fxsr __cpuinitdata;
 static int disable_x86_serial_nr __cpuinitdata = 1;
-static int disable_x86_sep __cpuinitdata;
 
 struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
 
-extern int disable_pse;
-
 static void __cpuinit default_init(struct cpuinfo_x86 * c)
 {
 	/* Not much we can do here... */
@@ -216,16 +212,8 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
 
 static int __init x86_fxsr_setup(char * s)
 {
-	/* Tell all the other CPUs to not use it... */
-	disable_x86_fxsr = 1;
-
-	/*
-	 * ... and clear the bits early in the boot_cpu_data
-	 * so that the bootup process doesn't try to do this
-	 * either.
-	 */
-	clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability);
-	clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability);
+	setup_clear_cpu_cap(X86_FEATURE_FXSR);
+	setup_clear_cpu_cap(X86_FEATURE_XMM);
 	return 1;
 }
 __setup("nofxsr", x86_fxsr_setup);
@@ -233,7 +221,7 @@ __setup("nofxsr", x86_fxsr_setup);
 
 static int __init x86_sep_setup(char * s)
 {
-	disable_x86_sep = 1;
+	setup_clear_cpu_cap(X86_FEATURE_SEP);
 	return 1;
 }
 __setup("nosep", x86_sep_setup);
@@ -462,19 +450,6 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 	if ( tsc_disable )
 		clear_bit(X86_FEATURE_TSC, c->x86_capability);
 
-	/* FXSR disabled? */
-	if (disable_x86_fxsr) {
-		clear_bit(X86_FEATURE_FXSR, c->x86_capability);
-		clear_bit(X86_FEATURE_XMM, c->x86_capability);
-	}
-
-	/* SEP disabled? */
-	if (disable_x86_sep)
-		clear_bit(X86_FEATURE_SEP, c->x86_capability);
-
-	if (disable_pse)
-		clear_bit(X86_FEATURE_PSE, c->x86_capability);
-
 	/* If the model name is still unset, do table lookup. */
 	if ( !c->x86_model_id[0] ) {
 		char *p;
@@ -629,8 +604,7 @@ void __init early_cpu_init(void)
 	/* pse is not compatible with on-the-fly unmapping,
 	 * disable it even if the cpus claim to support it.
 	 */
-	clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
-	disable_pse = 1;
+	setup_clear_cpu_cap(X86_FEATURE_PSE);
 #endif
 }
 
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 6802a383077d..26a56f714d34 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -68,8 +68,6 @@
    address, and must not be in the .bss segment! */
 unsigned long init_pg_tables_end __initdata = ~0UL;
 
-int disable_pse __cpuinitdata = 0;
-
 /*
  * Machine setup..
  */
@@ -242,8 +240,7 @@ static int __init parse_mem(char *arg)
 		return -EINVAL;
 
 	if (strcmp(arg, "nopentium") == 0) {
-		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PSE);
-		disable_pse = 1;
+		setup_clear_cpu_cap(X86_FEATURE_PSE);
 	} else {
 		/* If the user specifies memory size, we
 		 * limit the BIOS-provided memory map to

From 8424950b5e85543a494b5d940bb2f5f9f16f56a9 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:20 +0100
Subject: [PATCH 659/890] x86: don't disable RDTSC in userland for 32bit notsc

Modern 32bit userland doesn't even boot when the TSC is disabled
because ld.so tends to contain RDTSCs.  So make notsc only effective for the
kernel, similar to 64bit.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/common.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5f9c8e3a3e0f..c66991a04a8a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -654,7 +654,6 @@ void __cpuinit cpu_init(void)
 		printk(KERN_NOTICE "Disabling TSC...\n");
 		/**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
 		clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
-		set_in_cr4(X86_CR4_TSD);
 	}
 
 	load_idt(&idt_descr);

From 404ee5b14b68d3cba287c2596588b83790c49f7b Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:20 +0100
Subject: [PATCH 660/890] x86: convert TSC disabling to generic cpuid disable
 bitmap

Fix from: Ian Campbell <ijc@hellion.org.uk>

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/bugs.c    |  2 +-
 arch/x86/kernel/cpu/common.c  |  9 ---------
 arch/x86/kernel/numaq_32.c    |  2 +-
 arch/x86/kernel/tsc_32.c      | 14 +++-----------
 arch/x86/mach-voyager/setup.c |  2 +-
 arch/x86/xen/time.c           |  2 +-
 include/asm-x86/cpufeature.h  |  4 ++++
 include/asm-x86/tsc.h         |  2 --
 8 files changed, 11 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index a96abd453e0d..9b95edcfc6ae 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -154,7 +154,7 @@ static void __init check_config(void)
  * If we configured ourselves for a TSC, we'd better have one!
  */
 #ifdef CONFIG_X86_TSC
-	if (!cpu_has_tsc && !tsc_disable)
+	if (!cpu_has_tsc)
 		panic("Kernel compiled for Pentium+, requires TSC feature!");
 #endif
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c66991a04a8a..dfc9563fc4f0 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -446,10 +446,6 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 	 * we do "generic changes."
 	 */
 
-	/* TSC disabled? */
-	if ( tsc_disable )
-		clear_bit(X86_FEATURE_TSC, c->x86_capability);
-
 	/* If the model name is still unset, do table lookup. */
 	if ( !c->x86_model_id[0] ) {
 		char *p;
@@ -650,11 +646,6 @@ void __cpuinit cpu_init(void)
 
 	if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
 		clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
-	if (tsc_disable && cpu_has_tsc) {
-		printk(KERN_NOTICE "Disabling TSC...\n");
-		/**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
-		clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
-	}
 
 	load_idt(&idt_descr);
 	switch_to_new_gdt();
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index 9000d82c6dc0..e65281b1634b 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -82,7 +82,7 @@ static int __init numaq_tsc_disable(void)
 {
 	if (num_online_nodes() > 1) {
 		printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
-		tsc_disable = 1;
+		setup_clear_cpu_cap(X86_FEATURE_TSC);
 	}
 	return 0;
 }
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index 2a7b95bd8509..43517e324be8 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -24,8 +24,6 @@ static int tsc_enabled;
 unsigned int tsc_khz;
 EXPORT_SYMBOL_GPL(tsc_khz);
 
-int tsc_disable;
-
 #ifdef CONFIG_X86_TSC
 static int __init tsc_setup(char *str)
 {
@@ -40,8 +38,7 @@ static int __init tsc_setup(char *str)
  */
 static int __init tsc_setup(char *str)
 {
-	tsc_disable = 1;
-
+	setup_clear_cpu_cap(X86_FEATURE_TSC);
 	return 1;
 }
 #endif
@@ -395,7 +392,7 @@ void __init tsc_init(void)
 {
 	int cpu;
 
-	if (!cpu_has_tsc || tsc_disable)
+	if (!cpu_has_tsc)
 		goto out_no_tsc;
 
 	cpu_khz = calculate_cpu_khz();
@@ -439,10 +436,5 @@ void __init tsc_init(void)
 	return;
 
 out_no_tsc:
-	/*
-	 * Set the tsc_disable flag if there's no TSC support, this
-	 * makes it a fast flag for the kernel to see whether it
-	 * should be using the TSC.
-	 */
-	tsc_disable = 1;
+	setup_clear_cpu_cap(X86_FEATURE_TSC);
 }
diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c
index 81257a861984..5ae5466b9eb9 100644
--- a/arch/x86/mach-voyager/setup.c
+++ b/arch/x86/mach-voyager/setup.c
@@ -37,7 +37,7 @@ void __init pre_setup_arch_hook(void)
 {
 	/* Voyagers run their CPUs from independent clocks, so disable
 	 * the TSC code because we can't sync them */
-	tsc_disable = 1;
+	setup_clear_cpu_cap(X86_FEATURE_TSC);
 }
 
 void __init trap_init_hook(void)
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index d083ff5ef088..b3721fd6877b 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -592,7 +592,7 @@ __init void xen_time_init(void)
 	set_normalized_timespec(&wall_to_monotonic,
 				-xtime.tv_sec, -xtime.tv_nsec);
 
-	tsc_disable = 0;
+	setup_force_cpu_cap(X86_FEATURE_TSC);
 
 	xen_setup_timer(cpu);
 	xen_setup_cpu_clockevents();
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index b8f53f869e1f..3fb7dfa7fc91 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -135,6 +135,10 @@
 	clear_cpu_cap(&boot_cpu_data, bit);	\
 	set_bit(bit, cleared_cpu_caps); 	\
 } while (0)
+#define setup_force_cpu_cap(bit) do { \
+	set_cpu_cap(&boot_cpu_data, bit);	\
+	clear_bit(bit, cleared_cpu_caps); 	\
+} while (0)
 
 #define cpu_has_fpu		boot_cpu_has(X86_FEATURE_FPU)
 #define cpu_has_vme		boot_cpu_has(X86_FEATURE_VME)
diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h
index 071e0ce5b664..a6e8d35c3f86 100644
--- a/include/asm-x86/tsc.h
+++ b/include/asm-x86/tsc.h
@@ -16,8 +16,6 @@ typedef unsigned long long cycles_t;
 
 extern unsigned int cpu_khz;
 extern unsigned int tsc_khz;
-/* flag for disabling the tsc */
-extern int tsc_disable;
 
 extern void disable_TSC(void);
 

From 191679fdfa63342752ff6a094a2522ae939b8d0c Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:21 +0100
Subject: [PATCH 661/890] x86: add noclflush option

To disable CLFLUSH usage, especially in change_page_attr().

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/kernel-parameters.txt | 2 ++
 arch/x86/kernel/cpu/common.c        | 7 +++++++
 arch/x86/kernel/setup_64.c          | 7 +++++++
 3 files changed, 16 insertions(+)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index b8fadf5f75a3..040e30a33b7c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1191,6 +1191,8 @@ and is between 256 and 4096 characters. It is defined in the file
 			register save and restore. The kernel will only save
 			legacy floating-point registers on task switch.
 
+	noclflush	[BUGS=X86] Don't use the CLFLUSH instruction
+
 	nohlt		[BUGS=ARM]
 
 	no-hlt		[BUGS=X86-32] Tells the kernel that the hlt
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index dfc9563fc4f0..56b7ea8e6c79 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -542,6 +542,13 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 }
 #endif
 
+static __init int setup_noclflush(char *arg)
+{
+	setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
+	return 1;
+}
+__setup("noclflush", setup_noclflush);
+
 void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 {
 	char *vendor = NULL;
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index df159520bbd2..cb9b8a90c094 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -1032,6 +1032,13 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 
 }
 
+static __init int setup_noclflush(char *arg)
+{
+	setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
+	return 1;
+}
+__setup("noclflush", setup_noclflush);
+
 void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 {
 	if (c->x86_model_id[0])

From ac72e7888a612dccfbc15b34698aad441bdfda10 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:21 +0100
Subject: [PATCH 662/890] x86: add generic clearcpuid=... option

Add a generic option to clear any cpuid bit. I added it because it was
very easy to add with the new generic cpuid disable bitmap and perhaps
it will be useful in the future.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/kernel-parameters.txt | 13 +++++++++++++
 arch/x86/kernel/cpu/common.c        | 11 +++++++++++
 arch/x86/kernel/setup_64.c          | 11 +++++++++++
 3 files changed, 35 insertions(+)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 040e30a33b7c..50d564dabb13 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -416,6 +416,19 @@ and is between 256 and 4096 characters. It is defined in the file
 			[SPARC64] tick
 			[X86-64] hpet,tsc
 
+	clearcpuid=BITNUM [X86]
+			Disable CPUID feature X for the kernel. See
+			include/asm-x86/cpufeature.h for the valid bit numbers.
+			Note the Linux specific bits are not necessarily
+			stable over kernel options, but the vendor specific
+			ones should be.
+			Also note that user programs calling CPUID directly
+			or using the feature without checking anything
+			will still see it. This just prevents it from
+			being used by the kernel or shown in /proc/cpuinfo.
+			Also note the kernel might malfunction if you disable
+			some critical bits.
+
 	code_bytes	[IA32/X86_64] How many bytes of object code to print
 			in an oops report.
 			Range: 0 - 8192
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 56b7ea8e6c79..56cc341cc586 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -572,6 +572,17 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 		printk("\n");
 }
 
+static __init int setup_disablecpuid(char *arg)
+{
+	int bit;
+	if (get_option(&arg, &bit) && bit < NCAPINTS*32)
+		setup_clear_cpu_cap(bit);
+	else
+		return 0;
+	return 1;
+}
+__setup("clearcpuid=", setup_disablecpuid);
+
 cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
 
 /* This is hacky. :)
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index cb9b8a90c094..79635b7bd57a 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -1050,6 +1050,17 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 		printk(KERN_CONT "\n");
 }
 
+static __init int setup_disablecpuid(char *arg)
+{
+	int bit;
+	if (get_option(&arg, &bit) && bit < NCAPINTS*32)
+		setup_clear_cpu_cap(bit);
+	else
+		return 0;
+	return 1;
+}
+__setup("clearcpuid=", setup_disablecpuid);
+
 /*
  *	Get CPU information for use by the procfs.
  */

From 834beda15ecc43c110c0a6ac39ec1aa79f891716 Mon Sep 17 00:00:00 2001
From: "travis@sgi.com" <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:33:21 +0100
Subject: [PATCH 663/890] x86: change NR_CPUS arrays in numa_64 fixup

Change the following static arrays sized by NR_CPUS to
per_cpu data variables:

	char cpu_to_node_map[NR_CPUS];

fixup:

  - Split cpu_to_node function into "early" and "late" versions
    so that x86_cpu_to_node_map_early_ptr is not EXPORT'ed and
    the cpu_to_node inline function is more streamlined.

  - This also involves setting up the percpu maps as early as possible.

  - Fix X86_32 NUMA build errors that previous version of this
    patch caused.

V2->V3:
    - add early_cpu_to_node function to keep cpu_to_node efficient
    - move and rename smp_set_apicids() to setup_percpu_maps()
    - call setup_percpu_maps() as early as possible

V1->V2:
    - Removed extraneous casts
    - Fix !NUMA builds with '#ifdef CONFIG_NUMA"

Signed-off-by: Mike Travis <travis@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup64.c    | 41 ++++++++++++++++++++++++++++++++++--
 arch/x86/kernel/smpboot_32.c |  2 +-
 arch/x86/kernel/smpboot_64.c | 34 ------------------------------
 arch/x86/mm/srat_64.c        |  5 +++--
 include/asm-x86/topology.h   | 23 ++++++++++++++++++++
 5 files changed, 66 insertions(+), 39 deletions(-)

diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index 8fa0de810d0b..855ec82e4f76 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -83,6 +83,40 @@ static int __init nonx32_setup(char *str)
 }
 __setup("noexec32=", nonx32_setup);
 
+/*
+ * Copy data used in early init routines from the initial arrays to the
+ * per cpu data areas.  These arrays then become expendable and the *_ptrs
+ * are zeroed indicating that the static arrays are gone.
+ */
+void __init setup_percpu_maps(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+#ifdef CONFIG_SMP
+		if (per_cpu_offset(cpu)) {
+#endif
+			per_cpu(x86_cpu_to_apicid, cpu) =
+						x86_cpu_to_apicid_init[cpu];
+#ifdef CONFIG_NUMA
+			per_cpu(x86_cpu_to_node_map, cpu) =
+						x86_cpu_to_node_map_init[cpu];
+#endif
+#ifdef CONFIG_SMP
+		}
+		else
+			printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n",
+									cpu);
+#endif
+	}
+
+	/* indicate the early static arrays are gone */
+	x86_cpu_to_apicid_early_ptr = NULL;
+#ifdef CONFIG_NUMA
+	x86_cpu_to_node_map_early_ptr = NULL;
+#endif
+}
+
 /*
  * Great future plan:
  * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
@@ -104,18 +138,21 @@ void __init setup_per_cpu_areas(void)
 	for_each_cpu_mask (i, cpu_possible_map) {
 		char *ptr;
 
-		if (!NODE_DATA(cpu_to_node(i))) {
+		if (!NODE_DATA(early_cpu_to_node(i))) {
 			printk("cpu with no node %d, num_online_nodes %d\n",
 			       i, num_online_nodes());
 			ptr = alloc_bootmem_pages(size);
 		} else { 
-			ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size);
+			ptr = alloc_bootmem_pages_node(NODE_DATA(early_cpu_to_node(i)), size);
 		}
 		if (!ptr)
 			panic("Cannot allocate cpu data for CPU %d\n", i);
 		cpu_pda(i)->data_offset = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
 	}
+
+	/* setup percpu data maps early */
+	setup_percpu_maps();
 } 
 
 void pda_init(int cpu)
diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c
index 915ec6267326..50232d476a27 100644
--- a/arch/x86/kernel/smpboot_32.c
+++ b/arch/x86/kernel/smpboot_32.c
@@ -465,7 +465,7 @@ cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly =
 				{ [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
 EXPORT_SYMBOL(node_to_cpumask_map);
 /* which node each logical CPU is on */
-int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
+u8 cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
 EXPORT_SYMBOL(cpu_to_node_map);
 
 /* set up a mapping between cpu and node. */
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index 93071cdf0849..4e14ecb90764 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -851,39 +851,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
 	return 0;
 }
 
-/*
- * Copy data used in early init routines from the initial arrays to the
- * per cpu data areas.  These arrays then become expendable and the
- * *_ptrs are zeroed indicating that the static arrays are gone.
- */
-void __init smp_set_apicids(void)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		if (per_cpu_offset(cpu)) {
-			per_cpu(x86_cpu_to_apicid, cpu) =
-						x86_cpu_to_apicid_init[cpu];
-#ifdef CONFIG_NUMA
-			per_cpu(x86_cpu_to_node_map, cpu) =
-						x86_cpu_to_node_map_init[cpu];
-#endif
-			per_cpu(x86_bios_cpu_apicid, cpu) =
-						x86_bios_cpu_apicid_init[cpu];
-		}
-		else
-			printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n",
-									cpu);
-	}
-
-	/* indicate the early static arrays are gone */
-	x86_cpu_to_apicid_early_ptr = NULL;
-#ifdef CONFIG_NUMA
-	x86_cpu_to_node_map_early_ptr = NULL;
-#endif
-	x86_bios_cpu_apicid_early_ptr = NULL;
-}
-
 static void __init smp_cpu_index_default(void)
 {
 	int i;
@@ -906,7 +873,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	smp_cpu_index_default();
 	current_cpu_data = boot_cpu_data;
 	current_thread_info()->cpu = 0;  /* needed? */
-	smp_set_apicids();
 	set_cpu_sibling_map(0);
 
 	if (smp_sanity_check(max_cpus) < 0) {
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index e5a1ec8342dc..04cbecaeca81 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -382,9 +382,10 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
 			setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 
 	for (i = 0; i < NR_CPUS; i++) {
-		if (cpu_to_node(i) == NUMA_NO_NODE)
+		int node = cpu_to_node(i);
+		if (node == NUMA_NO_NODE)
 			continue;
-		if (!node_isset(cpu_to_node(i), node_possible_map))
+		if (!node_isset(node, node_possible_map))
 			numa_set_node(i, NUMA_NO_NODE);
 	}
 	numa_init_array();
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index 2da1464ecbef..040374f030cf 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -30,15 +30,29 @@
 #include <asm/mpspec.h>
 
 /* Mappings between logical cpu number and node number */
+#ifdef CONFIG_X86_32
+extern u8 cpu_to_node_map[];
+
+#else
 DECLARE_PER_CPU(u16, x86_cpu_to_node_map);
 extern u16 x86_cpu_to_node_map_init[];
 extern void *x86_cpu_to_node_map_early_ptr;
+#endif
+
 extern cpumask_t node_to_cpumask_map[];
 
 #define NUMA_NO_NODE	((u16)(~0))
 
 /* Returns the number of the node containing CPU 'cpu' */
+#ifdef CONFIG_X86_32
+#define early_cpu_to_node(cpu)	cpu_to_node(cpu)
 static inline int cpu_to_node(int cpu)
+{
+	return cpu_to_node_map[cpu];
+}
+
+#else /* CONFIG_X86_64 */
+static inline int early_cpu_to_node(int cpu)
 {
 	u16 *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
 
@@ -50,6 +64,15 @@ static inline int cpu_to_node(int cpu)
 		return NUMA_NO_NODE;
 }
 
+static inline int cpu_to_node(int cpu)
+{
+	if(per_cpu_offset(cpu))
+		return per_cpu(x86_cpu_to_node_map, cpu);
+	else
+		return NUMA_NO_NODE;
+}
+#endif /* CONFIG_X86_64 */
+
 /*
  * Returns the number of the node containing Node 'node'. This
  * architecture is flat, so it is a pretty simple function!

From 0164fe16247ac2e1b697a9bf0e71df81497c4709 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:33:21 +0100
Subject: [PATCH 664/890] x86: fix early NUMA-id access

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/srat_64.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 04cbecaeca81..f828e6a6973d 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -382,7 +382,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
 			setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 
 	for (i = 0; i < NR_CPUS; i++) {
-		int node = cpu_to_node(i);
+		int node = early_cpu_to_node(i);
+
 		if (node == NUMA_NO_NODE)
 			continue;
 		if (!node_isset(node, node_possible_map))

From 602a54a8cab2759fceb20b3e0c2a27c4eac005df Mon Sep 17 00:00:00 2001
From: "travis@sgi.com" <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:33:21 +0100
Subject: [PATCH 665/890] x86: change bios_cpu_apicid to percpu data variable
 fixup

Change static bios_cpu_apicid array to a per_cpu data variable.
This includes using a static array used during initialization
similar to the way x86_cpu_to_apicid[] is handled.

There is one early use of bios_cpu_apicid in apic_is_clustered_box().
The other reference in cpu_present_to_apicid() is called after
smp_set_apicids() has setup the percpu version of bios_cpu_apicid.

Signed-off-by: Mike Travis <travis@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic_64.c    |  6 +++---
 arch/x86/kernel/setup64.c    | 13 ++++++++-----
 arch/x86/kernel/setup_64.c   |  1 +
 arch/x86/kernel/smpboot_32.c |  2 +-
 arch/x86/mm/srat_64.c        |  2 +-
 5 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index a7a38c9da136..d8d03e09dea2 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -1205,9 +1205,9 @@ __cpuinit int apic_is_clustered_box(void)
 
 	/* Problem:  Partially populated chassis may not have CPUs in some of
 	 * the APIC clusters they have been allocated.  Only present CPUs have
-	 * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.  Since
-	 * clusters are allocated sequentially, count zeros only if they are
-	 * bounded by ones.
+	 * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
+	 * Since clusters are allocated sequentially, count zeros only if
+	 * they are bounded by ones.
 	 */
 	clusters = 0;
 	zeros = 0;
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index 855ec82e4f76..0389331059ad 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -85,10 +85,10 @@ __setup("noexec32=", nonx32_setup);
 
 /*
  * Copy data used in early init routines from the initial arrays to the
- * per cpu data areas.  These arrays then become expendable and the *_ptrs
- * are zeroed indicating that the static arrays are gone.
+ * per cpu data areas.  These arrays then become expendable and the
+ * *_early_ptr's are zeroed indicating that the static arrays are gone.
  */
-void __init setup_percpu_maps(void)
+static void __init setup_per_cpu_maps(void)
 {
 	int cpu;
 
@@ -98,6 +98,8 @@ void __init setup_percpu_maps(void)
 #endif
 			per_cpu(x86_cpu_to_apicid, cpu) =
 						x86_cpu_to_apicid_init[cpu];
+			per_cpu(x86_bios_cpu_apicid, cpu) =
+						x86_bios_cpu_apicid_init[cpu];
 #ifdef CONFIG_NUMA
 			per_cpu(x86_cpu_to_node_map, cpu) =
 						x86_cpu_to_node_map_init[cpu];
@@ -110,8 +112,9 @@ void __init setup_percpu_maps(void)
 #endif
 	}
 
-	/* indicate the early static arrays are gone */
+	/* indicate the early static arrays will soon be gone */
 	x86_cpu_to_apicid_early_ptr = NULL;
+	x86_bios_cpu_apicid_early_ptr = NULL;
 #ifdef CONFIG_NUMA
 	x86_cpu_to_node_map_early_ptr = NULL;
 #endif
@@ -152,7 +155,7 @@ void __init setup_per_cpu_areas(void)
 	}
 
 	/* setup percpu data maps early */
-	setup_percpu_maps();
+	setup_per_cpu_maps();
 } 
 
 void pda_init(int cpu)
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 79635b7bd57a..f84e2662f1ca 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -334,6 +334,7 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_SMP
 	/* setup to use the early static init tables during kernel startup */
 	x86_cpu_to_apicid_early_ptr = (void *)&x86_cpu_to_apicid_init;
+	x86_bios_cpu_apicid_early_ptr = (void *)&x86_bios_cpu_apicid_init;
 #ifdef CONFIG_NUMA
 	x86_cpu_to_node_map_early_ptr = (void *)&x86_cpu_to_node_map_init;
 #endif
diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c
index 50232d476a27..915ec6267326 100644
--- a/arch/x86/kernel/smpboot_32.c
+++ b/arch/x86/kernel/smpboot_32.c
@@ -465,7 +465,7 @@ cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly =
 				{ [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
 EXPORT_SYMBOL(node_to_cpumask_map);
 /* which node each logical CPU is on */
-u8 cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
+int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
 EXPORT_SYMBOL(cpu_to_node_map);
 
 /* set up a mapping between cpu and node. */
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index f828e6a6973d..4aed38fa4a65 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -397,7 +397,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
 static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
 	[0 ... MAX_NUMNODES-1] = PXM_INVAL
 };
-static u16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
+static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
 	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
 };
 static int __init find_node_by_addr(unsigned long addr)

From c49a4955ea504c82f6b690491639bba5b8c1dc47 Mon Sep 17 00:00:00 2001
From: "travis@sgi.com" <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:33:22 +0100
Subject: [PATCH 666/890] x86: add debug of invalid per_cpu map accesses

Provide a means to trap usages of per_cpu map variables before
they are setup.  Define CONFIG_DEBUG_PER_CPU_MAPS to activate.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig.debug     | 12 ++++++++++++
 arch/x86/mm/numa_64.c      |  3 +++
 include/asm-x86/topology.h |  7 +++++++
 3 files changed, 22 insertions(+)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 88420af98140..0a82b889d76e 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -47,6 +47,18 @@ config DEBUG_PAGEALLOC
 	  This results in a large slowdown, but helps to find certain types
 	  of memory corruptions.
 
+config DEBUG_PER_CPU_MAPS
+	bool "Debug access to per_cpu maps"
+	depends on DEBUG_KERNEL
+	depends on X86_64_SMP
+	default n
+	help
+	  Say Y to verify that the per_cpu map being accessed has
+	  been setup.  Adds a fair amount of code to kernel memory
+	  and decreases performance.
+
+	  Say N if unsure.
+
 config DEBUG_RODATA
 	bool "Write protect kernel read-only data structures"
 	depends on DEBUG_KERNEL
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 5d24dc1ec237..e157cb274b25 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -37,6 +37,9 @@ u16 x86_cpu_to_node_map_init[NR_CPUS] = {
 void *x86_cpu_to_node_map_early_ptr;
 DEFINE_PER_CPU(u16, x86_cpu_to_node_map) = NUMA_NO_NODE;
 EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map);
+#ifdef	CONFIG_DEBUG_PER_CPU_MAPS
+EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
+#endif
 
 u16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
 	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index 040374f030cf..f1e12329078e 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -66,6 +66,13 @@ static inline int early_cpu_to_node(int cpu)
 
 static inline int cpu_to_node(int cpu)
 {
+#ifdef	CONFIG_DEBUG_PER_CPU_MAPS
+	if(x86_cpu_to_node_map_early_ptr) {
+		printk("KERN_NOTICE cpu_to_node(%d): usage too early!\n",
+			(int)cpu);
+		BUG();
+	}
+#endif
 	if(per_cpu_offset(cpu))
 		return per_cpu(x86_cpu_to_node_map, cpu);
 	else

From 5f5cd8fd60c71ce47d2ce4e60e7ccbc306e91c64 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:23 +0100
Subject: [PATCH 667/890] x86: add debug of invalid per_cpu map accesses

dont crash survivable situations.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/topology.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index f1e12329078e..9a8228a177c7 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -66,14 +66,15 @@ static inline int early_cpu_to_node(int cpu)
 
 static inline int cpu_to_node(int cpu)
 {
-#ifdef	CONFIG_DEBUG_PER_CPU_MAPS
-	if(x86_cpu_to_node_map_early_ptr) {
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+	if (x86_cpu_to_node_map_early_ptr) {
 		printk("KERN_NOTICE cpu_to_node(%d): usage too early!\n",
 			(int)cpu);
-		BUG();
+		dump_stack();
+		return ((int *)x86_cpu_to_node_map_early_ptr)[cpu];
 	}
 #endif
-	if(per_cpu_offset(cpu))
+	if (per_cpu_offset(cpu))
 		return per_cpu(x86_cpu_to_node_map, cpu);
 	else
 		return NUMA_NO_NODE;

From d729ab35ee1367b7690458ae9e050571cb055bd3 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:33:23 +0100
Subject: [PATCH 668/890] x86: use v8086_mode helper, trivial unification

Use v8086_mode inline in fault_32.c, no functional change
also ifdef the section for 32-bit only and add to fault_64.c

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_32.c |  4 +++-
 arch/x86/mm/fault_64.c | 11 +++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 276863dc4bdd..93ede2dde958 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -475,14 +475,16 @@ good_area:
 	else
 		tsk->min_flt++;
 
+#ifdef CONFIG_X86_32
 	/*
 	 * Did it hit the DOS screen memory VA from vm86 mode?
 	 */
-	if (regs->flags & VM_MASK) {
+	if (v8086_mode(regs)) {
 		unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
 		if (bit < 32)
 			tsk->thread.screen_bitmap |= 1 << bit;
 	}
+#endif
 	up_read(&mm->mmap_sem);
 	return;
 
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 9ef0306efe9e..2d9e6da2cb30 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -514,6 +514,17 @@ good_area:
 		tsk->maj_flt++;
 	else
 		tsk->min_flt++;
+
+#ifdef CONFIG_X86_32
+	/*
+	 * Did it hit the DOS screen memory VA from vm86 mode?
+	 */
+	if (v8086_mode(regs)) {
+		unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
+		if (bit < 32)
+			tsk->thread.screen_bitmap |= 1 << bit;
+	}
+#endif
 	up_read(&mm->mmap_sem);
 	return;
 

From ade1af77129dea6e335b525ed3be3b846bc1ec13 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Wed, 30 Jan 2008 13:33:23 +0100
Subject: [PATCH 669/890] x86: remove unneded casts

x86: remove unneeded casts

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/compressed/misc_32.c     |  5 +++--
 arch/x86/boot/compressed/misc_64.c     |  5 +++--
 arch/x86/ia32/ia32_signal.c            |  2 +-
 arch/x86/kernel/cpu/cpufreq/longhaul.c |  2 +-
 arch/x86/kernel/cpuid.c                |  2 +-
 arch/x86/kernel/kprobes.c              |  2 +-
 arch/x86/kernel/microcode.c            | 12 +++++-------
 arch/x86/kernel/process_64.c           |  2 +-
 arch/x86/kernel/signal_32.c            |  2 +-
 arch/x86/kernel/smpboot_32.c           |  2 +-
 arch/x86/kernel/stacktrace.c           |  2 +-
 arch/x86/kernel/traps_64.c             |  2 +-
 arch/x86/lib/memcpy_32.c               |  4 ++--
 arch/x86/lib/memmove_64.c              |  4 ++--
 14 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c
index 9103652058c4..5a0281c81edf 100644
--- a/arch/x86/boot/compressed/misc_32.c
+++ b/arch/x86/boot/compressed/misc_32.c
@@ -285,7 +285,7 @@ static void putstr(const char *s)
 static void* memset(void* s, int c, unsigned n)
 {
 	int i;
-	char *ss = (char*)s;
+	char *ss = s;
 
 	for (i=0;i<n;i++) ss[i] = c;
 	return s;
@@ -294,7 +294,8 @@ static void* memset(void* s, int c, unsigned n)
 static void* memcpy(void* dest, const void* src, unsigned n)
 {
 	int i;
-	char *d = (char *)dest, *s = (char *)src;
+	const char *s = src;
+	char *d = dest;
 
 	for (i=0;i<n;i++) d[i] = s[i];
 	return dest;
diff --git a/arch/x86/boot/compressed/misc_64.c b/arch/x86/boot/compressed/misc_64.c
index 8494f0dcff21..8c1573b5d6c7 100644
--- a/arch/x86/boot/compressed/misc_64.c
+++ b/arch/x86/boot/compressed/misc_64.c
@@ -284,7 +284,7 @@ static void putstr(const char *s)
 static void* memset(void* s, int c, unsigned n)
 {
 	int i;
-	char *ss = (char*)s;
+	char *ss = s;
 
 	for (i=0;i<n;i++) ss[i] = c;
 	return s;
@@ -293,7 +293,8 @@ static void* memset(void* s, int c, unsigned n)
 static void* memcpy(void* dest, const void* src, unsigned n)
 {
 	int i;
-	char *d = (char *)dest, *s = (char *)src;
+	const char *s = src;
+	char *d = dest;
 
 	for (i=0;i<n;i++) d[i] = s[i];
 	return dest;
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 0a34c24f19e5..1c0503bdfb1a 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -472,7 +472,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 			restorer = VDSO32_SYMBOL(current->mm->context.vdso,
 						 sigreturn);
 		else
-			restorer = (void *)&frame->retcode;
+			restorer = &frame->retcode;
 	}
 	err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
 
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index 749d00cb2ebd..06fcce516d51 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -694,7 +694,7 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle,
 	if ( acpi_bus_get_device(obj_handle, &d) ) {
 		return 0;
 	}
-	*return_value = (void *)acpi_driver_data(d);
+	*return_value = acpi_driver_data(d);
 	return 1;
 }
 
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index d387c770c518..dec66e452810 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -50,7 +50,7 @@ struct cpuid_command {
 
 static void cpuid_smp_cpuid(void *cmd_block)
 {
-	struct cpuid_command *cmd = (struct cpuid_command *)cmd_block;
+	struct cpuid_command *cmd = cmd_block;
 
 	cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2],
 		      &cmd->data[3]);
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index f7ddbb8c3fe8..a99e764fd66a 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -953,7 +953,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 				       unsigned long val, void *data)
 {
-	struct die_args *args = (struct die_args *)data;
+	struct die_args *args = data;
 	int ret = NOTIFY_DONE;
 
 	if (args->regs && user_mode_vm(args->regs))
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 40cfd5488719..6ff447f9fda7 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -244,8 +244,8 @@ static int microcode_sanity_check(void *mc)
 		return 0;
 	/* check extended signature checksum */
 	for (i = 0; i < ext_sigcount; i++) {
-		ext_sig = (struct extended_signature *)((void *)ext_header
-			+ EXT_HEADER_SIZE + EXT_SIGNATURE_SIZE * i);
+		ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
+			  EXT_SIGNATURE_SIZE * i;
 		sum = orig_sum
 			- (mc_header->sig + mc_header->pf + mc_header->cksum)
 			+ (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
@@ -279,11 +279,9 @@ static int get_maching_microcode(void *mc, int cpu)
 	if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
 		return 0;
 
-	ext_header = (struct extended_sigtable *)(mc +
-			get_datasize(mc_header) + MC_HEADER_SIZE);
+	ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE;
 	ext_sigcount = ext_header->count;
-	ext_sig = (struct extended_signature *)((void *)ext_header
-			+ EXT_HEADER_SIZE);
+	ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
 	for (i = 0; i < ext_sigcount; i++) {
 		if (microcode_update_match(cpu, mc_header,
 				ext_sig->sig, ext_sig->pf))
@@ -539,7 +537,7 @@ static int cpu_request_microcode(int cpu)
 		pr_debug("ucode data file %s load failed\n", name);
 		return error;
 	}
-	buf = (void *)firmware->data;
+	buf = firmware->data;
 	size = firmware->size;
 	while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset))
 			> 0) {
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 95313532b2e0..b4c470658a8a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -470,7 +470,7 @@ static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
 		.limit_in_pages = 1,
 		.useable = 1,
 	};
-	struct desc_struct *desc = (void *)t->thread.tls_array;
+	struct desc_struct *desc = t->thread.tls_array;
 	desc += tls;
 	fill_ldt(desc, &ud);
 }
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 89a690edf999..caee1f002fed 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -368,7 +368,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 	if (current->binfmt->hasvdso)
 		restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn);
 	else
-		restorer = (void *)&frame->retcode;
+		restorer = &frame->retcode;
 	if (ka->sa.sa_flags & SA_RESTORER)
 		restorer = ka->sa.sa_restorer;
 
diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c
index 915ec6267326..7a62dced61ca 100644
--- a/arch/x86/kernel/smpboot_32.c
+++ b/arch/x86/kernel/smpboot_32.c
@@ -132,7 +132,7 @@ static unsigned long __cpuinit setup_trampoline(void)
  */
 void __init smp_alloc_memory(void)
 {
-	trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
+	trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
 	/*
 	 * Has to be in very low memory so we can execute
 	 * real-mode AP code.
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 4f4021b5bfb5..02f0f61f5b11 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -24,7 +24,7 @@ static int save_stack_stack(void *data, char *name)
 
 static void save_stack_address(void *data, unsigned long addr, int reliable)
 {
-	struct stack_trace *trace = (struct stack_trace *)data;
+	struct stack_trace *trace = data;
 	if (trace->skip > 0) {
 		trace->skip--;
 		return;
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 911ed28afff8..4aa4520f9499 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -217,7 +217,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
 static inline int valid_stack_ptr(struct thread_info *tinfo,
 			void *p, unsigned int size, void *end)
 {
-	void *t = (void *)tinfo;
+	void *t = tinfo;
 	if (end) {
 		if (p < end && p >= (end-THREAD_SIZE))
 			return 1;
diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c
index 8ac51b82a632..37756b6fb329 100644
--- a/arch/x86/lib/memcpy_32.c
+++ b/arch/x86/lib/memcpy_32.c
@@ -34,8 +34,8 @@ void *memmove(void *dest, const void *src, size_t n)
 			"cld"
 			: "=&c" (d0), "=&S" (d1), "=&D" (d2)
 			:"0" (n),
-			 "1" (n-1+(const char *)src),
-			 "2" (n-1+(char *)dest)
+			 "1" (n-1+src),
+			 "2" (n-1+dest)
 			:"memory");
 	}
 	return dest;
diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c
index 751ebae8ec42..80175e47b190 100644
--- a/arch/x86/lib/memmove_64.c
+++ b/arch/x86/lib/memmove_64.c
@@ -11,8 +11,8 @@ void *memmove(void * dest,const void *src,size_t count)
 	if (dest < src) { 
 		return memcpy(dest,src,count);
 	} else {
-		char *p = (char *) dest + count;
-		char *s = (char *) src + count;
+		char *p = dest + count;
+		const char *s = src + count;
 		while (count--)
 			*--p = *--s;
 	}

From df43510b18b8439465b4b58556f0495b5f5d771e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:23 +0100
Subject: [PATCH 670/890] x86: check_tsc_warp() slowness fix

100 million max # of loops is a bit too much - reduce it to 10 million.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/tsc_sync.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index ace340524c01..7110078f242c 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -67,12 +67,12 @@ static __cpuinit void check_tsc_warp(void)
 
 		/*
 		 * Be nice every now and then (and also check whether
-		 * measurement is done [we also insert a 100 million
+		 * measurement is done [we also insert a 10 million
 		 * loops safety exit, so we dont lock up in case the
 		 * TSC readout is totally broken]):
 		 */
 		if (unlikely(!(i & 7))) {
-			if (now > end || i > 100000000)
+			if (now > end || i > 10000000)
 				break;
 			cpu_relax();
 			touch_nmi_watchdog();

From ad8ca495bd3e03e6751fc0c6a6af44018ebb4036 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:24 +0100
Subject: [PATCH 671/890] x86: add warning to check_tsc_warp()

add warning to check_tsc_warp() - if get_cycles() does not progress.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/tsc_sync.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 7110078f242c..0577825cf89b 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -87,7 +87,11 @@ static __cpuinit void check_tsc_warp(void)
 			nr_warps++;
 			__raw_spin_unlock(&sync_lock);
 		}
-
+	}
+	if (!(now-start)) {
+		printk("Warning: zero tsc calibration delta: %Ld [max: %Ld]\n",
+			now-start, end-start);
+		WARN_ON(1);
 	}
 }
 

From 17abecfe651c862cd31b1f9e8ef6cfc29083f00d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:24 +0100
Subject: [PATCH 672/890] x86: fix up alternatives with lockdep enabled

An older binutils bug caused us to not fix up alternatives.
This problem involved mutex.c but we dont do lockdep section tricks
there anymore, so this workaround is moot. Keep the printk nevertheless,
just in case ... We can remove that later on.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/alternative.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 318a4f9b7ece..45d79ea890ae 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -342,12 +342,13 @@ void alternatives_smp_switch(int smp)
 
 #ifdef CONFIG_LOCKDEP
 	/*
-	 * A not yet fixed binutils section handling bug prevents
-	 * alternatives-replacement from working reliably, so turn
-	 * it off:
+	 * Older binutils section handling bug prevented
+	 * alternatives-replacement from working reliably.
+	 *
+	 * If this still occurs then you should see a hang
+	 * or crash shortly after this line:
 	 */
-	printk("lockdep: not fixing up alternatives.\n");
-	return;
+	printk("lockdep: fixing up alternatives.\n");
 #endif
 
 	if (noreplace_smp || smp_alt_once)

From 70edcd77a0d6d0f8731c826764f5eb6732f521e9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:24 +0100
Subject: [PATCH 673/890] genirq: stackdump after the "Trying to free
 already-free IRQ" message

these bugs are harder to find than they seem, a stackdump helps.

make it dependent on CONFIG_DEBUG_SHIRQ so that people can turn it off
if it annoys them.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/irq/manage.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 1f314221d534..438a01464287 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -479,6 +479,9 @@ void free_irq(unsigned int irq, void *dev_id)
 			return;
 		}
 		printk(KERN_ERR "Trying to free already-free IRQ %d\n", irq);
+#ifdef CONFIG_DEBUG_SHIRQ
+		dump_stack();
+#endif
 		spin_unlock_irqrestore(&desc->lock, flags);
 		return;
 	}

From fc7250ab38346762c567813794b80ee6c57dfed6 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Wed, 30 Jan 2008 13:33:24 +0100
Subject: [PATCH 674/890] x86: only support sparsemem

sparsemem is only one supported, so could remove FLAT_NODE_MEM related,
that is only needed !SPARSEMEM

Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/numa_64.c | 32 --------------------------------
 1 file changed, 32 deletions(-)

diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index e157cb274b25..1268d33e7b6e 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -240,35 +240,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
 	node_set_online(nodeid);
 }
 
-#ifdef CONFIG_FLAT_NODE_MEM_MAP
-/* Initialize final allocator for a zone */
-static void __init flat_setup_node_zones(int nodeid)
-{
-	unsigned long start_pfn, end_pfn, memmapsize, limit;
-
-	start_pfn = node_start_pfn(nodeid);
-	end_pfn = node_end_pfn(nodeid);
-
-	Dprintk(KERN_INFO "Setting up memmap for node %d %lx-%lx\n",
-		nodeid, start_pfn, end_pfn);
-
-	/*
-	 * Try to allocate mem_map at end to not fill up precious <4GB
-	 * memory.
-	 */
-	memmapsize = sizeof(struct page) * (end_pfn-start_pfn);
-	limit = end_pfn << PAGE_SHIFT;
-
-	NODE_DATA(nodeid)->node_mem_map =
-		__alloc_bootmem_core(NODE_DATA(nodeid)->bdata,
-				     memmapsize, SMP_CACHE_BYTES,
-				     round_down(limit - memmapsize, PAGE_SIZE),
-				     limit);
-}
-#else
-#define flat_setup_node_zones(i) do {} while (0)
-#endif
-
 /*
  * There are unfortunately some poorly designed mainboards around that
  * only connect memory to a single CPU. This breaks the 1:1 cpu->node
@@ -600,9 +571,6 @@ void __init paging_init(void)
 	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();
 
-	for_each_online_node(i)
-		flat_setup_node_zones(i);
-
 	free_area_init_nodes(max_zone_pfns);
 }
 

From 75f2ce033168ff435e72bf5bb615176d9930e77f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:24 +0100
Subject: [PATCH 675/890] x86: get_cycles() fix

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/tsc.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h
index a6e8d35c3f86..7d3e27f7d484 100644
--- a/include/asm-x86/tsc.h
+++ b/include/asm-x86/tsc.h
@@ -27,10 +27,8 @@ static inline cycles_t get_cycles(void)
 	if (!cpu_has_tsc)
 		return 0;
 #endif
-
-#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
 	rdtscll(ret);
-#endif
+
 	return ret;
 }
 

From 9c5ba48958acf6d584f57e9169ad7ecc80ccc390 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:24 +0100
Subject: [PATCH 676/890] x86: clean up paging_init()

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/numa_64.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 1268d33e7b6e..70b463abf6f4 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -561,7 +561,6 @@ unsigned long __init numa_free_all_bootmem(void)
 void __init paging_init(void)
 {
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
-	int i;
 
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
 	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;

From 084662580ddc282f38b31da62af5d75208a59b12 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:33:24 +0100
Subject: [PATCH 677/890] x86: add vdso32-int80-syms.lds to .gitignore

One of the generated files was missed in gitignore.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/vdso/.gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/vdso/.gitignore b/arch/x86/vdso/.gitignore
index b9030e533418..60274d5746e1 100644
--- a/arch/x86/vdso/.gitignore
+++ b/arch/x86/vdso/.gitignore
@@ -3,3 +3,4 @@ vdso-syms.lds
 vdso32-syms.lds
 vdso32-syscall-syms.lds
 vdso32-sysenter-syms.lds
+vdso32-int80-syms.lds

From 518edc93f8b29bc3e31a13ced0af27c602feed8f Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:33:24 +0100
Subject: [PATCH 678/890] x86: update reliability argument to printk_address

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 2d9e6da2cb30..dd26e680a431 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -608,7 +608,7 @@ no_context:
 	else
 		printk(KERN_ALERT "Unable to handle kernel paging request");
 	printk(" at %016lx RIP: \n" KERN_ALERT, address);
-	printk_address(regs->ip, regs->bp);
+	printk_address(regs->ip, 1);
 	dump_pagetable(address);
 	tsk->thread.cr2 = address;
 	tsk->thread.trap_no = 14;

From dbae595249bdbacd852e677f9b3e995f65c16781 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:33:24 +0100
Subject: [PATCH 679/890] x86: export check_tsc_unstable

Exporrt check_tsc_unstable function as GPL symbol. lguest is
a user of it.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/tsc_64.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index c62f3b6eacc0..947554ddabb6 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -92,10 +92,12 @@ sched_clock(void) __attribute__((alias("native_sched_clock")));
 
 static int tsc_unstable;
 
-inline int check_tsc_unstable(void)
+int check_tsc_unstable(void)
 {
 	return tsc_unstable;
 }
+EXPORT_SYMBOL_GPL(check_tsc_unstable);
+
 #ifdef CONFIG_CPU_FREQ
 
 /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency

From e68decb52104388ed6c6218be926e10e6cde2814 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <gcosta@redhat.com>
Date: Wed, 30 Jan 2008 13:33:24 +0100
Subject: [PATCH 680/890] x86: export __supported_pte_mask

export __supported_pte_mask variable as GPL symbol.
lguest is a user of it.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup64.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index 0389331059ad..309366f8f603 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -41,6 +41,8 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
 char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
 
 unsigned long __supported_pte_mask __read_mostly = ~0UL;
+EXPORT_SYMBOL_GPL(__supported_pte_mask);
+
 static int do_not_nx __cpuinitdata = 0;
 
 /* noexec=on|off

From 08b6d290f977d8145804fd2b9bc2c331f2484f8e Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 30 Jan 2008 13:33:25 +0100
Subject: [PATCH 681/890] xen: fix section usage in xen-head.S and setup.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

additional section for .init.text appending a number.

A side effect of this was a section mismatch warning because modpost did
not recognize a .init.text section named .init.text.1: WARNING:
vmlinux.o(.text.head+0x247): Section mismatch: reference to
.init.text.1:start_kernel (between 'is386' and 'check_x87')

Fix this by hardcoding the "ax" in the pushsection.  Thanks to Torlaf for
reporting this.

Alan Modra provided the hint that made me able to locate the root cause of
this warning.  And Mike Frysinger told me how to properly fix it using
__INIT/__FINIT.

Fix following Section mismatch warning in addition:
WARNING: vmlinux.o(.text+0x14c8): Section mismatch: reference to .init.data:vsyscall_int80_start (between 'fiddle_vdso' and 'xen_setup_features')

fiddle_vdso was only used from a __init function - so declare it __init.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Toralf Förster <toralf.foerster@gmx.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/setup.c    | 2 +-
 arch/x86/xen/xen-head.S | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 7d6d0ef55890..3bad4773a2f3 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -60,7 +60,7 @@ static void xen_idle(void)
 /*
  * Set the bit indicating "nosegneg" library variants should be used.
  */
-static void fiddle_vdso(void)
+static void __init fiddle_vdso(void)
 {
 	extern const char vdso32_default_start;
 	u32 *mask = VDSO32_SYMBOL(&vdso32_default_start, NOTE_MASK);
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index f8d6937db2ec..288d587ce73c 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -4,16 +4,18 @@
 #ifdef CONFIG_XEN
 
 #include <linux/elfnote.h>
+#include <linux/init.h>
 #include <asm/boot.h>
 #include <xen/interface/elfnote.h>
 
-.pushsection .init.text
+	__INIT
 ENTRY(startup_xen)
 	movl %esi,xen_start_info
 	cld
 	movl $(init_thread_union+THREAD_SIZE),%esp
 	jmp xen_start_kernel
-.popsection
+
+	__FINIT
 
 .pushsection .bss.page_aligned
 	.align PAGE_SIZE_asm

From c84d6af881be84687fa924f16b2f4b4690354165 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Wed, 30 Jan 2008 13:33:25 +0100
Subject: [PATCH 682/890] x86: reboot: remove inb_p usage

We are driving a motherboard port so use a 2uS explicit delay at this
point.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/reboot.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 5b32f0b4d133..5818dc28167d 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -319,9 +319,11 @@ static inline void kb_wait(void)
 {
 	int i;
 
-	for (i = 0; i < 0x10000; i++)
-		if ((inb_p(0x64) & 0x02) == 0)
+	for (i = 0; i < 0x10000; i++) {
+		if ((inb(0x64) & 0x02) == 0)
 			break;
+		udelay(2);
+	}
 }
 
 void machine_emergency_restart(void)

From 625d6cffcac1c96faa18d629f1271d63af0e05f2 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:33:25 +0100
Subject: [PATCH 683/890] x86: fix early cpu_to_node panic from
 nr_free_zone_pages

call early_cpu_to_node() since per_cpu(cpu_to_node_map) might not be setup
yet.

I also had to export x86_cpu_to_node_map_early_ptr because of some calls
from the network code to numa_node_id():

	net/ipv4/netfilter/arp_tables.c:
	net/ipv4/netfilter/ip_tables.c:
	net/ipv4/netfilter/ip_tables.c:

Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/numa_64.c      | 2 --
 include/asm-x86/topology.h | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 70b463abf6f4..441af4edea59 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -37,9 +37,7 @@ u16 x86_cpu_to_node_map_init[NR_CPUS] = {
 void *x86_cpu_to_node_map_early_ptr;
 DEFINE_PER_CPU(u16, x86_cpu_to_node_map) = NUMA_NO_NODE;
 EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map);
-#ifdef	CONFIG_DEBUG_PER_CPU_MAPS
 EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
-#endif
 
 u16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
 	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index 9a8228a177c7..7b0cce2f370d 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -37,6 +37,8 @@ extern u8 cpu_to_node_map[];
 DECLARE_PER_CPU(u16, x86_cpu_to_node_map);
 extern u16 x86_cpu_to_node_map_init[];
 extern void *x86_cpu_to_node_map_early_ptr;
+/* Returns the number of the current Node. */
+#define numa_node_id()		(early_cpu_to_node(raw_smp_processor_id()))
 #endif
 
 extern cpumask_t node_to_cpumask_map[];

From a5ff677c2fb10567d1e750fb9e4417d95081071b Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:33:25 +0100
Subject: [PATCH 684/890] x86: make printk_address available on X86_32

Small fomatting fixes to 64-bit as well, trailing whitespace
and extra semicolon, also move the ifdefs for CONFIG_KALLSYMS
into the function itself.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/traps_32.c | 28 ++++++++++++++++++++++++++++
 arch/x86/kernel/traps_64.c | 11 ++++-------
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 270cfd483160..8534cb53ff60 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -102,6 +102,34 @@ asmlinkage void machine_check(void);
 int kstack_depth_to_print = 24;
 static unsigned int code_bytes = 64;
 
+void printk_address(unsigned long address, int reliable)
+{
+#ifdef CONFIG_KALLSYMS
+	unsigned long offset = 0, symsize;
+	const char *symname;
+	char *modname;
+	char *delim = ":";
+	char namebuf[128];
+	char reliab[4] = "";
+
+	symname = kallsyms_lookup(address, &symsize, &offset,
+					&modname, namebuf);
+	if (!symname) {
+		printk(" [<%08lx>]\n", address);
+		return;
+	}
+	if (!reliable)
+		strcpy(reliab, "? ");
+
+	if (!modname)
+		modname = delim = "";
+	printk(" [<%08lx>] %s%s%s%s%s+0x%lx/0x%lx\n",
+		address, reliab, delim, modname, delim, symname, offset, symsize);
+#else
+	printk(" [<%08lx>]\n", address);
+#endif
+}
+
 static inline int valid_stack_ptr(struct thread_info *tinfo, void *p, unsigned size)
 {
 	return	p > (void *)tinfo &&
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 4aa4520f9499..4ea727869ded 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -100,15 +100,15 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
 
 int kstack_depth_to_print = 12;
 
-#ifdef CONFIG_KALLSYMS
 void printk_address(unsigned long address, int reliable)
 {
+#ifdef CONFIG_KALLSYMS
 	unsigned long offset = 0, symsize;
 	const char *symname;
 	char *modname;
 	char *delim = ":";
 	char namebuf[128];
-	char reliab[4] = "";;
+	char reliab[4] = "";
 
 	symname = kallsyms_lookup(address, &symsize, &offset,
 					&modname, namebuf);
@@ -120,16 +120,13 @@ void printk_address(unsigned long address, int reliable)
 		strcpy(reliab, "? ");
 
 	if (!modname)
-		modname = delim = ""; 		
+		modname = delim = "";
 	printk(" [<%016lx>] %s%s%s%s%s+0x%lx/0x%lx\n",
 		address, reliab, delim, modname, delim, symname, offset, symsize);
-}
 #else
-void printk_address(unsigned long address, int reliable)
-{
 	printk(" [<%016lx>]\n", address);
-}
 #endif
+}
 
 static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
 					unsigned *usedp, char **idp)

From 1b000a5dbeb2f34bc03d45ebdf3f6d24a60c3aed Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Wed, 30 Jan 2008 13:33:25 +0100
Subject: [PATCH 685/890] x86: make NUMA work on 32-bit

The DISCONTIG memory model on x86 32 bit uses a remap allocator early
in boot. The objective is that portions of every node are mapped in to
the kernel virtual area (KVA) in place of ZONE_NORMAL so that node-local
allocations can be made for pgdat and mem_map structures.

With SPARSEMEM, the amount that is set aside is insufficient for all the
mem_maps to be allocated. During the boot process, it falls back to using
the bootmem allocator. This breaks assumptions that SPARSEMEM makes about
the layout of the mem_map in memory and results in a VM_BUG_ON triggering
due to pfn_to_page() returning garbage values.

This patch only enables the remap allocator for use with DISCONTIG.

Without SRAT support, a compile-error occurs because ACPI table parsing
functions are only available in x86-64. This patch also adds no-op stubs
and prints a warning message. What likely needs to be done is sharing
the table parsing functions between 32 and 64 bit if they are
compatible.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/discontig_32.c | 93 ++++++++++++++++++++++++++++++++------
 1 file changed, 78 insertions(+), 15 deletions(-)

diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 9f1d02cfde37..04b1d20e2613 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -32,6 +32,7 @@
 #include <linux/kexec.h>
 #include <linux/pfn.h>
 #include <linux/swap.h>
+#include <linux/acpi.h>
 
 #include <asm/e820.h>
 #include <asm/setup.h>
@@ -103,14 +104,10 @@ extern unsigned long highend_pfn, highstart_pfn;
 
 #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
 
-static unsigned long node_remap_start_pfn[MAX_NUMNODES];
 unsigned long node_remap_size[MAX_NUMNODES];
-static unsigned long node_remap_offset[MAX_NUMNODES];
 static void *node_remap_start_vaddr[MAX_NUMNODES];
 void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
-static void *node_remap_end_vaddr[MAX_NUMNODES];
-static void *node_remap_alloc_vaddr[MAX_NUMNODES];
 static unsigned long kva_start_pfn;
 static unsigned long kva_pages;
 /*
@@ -167,6 +164,22 @@ static void __init allocate_pgdat(int nid)
 	}
 }
 
+#ifdef CONFIG_DISCONTIGMEM
+/*
+ * In the discontig memory model, a portion of the kernel virtual area (KVA)
+ * is reserved and portions of nodes are mapped using it. This is to allow
+ * node-local memory to be allocated for structures that would normally require
+ * ZONE_NORMAL. The memory is allocated with alloc_remap() and callers
+ * should be prepared to allocate from the bootmem allocator instead. This KVA
+ * mechanism is incompatible with SPARSEMEM as it makes assumptions about the
+ * layout of memory that are broken if alloc_remap() succeeds for some of the
+ * map and fails for others
+ */
+static unsigned long node_remap_start_pfn[MAX_NUMNODES];
+static void *node_remap_end_vaddr[MAX_NUMNODES];
+static void *node_remap_alloc_vaddr[MAX_NUMNODES];
+static unsigned long node_remap_offset[MAX_NUMNODES];
+
 void *alloc_remap(int nid, unsigned long size)
 {
 	void *allocation = node_remap_alloc_vaddr[nid];
@@ -263,6 +276,40 @@ static unsigned long calculate_numa_remap_pages(void)
 	return reserve_pages;
 }
 
+static void init_remap_allocator(int nid)
+{
+	node_remap_start_vaddr[nid] = pfn_to_kaddr(
+			kva_start_pfn + node_remap_offset[nid]);
+	node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
+		(node_remap_size[nid] * PAGE_SIZE);
+	node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] +
+		ALIGN(sizeof(pg_data_t), PAGE_SIZE);
+
+	printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
+		(ulong) node_remap_start_vaddr[nid],
+		(ulong) pfn_to_kaddr(highstart_pfn
+		   + node_remap_offset[nid] + node_remap_size[nid]));
+}
+#else
+void *alloc_remap(int nid, unsigned long size)
+{
+	return NULL;
+}
+
+static unsigned long calculate_numa_remap_pages(void)
+{
+	return 0;
+}
+
+static void init_remap_allocator(int nid)
+{
+}
+
+void __init remap_numa_kva(void)
+{
+}
+#endif /* CONFIG_DISCONTIGMEM */
+
 extern void setup_bootmem_allocator(void);
 unsigned long __init setup_memory(void)
 {
@@ -326,19 +373,9 @@ unsigned long __init setup_memory(void)
 	printk("Low memory ends at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(max_low_pfn));
 	for_each_online_node(nid) {
-		node_remap_start_vaddr[nid] = pfn_to_kaddr(
-				kva_start_pfn + node_remap_offset[nid]);
-		/* Init the node remap allocator */
-		node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
-			(node_remap_size[nid] * PAGE_SIZE);
-		node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] +
-			ALIGN(sizeof(pg_data_t), PAGE_SIZE);
+		init_remap_allocator(nid);
 
 		allocate_pgdat(nid);
-		printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
-			(ulong) node_remap_start_vaddr[nid],
-			(ulong) pfn_to_kaddr(highstart_pfn
-			   + node_remap_offset[nid] + node_remap_size[nid]));
 	}
 	printk("High memory starts at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(highstart_pfn));
@@ -439,3 +476,29 @@ int memory_add_physaddr_to_nid(u64 addr)
 
 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
+
+#ifndef CONFIG_HAVE_ARCH_PARSE_SRAT
+/*
+ * XXX FIXME: Make SLIT table parsing available to 32-bit NUMA
+ *
+ * These stub functions are needed to compile 32-bit NUMA when SRAT is
+ * not set. There are functions in srat_64.c for parsing this table
+ * and it may be possible to make them common functions.
+ */
+void acpi_numa_slit_init (struct acpi_table_slit *slit)
+{
+	printk(KERN_INFO "ACPI: No support for parsing SLIT table\n");
+}
+
+void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa)
+{
+}
+
+void acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma)
+{
+}
+
+void acpi_numa_arch_fixup(void)
+{
+}
+#endif /* CONFIG_HAVE_ARCH_PARSE_SRAT */

From 409a7b859ddb6e0a615d9f3ccddedaec218c56b6 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Wed, 30 Jan 2008 13:33:25 +0100
Subject: [PATCH 686/890] x86: relax restrictions on setting CONFIG_NUMA on
 x86, #2

The FLATMEM memory model references a global mem_map and max_mapnr. This
is incompatible with how memory models used for NUMA view the world.
Builds fail if FLATMEM && NUMA are set on x86. This patch forbids that
combination of config items. This is consistent with x86_64
enforcements.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index dc80a9f1f303..2114b4c71fa0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -871,7 +871,7 @@ config HAVE_ARCH_ALLOC_REMAP
 
 config ARCH_FLATMEM_ENABLE
 	def_bool y
-	depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC
+	depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC && !NUMA
 
 config ARCH_DISCONTIGMEM_ENABLE
 	def_bool y

From 4323838215184f5a2f081e0d17b8d60731b03164 Mon Sep 17 00:00:00 2001
From: "travis@sgi.com" <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:33:25 +0100
Subject: [PATCH 687/890] x86: change size of node ids from u8 to s16

Change the size of node ids for X86_64 from u8 to s16 to
accomodate more than 32k nodes and allow for NUMA_NO_NODE
(-1) to be sign extended to int.

Cc: David Rientjes <rientjes@google.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Mike Travis <travis@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig            |  1 +
 arch/x86/mm/numa_64.c       | 12 ++++++------
 include/asm-x86/mmzone_64.h |  6 +++---
 include/asm-x86/numa_64.h   |  2 +-
 include/asm-x86/topology.h  | 10 +++++-----
 5 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2114b4c71fa0..81af31e5a9f1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -848,6 +848,7 @@ config NUMA_EMU
 
 config NODES_SHIFT
 	int
+	range 1 15  if X86_64
 	default "6" if X86_64
 	default "4" if X86_NUMAQ
 	default "3"
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 441af4edea59..0ca96d86cb71 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -31,15 +31,15 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES];
 
 struct memnode memnode;
 
-u16 x86_cpu_to_node_map_init[NR_CPUS] = {
+int x86_cpu_to_node_map_init[NR_CPUS] = {
 	[0 ... NR_CPUS-1] = NUMA_NO_NODE
 };
 void *x86_cpu_to_node_map_early_ptr;
-DEFINE_PER_CPU(u16, x86_cpu_to_node_map) = NUMA_NO_NODE;
+DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
 EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map);
 EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
 
-u16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
 	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
 };
 
@@ -63,7 +63,7 @@ static int __init populate_memnodemap(const struct bootnode *nodes,
 	unsigned long addr, end;
 	int i, res = -1;
 
-	memset(memnodemap, 0xff, memnodemapsize);
+	memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize);
 	for (i = 0; i < numnodes; i++) {
 		addr = nodes[i].start;
 		end = nodes[i].end;
@@ -72,7 +72,7 @@ static int __init populate_memnodemap(const struct bootnode *nodes,
 		if ((end >> shift) >= memnodemapsize)
 			return 0;
 		do {
-			if (memnodemap[addr >> shift] != 0xff)
+			if (memnodemap[addr >> shift] != NUMA_NO_NODE)
 				return -1;
 			memnodemap[addr >> shift] = i;
 			addr += (1UL << shift);
@@ -533,7 +533,7 @@ __cpuinit void numa_add_cpu(int cpu)
 
 void __cpuinit numa_set_node(int cpu, int node)
 {
-	u16 *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
+	int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
 
 	cpu_pda(cpu)->nodenumber = node;
 
diff --git a/include/asm-x86/mmzone_64.h b/include/asm-x86/mmzone_64.h
index b0c25ae111d9..ebaf9663aa8a 100644
--- a/include/asm-x86/mmzone_64.h
+++ b/include/asm-x86/mmzone_64.h
@@ -15,9 +15,9 @@
 struct memnode {
 	int shift;
 	unsigned int mapsize;
-	u8 *map;
-	u8 embedded_map[64-16];
-} ____cacheline_aligned; /* total size = 64 bytes */
+	s16 *map;
+	s16 embedded_map[64-8];
+} ____cacheline_aligned; /* total size = 128 bytes */
 extern struct memnode memnode;
 #define memnode_shift memnode.shift
 #define memnodemap memnode.map
diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h
index c797cd523d2b..15fe07cde586 100644
--- a/include/asm-x86/numa_64.h
+++ b/include/asm-x86/numa_64.h
@@ -20,7 +20,7 @@ extern void numa_set_node(int cpu, int node);
 extern void srat_reserve_add_area(int nodeid);
 extern int hotadd_percent;
 
-extern u16 apicid_to_node[MAX_LOCAL_APIC];
+extern s16 apicid_to_node[MAX_LOCAL_APIC];
 
 extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
 extern unsigned long numa_free_all_bootmem(void);
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index 7b0cce2f370d..8af05a93f097 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -31,11 +31,11 @@
 
 /* Mappings between logical cpu number and node number */
 #ifdef CONFIG_X86_32
-extern u8 cpu_to_node_map[];
+extern int cpu_to_node_map[];
 
 #else
-DECLARE_PER_CPU(u16, x86_cpu_to_node_map);
-extern u16 x86_cpu_to_node_map_init[];
+DECLARE_PER_CPU(int, x86_cpu_to_node_map);
+extern int x86_cpu_to_node_map_init[];
 extern void *x86_cpu_to_node_map_early_ptr;
 /* Returns the number of the current Node. */
 #define numa_node_id()		(early_cpu_to_node(raw_smp_processor_id()))
@@ -43,7 +43,7 @@ extern void *x86_cpu_to_node_map_early_ptr;
 
 extern cpumask_t node_to_cpumask_map[];
 
-#define NUMA_NO_NODE	((u16)(~0))
+#define NUMA_NO_NODE	(-1)
 
 /* Returns the number of the node containing CPU 'cpu' */
 #ifdef CONFIG_X86_32
@@ -56,7 +56,7 @@ static inline int cpu_to_node(int cpu)
 #else /* CONFIG_X86_64 */
 static inline int early_cpu_to_node(int cpu)
 {
-	u16 *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
+	int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
 
 	if (cpu_to_node_map)
 		return cpu_to_node_map[cpu];

From 503595010f2024b236fd2636a7318fe64c794d6c Mon Sep 17 00:00:00 2001
From: Ian Campbell <ijc@hellion.org.uk>
Date: Wed, 30 Jan 2008 13:33:27 +0100
Subject: [PATCH 688/890] x86_32: remove unnecessary use of %ebx as the boot
 cpu flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently in head_32.S there are two ways we test to see if we
are the boot cpu.  By looking at %ebx and by looking at the
static variable ready.  When changing things around I have
found that it gets tricky to preserve %ebx.  So this
patch just switches head.S over to the more reliable
test of always using ready.

Hopefully later we can kill these tests entirely.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Ian Campbell <ijc@hellion.org.uk>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Mika Penttilä <mika.penttila@kolumbus.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/head_32.S | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index f409fe2a52e4..7b9b2566b7a8 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -199,7 +199,6 @@ default_entry:
 	addl $0x67, %eax			/* 0x67 == _PAGE_TABLE */
 	movl %eax, 4092(%edx)
 
-	xorl %ebx,%ebx				/* This is the boot CPU (BSP) */
 	jmp 3f
 /*
  * Non-boot CPU entry point; entered from trampoline.S
@@ -268,10 +267,6 @@ ENTRY(startup_32_smp)
 	wrmsr
 
 6:
-	/* This is a secondary processor (AP) */
-	xorl %ebx,%ebx
-	incl %ebx
-
 #endif /* CONFIG_SMP */
 3:
 
@@ -297,7 +292,7 @@ ENTRY(startup_32_smp)
 	popfl
 
 #ifdef CONFIG_SMP
-	andl %ebx,%ebx
+	cmpb $0, ready
 	jz  1f				/* Initial CPU cleans BSS */
 	jmp checkCPUtype
 1:

From 5756dd59f118daacd68ee971a2381360ef769c48 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ijc@hellion.org.uk>
Date: Wed, 30 Jan 2008 13:33:27 +0100
Subject: [PATCH 689/890] x86_32: always run the full set of paging state.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I am preparing to convert the boot time page table to the kernels
native format.  To achieve that I need to enable PAE. Enabling PSE
and the no execute bit would not hurt.  So this patch modifies
the boot cpu path to execute all of the kernels enable code
if and only if we have the proper bits set in mmu_cr4_features.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Ian Campbell <ijc@hellion.org.uk>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Mika Penttilä <mika.penttila@kolumbus.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/head_32.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 7b9b2566b7a8..a2b6331434da 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -221,6 +221,8 @@ ENTRY(startup_32_smp)
 	movl %eax,%es
 	movl %eax,%fs
 	movl %eax,%gs
+#endif /* CONFIG_SMP */
+3:
 
 /*
  *	New page tables may be in 4Mbyte page mode and may
@@ -267,8 +269,6 @@ ENTRY(startup_32_smp)
 	wrmsr
 
 6:
-#endif /* CONFIG_SMP */
-3:
 
 /*
  * Enable paging

From 8b2f7ffffe7f247ba237322fee78c528ba88f16b Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 30 Jan 2008 13:33:28 +0100
Subject: [PATCH 690/890] x86: fix Section mismatch: reference to
 .init.text:lguest_entry

fix:

> WARNING: vmlinux.o(.data+0x4): Section mismatch: reference to .init.text:lguest_entry (between 'subarch_entries' and 'stack_start')

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/head_32.S | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index a2b6331434da..5d8c5730686b 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -9,6 +9,7 @@
 
 .text
 #include <linux/threads.h>
+#include <linux/init.h>
 #include <linux/linkage.h>
 #include <asm/segment.h>
 #include <asm/page.h>
@@ -151,7 +152,9 @@ WEAK(xen_entry)
 	/* Unknown implementation; there's really
 	   nothing we can do at this point. */
 	ud2a
-.data
+
+	__INITDATA
+
 subarch_entries:
 	.long default_entry		/* normal x86/PC */
 	.long lguest_entry		/* lguest hypervisor */

From 1bdbdaacf774f2979ed4cb0c4a4316c9e578c897 Mon Sep 17 00:00:00 2001
From: Bernhard Walle <bwalle@suse.de>
Date: Wed, 30 Jan 2008 13:33:28 +0100
Subject: [PATCH 691/890] x86, rtc: make CONFIG_HPET_EMULATE_RTC usable from
 modules

enabled, then interrupts don't work for the rtc-cmos driver which results in
RTC_AIE*, RTC_PIE* and RTC_ALM being unusable.  This affects hwclock from
util-linux-ng at least on i386 since that uses RTC_PIE_ON.  (For x86-64, a
polling method is used for unknown reasons.)

This patch series now

  1. export the functions from arch/x86/kernel/hpet.c that the old char/rtc
     driver uses to work around that problem,

  2. makes it possible to compile the old rtc driver as module, while still
     having CONFIG_HPET_EMULATE_RTC enabled and

  3. makes use of the exported functions in (1) in the new rtc-cmos driver.

This patch:

This patch makes the RTC emulation functions in arch/x86/kernel/hpet.c usable
for kernel modules. It

  - exports the functions (EXPORT_SYMBOL_GPL()),
  - adds an interface to register the interrupt callback function
    instead of using only a fixed callback function and
  - replaces the rtc_get_rtc_time() function which depends on
    CONFIG_RTC with a call to get_rtc_time() which is defined in
    include/asm-generic/rtc.h.

The only dependency to CONFIG_RTC is the call to rtc_interrupt() which is
removed by the next patch. After this, there's no (code) dependency of
this functions to CONFIG_RTC=y any more.

Signed-off-by: Bernhard Walle <bwalle@suse.de>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: David Brownell <david-b@pacbell.net>
Cc: Andi Kleen <ak@suse.de>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Robert Picco <Robert.Picco@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/hpet.c | 47 +++++++++++++++++++++++++++++++++++++++++-
 include/asm-x86/hpet.h |  3 +++
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 5c1702789be4..d65ced59a18f 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -107,6 +107,7 @@ int is_hpet_enabled(void)
 {
 	return is_hpet_capable() && hpet_legacy_int_enabled;
 }
+EXPORT_SYMBOL_GPL(is_hpet_enabled);
 
 /*
  * When the hpet driver (/dev/hpet) is enabled, we need to reserve
@@ -475,6 +476,7 @@ void hpet_disable(void)
  */
 #include <linux/mc146818rtc.h>
 #include <linux/rtc.h>
+#include <asm/rtc.h>
 
 #define DEFAULT_RTC_INT_FREQ	64
 #define DEFAULT_RTC_SHIFT	6
@@ -489,6 +491,38 @@ static unsigned long hpet_default_delta;
 static unsigned long hpet_pie_delta;
 static unsigned long hpet_pie_limit;
 
+static rtc_irq_handler irq_handler;
+
+/*
+ * Registers a IRQ handler.
+ */
+int hpet_register_irq_handler(rtc_irq_handler handler)
+{
+	if (!is_hpet_enabled())
+		return -ENODEV;
+	if (irq_handler)
+		return -EBUSY;
+
+	irq_handler = handler;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hpet_register_irq_handler);
+
+/*
+ * Deregisters the IRQ handler registered with hpet_register_irq_handler()
+ * and does cleanup.
+ */
+void hpet_unregister_irq_handler(rtc_irq_handler handler)
+{
+	if (!is_hpet_enabled())
+		return;
+
+	irq_handler = NULL;
+	hpet_rtc_flags = 0;
+}
+EXPORT_SYMBOL_GPL(hpet_unregister_irq_handler);
+
 /*
  * Timer 1 for RTC emulation. We use one shot mode, as periodic mode
  * is not supported by all HPET implementations for timer 1.
@@ -530,6 +564,7 @@ int hpet_rtc_timer_init(void)
 
 	return 1;
 }
+EXPORT_SYMBOL_GPL(hpet_rtc_timer_init);
 
 /*
  * The functions below are called from rtc driver.
@@ -544,6 +579,7 @@ int hpet_mask_rtc_irq_bit(unsigned long bit_mask)
 	hpet_rtc_flags &= ~bit_mask;
 	return 1;
 }
+EXPORT_SYMBOL_GPL(hpet_mask_rtc_irq_bit);
 
 int hpet_set_rtc_irq_bit(unsigned long bit_mask)
 {
@@ -559,6 +595,7 @@ int hpet_set_rtc_irq_bit(unsigned long bit_mask)
 
 	return 1;
 }
+EXPORT_SYMBOL_GPL(hpet_set_rtc_irq_bit);
 
 int hpet_set_alarm_time(unsigned char hrs, unsigned char min,
 			unsigned char sec)
@@ -572,6 +609,7 @@ int hpet_set_alarm_time(unsigned char hrs, unsigned char min,
 
 	return 1;
 }
+EXPORT_SYMBOL_GPL(hpet_set_alarm_time);
 
 int hpet_set_periodic_freq(unsigned long freq)
 {
@@ -590,11 +628,13 @@ int hpet_set_periodic_freq(unsigned long freq)
 	}
 	return 1;
 }
+EXPORT_SYMBOL_GPL(hpet_set_periodic_freq);
 
 int hpet_rtc_dropped_irq(void)
 {
 	return is_hpet_enabled();
 }
+EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq);
 
 static void hpet_rtc_timer_reinit(void)
 {
@@ -638,9 +678,10 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
 	unsigned long rtc_int_flag = 0;
 
 	hpet_rtc_timer_reinit();
+	memset(&curr_time, 0, sizeof(struct rtc_time));
 
 	if (hpet_rtc_flags & (RTC_UIE | RTC_AIE))
-		rtc_get_rtc_time(&curr_time);
+		get_rtc_time(&curr_time);
 
 	if (hpet_rtc_flags & RTC_UIE &&
 	    curr_time.tm_sec != hpet_prev_update_sec) {
@@ -662,8 +703,12 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
 
 	if (rtc_int_flag) {
 		rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
+		if (irq_handler)
+			irq_handler(rtc_int_flag, dev_id);
+
 		rtc_interrupt(rtc_int_flag, dev_id);
 	}
 	return IRQ_HANDLED;
 }
+EXPORT_SYMBOL_GPL(hpet_rtc_interrupt);
 #endif
diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h
index 624f600f7161..6a9b4ac59bf7 100644
--- a/include/asm-x86/hpet.h
+++ b/include/asm-x86/hpet.h
@@ -69,6 +69,7 @@ extern void force_hpet_resume(void);
 
 #include <linux/interrupt.h>
 
+typedef irqreturn_t (*rtc_irq_handler)(int interrupt, void *cookie);
 extern int hpet_mask_rtc_irq_bit(unsigned long bit_mask);
 extern int hpet_set_rtc_irq_bit(unsigned long bit_mask);
 extern int hpet_set_alarm_time(unsigned char hrs, unsigned char min,
@@ -77,6 +78,8 @@ extern int hpet_set_periodic_freq(unsigned long freq);
 extern int hpet_rtc_dropped_irq(void);
 extern int hpet_rtc_timer_init(void);
 extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id);
+extern int hpet_register_irq_handler(rtc_irq_handler handler);
+extern void hpet_unregister_irq_handler(rtc_irq_handler handler);
 
 #endif /* CONFIG_HPET_EMULATE_RTC */
 

From f8f76481bc2803aea03ff213c7e1405b53f7e488 Mon Sep 17 00:00:00 2001
From: Bernhard Walle <bwalle@suse.de>
Date: Wed, 30 Jan 2008 13:33:31 +0100
Subject: [PATCH 692/890] rtc: use the IRQ callback interface in (old) RTC
 driver

the previous patch in the old RTC driver.  It also removes the direct
rtc_interrupt() call from arch/x86/kernel/hpetc.c so that there's finally no
(code) dependency to CONFIG_RTC in arch/x86/kernel/hpet.c.

Because of this, it's possible to compile the drivers/char/rtc.ko driver as
module and still use the HPET emulation functionality.  This is also expressed
in Kconfig.

Signed-off-by: Bernhard Walle <bwalle@suse.de>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: David Brownell <david-b@pacbell.net>
Cc: Andi Kleen <ak@suse.de>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Robert Picco <Robert.Picco@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig       |  2 +-
 arch/x86/kernel/hpet.c |  2 --
 drivers/char/rtc.c     | 15 ++++++++++++++-
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 81af31e5a9f1..4e911d0a4962 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -390,7 +390,7 @@ config HPET_TIMER
 
 config HPET_EMULATE_RTC
 	def_bool y
-	depends on HPET_TIMER && RTC=y
+	depends on HPET_TIMER && (RTC=y || RTC=m)
 
 # Mark as embedded because too many people got it wrong.
 # The code disables itself when not needed.
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index d65ced59a18f..429d084e014d 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -705,8 +705,6 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
 		rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
 		if (irq_handler)
 			irq_handler(rtc_int_flag, dev_id);
-
-		rtc_interrupt(rtc_int_flag, dev_id);
 	}
 	return IRQ_HANDLED;
 }
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index 3ac7952fe086..78b151c4d20f 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -110,6 +110,8 @@ static int rtc_has_irq = 1;
 #define hpet_set_rtc_irq_bit(arg)		0
 #define hpet_rtc_timer_init()			do { } while (0)
 #define hpet_rtc_dropped_irq()			0
+#define hpet_register_irq_handler(h)		0
+#define hpet_unregister_irq_handler(h)		0
 #ifdef RTC_IRQ
 static irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
 {
@@ -1027,7 +1029,15 @@ no_irq:
 
 #ifdef RTC_IRQ
 	if (is_hpet_enabled()) {
+		int err;
+
 		rtc_int_handler_ptr = hpet_rtc_interrupt;
+		err = hpet_register_irq_handler(rtc_interrupt);
+		if (err != 0) {
+			printk(KERN_WARNING "hpet_register_irq_handler failed "
+					"in rtc_init().");
+			return err;
+		}
 	} else {
 		rtc_int_handler_ptr = rtc_interrupt;
 	}
@@ -1050,6 +1060,7 @@ no_irq:
 	if (misc_register(&rtc_dev)) {
 #ifdef RTC_IRQ
 		free_irq(RTC_IRQ, NULL);
+		hpet_unregister_irq_handler(rtc_interrupt);
 		rtc_has_irq = 0;
 #endif
 		rtc_release_region();
@@ -1141,8 +1152,10 @@ static void __exit rtc_exit(void)
 #else
 	rtc_release_region();
 #ifdef RTC_IRQ
-	if (rtc_has_irq)
+	if (rtc_has_irq) {
 		free_irq(RTC_IRQ, NULL);
+		hpet_unregister_irq_handler(hpet_rtc_interrupt);
+	}
 #endif
 #endif /* CONFIG_SPARC32 */
 }

From 612a95b4e053b8a06319049191fd2dce9c970189 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 30 Jan 2008 13:33:32 +0100
Subject: [PATCH 693/890] x86: remove iBCS support

ibcs2 support has never been supported on 2.6 kernels as far as I know,
and if it has it must have been an external patch.  Anyways, if anybody
applies an external patch they could as well readd the ibcs checking
code to the ELF loader in the same patch.  But there is no reason to
keep this code running in all Linux kernels.  This will save at least
two strcmps each ELF execution.

No deprecation period because it could not have been used anyway.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/binfmt_elf.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 786ee275ec0a..18ed6dd906c1 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -595,7 +595,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	int load_addr_set = 0;
 	char * elf_interpreter = NULL;
 	unsigned int interpreter_type = INTERPRETER_NONE;
-	unsigned char ibcs2_interpreter = 0;
 	unsigned long error;
 	struct elf_phdr *elf_ppnt, *elf_phdata;
 	unsigned long elf_bss, elf_brk;
@@ -713,14 +712,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 				goto out_free_interp;
 
-			/* If the program interpreter is one of these two,
-			 * then assume an iBCS2 image. Otherwise assume
-			 * a native linux image.
-			 */
-			if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
-			    strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
-				ibcs2_interpreter = 1;
-
 			/*
 			 * The early SET_PERSONALITY here is so that the lookup
 			 * for the interpreter happens in the namespace of the 
@@ -740,7 +731,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 			 * switch really is going to happen - do this in
 			 * flush_thread().	- akpm
 			 */
-			SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
+			SET_PERSONALITY(loc->elf_ex, 0);
 
 			interpreter = open_exec(elf_interpreter);
 			retval = PTR_ERR(interpreter);
@@ -819,7 +810,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 			goto out_free_dentry;
 	} else {
 		/* Executables without an interpreter also need a personality  */
-		SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
+		SET_PERSONALITY(loc->elf_ex, 0);
 	}
 
 	/* OK, we are done with that, now set up the arg stuff,
@@ -853,7 +844,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 
 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
 	   may depend on the personality.  */
-	SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
+	SET_PERSONALITY(loc->elf_ex, 0);
 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 		current->personality |= READ_IMPLIES_EXEC;
 

From e61bd94a36a9f2c394d75ebe3614fe0a612bcf2d Mon Sep 17 00:00:00 2001
From: Eduardo Pereira Habkost <ehabkost@redhat.com>
Date: Wed, 30 Jan 2008 13:33:32 +0100
Subject: [PATCH 694/890] x86: allow enabling PARAVIRT without any guest
 implementation

This will allow people to enable the paravirt_ops code even when no
guest support is enabled, for broader testing of the paravirt_ops code.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4e911d0a4962..5e0cde8294c9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -310,15 +310,6 @@ config SCHED_NO_NO_OMIT_FRAME_POINTER
 
 	  If in doubt, say "Y".
 
-config PARAVIRT
-	bool
-	depends on X86_32 && !(X86_VISWS || X86_VOYAGER)
-	help
-	  This changes the kernel so it can modify itself when it is run
-	  under a hypervisor, potentially improving performance significantly
-	  over full virtualization.  However, when run without a hypervisor
-	  the kernel is theoretically slower and slightly larger.
-
 menuconfig PARAVIRT_GUEST
 	bool "Paravirtualized guest support"
 	depends on X86_32
@@ -344,6 +335,15 @@ config VMI
 
 source "arch/x86/lguest/Kconfig"
 
+config PARAVIRT
+	bool "Enable paravirtualization code"
+	depends on X86_32 && !(X86_VISWS || X86_VOYAGER)
+	help
+	  This changes the kernel so it can modify itself when it is run
+	  under a hypervisor, potentially improving performance significantly
+	  over full virtualization.  However, when run without a hypervisor
+	  the kernel is theoretically slower and slightly larger.
+
 endif
 
 config ACPI_SRAT

From 42d545c9a4c0d3faeab658a40165c3da2dda91b2 Mon Sep 17 00:00:00 2001
From: Eduardo Pereira Habkost <ehabkost@redhat.com>
Date: Wed, 30 Jan 2008 13:33:32 +0100
Subject: [PATCH 695/890] x86: remove depends on X86_32 from PARAVIRT &
 PARAVIRT_GUEST

With this, the paravirt_ops code can be enabled on x86_64 also.

Each guest implementation (Xen, VMI, lguest) now depends on X86_32. The
dependencies can be dropped for each one when they start to support
x86_64.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig        | 4 ++--
 arch/x86/lguest/Kconfig | 1 +
 arch/x86/xen/Kconfig    | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5e0cde8294c9..f0887d12a5bb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -312,7 +312,6 @@ config SCHED_NO_NO_OMIT_FRAME_POINTER
 
 menuconfig PARAVIRT_GUEST
 	bool "Paravirtualized guest support"
-	depends on X86_32
 	help
 	  Say Y here to get to see options related to running Linux under
 	  various hypervisors.  This option alone does not add any kernel code.
@@ -326,6 +325,7 @@ source "arch/x86/xen/Kconfig"
 config VMI
 	bool "VMI Guest support"
 	select PARAVIRT
+	depends on X86_32
 	depends on !(X86_VISWS || X86_VOYAGER)
 	help
 	  VMI provides a paravirtualized interface to the VMware ESX server
@@ -337,7 +337,7 @@ source "arch/x86/lguest/Kconfig"
 
 config PARAVIRT
 	bool "Enable paravirtualization code"
-	depends on X86_32 && !(X86_VISWS || X86_VOYAGER)
+	depends on !(X86_VISWS || X86_VOYAGER)
 	help
 	  This changes the kernel so it can modify itself when it is run
 	  under a hypervisor, potentially improving performance significantly
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig
index 19626ace0f50..964dfa36d367 100644
--- a/arch/x86/lguest/Kconfig
+++ b/arch/x86/lguest/Kconfig
@@ -1,6 +1,7 @@
 config LGUEST_GUEST
 	bool "Lguest guest support"
 	select PARAVIRT
+	depends on X86_32
 	depends on !X86_PAE
 	depends on !(X86_VISWS || X86_VOYAGER)
 	select VIRTIO
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index fbfa55ce0d55..4d5f2649bee4 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -5,6 +5,7 @@
 config XEN
 	bool "Xen guest support"
 	select PARAVIRT
+	depends on X86_32
 	depends on X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES && !(X86_VISWS || X86_VOYAGER)
 	help
 	  This is the Linux Xen port.  Enabling this will allow the

From 8a45eb31d832af5441703bf447c97f786ff850a4 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Wed, 30 Jan 2008 13:33:32 +0100
Subject: [PATCH 696/890] x86: constify function pointer tables

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/proc.c | 2 +-
 arch/x86/kernel/setup_64.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 3900e46d66db..028213260148 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -188,7 +188,7 @@ static void *c_next(struct seq_file *m, void *v, loff_t *pos)
 static void c_stop(struct seq_file *m, void *v)
 {
 }
-struct seq_operations cpuinfo_op = {
+const struct seq_operations cpuinfo_op = {
 	.start	= c_start,
 	.next	= c_next,
 	.stop	= c_stop,
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index f84e2662f1ca..116687ade180 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -1251,7 +1251,7 @@ static void c_stop(struct seq_file *m, void *v)
 {
 }
 
-struct seq_operations cpuinfo_op = {
+const struct seq_operations cpuinfo_op = {
 	.start = c_start,
 	.next =	c_next,
 	.stop =	c_stop,

From 11201e603d28a1cb7a4bb1d65f39e61629c97a28 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:32 +0100
Subject: [PATCH 697/890] x86: fix DEBUG_RODATA kconfig text

fix kconfig text and make DEBUG_RODATA default.

this helps debugging quite a bit.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig.debug | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 0a82b889d76e..9bb61e1aed69 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -61,13 +61,13 @@ config DEBUG_PER_CPU_MAPS
 
 config DEBUG_RODATA
 	bool "Write protect kernel read-only data structures"
+	default y
 	depends on DEBUG_KERNEL
 	help
 	  Mark the kernel read-only data as write-protected in the pagetables,
 	  in order to catch accidental (and incorrect) writes to such const
-	  data. This option may have a slight performance impact because a
-	  portion of the kernel code won't be covered by a 2MB TLB anymore.
-	  If in doubt, say "N".
+	  data. This is recommended so that we can catch kernel bugs sooner.
+	  If in doubt, say "Y".
 
 config 4KSTACKS
 	bool "Use 4Kb for kernel stacks instead of 8Kb"

From 093af8d7f0ba3c6be1485973508584ef081e9f93 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Wed, 30 Jan 2008 13:33:32 +0100
Subject: [PATCH 698/890] x86_32: trim memory by updating e820

when MTRRs are not covering the whole e820 table, we need to trim the
RAM and need to update e820.

reuse some code on 64-bit as well.

here need to add early_get_cap and use it in early_cpu_detect, and move
mtrr_bp_init early.

The code successfully trimmed the memory map on Justin's system:

from:

 [    0.000000]  BIOS-e820: 0000000100000000 - 000000022c000000 (usable)

to:

 [    0.000000]   modified: 0000000100000000 - 0000000228000000 (usable)
 [    0.000000]   modified: 0000000228000000 - 000000022c000000 (reserved)

According to Justin it makes quite a difference:

|  When I boot the box without any trimming it acts like a 286 or 386,
|  takes about 10 minutes to boot (using raptor disks).

Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Tested-by: Justin Piszcz <jpiszcz@lucidpixels.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/kernel-parameters.txt |  2 +-
 arch/x86/kernel/cpu/common.c        | 30 ++++++++++++++++++++++++++++-
 arch/x86/kernel/cpu/mtrr/main.c     | 22 +++++++++++++--------
 arch/x86/kernel/e820_32.c           | 11 +++++++++++
 arch/x86/kernel/setup_32.c          |  6 ++++++
 include/asm-x86/e820_32.h           |  3 +++
 6 files changed, 64 insertions(+), 10 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 50d564dabb13..fe3031d56431 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -583,7 +583,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			See drivers/char/README.epca and
 			Documentation/digiepca.txt.
 
-	disable_mtrr_trim [X86-64, Intel only]
+	disable_mtrr_trim [X86, Intel and AMD only]
 			By default the kernel will trim any uncacheable
 			memory out of your available memory pool based on
 			MTRR settings.  This parameter disables that behavior,
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 56cc341cc586..bba850b05d0e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -278,6 +278,33 @@ void __init cpu_detect(struct cpuinfo_x86 *c)
 			c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
 	}
 }
+static void __cpuinit early_get_cap(struct cpuinfo_x86 *c)
+{
+	u32 tfms, xlvl;
+	int ebx;
+
+	memset(&c->x86_capability, 0, sizeof c->x86_capability);
+	if (have_cpuid_p()) {
+		/* Intel-defined flags: level 0x00000001 */
+		if (c->cpuid_level >= 0x00000001) {
+			u32 capability, excap;
+			cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
+			c->x86_capability[0] = capability;
+			c->x86_capability[4] = excap;
+		}
+
+		/* AMD-defined flags: level 0x80000001 */
+		xlvl = cpuid_eax(0x80000000);
+		if ((xlvl & 0xffff0000) == 0x80000000) {
+			if (xlvl >= 0x80000001) {
+				c->x86_capability[1] = cpuid_edx(0x80000001);
+				c->x86_capability[6] = cpuid_ecx(0x80000001);
+			}
+		}
+
+	}
+
+}
 
 /* Do minimum CPU detection early.
    Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
@@ -306,6 +333,8 @@ static void __init early_cpu_detect(void)
 		early_init_intel(c);
 		break;
 	}
+
+	early_get_cap(c);
 }
 
 static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
@@ -485,7 +514,6 @@ void __init identify_boot_cpu(void)
 	identify_cpu(&boot_cpu_data);
 	sysenter_setup();
 	enable_sep_cpu();
-	mtrr_bp_init();
 }
 
 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index ccd36ed2187b..ac4b6338f3f4 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -624,7 +624,6 @@ static struct sysdev_driver mtrr_sysdev_driver = {
 	.resume		= mtrr_restore,
 };
 
-#ifdef CONFIG_X86_64
 static int disable_mtrr_trim;
 
 static int __init disable_mtrr_trim_setup(char *str)
@@ -643,13 +642,10 @@ early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
 #define Tom2Enabled (1U << 21)
 #define Tom2ForceMemTypeWB (1U << 22)
 
-static __init int amd_special_default_mtrr(unsigned long end_pfn)
+static __init int amd_special_default_mtrr(void)
 {
 	u32 l, h;
 
-	/* Doesn't apply to memory < 4GB */
-	if (end_pfn <= (0xffffffff >> PAGE_SHIFT))
-		return 0;
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
 		return 0;
 	if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
@@ -687,9 +683,14 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 	 * Make sure we only trim uncachable memory on machines that
 	 * support the Intel MTRR architecture:
 	 */
+	if (!is_cpu(INTEL) || disable_mtrr_trim)
+		return 0;
 	rdmsr(MTRRdefType_MSR, def, dummy);
 	def &= 0xff;
-	if (!is_cpu(INTEL) || disable_mtrr_trim || def != MTRR_TYPE_UNCACHABLE)
+	if (def != MTRR_TYPE_UNCACHABLE)
+		return 0;
+
+	if (amd_special_default_mtrr())
 		return 0;
 
 	/* Find highest cached pfn */
@@ -703,8 +704,14 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 			highest_addr = base + size;
 	}
 
-	if (amd_special_default_mtrr(end_pfn))
+	/* kvm/qemu doesn't have mtrr set right, don't trim them all */
+	if (!highest_addr) {
+		printk(KERN_WARNING "***************\n");
+		printk(KERN_WARNING "**** WARNING: likely strange cpu\n");
+		printk(KERN_WARNING "**** MTRRs all blank, cpu in qemu?\n");
+		printk(KERN_WARNING "***************\n");
 		return 0;
+	}
 
 	if ((highest_addr >> PAGE_SHIFT) < end_pfn) {
 		printk(KERN_WARNING "***************\n");
@@ -726,7 +733,6 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 
 	return 0;
 }
-#endif
 
 /**
  * mtrr_bp_init - initialize mtrrs on the boot CPU
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 931934a7b353..4e16ef4a2659 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -749,3 +749,14 @@ static int __init parse_memmap(char *arg)
 	return 0;
 }
 early_param("memmap", parse_memmap);
+void __init update_e820(void)
+{
+	u8 nr_map;
+
+	nr_map = e820.nr_map;
+	if (sanitize_e820_map(e820.map, &nr_map))
+		return;
+	e820.nr_map = nr_map;
+	printk(KERN_INFO "modified physical RAM map:\n");
+	print_memory_map("modified");
+}
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 26a56f714d34..83ba3ca5f431 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -48,6 +48,7 @@
 
 #include <video/edid.h>
 
+#include <asm/mtrr.h>
 #include <asm/apic.h>
 #include <asm/e820.h>
 #include <asm/mpspec.h>
@@ -758,6 +759,11 @@ void __init setup_arch(char **cmdline_p)
 
 	max_low_pfn = setup_memory();
 
+	/* update e820 for memory not covered by WB MTRRs */
+	mtrr_bp_init();
+	if (mtrr_trim_uncached_memory(max_pfn))
+		max_low_pfn = setup_memory();
+
 #ifdef CONFIG_VMI
 	/*
 	 * Must be after max_low_pfn is determined, and before kernel
diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h
index e2faf5f3a0bb..f1da7ebd1905 100644
--- a/include/asm-x86/e820_32.h
+++ b/include/asm-x86/e820_32.h
@@ -19,12 +19,15 @@
 #ifndef __ASSEMBLY__
 
 extern struct e820map e820;
+extern void update_e820(void);
 
 extern int e820_all_mapped(unsigned long start, unsigned long end,
 			   unsigned type);
 extern int e820_any_mapped(u64 start, u64 end, unsigned type);
 extern void find_max_pfn(void);
 extern void register_bootmem_low_pages(unsigned long max_low_pfn);
+extern void add_memory_region(unsigned long long start,
+			      unsigned long long size, int type);
 extern void e820_register_memory(void);
 extern void limit_regions(unsigned long long size);
 extern void print_memory_map(char *who);

From 3212bff370c2f22e4987c6679ba485654cefb178 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Wed, 30 Jan 2008 13:33:32 +0100
Subject: [PATCH 699/890] x86: left over fix for leak of early_ioremp in
 dmi_scan

Signed-off-by: Yinghai Lu <yinghai@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/firmware/dmi_scan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 5f9d0bc839ba..9008ed5ef4ce 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -353,6 +353,7 @@ void __init dmi_scan_machine(void)
 				return;
 			}
 		}
+		dmi_iounmap(p, 0x10000);
 	}
  out:	printk(KERN_INFO "DMI not present or invalid.\n");
 }

From dd5af90a7f3d79e04b7eace9a98644dbf2038f4d Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:33:32 +0100
Subject: [PATCH 700/890] x86/non-x86: percpu, node ids, apic ids x86.git fixup

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig             |  2 +-
 include/asm-generic/percpu.h | 12 ++----------
 init/main.c                  |  4 ++--
 kernel/module.c              |  8 ++++++++
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f0887d12a5bb..8e1b33c5405f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -97,7 +97,7 @@ config GENERIC_TIME_VSYSCALL
 	bool
 	default X86_64
 
-config ARCH_SETS_UP_PER_CPU_AREA
+config HAVE_SETUP_PER_CPU_AREA
 	def_bool X86_64
 
 config ARCH_SUPPORTS_OPROFILE
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index c41b1a731129..4b8d31cda1a0 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -47,7 +47,7 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
 #endif
 
 /*
- * A percpu variable may point to a discarded reghions. The following are
+ * A percpu variable may point to a discarded regions. The following are
  * established ways to produce a usable pointer from the percpu variable
  * offset.
  */
@@ -59,18 +59,10 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
 	(*SHIFT_PERCPU_PTR(&per_cpu_var(var), __my_cpu_offset))
 
 
-#ifdef CONFIG_ARCH_SETS_UP_PER_CPU_AREA
+#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
 extern void setup_per_cpu_areas(void);
 #endif
 
-/* A macro to avoid #include hell... */
-#define percpu_modcopy(pcpudst, src, size)			\
-do {								\
-	unsigned int __i;					\
-	for_each_possible_cpu(__i)				\
-		memcpy((pcpudst)+per_cpu_offset(__i),		\
-		       (src), (size));				\
-} while (0)
 #else /* ! SMP */
 
 #define per_cpu(var, cpu)			(*((void)(cpu), &per_cpu_var(var)))
diff --git a/init/main.c b/init/main.c
index 5843fe996703..3316dffe3e57 100644
--- a/init/main.c
+++ b/init/main.c
@@ -363,7 +363,7 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { }
 
 #else
 
-#ifndef CONFIG_ARCH_SETS_UP_PER_CPU_AREA
+#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
 
 EXPORT_SYMBOL(__per_cpu_offset);
@@ -384,7 +384,7 @@ static void __init setup_per_cpu_areas(void)
 		ptr += size;
 	}
 }
-#endif /* CONFIG_ARCH_SETS_UP_CPU_AREA */
+#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
 
 /* Called by boot processor to activate the rest. */
 static void __init smp_init(void)
diff --git a/kernel/module.c b/kernel/module.c
index f6a4e721fd49..bd60278ee703 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -430,6 +430,14 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr,
 	return find_sec(hdr, sechdrs, secstrings, ".data.percpu");
 }
 
+static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		memcpy(pcpudest + per_cpu_offset(cpu), from, size);
+}
+
 static int percpu_modinit(void)
 {
 	pcpu_num_used = 2;

From ac629a98bf98bfe66bf0a674b22be3474ee6a9a4 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Wed, 30 Jan 2008 13:33:33 +0100
Subject: [PATCH 701/890] x86: remove duplicated line about

Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup_64.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 116687ade180..fe47807cac00 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -338,7 +338,6 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_NUMA
 	x86_cpu_to_node_map_early_ptr = (void *)&x86_cpu_to_node_map_init;
 #endif
-	x86_bios_cpu_apicid_early_ptr = (void *)&x86_bios_cpu_apicid_init;
 #endif
 
 #ifdef CONFIG_ACPI

From 3effef1f3bbc2a63cc79c9eda789642ac06067b3 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Wed, 30 Jan 2008 13:33:33 +0100
Subject: [PATCH 702/890] x86: should use array directly for early_ptr

Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup_64.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index fe47807cac00..0d20ed7fb097 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -333,10 +333,10 @@ void __init setup_arch(char **cmdline_p)
 
 #ifdef CONFIG_SMP
 	/* setup to use the early static init tables during kernel startup */
-	x86_cpu_to_apicid_early_ptr = (void *)&x86_cpu_to_apicid_init;
-	x86_bios_cpu_apicid_early_ptr = (void *)&x86_bios_cpu_apicid_init;
+	x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
+	x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
 #ifdef CONFIG_NUMA
-	x86_cpu_to_node_map_early_ptr = (void *)&x86_cpu_to_node_map_init;
+	x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
 #endif
 #endif
 

From 85e2aeea80b453800c19e7713d85c8f91ed97b5b Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Wed, 30 Jan 2008 13:33:33 +0100
Subject: [PATCH 703/890] x86: use KSYM_NAME_LEN

Use KSYM_NAME_LEN instead of numeric value

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/traps_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 4ea727869ded..efc66df728b6 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -107,7 +107,7 @@ void printk_address(unsigned long address, int reliable)
 	const char *symname;
 	char *modname;
 	char *delim = ":";
-	char namebuf[128];
+	char namebuf[KSYM_NAME_LEN];
 	char reliab[4] = "";
 
 	symname = kallsyms_lookup(address, &symsize, &offset,

From 45078cb5e2fb5a0e55c373755ed45b43d901f9e4 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 30 Jan 2008 13:33:33 +0100
Subject: [PATCH 704/890] x86: remove struct cpu_model_info

No one uses struct cpu_model_info on x86_64 now.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup_64.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 0d20ed7fb097..9ee2bc30d530 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -869,12 +869,6 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
 		c->x86_vendor = X86_VENDOR_UNKNOWN;
 }
 
-struct cpu_model_info {
-	int vendor;
-	int family;
-	char *model_names[16];
-};
-
 /* Do some early cpuid on the boot CPU to get some parameter that are
    needed before check_bugs. Everything advanced is in identify_cpu
    below. */

From e6c4dc6c36b01d33a2e0c8a89828d8dd44fb4c1b Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Wed, 30 Jan 2008 13:33:33 +0100
Subject: [PATCH 705/890] x86: GEODE add the "mfgptfix" boot time option to fix
 MFGPT timers

The new "mfgptfix" boot command line option may be usd to fix MFGPT
timers on AMD Geode platforms when the BIOS has incorrectly applied
a workaround. TinyBIOS version 0.98 is known to be affected, 0.99
fixes the problem by letting the user disable the workaround.

Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/kernel-parameters.txt |  5 +++++
 arch/x86/kernel/mfgpt_32.c          | 15 +++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index fe3031d56431..5d171b7b8393 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1092,6 +1092,11 @@ and is between 256 and 4096 characters. It is defined in the file
 			Multi-Function General Purpose Timers on AMD Geode
 			platforms.
 
+	mfgptfix	[X86-32] Fix MFGPT timers on AMD Geode platforms when
+			the BIOS has incorrectly applied a workaround. TinyBIOS
+			version 0.98 is known to be affected, 0.99 fixes the
+			problem by letting the user disable the workaround.
+
 	mga=		[HW,DRM]
 
 	mousedev.tap_time=
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 3960ab7e1497..219f86eb6123 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -63,6 +63,21 @@ static int __init mfgpt_disable(char *s)
 }
 __setup("nomfgpt", mfgpt_disable);
 
+/* Reset the MFGPT timers. This is required by some broken BIOSes which already
+ * do the same and leave the system in an unstable state. TinyBIOS 0.98 is
+ * affected at least (0.99 is OK with MFGPT workaround left to off).
+ */
+static int __init mfgpt_fix(char *s)
+{
+	u32 val, dummy;
+
+	/* The following udocumented bit resets the MFGPT timers */
+	val = 0xFF; dummy = 0;
+	wrmsr(0x5140002B, val, dummy);
+	return 1;
+}
+__setup("mfgptfix", mfgpt_fix);
+
 /*
  * Check whether any MFGPTs are available for the kernel to use.  In most
  * cases, firmware that uses AMD's VSA code will claim all timers during

From 1ce357129a55a791318e23f5d7b8a9f1352858aa Mon Sep 17 00:00:00 2001
From: "travis@sgi.com" <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:33:33 +0100
Subject: [PATCH 706/890] x86: early cpu_to_node fix in numa_64.c

Both of these references to cpu_to_node() can potentially occur
before the "late" cpu_to_node map is setup.  Therefore, they
should be changed to use early_cpu_to_node().

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/numa_64.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 0ca96d86cb71..f0e5cabe49d8 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -251,7 +251,7 @@ void __init numa_init_array(void)
 
 	rr = first_node(node_online_map);
 	for (i = 0; i < NR_CPUS; i++) {
-		if (cpu_to_node(i) != NUMA_NO_NODE)
+		if (early_cpu_to_node(i) != NUMA_NO_NODE)
 			continue;
 		numa_set_node(i, rr);
 		rr = next_node(rr, node_online_map);
@@ -528,7 +528,8 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 
 __cpuinit void numa_add_cpu(int cpu)
 {
-	set_bit(cpu, (unsigned long *)&node_to_cpumask_map[cpu_to_node(cpu)]);
+	set_bit(cpu,
+		(unsigned long *)&node_to_cpumask_map[early_cpu_to_node(cpu)]);
 }
 
 void __cpuinit numa_set_node(int cpu, int node)

From 53391fa20cab6df6b476a5a0ad6be653c9de0c46 Mon Sep 17 00:00:00 2001
From: Yi Yang <yi.y.yang@intel.com>
Date: Wed, 30 Jan 2008 13:33:34 +0100
Subject: [PATCH 707/890] cpufreq: fix obvious condition statement error

The function __cpufreq_set_policy in file drivers/cpufreq/cpufreq.c
has a very obvious error:

        if (policy->min > data->min && policy->min > policy->max) {
                ret = -EINVAL;
                goto error_out;
        }

This condtion statement is wrong because it returns -EINVAL only if
policy->min is greater than policy->max (in this case,
"policy->min > data->min" is true for ever.). In fact, it should
return -EINVAL as well if policy->max is less than data->min.

The correct condition should be:

	if (policy->min > data->max || policy->max < data->min) {

The following test result testifies the above conclusion:

Before applying this patch:

[root@yangyi-dev /]# cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies
2394000 1596000
[root@yangyi-dev /]# echo 1596000 > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
[root@yangyi-dev /]# cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
1596000
[root@yangyi-dev /]# cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_min_freq
1596000
[root@yangyi-dev /]# echo "2000000" > /sys/devices/system/cpu/cpu0/cpufreq/scaling_min_freq
-bash: echo: write error: Invalid argument
[root@yangyi-dev /]# cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_min_freq
1596000
[root@yangyi-dev /]# echo "0" > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
[root@yangyi-dev /]# cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
1596000
[root@yangyi-dev /]# echo "1595000" > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
[root@yangyi-dev /]# cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
1596000
[root@yangyi-dev /]#

After applying this patch:

[root@yangyi-dev /]# cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies
2394000 1596000
[root@yangyi-dev /]# echo 1596000 > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
[root@yangyi-dev /]# cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
1596000
[root@yangyi-dev /]# cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_min_freq
1596000
[root@localhost /]# echo "2000000" > /sys/devices/system/cpu/cpu0/cpufreq/scaling_min_freq
-bash: echo: write error: Invalid argument
[root@localhost /]# cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_min_freq
1596000
[root@localhost /]# echo "0" > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
-bash: echo: write error: Invalid argument
[root@localhost /]# echo "1595000" > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
-bash: echo: write error: Invalid argument
[root@localhost /]# cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
1596000
[root@localhost /]# echo "1596000" > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
[root@localhost /]# echo "2394000" > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
[root@localhost /]# cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
2394000
[root@localhost /]

Signed-off-by: Yi Yang <yi.y.yang@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/cpufreq/cpufreq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 5efd5550f4ca..b730d6709529 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1604,7 +1604,7 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data,
 	memcpy(&policy->cpuinfo, &data->cpuinfo,
 				sizeof(struct cpufreq_cpuinfo));
 
-	if (policy->min > data->min && policy->min > policy->max) {
+	if (policy->min > data->max || policy->max < data->min) {
 		ret = -EINVAL;
 		goto error_out;
 	}

From 9a1b62fe858ba6780a9aeb4ab5f7751038a6c15d Mon Sep 17 00:00:00 2001
From: Minoru Usui <usui@mxm.nes.nec.co.jp>
Date: Wed, 30 Jan 2008 13:33:35 +0100
Subject: [PATCH 708/890] x86: fix NUMA emulation on 64-bit

I found a small bug of NUMA emulation code for x86_64. (CONFIG_NUMA_EMU)
If machine is non-NUMA, find_node_by_addr() should return
NUMA_NO_NODE, but current implementation code returns existent maximum
NUMA node number + 1.
This is not existent NUMA node number.

However, this behaviour does not affect NUMA emulation fortunately, because
acpi_fake_nodes() that is caller of find_node_by_addr() gets pxm
(proximity domain) by node_to_pxm() from non-existent NUMA node number
that was returned by find_node_by_addr().
node_to_pxm() returns PXM_INVAL that means illegal or non-existent
NUMA node number.

Signed-off-by: Minoru Usui <usui@mxm.nes.nec.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/srat_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 4aed38fa4a65..37308d6bb5d9 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -416,7 +416,7 @@ static int __init find_node_by_addr(unsigned long addr)
 			break;
 		}
 	}
-	return i;
+	return ret;
 }
 
 /*

From cd7d72bb27a8c7502a602bdc299f1bb0a9357975 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:35 +0100
Subject: [PATCH 709/890] x86: improve MTRR trimming messages

improve the MTTR trimming messages and also trigger a WARN_ON()
so that kerneloops.org can pick it up and categorize it.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mtrr/main.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index ac4b6338f3f4..715919582657 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -706,20 +706,17 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 
 	/* kvm/qemu doesn't have mtrr set right, don't trim them all */
 	if (!highest_addr) {
-		printk(KERN_WARNING "***************\n");
-		printk(KERN_WARNING "**** WARNING: likely strange cpu\n");
-		printk(KERN_WARNING "**** MTRRs all blank, cpu in qemu?\n");
-		printk(KERN_WARNING "***************\n");
+		printk(KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n");
+		WARN_ON(1);
 		return 0;
 	}
 
 	if ((highest_addr >> PAGE_SHIFT) < end_pfn) {
-		printk(KERN_WARNING "***************\n");
-		printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
-		printk(KERN_WARNING "**** MTRRs don't cover all of "
-		       "memory, trimmed %ld pages\n", end_pfn -
-		       (highest_addr >> PAGE_SHIFT));
-		printk(KERN_WARNING "***************\n");
+		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
+			" all of memory, losing %LdMB of RAM.\n",
+			(((u64)end_pfn << PAGE_SHIFT) - highest_addr) >> 20);
+
+		WARN_ON(1);
 
 		printk(KERN_INFO "update e820 for mtrr\n");
 		trim_start = highest_addr;

From 519efbc0b3b6004a3b98d66a446bce30852c8171 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann3 <andreas.herrmann3@amd.com>
Date: Wed, 30 Jan 2008 13:33:35 +0100
Subject: [PATCH 710/890] x86: fix cpu MHz reporting for AMD family 0x11

Fix cpu MHz reporting for AMD family 0x11 when powernow-k8 is
disabled.

Just adhere to the CONSTANT_TSC feature bit for AMD CPUs when deciding
whether cpu_khz needs calibration. The additional check for CPU family
is not needed and prevents calibration for future CPUs.

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/time_64.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index 61b17f5ec867..0380795121a6 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -120,8 +120,7 @@ void __init time_init(void)
 
 	cpu_khz = tsc_khz;
 	if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
-		boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-		boot_cpu_data.x86 == 16)
+		(boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
 		cpu_khz = calculate_cpu_khz();
 
 	if (unsynchronized_tsc())

From ade761496dcd0aea1c41da21d5a6ced4897f02e7 Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@queued.net>
Date: Wed, 30 Jan 2008 13:33:35 +0100
Subject: [PATCH 711/890] x86: GEODE: update GPIO API to support setting
 multiple GPIOs at once

The existing Geode GPIO API only allows for updating one GPIO at once.  There
are instances where users want to update multiple GPIOs at once.  With the
current API, they are given two choices; either ignore the GPIO API:

      outl(0xc000, gpio_base + GPIO_OUTPUT_VAL);
      outl(0xc000, gpio_base + GPIO_OUTPUT_ENABLE);

Alternatively, call each GPIO update separately:

      geode_gpio_set(14, GPIO_OUTPUT_VAL);
      geode_gpio_set(15, GPIO_OUTPUT_VAL);
      geode_gpio_set(14, GPIO_OUTPUT_ENABLE);
      geode_gpio_set(15, GPIO_OUTPUT_ENABLE);

Neither are desirable.  This patch changes the GPIO API to allow for setting
of multiple GPIOs at once; rather than being passed an integer, we pass
a bitmask and provide a translation function.  The above code would now
look like this:

      geode_gpio_set(geode_gpio(14)|geode_gpio(15), GPIO_OUTPUT_VAL);
      geode_gpio_set(geode_gpio(14)|geode_gpio(15), GPIO_OUTPUT_ENABLE);

Since there are no upstream users of the GPIO API yet (afaik), best to
change this now.  This also adds a bit of sanity checking; it is no
longer possible to use a GPIO above 28.

Note the semantics of geode_gpio_isset() have changed:
geode_gpio_isset(geode_gpio(3)|geode_gpio(4), ...)
will only return true iff both GPIOs are set.

Signed-off-by: Andres Salomon <dilinger@debian.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/geode_32.c | 48 ++++++++++++++++++++++++++------------
 include/asm-x86/geode.h    | 12 +++++++---
 2 files changed, 42 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c
index f12d8c5d9809..9c7f7d395968 100644
--- a/arch/x86/kernel/geode_32.c
+++ b/arch/x86/kernel/geode_32.c
@@ -1,6 +1,7 @@
 /*
  * AMD Geode southbridge support code
  * Copyright (C) 2006, Advanced Micro Devices, Inc.
+ * Copyright (C) 2007, Andres Salomon <dilinger@debian.org>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public License
@@ -51,45 +52,62 @@ EXPORT_SYMBOL_GPL(geode_get_dev_base);
 
 /* === GPIO API === */
 
-void geode_gpio_set(unsigned int gpio, unsigned int reg)
+void geode_gpio_set(u32 gpio, unsigned int reg)
 {
 	u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
 
 	if (!base)
 		return;
 
-	if (gpio < 16)
-		outl(1 << gpio, base + reg);
-	else
-		outl(1 << (gpio - 16), base + 0x80 + reg);
+	/* low bank register */
+	if (gpio & 0xFFFF)
+		outl(gpio & 0xFFFF, base + reg);
+	/* high bank register */
+	gpio >>= 16;
+	if (gpio)
+		outl(gpio, base + 0x80 + reg);
 }
 EXPORT_SYMBOL_GPL(geode_gpio_set);
 
-void geode_gpio_clear(unsigned int gpio, unsigned int reg)
+void geode_gpio_clear(u32 gpio, unsigned int reg)
 {
 	u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
 
 	if (!base)
 		return;
 
-	if (gpio < 16)
-		outl(1 << (gpio + 16), base + reg);
-	else
-		outl(1 << gpio, base + 0x80 + reg);
+	/* low bank register */
+	if (gpio & 0xFFFF)
+		outl((gpio & 0xFFFF) << 16, base + reg);
+	/* high bank register */
+	gpio &= (0xFFFF << 16);
+	if (gpio)
+		outl(gpio, base + 0x80 + reg);
 }
 EXPORT_SYMBOL_GPL(geode_gpio_clear);
 
-int geode_gpio_isset(unsigned int gpio, unsigned int reg)
+int geode_gpio_isset(u32 gpio, unsigned int reg)
 {
 	u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
+	u32 val;
 
 	if (!base)
 		return 0;
 
-	if (gpio < 16)
-		return (inl(base + reg) & (1 << gpio)) ? 1 : 0;
-	else
-		return (inl(base + 0x80 + reg) & (1 << (gpio - 16))) ? 1 : 0;
+	/* low bank register */
+	if (gpio & 0xFFFF) {
+		val = inl(base + reg) & (gpio & 0xFFFF);
+		if ((gpio & 0xFFFF) == val)
+			return 1;
+	}
+	/* high bank register */
+	gpio >>= 16;
+	if (gpio) {
+		val = inl(base + 0x80 + reg) & gpio;
+		if (gpio == val)
+			return 1;
+	}
+	return 0;
 }
 EXPORT_SYMBOL_GPL(geode_gpio_isset);
 
diff --git a/include/asm-x86/geode.h b/include/asm-x86/geode.h
index 771af336734f..811fe14f70b2 100644
--- a/include/asm-x86/geode.h
+++ b/include/asm-x86/geode.h
@@ -121,9 +121,15 @@ extern int geode_get_dev_base(unsigned int dev);
 #define GPIO_MAP_Z		0xE8
 #define GPIO_MAP_W		0xEC
 
-extern void geode_gpio_set(unsigned int, unsigned int);
-extern void geode_gpio_clear(unsigned int, unsigned int);
-extern int geode_gpio_isset(unsigned int, unsigned int);
+static inline u32 geode_gpio(unsigned int nr)
+{
+	BUG_ON(nr > 28);
+	return 1 << nr;
+}
+
+extern void geode_gpio_set(u32, unsigned int);
+extern void geode_gpio_clear(u32, unsigned int);
+extern int geode_gpio_isset(u32, unsigned int);
 extern void geode_gpio_setup_event(unsigned int, int, int);
 extern void geode_gpio_set_irq(unsigned int, unsigned int);
 

From a608295935d237bdbe95eefdba1e3fa40676df31 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian.fainelli@telecomint.eu>
Date: Wed, 30 Jan 2008 13:33:35 +0100
Subject: [PATCH 712/890] x86: add generic GPIO support to x86

This patch adds the generic GPIO support to the x86
architecture. We do the same as for MIPS, we let
the machine override the gpio callbacks and provide
defaults one in mach-generic.

Signed-off-by: Florian Fainelli <florian.fainelli@telecomint.eu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig                    |  3 +++
 include/asm-x86/gpio.h              |  6 ++++++
 include/asm-x86/mach-generic/gpio.h | 15 +++++++++++++++
 3 files changed, 24 insertions(+)
 create mode 100644 include/asm-x86/gpio.h
 create mode 100644 include/asm-x86/mach-generic/gpio.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8e1b33c5405f..32e267186035 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -72,6 +72,9 @@ config GENERIC_BUG
 config GENERIC_HWEIGHT
 	def_bool y
 
+config GENERIC_GPIO
+	def_bool n
+
 config ARCH_MAY_HAVE_PC_FDC
 	def_bool y
 
diff --git a/include/asm-x86/gpio.h b/include/asm-x86/gpio.h
new file mode 100644
index 000000000000..ff87fca0caf9
--- /dev/null
+++ b/include/asm-x86/gpio.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_I386_GPIO_H
+#define _ASM_I386_GPIO_H
+
+#include <gpio.h>
+
+#endif /* _ASM_I386_GPIO_H */
diff --git a/include/asm-x86/mach-generic/gpio.h b/include/asm-x86/mach-generic/gpio.h
new file mode 100644
index 000000000000..5305dcb96df2
--- /dev/null
+++ b/include/asm-x86/mach-generic/gpio.h
@@ -0,0 +1,15 @@
+#ifndef __ASM_MACH_GENERIC_GPIO_H
+#define __ASM_MACH_GENERIC_GPIO_H
+
+int gpio_request(unsigned gpio, const char *label);
+void gpio_free(unsigned gpio);
+int gpio_direction_input(unsigned gpio);
+int gpio_direction_output(unsigned gpio, int value);
+int gpio_get_value(unsigned gpio);
+void gpio_set_value(unsigned gpio, int value);
+int gpio_to_irq(unsigned gpio);
+int irq_to_gpio(unsigned irq);
+
+#include <asm-generic/gpio.h>           /* cansleep wrappers */
+
+#endif /* __ASM_MACH_GENERIC_GPIO_H */

From 0acf8e3447b893ff921863c2a4258e210d584452 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian.fainelli@telecomint.eu>
Date: Wed, 30 Jan 2008 13:33:36 +0100
Subject: [PATCH 713/890] pci: add PCI identifiers for the RDC devices

This patch defines the PCI identifiers found in
the RDC R-321x System-on-Chip.

Signed-off-by: Florian Fainelli <florian.fainelli@telecomint.eu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/pci_ids.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index c69531348363..41f6f28690f6 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2085,6 +2085,13 @@
 #define PCI_VENDOR_ID_BELKIN		0x1799
 #define PCI_DEVICE_ID_BELKIN_F5D7010V7	0x701f
 
+#define PCI_VENDOR_ID_RDC		0x17f3
+#define PCI_DEVICE_ID_RDC_R6020		0x6020
+#define PCI_DEVICE_ID_RDC_R6030		0x6030
+#define PCI_DEVICE_ID_RDC_R6040		0x6040
+#define PCI_DEVICE_ID_RDC_R6060		0x6060
+#define PCI_DEVICE_ID_RDC_R6061		0x6061
+
 #define PCI_VENDOR_ID_LENOVO		0x17aa
 
 #define PCI_VENDOR_ID_ARECA		0x17d3

From 5e3a77e9a9b7bfc1f69f51fe6d34aa649887980c Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian.fainelli@telecomint.eu>
Date: Wed, 30 Jan 2008 13:33:36 +0100
Subject: [PATCH 714/890] x86: add support for the RDC R-321x SoC

This patch adds support for the RDC R-321x system-on-chip,
also known as R-861x-(G). It uses the generic GPIO API and
has support for the on-chip hardware watchdog.

Build-fix from: Randy Dunlap <randy.dunlap@oracle.com>

Signed-off-by: Florian Fainelli <florian.fainelli@telecomint.eu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig                            |  14 +-
 arch/x86/Makefile                           |   5 +
 arch/x86/mach-rdc321x/Makefile              |   5 +
 arch/x86/mach-rdc321x/gpio.c                |  91 +++++++
 arch/x86/mach-rdc321x/platform.c            |  68 +++++
 arch/x86/mach-rdc321x/wdt.c                 | 275 ++++++++++++++++++++
 include/asm-x86/mach-rdc321x/gpio.h         |  56 ++++
 include/asm-x86/mach-rdc321x/rdc321x_defs.h |   6 +
 include/asm-x86/timex.h                     |   2 +
 9 files changed, 521 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/mach-rdc321x/Makefile
 create mode 100644 arch/x86/mach-rdc321x/gpio.c
 create mode 100644 arch/x86/mach-rdc321x/platform.c
 create mode 100644 arch/x86/mach-rdc321x/wdt.c
 create mode 100644 include/asm-x86/mach-rdc321x/gpio.h
 create mode 100644 include/asm-x86/mach-rdc321x/rdc321x_defs.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 32e267186035..92dc919c7640 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -291,6 +291,18 @@ config X86_ES7000
 	  Only choose this option if you have such a system, otherwise you
 	  should say N here.
 
+config X86_RDC321X
+	bool "RDC R-321x SoC"
+	depends on X86_32
+	select M486
+	select X86_REBOOTFIXUPS
+	select GENERIC_GPIO
+	select LEDS_GPIO
+	help
+	  This option is needed for RDC R-321x system-on-chip, also known
+	  as R-8610-(G).
+	  If you don't have one of these chips, you should say N here.
+
 config X86_VSMP
 	bool "Support for ScaleMP vSMP"
 	depends on X86_64 && PCI
@@ -637,7 +649,7 @@ config X86_REBOOTFIXUPS
 	  system.
 
 	  Currently, the only fixup is for the Geode machines using
-	  CS5530A and CS5536 chipsets.
+	  CS5530A and CS5536 chipsets and the RDC R-321x SoC.
 
 	  Say Y if you want to enable the fixup. Currently, it's safe to
 	  enable this option even if you don't need it.
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index e2ffccfd6af6..b08f18261df6 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -139,6 +139,11 @@ mflags-$(CONFIG_X86_ES7000)	:= -Iinclude/asm-x86/mach-es7000
 fcore-$(CONFIG_X86_ES7000)	:= arch/x86/mach-es7000/
 mcore-$(CONFIG_X86_ES7000)	:= arch/x86/mach-default/
 
+# RDC R-321x subarch support
+mflags-$(CONFIG_X86_RDC321X)	:= -Iinclude/asm-x86/mach-rdc321x
+mcore-$(CONFIG_X86_RDC321X)	:= arch/x86/mach-default
+core-$(CONFIG_X86_RDC321X)	+= arch/x86/mach-rdc321x/
+
 # default subarch .h files
 mflags-y += -Iinclude/asm-x86/mach-default
 
diff --git a/arch/x86/mach-rdc321x/Makefile b/arch/x86/mach-rdc321x/Makefile
new file mode 100644
index 000000000000..1faac8125e3d
--- /dev/null
+++ b/arch/x86/mach-rdc321x/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the RDC321x specific parts of the kernel
+#
+obj-$(CONFIG_X86_RDC321X)        := gpio.o platform.o wdt.o
+
diff --git a/arch/x86/mach-rdc321x/gpio.c b/arch/x86/mach-rdc321x/gpio.c
new file mode 100644
index 000000000000..031269163bd6
--- /dev/null
+++ b/arch/x86/mach-rdc321x/gpio.c
@@ -0,0 +1,91 @@
+/*
+ *  Copyright (C) 2007, OpenWrt.org, Florian Fainelli <florian@openwrt.org>
+ *  	RDC321x architecture specific GPIO support
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ */
+
+#include <linux/autoconf.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+
+#include <asm/mach-rdc321x/rdc321x_defs.h>
+
+static inline int rdc_gpio_is_valid(unsigned gpio)
+{
+	return (gpio <= RDC_MAX_GPIO);
+}
+
+static unsigned int rdc_gpio_read(unsigned gpio)
+{
+	unsigned int val;
+
+	val = 0x80000000 | (7 << 11) | ((gpio&0x20?0x84:0x48));
+	outl(val, RDC3210_CFGREG_ADDR);
+	udelay(10);
+	val = inl(RDC3210_CFGREG_DATA);
+	val |= (0x1 << (gpio & 0x1F));
+	outl(val, RDC3210_CFGREG_DATA);
+	udelay(10);
+	val = 0x80000000 | (7 << 11) | ((gpio&0x20?0x88:0x4C));
+	outl(val, RDC3210_CFGREG_ADDR);
+	udelay(10);
+	val = inl(RDC3210_CFGREG_DATA);
+
+	return val;
+}
+
+static void rdc_gpio_write(unsigned int val)
+{
+	if (val) {
+		outl(val, RDC3210_CFGREG_DATA);
+		udelay(10);
+	}
+}
+
+int rdc_gpio_get_value(unsigned gpio)
+{
+	if (rdc_gpio_is_valid(gpio))
+		return (int)rdc_gpio_read(gpio);
+	else
+		return -EINVAL;
+}
+EXPORT_SYMBOL(rdc_gpio_get_value);
+
+void rdc_gpio_set_value(unsigned gpio, int value)
+{
+	unsigned int val;
+
+	if (!rdc_gpio_is_valid(gpio))
+		return;
+
+	val = rdc_gpio_read(gpio);
+
+	if (value)
+		val &= ~(0x1 << (gpio & 0x1F));
+	else
+		val |= (0x1 << (gpio & 0x1F));
+
+	rdc_gpio_write(val);
+}
+EXPORT_SYMBOL(rdc_gpio_set_value);
+
+int rdc_gpio_direction_input(unsigned gpio)
+{
+	return 0;
+}
+EXPORT_SYMBOL(rdc_gpio_direction_input);
+
+int rdc_gpio_direction_output(unsigned gpio, int value)
+{
+	return 0;
+}
+EXPORT_SYMBOL(rdc_gpio_direction_output);
+
+
diff --git a/arch/x86/mach-rdc321x/platform.c b/arch/x86/mach-rdc321x/platform.c
new file mode 100644
index 000000000000..dda6024a5862
--- /dev/null
+++ b/arch/x86/mach-rdc321x/platform.c
@@ -0,0 +1,68 @@
+/*
+ *  Generic RDC321x platform devices
+ *
+ *  Copyright (C) 2007 Florian Fainelli <florian@openwrt.org>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the
+ *  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ *  Boston, MA  02110-1301, USA.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/leds.h>
+
+#include <asm/gpio.h>
+
+/* LEDS */
+static struct gpio_led default_leds[] = {
+	{ .name = "rdc:dmz", .gpio = 1, },
+};
+
+static struct gpio_led_platform_data rdc321x_led_data = {
+	.num_leds = ARRAY_SIZE(default_leds),
+	.leds = default_leds,
+};
+
+static struct platform_device rdc321x_leds = {
+	.name = "leds-gpio",
+	.id = -1,
+	.dev = {
+		.platform_data = &rdc321x_led_data,
+	}
+};
+
+/* Watchdog */
+static struct platform_device rdc321x_wdt = {
+	.name = "rdc321x-wdt",
+	.id = -1,
+	.num_resources = 0,
+};
+
+static struct platform_device *rdc321x_devs[] = {
+	&rdc321x_leds,
+	&rdc321x_wdt
+};
+
+static int __init rdc_board_setup(void)
+{
+	return platform_add_devices(rdc321x_devs, ARRAY_SIZE(rdc321x_devs));
+}
+
+arch_initcall(rdc_board_setup);
diff --git a/arch/x86/mach-rdc321x/wdt.c b/arch/x86/mach-rdc321x/wdt.c
new file mode 100644
index 000000000000..ec5625ae7061
--- /dev/null
+++ b/arch/x86/mach-rdc321x/wdt.c
@@ -0,0 +1,275 @@
+/*
+ * RDC321x watchdog driver
+ *
+ * Copyright (C) 2007 Florian Fainelli <florian@openwrt.org>
+ *
+ * This driver is highly inspired from the cpu5_wdt driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/timer.h>
+#include <linux/completion.h>
+#include <linux/jiffies.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+
+#include <asm/mach-rdc321x/rdc321x_defs.h>
+
+#define RDC_WDT_MASK	0x80000000 /* Mask */
+#define RDC_WDT_EN	0x00800000 /* Enable bit */
+#define RDC_WDT_WTI	0x00200000 /* Generate CPU reset/NMI/WDT on timeout */
+#define RDC_WDT_RST	0x00100000 /* Reset bit */
+#define RDC_WDT_WIF	0x00040000 /* WDT IRQ Flag */
+#define RDC_WDT_IRT	0x00000100 /* IRQ Routing table */
+#define RDC_WDT_CNT	0x00000001 /* WDT count */
+
+#define RDC_CLS_TMR	0x80003844 /* Clear timer */
+
+#define RDC_WDT_INTERVAL	(HZ/10+1)
+
+int nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, int, 0);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+static int ticks = 1000;
+
+/* some device data */
+
+static struct {
+	struct completion stop;
+	volatile int running;
+	struct timer_list timer;
+	volatile int queue;
+	int default_ticks;
+	unsigned long inuse;
+} rdc321x_wdt_device;
+
+/* generic helper functions */
+
+static void rdc321x_wdt_trigger(unsigned long unused)
+{
+	if (rdc321x_wdt_device.running)
+		ticks--;
+
+	/* keep watchdog alive */
+	outl(RDC_WDT_EN|inl(RDC3210_CFGREG_DATA), RDC3210_CFGREG_DATA);
+
+	/* requeue?? */
+	if (rdc321x_wdt_device.queue && ticks)
+		mod_timer(&rdc321x_wdt_device.timer,
+				jiffies + RDC_WDT_INTERVAL);
+	else {
+		/* ticks doesn't matter anyway */
+		complete(&rdc321x_wdt_device.stop);
+	}
+
+}
+
+static void rdc321x_wdt_reset(void)
+{
+	ticks = rdc321x_wdt_device.default_ticks;
+}
+
+static void rdc321x_wdt_start(void)
+{
+	if (!rdc321x_wdt_device.queue) {
+		rdc321x_wdt_device.queue = 1;
+
+		/* Clear the timer */
+		outl(RDC_CLS_TMR, RDC3210_CFGREG_ADDR);
+
+		/* Enable watchdog and set the timeout to 81.92 us */
+		outl(RDC_WDT_EN|RDC_WDT_CNT, RDC3210_CFGREG_DATA);
+
+		mod_timer(&rdc321x_wdt_device.timer,
+				jiffies + RDC_WDT_INTERVAL);
+	}
+
+	/* if process dies, counter is not decremented */
+	rdc321x_wdt_device.running++;
+}
+
+static int rdc321x_wdt_stop(void)
+{
+	if (rdc321x_wdt_device.running)
+		rdc321x_wdt_device.running = 0;
+
+	ticks = rdc321x_wdt_device.default_ticks;
+
+	return -EIO;
+}
+
+/* filesystem operations */
+
+static int rdc321x_wdt_open(struct inode *inode, struct file *file)
+{
+	if (test_and_set_bit(0, &rdc321x_wdt_device.inuse))
+		return -EBUSY;
+
+	return nonseekable_open(inode, file);
+}
+
+static int rdc321x_wdt_release(struct inode *inode, struct file *file)
+{
+	clear_bit(0, &rdc321x_wdt_device.inuse);
+	return 0;
+}
+
+static int rdc321x_wdt_ioctl(struct inode *inode, struct file *file,
+				unsigned int cmd, unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	unsigned int value;
+	static struct watchdog_info ident = {
+		.options = WDIOF_CARDRESET,
+		.identity = "RDC321x WDT",
+	};
+
+	switch (cmd) {
+	case WDIOC_KEEPALIVE:
+		rdc321x_wdt_reset();
+		break;
+	case WDIOC_GETSTATUS:
+		/* Read the value from the DATA register */
+		value = inl(RDC3210_CFGREG_DATA);
+		if (copy_to_user(argp, &value, sizeof(int)))
+			return -EFAULT;
+		break;
+	case WDIOC_GETSUPPORT:
+		if (copy_to_user(argp, &ident, sizeof(ident)))
+			return -EFAULT;
+		break;
+	case WDIOC_SETOPTIONS:
+		if (copy_from_user(&value, argp, sizeof(int)))
+			return -EFAULT;
+		switch (value) {
+		case WDIOS_ENABLECARD:
+			rdc321x_wdt_start();
+			break;
+		case WDIOS_DISABLECARD:
+			return rdc321x_wdt_stop();
+		default:
+			return -EINVAL;
+		}
+		break;
+	default:
+		return -ENOTTY;
+	}
+	return 0;
+}
+
+static ssize_t rdc321x_wdt_write(struct file *file, const char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	if (!count)
+		return -EIO;
+
+	rdc321x_wdt_reset();
+
+	return count;
+}
+
+static const struct file_operations rdc321x_wdt_fops = {
+	.owner		= THIS_MODULE,
+	.llseek		= no_llseek,
+	.ioctl		= rdc321x_wdt_ioctl,
+	.open		= rdc321x_wdt_open,
+	.write		= rdc321x_wdt_write,
+	.release	= rdc321x_wdt_release,
+};
+
+static struct miscdevice rdc321x_wdt_misc = {
+	.minor	= WATCHDOG_MINOR,
+	.name	= "watchdog",
+	.fops	= &rdc321x_wdt_fops,
+};
+
+static int __devinit rdc321x_wdt_probe(struct platform_device *pdev)
+{
+	int err;
+
+	err = misc_register(&rdc321x_wdt_misc);
+	if (err < 0) {
+		printk(KERN_ERR PFX "watchdog misc_register failed\n");
+		return err;
+	}
+
+	/* Reset the watchdog */
+	outl(RDC_WDT_RST, RDC3210_CFGREG_DATA);
+
+	init_completion(&rdc321x_wdt_device.stop);
+	rdc321x_wdt_device.queue = 0;
+
+	clear_bit(0, &rdc321x_wdt_device.inuse);
+
+	setup_timer(&rdc321x_wdt_device.timer, rdc321x_wdt_trigger, 0);
+
+	rdc321x_wdt_device.default_ticks = ticks;
+
+	printk(KERN_INFO PFX "watchdog init success\n");
+
+	return 0;
+}
+
+static int rdc321x_wdt_remove(struct platform_device *pdev)
+{
+	if (rdc321x_wdt_device.queue) {
+		rdc321x_wdt_device.queue = 0;
+		wait_for_completion(&rdc321x_wdt_device.stop);
+	}
+
+	misc_deregister(&rdc321x_wdt_misc);
+
+	return 0;
+}
+
+static struct platform_driver rdc321x_wdt_driver = {
+	.probe = rdc321x_wdt_probe,
+	.remove = rdc321x_wdt_remove,
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "rdc321x-wdt",
+	},
+};
+
+static int __init rdc321x_wdt_init(void)
+{
+	return platform_driver_register(&rdc321x_wdt_driver);
+}
+
+static void __exit rdc321x_wdt_exit(void)
+{
+	platform_driver_unregister(&rdc321x_wdt_driver);
+}
+
+module_init(rdc321x_wdt_init);
+module_exit(rdc321x_wdt_exit);
+
+MODULE_AUTHOR("Florian Fainelli <florian@openwrt.org>");
+MODULE_DESCRIPTION("RDC321x watchdog driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
diff --git a/include/asm-x86/mach-rdc321x/gpio.h b/include/asm-x86/mach-rdc321x/gpio.h
new file mode 100644
index 000000000000..db31b929b990
--- /dev/null
+++ b/include/asm-x86/mach-rdc321x/gpio.h
@@ -0,0 +1,56 @@
+#ifndef _RDC321X_GPIO_H
+#define _RDC321X_GPIO_H
+
+extern int rdc_gpio_get_value(unsigned gpio);
+extern void rdc_gpio_set_value(unsigned gpio, int value);
+extern int rdc_gpio_direction_input(unsigned gpio);
+extern int rdc_gpio_direction_output(unsigned gpio, int value);
+
+
+/* Wrappers for the arch-neutral GPIO API */
+
+static inline int gpio_request(unsigned gpio, const char *label)
+{
+	/* Not yet implemented */
+	return 0;
+}
+
+static inline void gpio_free(unsigned gpio)
+{
+	/* Not yet implemented */
+}
+
+static inline int gpio_direction_input(unsigned gpio)
+{
+	return rdc_gpio_direction_input(gpio);
+}
+
+static inline int gpio_direction_output(unsigned gpio, int value)
+{
+	return rdc_gpio_direction_output(gpio, value);
+}
+
+static inline int gpio_get_value(unsigned gpio)
+{
+	return rdc_gpio_get_value(gpio);
+}
+
+static inline void gpio_set_value(unsigned gpio, int value)
+{
+	rdc_gpio_set_value(gpio, value);
+}
+
+static inline int gpio_to_irq(unsigned gpio)
+{
+	return gpio;
+}
+
+static inline int irq_to_gpio(unsigned irq)
+{
+	return irq;
+}
+
+/* For cansleep */
+#include <asm-generic/gpio.h>
+
+#endif /* _RDC321X_GPIO_H_ */
diff --git a/include/asm-x86/mach-rdc321x/rdc321x_defs.h b/include/asm-x86/mach-rdc321x/rdc321x_defs.h
new file mode 100644
index 000000000000..838ba8f64fd3
--- /dev/null
+++ b/include/asm-x86/mach-rdc321x/rdc321x_defs.h
@@ -0,0 +1,6 @@
+#define PFX	"rdc321x: "
+
+/* General purpose configuration and data registers */
+#define RDC3210_CFGREG_ADDR     0x0CF8
+#define RDC3210_CFGREG_DATA     0x0CFC
+#define RDC_MAX_GPIO		0x3A
diff --git a/include/asm-x86/timex.h b/include/asm-x86/timex.h
index 39a21ab030f0..27cfd6c599ba 100644
--- a/include/asm-x86/timex.h
+++ b/include/asm-x86/timex.h
@@ -7,6 +7,8 @@
 
 #ifdef CONFIG_X86_ELAN
 #  define PIT_TICK_RATE 1189200 /* AMD Elan has different frequency! */
+#elif defined(CONFIG_X86_RDC321X)
+#  define PIT_TICK_RATE 1041667 /* Underlying HZ for R8610 */
 #else
 #  define PIT_TICK_RATE 1193182 /* Underlying HZ */
 #endif

From ff472a114ca11f5354912745fbb94bb66ec10916 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian.fainelli@telecomint.eu>
Date: Wed, 30 Jan 2008 13:33:36 +0100
Subject: [PATCH 715/890] x86: add the RDC machine specific reboot fixup

The RDC R-321x SoC needs a reboot fixup which
uses its internal hardware watchdog set to
reset the CPU on next tick.

Signed-off-by: Florian Fainelli <florian.fainelli@telecomint.eu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/reboot_fixups_32.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c
index f452726c0fe2..dec0b5ec25c2 100644
--- a/arch/x86/kernel/reboot_fixups_32.c
+++ b/arch/x86/kernel/reboot_fixups_32.c
@@ -30,6 +30,19 @@ static void cs5536_warm_reset(struct pci_dev *dev)
 	udelay(50); /* shouldn't get here but be safe and spin a while */
 }
 
+static void rdc321x_reset(struct pci_dev *dev)
+{
+	unsigned i;
+	/* Voluntary reset the watchdog timer */
+	outl(0x80003840, 0xCF8);
+	/* Generate a CPU reset on next tick */
+	i = inl(0xCFC);
+	/* Use the minimum timer resolution */
+	i |= 0x1600;
+	outl(i, 0xCFC);
+	outb(1, 0x92);
+}
+
 struct device_fixup {
 	unsigned int vendor;
 	unsigned int device;
@@ -40,6 +53,7 @@ static struct device_fixup fixups_table[] = {
 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset },
 { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset },
 { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset },
+{ PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset },
 };
 
 /*

From 1e35669d017eac5bea4739f927b28e64731504e1 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 30 Jan 2008 13:33:36 +0100
Subject: [PATCH 716/890] x86: fix section mismatch warning in mcheck/mce_64.c

Fix following warning:
WARNING: arch/x86/kernel/cpu/mcheck/built-in.o(.text+0x752): Section mismatch: reference to .cpuinit.text:mce_create_device in 'mce_cpu_callback'

mce_cpu_callback() is only used by mce_cpu_notofier.
The notifier is only used for hotplugable cpu's as it is
registered using register_hotcpu_notifier(),

Annotate them both __cpuinit to fix the warning.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mcheck/mce_64.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 98b23d55fe6e..9a699ed03598 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -854,8 +854,8 @@ static void mce_remove_device(unsigned int cpu)
 }
 
 /* Get notified when a cpu comes on/off. Be hotplug friendly. */
-static int
-mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
+static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
+				      unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
 
@@ -872,7 +872,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 	return NOTIFY_OK;
 }
 
-static struct notifier_block mce_cpu_notifier = {
+static struct notifier_block mce_cpu_notifier __cpuinitdata = {
 	.notifier_call = mce_cpu_callback,
 };
 

From b48ed48a7816aafedb260cafe872a55757b9489e Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 30 Jan 2008 13:33:36 +0100
Subject: [PATCH 717/890] x86: fix section mismatch warning in
 mcheck/mce_amd_64.c

Fix following warning:
WARNING: arch/x86/kernel/cpu/mcheck/built-in.o(.text+0x1584): Section mismatch: reference to .cpuinit.text:threshold_create_device in 'threshold_cpu_callback'

threshold_cpu_callback() is only used by threshold_cpu_notifier.
threshold_cpu_notifier is only used for cpu hot plug as it is registered
using register_hotcpu_notifier().

Mark them both __cpuinit to fix the warning.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 550502596ca3..77e666628818 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -645,7 +645,7 @@ static void threshold_remove_device(unsigned int cpu)
 }
 
 /* get notified when a cpu comes on/off */
-static int threshold_cpu_callback(struct notifier_block *nfb,
+static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb,
 					    unsigned long action, void *hcpu)
 {
 	/* cpu was unsigned int to begin with */
@@ -670,7 +670,7 @@ static int threshold_cpu_callback(struct notifier_block *nfb,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block threshold_cpu_notifier = {
+static struct notifier_block threshold_cpu_notifier __cpuinitdata = {
 	.notifier_call = threshold_cpu_callback,
 };
 

From 4c02ad1efdd1293d6fdd453a2f27ad993458dcd1 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 30 Jan 2008 13:33:37 +0100
Subject: [PATCH 718/890] x86: fix section mismatch warning in process_*.c

Fix the following warning:
WARNING: arch/x86/kernel/built-in.o(.text+0x3): Section mismatch: reference to .cpuinit.data:force_mwait in 'mwait_usable'
[Seen on 64 bit only but similar pattern exist on 32 bit so fix it there too]

mwait_usable() were only used by a function annotated __cpuinit
so annotate mwait_usable() with __cpuinit to fix the warning.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process_32.c | 2 +-
 arch/x86/kernel/process_64.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index b72d7d132072..968371ab223a 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -285,7 +285,7 @@ static void mwait_idle(void)
 	mwait_idle_with_hints(0, 0);
 }
 
-static int mwait_usable(const struct cpuinfo_x86 *c)
+static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
 {
 	if (force_mwait)
 		return 1;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b4c470658a8a..137a86171c39 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -281,7 +281,7 @@ static void mwait_idle(void)
 }
 
 
-static int mwait_usable(const struct cpuinfo_x86 *c)
+static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
 {
 	if (force_mwait)
 		return 1;

From 87d7e98012e60d36cdb6c9c6a6cd1dec7c2b5a1c Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 30 Jan 2008 13:33:37 +0100
Subject: [PATCH 719/890] x86: fix section mismatch warning in acpi/boot.c

Fix following warning:
WARNING: arch/x86/kernel/built-in.o(.text+0x10ea0): Section mismatch: reference to .cpuinit.data:num_processors in 'acpi_unmap_lsapic'

The exported function acpi_unmap_lsapic() references
the variable num_processors that is annotated __cpuinitdata.

Remove the annotation of num_processors as we never know
when an exported function are called.
And drop the needless initialsation to 0.

Warning was seen on 64 bit but similar pattern were seen
in 32 bit - so fix it up there too.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/mpparse_32.c | 2 +-
 arch/x86/kernel/mpparse_64.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c
index cc06eae1b037..67009cdd5eca 100644
--- a/arch/x86/kernel/mpparse_32.c
+++ b/arch/x86/kernel/mpparse_32.c
@@ -68,7 +68,7 @@ unsigned int def_to_bigsmp = 0;
 /* Processor that is doing the boot up */
 unsigned int boot_cpu_physical_apicid = -1U;
 /* Internal processor count */
-unsigned int __cpuinitdata num_processors;
+unsigned int num_processors;
 
 /* Bitmask of physically existing CPUs */
 physid_mask_t phys_cpu_present_map;
diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c
index d3260f8f17dc..72ab1403fed7 100644
--- a/arch/x86/kernel/mpparse_64.c
+++ b/arch/x86/kernel/mpparse_64.c
@@ -60,7 +60,7 @@ unsigned int boot_cpu_id = -1U;
 EXPORT_SYMBOL(boot_cpu_id);
 
 /* Internal processor count */
-unsigned int num_processors __cpuinitdata = 0;
+unsigned int num_processors;
 
 unsigned disabled_cpus __cpuinitdata;
 

From 85b74d6c119a7dae882569e57925eaafecc07859 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 30 Jan 2008 13:33:37 +0100
Subject: [PATCH 720/890] x86: fix section mismatch warning in early-quirks.c

Fix following warnings:
WARNING: arch/x86/kernel/built-in.o(.text+0x139e1): Section mismatch: reference to .init.data:early_qrk in 'check_dev_quirk'
WARNING: arch/x86/kernel/built-in.o(.text+0x139f5): Section mismatch: reference to .init.data:early_qrk in 'check_dev_quirk'
WARNING: arch/x86/kernel/built-in.o(.text+0x13a0c): Section mismatch: reference to .init.data:early_qrk in 'check_dev_quirk'
WARNING: arch/x86/kernel/built-in.o(.text+0x13a12): Section mismatch: reference to .init.data:early_qrk in 'check_dev_quirk'
WARNING: arch/x86/kernel/built-in.o(.text+0x13a1a): Section mismatch: reference to .init.data:early_qrk in 'check_dev_quirk'
WARNING: arch/x86/kernel/built-in.o(.text+0x13a36): Section mismatch: reference to .init.data:early_qrk in 'check_dev_quirk'
WARNING: arch/x86/kernel/built-in.o(.text+0x13a42): Section mismatch: reference to .init.data:

Warning was caused by access to the __initdata annotated variable
from the non-annotated static function check_dev_quirk().
check_dev_quirk() were only used from a function annotated
__init so add __init annotation to check_dev_quirk() to fix it.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/early-quirks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 3f88e437e843..9f51e1ea9e82 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -133,7 +133,7 @@ static struct chipset early_qrk[] __initdata = {
 	{}
 };
 
-static void check_dev_quirk(int num, int slot, int func)
+static void __init check_dev_quirk(int num, int slot, int func)
 {
 	u16 class;
 	u16 vendor;

From 1e296f578a8ef70b314315a2bfc7b036f947803f Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 30 Jan 2008 13:33:37 +0100
Subject: [PATCH 721/890] x86: fix section mismatch warning in srat_64.c

Fix the following warnings:
WARNING: arch/x86/mm/built-in.o(.text+0x1abc): Section mismatch: reference to .init.data:nodes_parsed in 'unparse_node'
WARNING: arch/x86/mm/built-in.o(.text+0x1ac6): Section mismatch: reference to .cpuinit.data:apicid_to_node in 'unparse_node'
WARNING: arch/x86/mm/built-in.o(.text+0x1ad2): Section mismatch: reference to .cpuinit.data:apicid_to_node in 'unparse_node'

unparse_node are static and only used by acpi_scan_nodes which
is already annotated __init.
So we annotate unparse_node with __init.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/srat_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 37308d6bb5d9..65416f843e59 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -323,7 +323,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
 	return 1;
 }
 
-static void unparse_node(int node)
+static void __init unparse_node(int node)
 {
 	int i;
 	node_clear(node, nodes_parsed);

From 3eaf5efae72b33bc6132e9590e5d4bf11532f8c4 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 30 Jan 2008 13:33:37 +0100
Subject: [PATCH 722/890] x86: fix section mismatch warning in topology.c

Fix following warning:
WARNING: arch/x86/kernel/built-in.o(__ksymtab+0x2b0): Section mismatch: reference to .cpuinit.text:arch_register_cpu in '__ksymtab_arch_register_cpu'

Annotating exported symbols are wrong.
Previously the warning were hidden by avoiding the export
in the non HOTPLUG_CPU case but the improved checks in
modpost caught it anyway.
Fix it by removing the __cpuinit annotation and rearrange the
code a bit to save one ifdef/endif pair.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/topology.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index a0d1719bda79..78cbb655aa79 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -33,7 +33,8 @@
 
 static DEFINE_PER_CPU(struct x86_cpu, cpu_devices);
 
-int __cpuinit arch_register_cpu(int num)
+#ifdef CONFIG_HOTPLUG_CPU
+int arch_register_cpu(int num)
 {
 	/*
 	 * CPU0 cannot be offlined due to several
@@ -44,21 +45,23 @@ int __cpuinit arch_register_cpu(int num)
 	 * Also certain PCI quirks require not to enable hotplug control
 	 * for all CPU's.
 	 */
-#ifdef CONFIG_HOTPLUG_CPU
 	if (num)
 		per_cpu(cpu_devices, num).cpu.hotpluggable = 1;
-#endif
-
 	return register_cpu(&per_cpu(cpu_devices, num).cpu, num);
 }
+EXPORT_SYMBOL(arch_register_cpu);
 
-#ifdef CONFIG_HOTPLUG_CPU
 void arch_unregister_cpu(int num)
 {
 	return unregister_cpu(&per_cpu(cpu_devices, num).cpu);
 }
-EXPORT_SYMBOL(arch_register_cpu);
 EXPORT_SYMBOL(arch_unregister_cpu);
+#else
+int arch_register_cpu(int num)
+{
+	return register_cpu(&per_cpu(cpu_devices, num).cpu, num);
+}
+EXPORT_SYMBOL(arch_register_cpu);
 #endif /*CONFIG_HOTPLUG_CPU*/
 
 static int __init topology_init(void)

From adb8daed46356d1772e8bd8def1b70c8cb58ce12 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 30 Jan 2008 13:33:37 +0100
Subject: [PATCH 723/890] x86: fix section mismatch warning in setup_64.c

Fix the following warning:
WARNING: arch/x86/kernel/built-in.o(.cpuinit.text+0x7a3): Section mismatch: reference to .init.text:amd_detect_cmp in 'init_amd'

The function amd_detect_cmp were annotated __init and
was only used from init_amd() which are annotated __cpuinit.

Annotate amd_detect_cmp() with _cpuinit to fix it.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 9ee2bc30d530..c9b7f44ea497 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -530,7 +530,7 @@ static int nearby_node(int apicid)
  * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
  * Assumes number of cores is a power of two.
  */
-static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
+static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
 	unsigned bits;

From c6c2d7a084d14a8a701be84872aa1b77d2945f46 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 30 Jan 2008 13:33:37 +0100
Subject: [PATCH 724/890] x86: fix usage of .section .sched.text in assembler
 code

Without this patch the linker will generate a section
named .sched.text.1 which is unexpected.
This is because the gcc generated section has "ax" but the
assembler usage of .sched.text lacks the "ax" specifier.

It would be better to have a definition we could use from
assembler code but I did not find a suitable header
file for it.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/lib/semaphore_32.S | 2 +-
 arch/x86/lib/thunk_64.S     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S
index e2c6e0d5e125..3899bd37fdf0 100644
--- a/arch/x86/lib/semaphore_32.S
+++ b/arch/x86/lib/semaphore_32.S
@@ -29,7 +29,7 @@
  * registers (%eax, %edx and %ecx) except %eax whish is either a return
  * value or just clobbered..
  */
-	.section .sched.text
+	.section .sched.text, "ax"
 ENTRY(__down_failed)
 	CFI_STARTPROC
 	FRAME
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index 6ea73f3de567..8b92d428ab02 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -33,7 +33,7 @@
 	.endm
 	
 
-	.section .sched.text
+	.section .sched.text, "ax"
 #ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
 	thunk rwsem_down_read_failed_thunk,rwsem_down_read_failed
 	thunk rwsem_down_write_failed_thunk,rwsem_down_write_failed

From afe0bc63ae5fdc642a6181e1b514434f901e39ba Mon Sep 17 00:00:00 2001
From: Ian Campbell <ijc@hellion.org.uk>
Date: Wed, 30 Jan 2008 13:33:37 +0100
Subject: [PATCH 725/890] x86: unify whitespace and comments in
 arch/x86/boot/compressed/vmlinux_??.lds

size reports no change in arch/x86/boot/compressed/vmlinux.

Signed-off-by: Ian Campbell <ijc@hellion.org.uk>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/compressed/vmlinux_32.lds | 6 +++---
 arch/x86/boot/compressed/vmlinux_64.lds | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/boot/compressed/vmlinux_32.lds b/arch/x86/boot/compressed/vmlinux_32.lds
index cc4854f6c6c1..d98ab69a1276 100644
--- a/arch/x86/boot/compressed/vmlinux_32.lds
+++ b/arch/x86/boot/compressed/vmlinux_32.lds
@@ -3,10 +3,10 @@ OUTPUT_ARCH(i386)
 ENTRY(startup_32)
 SECTIONS
 {
-        /* Be careful parts of head.S assume startup_32 is at
-         * address 0.
+	/* Be careful parts of head_32.S assume startup_32 is at
+	 * address 0.
 	 */
-	. =  0 	;
+	. = 0;
 	.text.head : {
 		_head = . ;
 		*(.text.head)
diff --git a/arch/x86/boot/compressed/vmlinux_64.lds b/arch/x86/boot/compressed/vmlinux_64.lds
index 94c13e557fb4..8d63b3a22ce4 100644
--- a/arch/x86/boot/compressed/vmlinux_64.lds
+++ b/arch/x86/boot/compressed/vmlinux_64.lds
@@ -3,8 +3,8 @@ OUTPUT_ARCH(i386:x86-64)
 ENTRY(startup_64)
 SECTIONS
 {
-	/* Be careful parts of head.S assume startup_32 is at
- 	 * address 0.
+	/* Be careful parts of head_64.S assume startup_64 is at
+	 * address 0.
 	 */
 	. = 0;
 	.text :	{

From 004426eb295b5fff53ed606d5f7495f23674f2c8 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ijc@hellion.org.uk>
Date: Wed, 30 Jan 2008 13:33:37 +0100
Subject: [PATCH 726/890] x86: unify whitespace in
 arch/x86/boot/compressed/vmlinux_??.scr

size reports no change in arch/x86/boot/compressed/vmlinux.

Signed-off-by: Ian Campbell <ijc@hellion.org.uk>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/compressed/vmlinux_32.scr | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/boot/compressed/vmlinux_32.scr b/arch/x86/boot/compressed/vmlinux_32.scr
index 707a88f7f29e..a54952661995 100644
--- a/arch/x86/boot/compressed/vmlinux_32.scr
+++ b/arch/x86/boot/compressed/vmlinux_32.scr
@@ -2,9 +2,9 @@ SECTIONS
 {
   .data.compressed : {
 	input_len = .;
-	LONG(input_data_end - input_data) input_data = .; 
-	*(.data) 
+	LONG(input_data_end - input_data) input_data = .;
+	*(.data)
 	output_len = . - 4;
-	input_data_end = .; 
+	input_data_end = .;
 	}
 }

From 20514e9f6e6221884afada04fbbbde651d3d8654 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ijc@hellion.org.uk>
Date: Wed, 30 Jan 2008 13:33:38 +0100
Subject: [PATCH 727/890] x86: switch to .rodata.compressed in
 arch/x86/boot/compressed/vmlinux_??.scr

vmlinux_64 and vmlinux_32.scr are now identical

size shows an expected movement from .text to .rodata and 4 extra bytes
of padding.

Signed-off-by: Ian Campbell <ijc@hellion.org.uk>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/compressed/vmlinux_32.lds | 4 ++--
 arch/x86/boot/compressed/vmlinux_32.scr | 2 +-
 arch/x86/boot/compressed/vmlinux_64.lds | 8 ++++++--
 arch/x86/boot/compressed/vmlinux_64.scr | 2 +-
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/x86/boot/compressed/vmlinux_32.lds b/arch/x86/boot/compressed/vmlinux_32.lds
index d98ab69a1276..bb3c48379c40 100644
--- a/arch/x86/boot/compressed/vmlinux_32.lds
+++ b/arch/x86/boot/compressed/vmlinux_32.lds
@@ -12,8 +12,8 @@ SECTIONS
 		*(.text.head)
 		_ehead = . ;
 	}
-	.data.compressed : {
-		*(.data.compressed)
+	.rodata.compressed : {
+		*(.rodata.compressed)
 	}
 	.text :	{
 		_text = .; 	/* Text */
diff --git a/arch/x86/boot/compressed/vmlinux_32.scr b/arch/x86/boot/compressed/vmlinux_32.scr
index a54952661995..f02382ae5c48 100644
--- a/arch/x86/boot/compressed/vmlinux_32.scr
+++ b/arch/x86/boot/compressed/vmlinux_32.scr
@@ -1,6 +1,6 @@
 SECTIONS
 {
-  .data.compressed : {
+  .rodata.compressed : {
 	input_len = .;
 	LONG(input_data_end - input_data) input_data = .;
 	*(.data)
diff --git a/arch/x86/boot/compressed/vmlinux_64.lds b/arch/x86/boot/compressed/vmlinux_64.lds
index 8d63b3a22ce4..f6e5b445f457 100644
--- a/arch/x86/boot/compressed/vmlinux_64.lds
+++ b/arch/x86/boot/compressed/vmlinux_64.lds
@@ -7,11 +7,15 @@ SECTIONS
 	 * address 0.
 	 */
 	. = 0;
-	.text :	{
+	.text.head : {
 		_head = . ;
 		*(.text.head)
 		_ehead = . ;
-		*(.text.compressed)
+	}
+	.rodata.compressed : {
+		*(.rodata.compressed)
+	}
+	.text :	{
 		_text = .; 	/* Text */
 		*(.text)
 		*(.text.*)
diff --git a/arch/x86/boot/compressed/vmlinux_64.scr b/arch/x86/boot/compressed/vmlinux_64.scr
index bd1429ce193e..f02382ae5c48 100644
--- a/arch/x86/boot/compressed/vmlinux_64.scr
+++ b/arch/x86/boot/compressed/vmlinux_64.scr
@@ -1,6 +1,6 @@
 SECTIONS
 {
-  .text.compressed : {
+  .rodata.compressed : {
 	input_len = .;
 	LONG(input_data_end - input_data) input_data = .;
 	*(.data)

From f18c0e792057059eab9a8ce929af424ff8a4a90f Mon Sep 17 00:00:00 2001
From: Ian Campbell <ijc@hellion.org.uk>
Date: Wed, 30 Jan 2008 13:33:38 +0100
Subject: [PATCH 728/890] x86: unify arch/x86/boot/compressed/vmlinux_??.scr

The files are now identical so merge them.

size reports no change in arch/x86/boot/compressed/vmlinux.

Signed-off-by: Ian Campbell <ijc@hellion.org.uk>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/compressed/Makefile                      |  2 +-
 .../boot/compressed/{vmlinux_32.scr => vmlinux.scr}    |  0
 arch/x86/boot/compressed/vmlinux_64.scr                | 10 ----------
 3 files changed, 1 insertion(+), 11 deletions(-)
 rename arch/x86/boot/compressed/{vmlinux_32.scr => vmlinux.scr} (100%)
 delete mode 100644 arch/x86/boot/compressed/vmlinux_64.scr

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index d6ae347d11f1..baffeafa798f 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -59,5 +59,5 @@ LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
 endif
 
 
-$(obj)/piggy.o: $(obj)/vmlinux_$(BITS).scr $(obj)/vmlinux.bin.gz FORCE
+$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
 	$(call if_changed,ld)
diff --git a/arch/x86/boot/compressed/vmlinux_32.scr b/arch/x86/boot/compressed/vmlinux.scr
similarity index 100%
rename from arch/x86/boot/compressed/vmlinux_32.scr
rename to arch/x86/boot/compressed/vmlinux.scr
diff --git a/arch/x86/boot/compressed/vmlinux_64.scr b/arch/x86/boot/compressed/vmlinux_64.scr
deleted file mode 100644
index f02382ae5c48..000000000000
--- a/arch/x86/boot/compressed/vmlinux_64.scr
+++ /dev/null
@@ -1,10 +0,0 @@
-SECTIONS
-{
-  .rodata.compressed : {
-	input_len = .;
-	LONG(input_data_end - input_data) input_data = .;
-	*(.data)
-	output_len = . - 4;
-	input_data_end = .;
-	}
-}

From 6b3c0426a765dcae277ce9eb06d865a2f109cf48 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ijc@hellion.org.uk>
Date: Wed, 30 Jan 2008 13:33:38 +0100
Subject: [PATCH 729/890] x86: unify strings in
 arch/x86/boot/compressed/misc_??.c

There seems to be a preference for the 64 bit version so use that on 32 bit and
drop the stray leading "."

Signed-off-by: Ian Campbell <ijc@hellion.org.uk>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/compressed/misc_32.c | 4 ++--
 arch/x86/boot/compressed/misc_64.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c
index 5a0281c81edf..a1979e322ef0 100644
--- a/arch/x86/boot/compressed/misc_32.c
+++ b/arch/x86/boot/compressed/misc_32.c
@@ -377,8 +377,8 @@ asmlinkage void decompress_kernel(void *rmode, unsigned long end,
 #endif
 
 	makecrc();
-	putstr("Uncompressing Linux... ");
+	putstr("\nDecompressing Linux... ");
 	gunzip();
-	putstr("Ok, booting the kernel.\n");
+	putstr("done.\nBooting the kernel.\n");
 	return;
 }
diff --git a/arch/x86/boot/compressed/misc_64.c b/arch/x86/boot/compressed/misc_64.c
index 8c1573b5d6c7..355e31bbaab3 100644
--- a/arch/x86/boot/compressed/misc_64.c
+++ b/arch/x86/boot/compressed/misc_64.c
@@ -372,7 +372,7 @@ asmlinkage void decompress_kernel(void *rmode, unsigned long heap,
 		error("Destination address too large");
 
 	makecrc();
-	putstr(".\nDecompressing Linux...");
+	putstr("\nDecompressing Linux... ");
 	gunzip();
 	putstr("done.\nBooting the kernel.\n");
 	return;

From 818a08f8a5681d54532af8263099bb5561afa0c1 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ijc@hellion.org.uk>
Date: Wed, 30 Jan 2008 13:33:38 +0100
Subject: [PATCH 730/890] x86: unify whitespace and comments in
 arch/x86/boot/compressed/misc_??.c

size reports no change in arch/x86/boot/compressed/vmlinux.

Signed-off-by: Ian Campbell <ijc@hellion.org.uk>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/compressed/misc_32.c | 20 +++++++++++++-------
 arch/x86/boot/compressed/misc_64.c | 13 +++++++------
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c
index a1979e322ef0..52fc214546e9 100644
--- a/arch/x86/boot/compressed/misc_32.c
+++ b/arch/x86/boot/compressed/misc_32.c
@@ -1,7 +1,7 @@
 /*
  * misc.c
- * 
- * This is a collection of several routines from gzip-1.0.3 
+ *
+ * This is a collection of several routines from gzip-1.0.3
  * adapted for Linux.
  *
  * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
@@ -9,6 +9,11 @@
  * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
  */
 
+/*
+ * we have to be careful, because no indirections are allowed here, and
+ * paravirt_ops is a kind of one. As it will only run in baremetal anyway,
+ * we just keep it from happening
+ */
 #undef CONFIG_PARAVIRT
 #include <linux/linkage.h>
 #include <linux/vmalloc.h>
@@ -261,7 +266,7 @@ static void putstr(const char *s)
 				y--;
 			}
 		} else {
-			vidmem [ ( x + cols * y ) * 2 ] = c;
+			vidmem [(x + cols * y) * 2] = c;
 			if ( ++x >= cols ) {
 				x = 0;
 				if ( ++y >= lines ) {
@@ -345,7 +350,8 @@ static void error(char *x)
 }
 
 asmlinkage void decompress_kernel(void *rmode, unsigned long end,
-			uch *input_data, unsigned long input_len, uch *output)
+				  uch *input_data, unsigned long input_len,
+				  uch *output)
 {
 	real_mode = rmode;
 
@@ -360,10 +366,10 @@ asmlinkage void decompress_kernel(void *rmode, unsigned long end,
 	lines = RM_SCREEN_INFO.orig_video_lines;
 	cols = RM_SCREEN_INFO.orig_video_cols;
 
-	window = output;  	/* Output buffer (Normally at 1M) */
-	free_mem_ptr     = end;	/* Heap  */
+	window = output;		/* Output buffer (Normally at 1M) */
+	free_mem_ptr     = end;		/* Heap */
 	free_mem_end_ptr = end + HEAP_SIZE;
-	inbuf  = input_data;	/* Input buffer */
+	inbuf  = input_data;		/* Input buffer */
 	insize = input_len;
 	inptr  = 0;
 
diff --git a/arch/x86/boot/compressed/misc_64.c b/arch/x86/boot/compressed/misc_64.c
index 355e31bbaab3..5029fb3c5740 100644
--- a/arch/x86/boot/compressed/misc_64.c
+++ b/arch/x86/boot/compressed/misc_64.c
@@ -1,7 +1,7 @@
 /*
  * misc.c
- * 
- * This is a collection of several routines from gzip-1.0.3 
+ *
+ * This is a collection of several routines from gzip-1.0.3
  * adapted for Linux.
  *
  * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
@@ -260,7 +260,7 @@ static void putstr(const char *s)
 				y--;
 			}
 		} else {
-			vidmem [ ( x + cols * y ) * 2 ] = c; 
+			vidmem [(x + cols * y) * 2] = c;
 			if ( ++x >= cols ) {
 				x = 0;
 				if ( ++y >= lines ) {
@@ -344,7 +344,8 @@ static void error(char *x)
 }
 
 asmlinkage void decompress_kernel(void *rmode, unsigned long heap,
-	uch *input_data, unsigned long input_len, uch *output)
+				  uch *input_data, unsigned long input_len,
+				  uch *output)
 {
 	real_mode = rmode;
 
@@ -359,8 +360,8 @@ asmlinkage void decompress_kernel(void *rmode, unsigned long heap,
 	lines = RM_SCREEN_INFO.orig_video_lines;
 	cols = RM_SCREEN_INFO.orig_video_cols;
 
-	window = output;  		/* Output buffer (Normally at 1M) */
-	free_mem_ptr     = heap;	/* Heap  */
+	window = output;		/* Output buffer (Normally at 1M) */
+	free_mem_ptr     = heap;	/* Heap */
 	free_mem_end_ptr = heap + HEAP_SIZE;
 	inbuf  = input_data;		/* Input buffer */
 	insize = input_len;

From 8751f97486e071b73240e0a0c08e3b663766e45f Mon Sep 17 00:00:00 2001
From: Ian Campbell <ijc@hellion.org.uk>
Date: Wed, 30 Jan 2008 13:33:38 +0100
Subject: [PATCH 731/890] x86: unify headers in
 arch/x86/boot/compressed/misc_??.c

size reports no change in arch/x86/boot/compressed/vmlinux.

Signed-off-by: Ian Campbell <ijc@hellion.org.uk>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/compressed/misc_32.c | 1 -
 arch/x86/boot/compressed/misc_64.c | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c
index 52fc214546e9..ad0528a83151 100644
--- a/arch/x86/boot/compressed/misc_32.c
+++ b/arch/x86/boot/compressed/misc_32.c
@@ -16,7 +16,6 @@
  */
 #undef CONFIG_PARAVIRT
 #include <linux/linkage.h>
-#include <linux/vmalloc.h>
 #include <linux/screen_info.h>
 #include <asm/io.h>
 #include <asm/page.h>
diff --git a/arch/x86/boot/compressed/misc_64.c b/arch/x86/boot/compressed/misc_64.c
index 5029fb3c5740..ca760fb69b95 100644
--- a/arch/x86/boot/compressed/misc_64.c
+++ b/arch/x86/boot/compressed/misc_64.c
@@ -22,6 +22,7 @@
 #include <linux/screen_info.h>
 #include <asm/io.h>
 #include <asm/page.h>
+#include <asm/boot.h>
 
 /* WARNING!!
  * This code is compiled with -fPIC and it is relocated dynamically

From 4c83d6536e7bcc8c0c1319c66337f3f2c89cd06a Mon Sep 17 00:00:00 2001
From: Ian Campbell <ijc@hellion.org.uk>
Date: Wed, 30 Jan 2008 13:33:38 +0100
Subject: [PATCH 732/890] x86: unify variable names in
 arch/x86/boot/compressed/misc_??.c

size reports no change in arch/x86/boot/compressed/vmlinux.

Signed-off-by: Ian Campbell <ijc@hellion.org.uk>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/compressed/misc_32.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c
index ad0528a83151..2b1c22de558a 100644
--- a/arch/x86/boot/compressed/misc_32.c
+++ b/arch/x86/boot/compressed/misc_32.c
@@ -348,7 +348,7 @@ static void error(char *x)
 		asm("hlt");
 }
 
-asmlinkage void decompress_kernel(void *rmode, unsigned long end,
+asmlinkage void decompress_kernel(void *rmode, unsigned long heap,
 				  uch *input_data, unsigned long input_len,
 				  uch *output)
 {
@@ -366,15 +366,15 @@ asmlinkage void decompress_kernel(void *rmode, unsigned long end,
 	cols = RM_SCREEN_INFO.orig_video_cols;
 
 	window = output;		/* Output buffer (Normally at 1M) */
-	free_mem_ptr     = end;		/* Heap */
-	free_mem_end_ptr = end + HEAP_SIZE;
+	free_mem_ptr     = heap;	/* Heap */
+	free_mem_end_ptr = heap + HEAP_SIZE;
 	inbuf  = input_data;		/* Input buffer */
 	insize = input_len;
 	inptr  = 0;
 
 	if ((u32)output & (CONFIG_PHYSICAL_ALIGN -1))
 		error("Destination address not CONFIG_PHYSICAL_ALIGN aligned");
-	if (end > ((-__PAGE_OFFSET-(512 <<20)-1) & 0x7fffffff))
+	if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff))
 		error("Destination address too large");
 #ifndef CONFIG_RELOCATABLE
 	if ((u32)output != LOAD_PHYSICAL_ADDR)

From 778cb929adaef0eb4cce74403c0f918d2751cd8f Mon Sep 17 00:00:00 2001
From: Ian Campbell <ijc@hellion.org.uk>
Date: Wed, 30 Jan 2008 13:33:38 +0100
Subject: [PATCH 733/890] x86: unify arch/x86/boot/compressed/misc_??.c

Remainder of unification can occur inplace.

size reports no change in arch/x86/boot/compressed/vmlinux.

Signed-off-by: Ian Campbell <ijc@hellion.org.uk>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/compressed/Makefile             |   4 +-
 .../x86/boot/compressed/{misc_32.c => misc.c} |  32 +-
 arch/x86/boot/compressed/misc_64.c            | 381 ------------------
 3 files changed, 30 insertions(+), 387 deletions(-)
 rename arch/x86/boot/compressed/{misc_32.c => misc.c} (94%)
 delete mode 100644 arch/x86/boot/compressed/misc_64.c

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index baffeafa798f..fe24ceabd909 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -4,7 +4,7 @@
 # create a compressed vmlinux image from the original vmlinux
 #
 
-targets := vmlinux vmlinux.bin vmlinux.bin.gz head_$(BITS).o misc_$(BITS).o piggy.o
+targets := vmlinux vmlinux.bin vmlinux.bin.gz head_$(BITS).o misc.o piggy.o
 
 KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
 KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
@@ -18,7 +18,7 @@ KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
 LDFLAGS := -m elf_$(UTS_MACHINE)
 LDFLAGS_vmlinux := -T
 
-$(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc_$(BITS).o $(obj)/piggy.o FORCE
+$(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE
 	$(call if_changed,ld)
 	@:
 
diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc.c
similarity index 94%
rename from arch/x86/boot/compressed/misc_32.c
rename to arch/x86/boot/compressed/misc.c
index 2b1c22de558a..8182e32c1b42 100644
--- a/arch/x86/boot/compressed/misc_32.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -15,6 +15,11 @@
  * we just keep it from happening
  */
 #undef CONFIG_PARAVIRT
+#ifdef CONFIG_X86_64
+#define _LINUX_STRING_H_ 1
+#define __LINUX_BITMAP_H 1
+#endif
+
 #include <linux/linkage.h>
 #include <linux/screen_info.h>
 #include <asm/io.h>
@@ -190,10 +195,20 @@ static void *memcpy(void *dest, const void *src, unsigned n);
 
 static void putstr(const char *);
 
-static unsigned long free_mem_ptr;
-static unsigned long free_mem_end_ptr;
+#ifdef CONFIG_X86_64
+#define memptr long
+#else
+#define memptr unsigned
+#endif
 
+static memptr free_mem_ptr;
+static memptr free_mem_end_ptr;
+
+#ifdef CONFIG_X86_64
+#define HEAP_SIZE             0x7000
+#else
 #define HEAP_SIZE             0x4000
+#endif
 
 static char *vidmem = (char *)0xb8000;
 static int vidport;
@@ -234,7 +249,7 @@ static void gzip_mark(void **ptr)
 
 static void gzip_release(void **ptr)
 {
-	free_mem_ptr = (unsigned long) *ptr;
+	free_mem_ptr = (memptr) *ptr;
 }
  
 static void scroll(void)
@@ -251,8 +266,10 @@ static void putstr(const char *s)
 	int x,y,pos;
 	char c;
 
+#ifdef CONFIG_X86_32
 	if (RM_SCREEN_INFO.orig_video_mode == 0 && lines == 0 && cols == 0)
 		return;
+#endif
 
 	x = RM_SCREEN_INFO.orig_x;
 	y = RM_SCREEN_INFO.orig_y;
@@ -348,7 +365,7 @@ static void error(char *x)
 		asm("hlt");
 }
 
-asmlinkage void decompress_kernel(void *rmode, unsigned long heap,
+asmlinkage void decompress_kernel(void *rmode, memptr heap,
 				  uch *input_data, unsigned long input_len,
 				  uch *output)
 {
@@ -372,6 +389,12 @@ asmlinkage void decompress_kernel(void *rmode, unsigned long heap,
 	insize = input_len;
 	inptr  = 0;
 
+#ifdef CONFIG_X86_64
+	if ((ulg)output & (__KERNEL_ALIGN - 1))
+		error("Destination address not 2M aligned");
+	if ((ulg)output >= 0xffffffffffUL)
+		error("Destination address too large");
+#else
 	if ((u32)output & (CONFIG_PHYSICAL_ALIGN -1))
 		error("Destination address not CONFIG_PHYSICAL_ALIGN aligned");
 	if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff))
@@ -379,6 +402,7 @@ asmlinkage void decompress_kernel(void *rmode, unsigned long heap,
 #ifndef CONFIG_RELOCATABLE
 	if ((u32)output != LOAD_PHYSICAL_ADDR)
 		error("Wrong destination address");
+#endif
 #endif
 
 	makecrc();
diff --git a/arch/x86/boot/compressed/misc_64.c b/arch/x86/boot/compressed/misc_64.c
deleted file mode 100644
index ca760fb69b95..000000000000
--- a/arch/x86/boot/compressed/misc_64.c
+++ /dev/null
@@ -1,381 +0,0 @@
-/*
- * misc.c
- *
- * This is a collection of several routines from gzip-1.0.3
- * adapted for Linux.
- *
- * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
- * puts by Nick Holloway 1993, better puts by Martin Mares 1995
- * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
- */
-
-/*
- * we have to be careful, because no indirections are allowed here, and
- * paravirt_ops is a kind of one. As it will only run in baremetal anyway,
- * we just keep it from happening
- */
-#undef CONFIG_PARAVIRT
-#define _LINUX_STRING_H_ 1
-#define __LINUX_BITMAP_H 1
-
-#include <linux/linkage.h>
-#include <linux/screen_info.h>
-#include <asm/io.h>
-#include <asm/page.h>
-#include <asm/boot.h>
-
-/* WARNING!!
- * This code is compiled with -fPIC and it is relocated dynamically
- * at run time, but no relocation processing is performed.
- * This means that it is not safe to place pointers in static structures.
- */
-
-/*
- * Getting to provable safe in place decompression is hard.
- * Worst case behaviours need to be analyzed.
- * Background information:
- *
- * The file layout is:
- *    magic[2]
- *    method[1]
- *    flags[1]
- *    timestamp[4]
- *    extraflags[1]
- *    os[1]
- *    compressed data blocks[N]
- *    crc[4] orig_len[4]
- *
- * resulting in 18 bytes of non compressed data overhead.
- *
- * Files divided into blocks
- * 1 bit (last block flag)
- * 2 bits (block type)
- *
- * 1 block occurs every 32K -1 bytes or when there 50% compression has been achieved.
- * The smallest block type encoding is always used.
- *
- * stored:
- *    32 bits length in bytes.
- *
- * fixed:
- *    magic fixed tree.
- *    symbols.
- *
- * dynamic:
- *    dynamic tree encoding.
- *    symbols.
- *
- *
- * The buffer for decompression in place is the length of the
- * uncompressed data, plus a small amount extra to keep the algorithm safe.
- * The compressed data is placed at the end of the buffer.  The output
- * pointer is placed at the start of the buffer and the input pointer
- * is placed where the compressed data starts.  Problems will occur
- * when the output pointer overruns the input pointer.
- *
- * The output pointer can only overrun the input pointer if the input
- * pointer is moving faster than the output pointer.  A condition only
- * triggered by data whose compressed form is larger than the uncompressed
- * form.
- *
- * The worst case at the block level is a growth of the compressed data
- * of 5 bytes per 32767 bytes.
- *
- * The worst case internal to a compressed block is very hard to figure.
- * The worst case can at least be boundined by having one bit that represents
- * 32764 bytes and then all of the rest of the bytes representing the very
- * very last byte.
- *
- * All of which is enough to compute an amount of extra data that is required
- * to be safe.  To avoid problems at the block level allocating 5 extra bytes
- * per 32767 bytes of data is sufficient.  To avoind problems internal to a block
- * adding an extra 32767 bytes (the worst case uncompressed block size) is
- * sufficient, to ensure that in the worst case the decompressed data for
- * block will stop the byte before the compressed data for a block begins.
- * To avoid problems with the compressed data's meta information an extra 18
- * bytes are needed.  Leading to the formula:
- *
- * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size.
- *
- * Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
- * Adding 32768 instead of 32767 just makes for round numbers.
- * Adding the decompressor_size is necessary as it musht live after all
- * of the data as well.  Last I measured the decompressor is about 14K.
- * 10K of actual data and 4K of bss.
- *
- */
-
-/*
- * gzip declarations
- */
-
-#define OF(args)  args
-#define STATIC static
-
-#undef memset
-#undef memcpy
-#define memzero(s, n)     memset ((s), 0, (n))
-
-typedef unsigned char  uch;
-typedef unsigned short ush;
-typedef unsigned long  ulg;
-
-#define WSIZE 0x80000000	/* Window size must be at least 32k,
-				 * and a power of two
-				 * We don't actually have a window just
-				 * a huge output buffer so I report
-				 * a 2G windows size, as that should
-				 * always be larger than our output buffer.
-				 */
-
-static uch *inbuf;	/* input buffer */
-static uch *window;	/* Sliding window buffer, (and final output buffer) */
-
-static unsigned insize;  /* valid bytes in inbuf */
-static unsigned inptr;   /* index of next byte to be processed in inbuf */
-static unsigned outcnt;  /* bytes in output buffer */
-
-/* gzip flag byte */
-#define ASCII_FLAG   0x01 /* bit 0 set: file probably ASCII text */
-#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */
-#define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
-#define ORIG_NAME    0x08 /* bit 3 set: original file name present */
-#define COMMENT      0x10 /* bit 4 set: file comment present */
-#define ENCRYPTED    0x20 /* bit 5 set: file is encrypted */
-#define RESERVED     0xC0 /* bit 6,7:   reserved */
-
-#define get_byte()  (inptr < insize ? inbuf[inptr++] : fill_inbuf())
-		
-/* Diagnostic functions */
-#ifdef DEBUG
-#  define Assert(cond,msg) {if(!(cond)) error(msg);}
-#  define Trace(x) fprintf x
-#  define Tracev(x) {if (verbose) fprintf x ;}
-#  define Tracevv(x) {if (verbose>1) fprintf x ;}
-#  define Tracec(c,x) {if (verbose && (c)) fprintf x ;}
-#  define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;}
-#else
-#  define Assert(cond,msg)
-#  define Trace(x)
-#  define Tracev(x)
-#  define Tracevv(x)
-#  define Tracec(c,x)
-#  define Tracecv(c,x)
-#endif
-
-static int  fill_inbuf(void);
-static void flush_window(void);
-static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
-  
-/*
- * This is set up by the setup-routine at boot-time
- */
-static unsigned char *real_mode; /* Pointer to real-mode data */
-
-#define RM_EXT_MEM_K   (*(unsigned short *)(real_mode + 0x2))
-#ifndef STANDARD_MEMORY_BIOS_CALL
-#define RM_ALT_MEM_K   (*(unsigned long *)(real_mode + 0x1e0))
-#endif
-#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
-
-extern unsigned char input_data[];
-extern int input_len;
-
-static long bytes_out = 0;
-
-static void *malloc(int size);
-static void free(void *where);
-
-static void *memset(void *s, int c, unsigned n);
-static void *memcpy(void *dest, const void *src, unsigned n);
-
-static void putstr(const char *);
-
-static long free_mem_ptr;
-static long free_mem_end_ptr;
-
-#define HEAP_SIZE             0x7000
-
-static char *vidmem = (char *)0xb8000;
-static int vidport;
-static int lines, cols;
-
-#include "../../../../lib/inflate.c"
-
-static void *malloc(int size)
-{
-	void *p;
-
-	if (size <0) error("Malloc error");
-	if (free_mem_ptr <= 0) error("Memory error");
-
-	free_mem_ptr = (free_mem_ptr + 3) & ~3;	/* Align */
-
-	p = (void *)free_mem_ptr;
-	free_mem_ptr += size;
-
-	if (free_mem_ptr >= free_mem_end_ptr)
-		error("Out of memory");
-
-	return p;
-}
-
-static void free(void *where)
-{	/* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
-	*ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
-	free_mem_ptr = (long) *ptr;
-}
- 
-static void scroll(void)
-{
-	int i;
-
-	memcpy ( vidmem, vidmem + cols * 2, ( lines - 1 ) * cols * 2 );
-	for ( i = ( lines - 1 ) * cols * 2; i < lines * cols * 2; i += 2 )
-		vidmem[i] = ' ';
-}
-
-static void putstr(const char *s)
-{
-	int x,y,pos;
-	char c;
-
-	x = RM_SCREEN_INFO.orig_x;
-	y = RM_SCREEN_INFO.orig_y;
-
-	while ( ( c = *s++ ) != '\0' ) {
-		if ( c == '\n' ) {
-			x = 0;
-			if ( ++y >= lines ) {
-				scroll();
-				y--;
-			}
-		} else {
-			vidmem [(x + cols * y) * 2] = c;
-			if ( ++x >= cols ) {
-				x = 0;
-				if ( ++y >= lines ) {
-					scroll();
-					y--;
-				}
-			}
-		}
-	}
-
-	RM_SCREEN_INFO.orig_x = x;
-	RM_SCREEN_INFO.orig_y = y;
-
-	pos = (x + cols * y) * 2;	/* Update cursor position */
-	outb(14, vidport);
-	outb(0xff & (pos >> 9), vidport+1);
-	outb(15, vidport);
-	outb(0xff & (pos >> 1), vidport+1);
-}
-
-static void* memset(void* s, int c, unsigned n)
-{
-	int i;
-	char *ss = s;
-
-	for (i=0;i<n;i++) ss[i] = c;
-	return s;
-}
-
-static void* memcpy(void* dest, const void* src, unsigned n)
-{
-	int i;
-	const char *s = src;
-	char *d = dest;
-
-	for (i=0;i<n;i++) d[i] = s[i];
-	return dest;
-}
-
-/* ===========================================================================
- * Fill the input buffer. This is called only when the buffer is empty
- * and at least one byte is really needed.
- */
-static int fill_inbuf(void)
-{
-	error("ran out of input data");
-	return 0;
-}
-
-/* ===========================================================================
- * Write the output window window[0..outcnt-1] and update crc and bytes_out.
- * (Used for the decompressed data only.)
- */
-static void flush_window(void)
-{
-	/* With my window equal to my output buffer
-	 * I only need to compute the crc here.
-	 */
-	ulg c = crc;         /* temporary variable */
-	unsigned n;
-	uch *in, ch;
-
-	in = window;
-	for (n = 0; n < outcnt; n++) {
-		ch = *in++;
-		c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
-	}
-	crc = c;
-	bytes_out += (ulg)outcnt;
-	outcnt = 0;
-}
-
-static void error(char *x)
-{
-	putstr("\n\n");
-	putstr(x);
-	putstr("\n\n -- System halted");
-
-	while (1)
-		asm("hlt");
-}
-
-asmlinkage void decompress_kernel(void *rmode, unsigned long heap,
-				  uch *input_data, unsigned long input_len,
-				  uch *output)
-{
-	real_mode = rmode;
-
-	if (RM_SCREEN_INFO.orig_video_mode == 7) {
-		vidmem = (char *) 0xb0000;
-		vidport = 0x3b4;
-	} else {
-		vidmem = (char *) 0xb8000;
-		vidport = 0x3d4;
-	}
-
-	lines = RM_SCREEN_INFO.orig_video_lines;
-	cols = RM_SCREEN_INFO.orig_video_cols;
-
-	window = output;		/* Output buffer (Normally at 1M) */
-	free_mem_ptr     = heap;	/* Heap */
-	free_mem_end_ptr = heap + HEAP_SIZE;
-	inbuf  = input_data;		/* Input buffer */
-	insize = input_len;
-	inptr  = 0;
-
-	if ((ulg)output & (__KERNEL_ALIGN - 1))
-		error("Destination address not 2M aligned");
-	if ((ulg)output >= 0xffffffffffUL)
-		error("Destination address too large");
-
-	makecrc();
-	putstr("\nDecompressing Linux... ");
-	gunzip();
-	putstr("done.\nBooting the kernel.\n");
-	return;
-}

From 07035f076febf5fd791edbdac0e6d3f2f0e00a01 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Wed, 30 Jan 2008 13:33:38 +0100
Subject: [PATCH 734/890] x86: not set boot cpu in cpu_present_map again

in init/main.c boot_cpu_init() already does that before setup_arch

Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup_64.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index c9b7f44ea497..4ccad185ab7e 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -414,12 +414,6 @@ void __init setup_arch(char **cmdline_p)
 
 	early_quirks();
 
-	/*
-	 * set this early, so we dont allocate cpu0
-	 * if MADT list doesnt list BSP first
-	 * mpparse.c/MP_processor_info() allocates logical cpu numbers.
-	 */
-	cpu_set(0, cpu_present_map);
 #ifdef CONFIG_ACPI
 	/*
 	 * Read APIC and some other early information from ACPI tables.

From 23916d49155552767726f116daf32dfb2aa3af23 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Wed, 30 Jan 2008 13:33:39 +0100
Subject: [PATCH 735/890] x86: not set boot cpu in cpu_online_map in
 smp_prepare_boot_cpu()

in init/main.c boot_cpu_init() does that before.

Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/smpboot_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index 4e14ecb90764..cc64b8085c2a 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -921,7 +921,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 void __init smp_prepare_boot_cpu(void)
 {
 	int me = smp_processor_id();
-	cpu_set(me, cpu_online_map);
+	/* already set me in cpu_online_map in boot_cpu_init() */
 	cpu_set(me, cpu_callout_map);
 	per_cpu(cpu_state, me) = CPU_ONLINE;
 }

From dff244af014144e4ac87dfc5b6e450dc8832710e Mon Sep 17 00:00:00 2001
From: Alistair John Strachan <alistair@devzero.co.uk>
Date: Wed, 30 Jan 2008 13:33:39 +0100
Subject: [PATCH 736/890] x86: force enable HPET on (some?) ICH9 boards

Some consumer ICH9 boards (such as the Abit IP35 Pro) do not provide a BIOS
option for enabling the HPET. The same ICH workaround used for 6,7,8 can be
applied to 9. Here I enable the only PCI id that was visible on my system.

I have confirmed the HPETs work both from userspace and as a clocksource for
the running kernel (2.6.24 here) after applying this patch.

 Force enabled HPET at base address 0xfed00000
 hpet clockevent registered
 hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0, 0
 hpet0: 4 64-bit timers, 14318180 Hz

Signed-off-by: Alistair John Strachan <alistair@devzero.co.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/quirks.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index fab30e134836..150ba29a0d33 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -162,6 +162,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31,
 			 ich_force_enable_hpet);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1,
 			 ich_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7,
+			 ich_force_enable_hpet);
 
 
 static struct pci_dev *cached_dev;

From 261a5ec36befbe6bae069be14f45e618dcd35146 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Wed, 30 Jan 2008 13:33:39 +0100
Subject: [PATCH 737/890] x86: change aper valid checking sequence

old sequence:
  size ==> >4G  ==> point to RAM

changed to:
  >4G ==> point to RAM ==> size

some bios even leave aper to unclear, so check size at last.

To avoid reporting:

  Node 0: Aperture @ 4a42000000 size 32 MB
  Aperture too small (32 MB)

with this change we will get:

  Node 0: Aperture @ 4a42000000 size 32 MB
  Aperture beyond 4G. Ignoring.

Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/aperture_64.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 0b837bb3becb..608152a2a05e 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -85,10 +85,6 @@ static int __init aperture_valid(u64 aper_base, u32 aper_size)
 	if (!aper_base)
 		return 0;
 
-	if (aper_size < 64*1024*1024) {
-		printk(KERN_ERR "Aperture too small (%d MB)\n", aper_size>>20);
-		return 0;
-	}
 	if (aper_base + aper_size > 0x100000000UL) {
 		printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n");
 		return 0;
@@ -97,6 +93,10 @@ static int __init aperture_valid(u64 aper_base, u32 aper_size)
 		printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n");
 		return 0;
 	}
+	if (aper_size < 64*1024*1024) {
+		printk(KERN_ERR "Aperture too small (%d MB)\n", aper_size>>20);
+		return 0;
+	}
 
 	return 1;
 }

From a89780f3b84f9a421e2608580b55f12b3ac4f9c2 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:33:39 +0100
Subject: [PATCH 738/890] xen: fix mismerge in masking pte flags

Looks like a mismerge/misapply dropped one of the cases of pte flag
masking for Xen.  Also, only mask the flags for present ptes.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/mmu.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3e9e095c295c..e6184735545f 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -241,8 +241,10 @@ unsigned long long xen_pgd_val(pgd_t pgd)
 
 pte_t xen_make_pte(unsigned long long pte)
 {
-	if (pte & 1)
+	if (pte & _PAGE_PRESENT) {
 		pte = phys_to_machine(XPADDR(pte)).maddr;
+		pte &= ~(_PAGE_PCD | _PAGE_PWT);
+	}
 
 	return (pte_t){ .pte = pte };
 }
@@ -288,10 +290,10 @@ unsigned long xen_pgd_val(pgd_t pgd)
 
 pte_t xen_make_pte(unsigned long pte)
 {
-	if (pte & _PAGE_PRESENT)
+	if (pte & _PAGE_PRESENT) {
 		pte = phys_to_machine(XPADDR(pte)).maddr;
-
-	pte &= ~(_PAGE_PCD | _PAGE_PWT);
+		pte &= ~(_PAGE_PCD | _PAGE_PWT);
+	}
 
 	return (pte_t){ pte };
 }

From 6c435456dc91ace468b4e9d72ad0e13dafa22a45 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:33:39 +0100
Subject: [PATCH 739/890] x86: add mm parameter to paravirt_alloc_pd

Add mm to paravirt_alloc_pd, partly to make it consistent with
paravirt_alloc_pt, and because later changes will make use of it.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/vmi_32.c     | 2 +-
 arch/x86/mm/init_32.c        | 4 ++--
 arch/x86/mm/pgtable_32.c     | 4 +++-
 include/asm-x86/paravirt.h   | 6 +++---
 include/asm-x86/pgalloc_32.h | 3 +--
 5 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 2ee5d8e0ada5..4525bc2c2e19 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -398,7 +398,7 @@ static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn)
 	vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
 }
 
-static void vmi_allocate_pd(u32 pfn)
+static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn)
 {
  	/*
 	 * This call comes in very early, before mem_map is setup.
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 04332c09ad1d..98d2acae4f64 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -65,7 +65,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
 	if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
 		pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
 
-		paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
+		paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
 		set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
 		pud = pud_offset(pgd, 0);
 		if (pmd_table != pmd_offset(pud, 0))
@@ -365,7 +365,7 @@ void __init native_pagetable_setup_start(pgd_t *base)
 	memset(&base[USER_PTRS_PER_PGD], 0,
 	       KERNEL_PGD_PTRS * sizeof(pgd_t));
 #else
-	paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT);
+	paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT);
 #endif
 }
 
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index be61a1d845a4..f85ee44720d2 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -330,13 +330,15 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	if (PTRS_PER_PMD == 1 || !pgd)
 		return pgd;
 
+	mm->pgd = pgd;		/* so that alloc_pd can use it */
+
  	for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
 		pmd_t *pmd = pmd_cache_alloc(i);
 
 		if (!pmd)
 			goto out_oom;
 
-		paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
+		paravirt_alloc_pd(mm, __pa(pmd) >> PAGE_SHIFT);
 		set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
 	}
 	return pgd;
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 9f9ff57b3efb..d6236eb46466 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -221,7 +221,7 @@ struct pv_mmu_ops {
 
 	/* Hooks for allocating/releasing pagetable pages */
 	void (*alloc_pt)(struct mm_struct *mm, u32 pfn);
-	void (*alloc_pd)(u32 pfn);
+	void (*alloc_pd)(struct mm_struct *mm, u32 pfn);
 	void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count);
 	void (*release_pt)(u32 pfn);
 	void (*release_pd)(u32 pfn);
@@ -903,9 +903,9 @@ static inline void paravirt_release_pt(unsigned pfn)
 	PVOP_VCALL1(pv_mmu_ops.release_pt, pfn);
 }
 
-static inline void paravirt_alloc_pd(unsigned pfn)
+static inline void paravirt_alloc_pd(struct mm_struct *mm, unsigned pfn)
 {
-	PVOP_VCALL1(pv_mmu_ops.alloc_pd, pfn);
+	PVOP_VCALL2(pv_mmu_ops.alloc_pd, mm, pfn);
 }
 
 static inline void paravirt_alloc_pd_clone(unsigned pfn, unsigned clonepfn,
diff --git a/include/asm-x86/pgalloc_32.h b/include/asm-x86/pgalloc_32.h
index f2fc33ceb9f2..fbc6357f5eba 100644
--- a/include/asm-x86/pgalloc_32.h
+++ b/include/asm-x86/pgalloc_32.h
@@ -8,8 +8,7 @@
 #include <asm/paravirt.h>
 #else
 #define paravirt_alloc_pt(mm, pfn) do { } while (0)
-#define paravirt_alloc_pd(pfn) do { } while (0)
-#define paravirt_alloc_pd(pfn) do { } while (0)
+#define paravirt_alloc_pd(mm, pfn) do { } while (0)
 #define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0)
 #define paravirt_release_pt(pfn) do { } while (0)
 #define paravirt_release_pd(pfn) do { } while (0)

From a5a19c63f4e55e32dc0bc3d936d7f94793d8b380 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:33:39 +0100
Subject: [PATCH 740/890] x86: demacro asm-x86/pgalloc_32.h

Convert macros into inline functions, for better type-checking.

This patch required a little bit of fiddling with headers in order to
make __(pte|pmd)_free_tlb inline rather than macros.
asm-generic/tlb.h includes asm/pgalloc.h, though it doesn't directly
use any pgalloc definitions.  I removed this include to avoid an
include cycle, but it may cause secondary compile failures by things
depending on the indirect inclusion; arch/x86/mm/hugetlbpage.c was one
such place; there may be others.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/hugetlbpage.c        |  3 +-
 arch/x86/mm/init_32.c            |  1 +
 include/asm-generic/tlb.h        |  1 -
 include/asm-x86/pgalloc_32.h     | 61 ++++++++++++++++++++------------
 include/asm-x86/pgtable-3level.h |  2 --
 include/linux/swap.h             |  1 +
 6 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 6c06d9c0488e..4fbafb4bc2f0 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -15,6 +15,7 @@
 #include <asm/mman.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
 
 static unsigned long page_table_shareable(struct vm_area_struct *svma,
 				struct vm_area_struct *vma,
@@ -88,7 +89,7 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
 
 	spin_lock(&mm->page_table_lock);
 	if (pud_none(*pud))
-		pud_populate(mm, pud, (unsigned long) spte & PAGE_MASK);
+		pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK));
 	else
 		put_page(virt_to_page(spte));
 	spin_unlock(&mm->page_table_lock);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 98d2acae4f64..459b384acda9 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -41,6 +41,7 @@
 #include <asm/apic.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
 #include <asm/sections.h>
 #include <asm/paravirt.h>
 
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 75f2bfab614f..6ce9f3ab928d 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -15,7 +15,6 @@
 
 #include <linux/swap.h>
 #include <linux/quicklist.h>
-#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
 /*
diff --git a/include/asm-x86/pgalloc_32.h b/include/asm-x86/pgalloc_32.h
index fbc6357f5eba..3482c3427897 100644
--- a/include/asm-x86/pgalloc_32.h
+++ b/include/asm-x86/pgalloc_32.h
@@ -3,6 +3,8 @@
 
 #include <linux/threads.h>
 #include <linux/mm.h>		/* for struct page */
+#include <asm/tlb.h>
+#include <asm-generic/tlb.h>
 
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
@@ -14,19 +16,20 @@
 #define paravirt_release_pd(pfn) do { } while (0)
 #endif
 
-#define pmd_populate_kernel(mm, pmd, pte)			\
-do {								\
-	paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT);		\
-	set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)));		\
-} while (0)
+static inline void pmd_populate_kernel(struct mm_struct *mm,
+				       pmd_t *pmd, pte_t *pte)
+{
+	paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT);
+	set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
+}
 
-#define pmd_populate(mm, pmd, pte) 				\
-do {								\
-	paravirt_alloc_pt(mm, page_to_pfn(pte));		\
-	set_pmd(pmd, __pmd(_PAGE_TABLE +			\
-		((unsigned long long)page_to_pfn(pte) <<	\
-			(unsigned long long) PAGE_SHIFT)));	\
-} while (0)
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
+{
+	unsigned long pfn = page_to_pfn(pte);
+
+	paravirt_alloc_pt(mm, pfn);
+	set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
+}
 
 /*
  * Allocate and free page tables.
@@ -48,20 +51,34 @@ static inline void pte_free(struct page *pte)
 }
 
 
-#define __pte_free_tlb(tlb,pte) 					\
-do {									\
-	paravirt_release_pt(page_to_pfn(pte));				\
-	tlb_remove_page((tlb),(pte));					\
-} while (0)
+static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
+{
+	paravirt_release_pt(page_to_pfn(pte));
+	tlb_remove_page(tlb, pte);
+}
 
 #ifdef CONFIG_X86_PAE
 /*
  * In the PAE case we free the pmds as part of the pgd.
  */
-#define pmd_alloc_one(mm, addr)		({ BUG(); ((pmd_t *)2); })
-#define pmd_free(x)			do { } while (0)
-#define __pmd_free_tlb(tlb,x)		do { } while (0)
-#define pud_populate(mm, pmd, pte)	BUG()
-#endif
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	BUG();
+	return (pmd_t *)2;
+}
+
+static inline void pmd_free(pmd_t *pmd)
+{
+}
+
+static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
+{
+}
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+	BUG();
+}
+#endif	/* CONFIG_X86_PAE */
 
 #endif /* _I386_PGALLOC_H */
diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h
index 5af7a44ed902..62bb06575d5a 100644
--- a/include/asm-x86/pgtable-3level.h
+++ b/include/asm-x86/pgtable-3level.h
@@ -157,6 +157,4 @@ static inline unsigned long pte_pfn(pte_t pte)
 #define __pte_to_swp_entry(pte)		((swp_entry_t){ (pte).pte_high })
 #define __swp_entry_to_pte(x)		((pte_t){ { .pte_high = (x).val } })
 
-#define __pmd_free_tlb(tlb, x)		do { } while (0)
-
 #endif /* _I386_PGTABLE_3LEVEL_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4f3838adbb30..2c3ce4c69b25 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -6,6 +6,7 @@
 #include <linux/mmzone.h>
 #include <linux/list.h>
 #include <linux/sched.h>
+#include <linux/pagemap.h>
 
 #include <asm/atomic.h>
 #include <asm/page.h>

From 1c70e9bd832642b712181e32d1bbf2436058a3df Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:33:39 +0100
Subject: [PATCH 741/890] xen: deal with pmd being allocated/freed

Deal properly with pmd-level pages being allocated and freed
dynamically.  We can handle them more or less the same as pte pages.

Also, deal with early_ioremap pagetable manipulations.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/enlighten.c | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 845b4fd94463..de647bc6e74d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -658,6 +658,13 @@ static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn)
 	make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
 }
 
+/* Early release_pt assumes that all pts are pinned, since there's
+   only init_mm and anything attached to that is pinned. */
+static void xen_release_pt_init(u32 pfn)
+{
+	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
+}
+
 static void pin_pagetable_pfn(unsigned level, unsigned long pfn)
 {
 	struct mmuext_op op;
@@ -669,7 +676,7 @@ static void pin_pagetable_pfn(unsigned level, unsigned long pfn)
 
 /* This needs to make sure the new pte page is pinned iff its being
    attached to a pinned pagetable. */
-static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
+static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level)
 {
 	struct page *page = pfn_to_page(pfn);
 
@@ -678,7 +685,7 @@ static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
 
 		if (!PageHighMem(page)) {
 			make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
-			pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
+			pin_pagetable_pfn(level, pfn);
 		} else
 			/* make sure there are no stray mappings of
 			   this page */
@@ -686,6 +693,16 @@ static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
 	}
 }
 
+static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
+{
+	xen_alloc_ptpage(mm, pfn, MMUEXT_PIN_L1_TABLE);
+}
+
+static void xen_alloc_pd(struct mm_struct *mm, u32 pfn)
+{
+	xen_alloc_ptpage(mm, pfn, MMUEXT_PIN_L2_TABLE);
+}
+
 /* This should never happen until we're OK to use struct page */
 static void xen_release_pt(u32 pfn)
 {
@@ -788,6 +805,9 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
 	/* This will work as long as patching hasn't happened yet
 	   (which it hasn't) */
 	pv_mmu_ops.alloc_pt = xen_alloc_pt;
+	pv_mmu_ops.alloc_pd = xen_alloc_pd;
+	pv_mmu_ops.release_pt = xen_release_pt;
+	pv_mmu_ops.release_pd = xen_release_pt;
 	pv_mmu_ops.set_pte = xen_set_pte;
 
 	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
@@ -1011,10 +1031,10 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.pte_update_defer = paravirt_nop,
 
 	.alloc_pt = xen_alloc_pt_init,
-	.release_pt = xen_release_pt,
-	.alloc_pd = paravirt_nop,
+	.release_pt = xen_release_pt_init,
+	.alloc_pd = xen_alloc_pt_init,
 	.alloc_pd_clone = paravirt_nop,
-	.release_pd = paravirt_nop,
+	.release_pd = xen_release_pt_init,
 
 #ifdef CONFIG_HIGHPTE
 	.kmap_atomic_pte = xen_kmap_atomic_pte,

From 8fe3deef013bebdbed1f75ae59ef9707fb6e5cc7 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:33:40 +0100
Subject: [PATCH 742/890] x86: preallocate pmds at pgd creation time

In PAE mode, an update to the pgd requires a cr3 reload to make sure
the processor notices the changes.  Since this also has the
side-effect of flushing the tlb, its an expensive operation which we
want to avoid where possible.

This patch mitigates the cost of installing the initial set of pmds on
process creation by preallocating them when the pgd is allocated.
This avoids up to three tlb flushes during exec, as it creates the new
process address space while the pagetable is in active use.

The pmds will be freed as part of the normal pagetable teardown in
free_pgtables, which is called in munmap and process exit.  However,
free_pgtables will only free parts of the pagetable which actually
contain mappings, so stray pmds may still be attached to the pgd at
pgd_free time.  We must mop them up to prevent a memory leak.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: William Irwin <wli@holomorphy.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pgtable_32.c | 70 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index f85ee44720d2..33ddddfc26b0 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -294,6 +294,70 @@ static void pgd_dtor(void *pgd)
 #define UNSHARED_PTRS_PER_PGD				\
 	(SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
 
+#ifdef CONFIG_X86_PAE
+/*
+ * Mop up any pmd pages which may still be attached to the pgd.
+ * Normally they will be freed by munmap/exit_mmap, but any pmd we
+ * preallocate which never got a corresponding vma will need to be
+ * freed manually.
+ */
+static void pgd_mop_up_pmds(pgd_t *pgdp)
+{
+	int i;
+
+	for(i = 0; i < USER_PTRS_PER_PGD; i++) {
+		pgd_t pgd = pgdp[i];
+
+		if (pgd_val(pgd) != 0) {
+			pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
+
+			pgdp[i] = native_make_pgd(0);
+
+			paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
+			pmd_free(pmd);
+		}
+	}
+}
+
+/*
+ * In PAE mode, we need to do a cr3 reload (=tlb flush) when
+ * updating the top-level pagetable entries to guarantee the
+ * processor notices the update.  Since this is expensive, and
+ * all 4 top-level entries are used almost immediately in a
+ * new process's life, we just pre-populate them here.
+ */
+static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
+{
+	pud_t *pud;
+	unsigned long addr;
+	int i;
+
+	pud = pud_offset(pgd, 0);
+ 	for (addr = i = 0; i < USER_PTRS_PER_PGD; i++, pud++, addr += PUD_SIZE) {
+		pmd_t *pmd = pmd_alloc_one(mm, addr);
+
+		if (!pmd) {
+			pgd_mop_up_pmds(pgd);
+			return 0;
+		}
+
+		pud_populate(mm, pud, pmd);
+	}
+
+	return 1;
+}
+#else  /* !CONFIG_X86_PAE */
+/* No need to prepopulate any pagetable entries in non-PAE modes. */
+static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
+{
+	return 1;
+}
+
+static void pgd_mop_up_pmds(pgd_t *pgd)
+{
+}
+#endif	/* CONFIG_X86_PAE */
+
 /* If we allocate a pmd for part of the kernel address space, then
    make sure its initialized with the appropriate kernel mappings.
    Otherwise use a cached zeroed pmd.  */
@@ -341,6 +405,11 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 		paravirt_alloc_pd(mm, __pa(pmd) >> PAGE_SHIFT);
 		set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
 	}
+	if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
+		quicklist_free(0, pgd_dtor, pgd);
+		pgd = NULL;
+	}
+
 	return pgd;
 
 out_oom:
@@ -367,6 +436,7 @@ void pgd_free(pgd_t *pgd)
 			pmd_cache_free(pmd, i);
 		}
 	/* in the non-PAE case, free_pgtables() clears user pgd entries */
+	pgd_mop_up_pmds(pgd);
 	quicklist_free(0, pgd_dtor, pgd);
 }
 

From 508bebbb1f211fbf3f392feea44218045096f240 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:33:40 +0100
Subject: [PATCH 743/890] x86: allocate and initialize unshared pmds

If SHARED_KERNEL_PMD is false, then we need to allocate and initialize
the kernel pmd.  We can easily piggy-back this onto the existing pmd
prepopulation code.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pgtable_32.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 33ddddfc26b0..3a6c9200058d 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -305,7 +305,7 @@ static void pgd_mop_up_pmds(pgd_t *pgdp)
 {
 	int i;
 
-	for(i = 0; i < USER_PTRS_PER_PGD; i++) {
+	for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
 		pgd_t pgd = pgdp[i];
 
 		if (pgd_val(pgd) != 0) {
@@ -325,6 +325,10 @@ static void pgd_mop_up_pmds(pgd_t *pgdp)
  * processor notices the update.  Since this is expensive, and
  * all 4 top-level entries are used almost immediately in a
  * new process's life, we just pre-populate them here.
+ *
+ * Also, if we're in a paravirt environment where the kernel pmd is
+ * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
+ * and initialize the kernel pmds here.
  */
 static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
 {
@@ -333,7 +337,8 @@ static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
 	int i;
 
 	pud = pud_offset(pgd, 0);
- 	for (addr = i = 0; i < USER_PTRS_PER_PGD; i++, pud++, addr += PUD_SIZE) {
+ 	for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
+	     i++, pud++, addr += PUD_SIZE) {
 		pmd_t *pmd = pmd_alloc_one(mm, addr);
 
 		if (!pmd) {
@@ -341,6 +346,10 @@ static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
 			return 0;
 		}
 
+		if (i >= USER_PTRS_PER_PGD)
+			memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
+			       sizeof(pmd_t) * PTRS_PER_PMD);
+
 		pud_populate(mm, pud, pmd);
 	}
 
@@ -444,4 +453,3 @@ void check_pgt_cache(void)
 {
 	quicklist_trim(0, pgd_dtor, 25, 16);
 }
-

From 5a96f4a55c3b0bfd40771a973b173e1b94909559 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Wed, 30 Jan 2008 13:33:40 +0100
Subject: [PATCH 744/890] x86: fix recursion in
 arch/x86/kernel/cpu/mcheck/mce_amd_64.c

remove the recursion from this function.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 77e666628818..32671da8184e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -555,7 +555,7 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
 	int err = 0;
 
 	for (bank = 0; bank < NR_BANKS; ++bank) {
-		if (!(per_cpu(bank_map, cpu) & 1 << bank))
+		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
 			continue;
 		err = threshold_create_bank(cpu, bank);
 		if (err)
@@ -638,7 +638,7 @@ static void threshold_remove_device(unsigned int cpu)
 	unsigned int bank;
 
 	for (bank = 0; bank < NR_BANKS; ++bank) {
-		if (!(per_cpu(bank_map, cpu) & 1 << bank))
+		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
 			continue;
 		threshold_remove_bank(cpu, bank);
 	}

From 6371b495991debfd1417b17c2bc4f7d7bae05739 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:40 +0100
Subject: [PATCH 745/890] x86: change ioremap() to default to uncached

Prepare ioremap() to default to uncached. This will be the
safest - but first we have to fix CPA.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/io_32.h | 15 ++++++++++++---
 include/asm-x86/io_64.h | 23 +++++++++++++++++------
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index 2a04bd17eac5..db3978846379 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -111,18 +111,27 @@ extern void __iomem * __ioremap(unsigned long offset, unsigned long size, unsign
  * make bus memory CPU accessible via the readb/readw/readl/writeb/
  * writew/writel functions and the other mmio helpers. The returned
  * address is not guaranteed to be usable directly as a virtual
- * address. 
+ * address.
  *
  * If the area you are trying to map is a PCI BAR you should have a
  * look at pci_iomap().
  */
+extern void __iomem * ioremap_nocache(unsigned long offset, unsigned long size);
 
-static inline void __iomem * ioremap(unsigned long offset, unsigned long size)
+static inline void __iomem *
+ioremap_cache(unsigned long offset, unsigned long size)
 {
 	return __ioremap(offset, size, 0);
 }
 
-extern void __iomem * ioremap_nocache(unsigned long offset, unsigned long size);
+/*
+ * The default ioremap() behavior is non-cached:
+ */
+static inline void __iomem * ioremap(unsigned long offset, unsigned long size)
+{
+	return ioremap_nocache(offset, size);
+}
+
 extern void iounmap(volatile void __iomem *addr);
 
 /*
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index fef0ce2ced81..7dee3c6e9c39 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -152,11 +152,6 @@ static inline void * phys_to_virt(unsigned long address)
 
 extern void __iomem *__ioremap(unsigned long offset, unsigned long size, unsigned long flags);
 
-static inline void __iomem * ioremap (unsigned long offset, unsigned long size)
-{
-	return __ioremap(offset, size, 0);
-}
-
 extern void *early_ioremap(unsigned long addr, unsigned long size);
 extern void early_iounmap(void *addr, unsigned long size);
 
@@ -165,8 +160,24 @@ extern void early_iounmap(void *addr, unsigned long size);
  * it's useful if some control registers are in such an area and write combining
  * or read caching is not desirable:
  */
-extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size);
+extern void __iomem * ioremap_nocache(unsigned long offset, unsigned long size);
+
+static inline void __iomem *
+ioremap_cache(unsigned long offset, unsigned long size)
+{
+	return __ioremap(offset, size, 0);
+}
+
+/*
+ * The default ioremap() behavior is non-cached:
+ */
+static inline void __iomem * ioremap(unsigned long offset, unsigned long size)
+{
+	return ioremap_cache(offset, size);
+}
+
 extern void iounmap(volatile void __iomem *addr);
+
 extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
 
 /*

From 9f4c815ce7ab53150f17c97a382f136a8fb68044 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:41 +0100
Subject: [PATCH 746/890] x86: clean up arch/x86/mm/pageattr_32.c

clean up arch/x86/mm/pageattr_32.c.

no code changed:

   text    data     bss     dec     hex filename
   1255      40       0    1295     50f pageattr_32.o.before
   1255      40       0    1295     50f pageattr_32.o.after

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_32.c | 151 +++++++++++++++++++++-----------------
 1 file changed, 83 insertions(+), 68 deletions(-)

diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 260073c07600..be4656403d77 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -1,28 +1,29 @@
-/* 
- * Copyright 2002 Andi Kleen, SuSE Labs. 
+/*
+ * Copyright 2002 Andi Kleen, SuSE Labs.
  * Thanks to Ben LaHaise for precious feedback.
- */ 
+ */
 
-#include <linux/mm.h>
-#include <linux/sched.h>
 #include <linux/highmem.h>
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/mm.h>
+
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
-#include <asm/pgalloc.h>
 #include <asm/sections.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
 
 static DEFINE_SPINLOCK(cpa_lock);
 static struct list_head df_list = LIST_HEAD_INIT(df_list);
 
-
-pte_t *lookup_address(unsigned long address) 
-{ 
+pte_t *lookup_address(unsigned long address)
+{
 	pgd_t *pgd = pgd_offset_k(address);
 	pud_t *pud;
 	pmd_t *pmd;
+
 	if (pgd_none(*pgd))
 		return NULL;
 	pud = pud_offset(pgd, address);
@@ -33,21 +34,22 @@ pte_t *lookup_address(unsigned long address)
 		return NULL;
 	if (pmd_large(*pmd))
 		return (pte_t *)pmd;
-        return pte_offset_kernel(pmd, address);
-} 
 
-static struct page *split_large_page(unsigned long address, pgprot_t prot,
-					pgprot_t ref_prot)
-{ 
-	int i; 
+	return pte_offset_kernel(pmd, address);
+}
+
+static struct page *
+split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
+{
 	unsigned long addr;
 	struct page *base;
 	pte_t *pbase;
+	int i;
 
 	spin_unlock_irq(&cpa_lock);
 	base = alloc_pages(GFP_KERNEL, 0);
 	spin_lock_irq(&cpa_lock);
-	if (!base) 
+	if (!base)
 		return NULL;
 
 	/*
@@ -58,22 +60,24 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
 	page_private(base) = 0;
 
 	address = __pa(address);
-	addr = address & LARGE_PAGE_MASK; 
+	addr = address & LARGE_PAGE_MASK;
 	pbase = (pte_t *)page_address(base);
 	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
+
 	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
-               set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
-                                          addr == address ? prot : ref_prot));
+		set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
+					   addr == address ? prot : ref_prot));
 	}
 	return base;
-} 
+}
 
 static void cache_flush_page(struct page *p)
-{ 
-	void *adr = page_address(p);
+{
+	void *addr = page_address(p);
 	int i;
+
 	for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
-		clflush(adr+i);
+		clflush(addr + i);
 }
 
 static void flush_kernel_map(void *arg)
@@ -83,23 +87,27 @@ static void flush_kernel_map(void *arg)
 
 	/* High level code is not ready for clflush yet */
 	if (0 && cpu_has_clflush) {
-		list_for_each_entry (p, lh, lru)
+		list_for_each_entry(p, lh, lru)
 			cache_flush_page(p);
-	} else if (boot_cpu_data.x86_model >= 4)
-		wbinvd();
+	} else {
+		if (boot_cpu_data.x86_model >= 4)
+			wbinvd();
+	}
 
-	/* Flush all to work around Errata in early athlons regarding 
-	 * large page flushing. 
+	/*
+	 * Flush all to work around Errata in early athlons regarding
+	 * large page flushing.
 	 */
-	__flush_tlb_all(); 	
+	__flush_tlb_all();
 }
 
-static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) 
-{ 
-	struct page *page;
+static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
+{
 	unsigned long flags;
+	struct page *page;
 
-	set_pte_atomic(kpte, pte); 	/* change init_mm */
+	/* change init_mm */
+	set_pte_atomic(kpte, pte);
 	if (SHARED_KERNEL_PMD)
 		return;
 
@@ -108,6 +116,7 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 		pgd_t *pgd;
 		pud_t *pud;
 		pmd_t *pmd;
+
 		pgd = (pgd_t *)page_address(page) + pgd_index(address);
 		pud = pud_offset(pgd, address);
 		pmd = pmd_offset(pud, address);
@@ -116,9 +125,9 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 	spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
-/* 
- * No more special protections in this 2/4MB area - revert to a
- * large page again. 
+/*
+ * No more special protections in this 2/4MB area - revert to a large
+ * page again.
  */
 static inline void revert_page(struct page *kpte_page, unsigned long address)
 {
@@ -142,12 +151,11 @@ static inline void save_page(struct page *kpte_page)
 		list_add(&kpte_page->lru, &df_list);
 }
 
-static int
-__change_page_attr(struct page *page, pgprot_t prot)
-{ 
-	pte_t *kpte; 
-	unsigned long address;
+static int __change_page_attr(struct page *page, pgprot_t prot)
+{
 	struct page *kpte_page;
+	unsigned long address;
+	pte_t *kpte;
 
 	BUG_ON(PageHighMem(page));
 	address = (unsigned long)page_address(page);
@@ -155,16 +163,17 @@ __change_page_attr(struct page *page, pgprot_t prot)
 	kpte = lookup_address(address);
 	if (!kpte)
 		return -EINVAL;
+
 	kpte_page = virt_to_page(kpte);
 	BUG_ON(PageLRU(kpte_page));
 	BUG_ON(PageCompound(kpte_page));
 
-	if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { 
+	if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
 		if (!pte_huge(*kpte)) {
-			set_pte_atomic(kpte, mk_pte(page, prot)); 
+			set_pte_atomic(kpte, mk_pte(page, prot));
 		} else {
-			pgprot_t ref_prot;
 			struct page *split;
+			pgprot_t ref_prot;
 
 			ref_prot =
 			((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
@@ -172,16 +181,19 @@ __change_page_attr(struct page *page, pgprot_t prot)
 			split = split_large_page(address, prot, ref_prot);
 			if (!split)
 				return -ENOMEM;
-			set_pmd_pte(kpte,address,mk_pte(split, ref_prot));
+
+			set_pmd_pte(kpte, address, mk_pte(split, ref_prot));
 			kpte_page = split;
 		}
 		page_private(kpte_page)++;
-	} else if (!pte_huge(*kpte)) {
-		set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
-		BUG_ON(page_private(kpte_page) == 0);
-		page_private(kpte_page)--;
-	} else
-		BUG();
+	} else {
+		if (!pte_huge(*kpte)) {
+			set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
+			BUG_ON(page_private(kpte_page) == 0);
+			page_private(kpte_page)--;
+		} else
+			BUG();
+	}
 
 	/*
 	 * If the pte was reserved, it means it was created at boot
@@ -197,7 +209,7 @@ __change_page_attr(struct page *page, pgprot_t prot)
 		}
 	}
 	return 0;
-} 
+}
 
 static inline void flush_map(struct list_head *l)
 {
@@ -211,32 +223,33 @@ static inline void flush_map(struct list_head *l)
  * than write-back somewhere - some CPUs do not like it when mappings with
  * different caching policies exist. This changes the page attributes of the
  * in kernel linear mapping too.
- * 
+ *
  * The caller needs to ensure that there are no conflicting mappings elsewhere.
  * This function only deals with the kernel linear map.
- * 
+ *
  * Caller must call global_flush_tlb() after this.
  */
 int change_page_attr(struct page *page, int numpages, pgprot_t prot)
 {
-	int err = 0; 
-	int i; 
 	unsigned long flags;
+	int err = 0, i;
 
 	spin_lock_irqsave(&cpa_lock, flags);
-	for (i = 0; i < numpages; i++, page++) { 
+	for (i = 0; i < numpages; i++, page++) {
 		err = __change_page_attr(page, prot);
-		if (err) 
-			break; 
-	} 	
+		if (err)
+			break;
+	}
 	spin_unlock_irqrestore(&cpa_lock, flags);
+
 	return err;
 }
+EXPORT_SYMBOL(change_page_attr);
 
 void global_flush_tlb(void)
 {
-	struct list_head l;
 	struct page *pg, *next;
+	struct list_head l;
 
 	BUG_ON(irqs_disabled());
 
@@ -253,26 +266,28 @@ void global_flush_tlb(void)
 		__free_page(pg);
 	}
 }
+EXPORT_SYMBOL(global_flush_tlb);
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
 void kernel_map_pages(struct page *page, int numpages, int enable)
 {
 	if (PageHighMem(page))
 		return;
-	if (!enable)
+	if (!enable) {
 		debug_check_no_locks_freed(page_address(page),
 					   numpages * PAGE_SIZE);
+	}
 
-	/* the return value is ignored - the calls cannot fail,
+	/*
+	 * the return value is ignored - the calls cannot fail,
 	 * large pages are disabled at boot time.
 	 */
 	change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
-	/* we should perform an IPI and flush all tlbs,
+
+	/*
+	 * we should perform an IPI and flush all tlbs,
 	 * but that can deadlock->flush only current cpu.
 	 */
 	__flush_tlb_all();
 }
 #endif
-
-EXPORT_SYMBOL(change_page_attr);
-EXPORT_SYMBOL(global_flush_tlb);

From b4416a1be86b0c7bdde4e6ba526715c1a055746f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:41 +0100
Subject: [PATCH 747/890] x86: clean up arch/x86/mm/pageattr_64.c

clean up arch/x86/mm/pageattr_64.c.

no code changed:

   text    data     bss     dec     hex filename
   1751      16       0    1767     6e7 pageattr_64.o.before
   1751      16       0    1767     6e7 pageattr_64.o.after

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_64.c | 145 +++++++++++++++++++++-----------------
 1 file changed, 82 insertions(+), 63 deletions(-)

diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index c40afbaaf93d..14ab327cde0c 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -1,48 +1,54 @@
-/* 
- * Copyright 2002 Andi Kleen, SuSE Labs. 
+/*
+ * Copyright 2002 Andi Kleen, SuSE Labs.
  * Thanks to Ben LaHaise for precious feedback.
- */ 
+ */
 
-#include <linux/mm.h>
-#include <linux/sched.h>
 #include <linux/highmem.h>
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/mm.h>
+
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
+#include <asm/uaccess.h>
 #include <asm/io.h>
 
 pte_t *lookup_address(unsigned long address)
-{ 
+{
 	pgd_t *pgd = pgd_offset_k(address);
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
+
 	if (pgd_none(*pgd))
 		return NULL;
 	pud = pud_offset(pgd, address);
 	if (!pud_present(*pud))
-		return NULL; 
+		return NULL;
 	pmd = pmd_offset(pud, address);
 	if (!pmd_present(*pmd))
-		return NULL; 
+		return NULL;
 	if (pmd_large(*pmd))
 		return (pte_t *)pmd;
+
 	pte = pte_offset_kernel(pmd, address);
 	if (pte && !pte_present(*pte))
-		pte = NULL; 
-	return pte;
-} 
+		pte = NULL;
 
-static struct page *split_large_page(unsigned long address, pgprot_t prot,
-				     pgprot_t ref_prot)
-{ 
-	int i; 
+	return pte;
+}
+
+static struct page *
+split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
+{
 	unsigned long addr;
-	struct page *base = alloc_pages(GFP_KERNEL, 0);
+	struct page *base;
 	pte_t *pbase;
-	if (!base) 
+	int i;
+
+	base = alloc_pages(GFP_KERNEL, 0);
+	if (!base)
 		return NULL;
 	/*
 	 * page_private is used to track the number of entries in
@@ -52,20 +58,21 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
 	page_private(base) = 0;
 
 	address = __pa(address);
-	addr = address & LARGE_PAGE_MASK; 
+	addr = address & LARGE_PAGE_MASK;
 	pbase = (pte_t *)page_address(base);
 	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
-		pbase[i] = pfn_pte(addr >> PAGE_SHIFT, 
+		pbase[i] = pfn_pte(addr >> PAGE_SHIFT,
 				   addr == address ? prot : ref_prot);
 	}
 	return base;
-} 
+}
 
-void clflush_cache_range(void *adr, int size)
+void clflush_cache_range(void *addr, int size)
 {
 	int i;
+
 	for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
-		clflush(adr+i);
+		clflush(addr+i);
 }
 
 static void flush_kernel_map(void *arg)
@@ -76,17 +83,20 @@ static void flush_kernel_map(void *arg)
 	/* When clflush is available always use it because it is
 	   much cheaper than WBINVD. */
 	/* clflush is still broken. Disable for now. */
-	if (1 || !cpu_has_clflush)
+	if (1 || !cpu_has_clflush) {
 		asm volatile("wbinvd" ::: "memory");
-	else list_for_each_entry(pg, l, lru) {
-		void *adr = page_address(pg);
-		clflush_cache_range(adr, PAGE_SIZE);
+	} else {
+		list_for_each_entry(pg, l, lru) {
+			void *addr = page_address(pg);
+
+			clflush_cache_range(addr, PAGE_SIZE);
+		}
 	}
 	__flush_tlb_all();
 }
 
 static inline void flush_map(struct list_head *l)
-{	
+{
 	on_each_cpu(flush_kernel_map, l, 1, 1);
 }
 
@@ -98,52 +108,56 @@ static inline void save_page(struct page *fpage)
 		list_add(&fpage->lru, &deferred_pages);
 }
 
-/* 
+/*
  * No more special protections in this 2/4MB area - revert to a
- * large page again. 
+ * large page again.
  */
 static void revert_page(unsigned long address, pgprot_t ref_prot)
 {
+	unsigned long pfn;
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t large_pte;
-	unsigned long pfn;
 
 	pgd = pgd_offset_k(address);
 	BUG_ON(pgd_none(*pgd));
-	pud = pud_offset(pgd,address);
+	pud = pud_offset(pgd, address);
 	BUG_ON(pud_none(*pud));
 	pmd = pmd_offset(pud, address);
 	BUG_ON(pmd_val(*pmd) & _PAGE_PSE);
 	pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT;
 	large_pte = pfn_pte(pfn, ref_prot);
 	large_pte = pte_mkhuge(large_pte);
+
 	set_pte((pte_t *)pmd, large_pte);
-}      
+}
 
 static int
 __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
-				   pgprot_t ref_prot)
-{ 
-	pte_t *kpte; 
+		   pgprot_t ref_prot)
+{
 	struct page *kpte_page;
 	pgprot_t ref_prot2;
+	pte_t *kpte;
 
 	kpte = lookup_address(address);
-	if (!kpte) return 0;
+	if (!kpte)
+		return 0;
+
 	kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
 	BUG_ON(PageLRU(kpte_page));
 	BUG_ON(PageCompound(kpte_page));
-	if (pgprot_val(prot) != pgprot_val(ref_prot)) { 
+	if (pgprot_val(prot) != pgprot_val(ref_prot)) {
 		if (!pte_huge(*kpte)) {
 			set_pte(kpte, pfn_pte(pfn, prot));
 		} else {
- 			/*
+			/*
 			 * split_large_page will take the reference for this
 			 * change_page_attr on the split page.
- 			 */
+			 */
 			struct page *split;
+
 			ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
 			split = split_large_page(address, prot, ref_prot2);
 			if (!split)
@@ -153,21 +167,23 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
 			kpte_page = split;
 		}
 		page_private(kpte_page)++;
-	} else if (!pte_huge(*kpte)) {
-		set_pte(kpte, pfn_pte(pfn, ref_prot));
-		BUG_ON(page_private(kpte_page) == 0);
-		page_private(kpte_page)--;
-	} else
-		BUG();
+	} else {
+		if (!pte_huge(*kpte)) {
+			set_pte(kpte, pfn_pte(pfn, ref_prot));
+			BUG_ON(page_private(kpte_page) == 0);
+			page_private(kpte_page)--;
+		} else
+			BUG();
+	}
 
 	/* on x86-64 the direct mapping set at boot is not using 4k pages */
- 	BUG_ON(PageReserved(kpte_page));
+	BUG_ON(PageReserved(kpte_page));
 
 	save_page(kpte_page);
 	if (page_private(kpte_page) == 0)
 		revert_page(address, ref_prot);
 	return 0;
-} 
+}
 
 /*
  * Change the page attributes of an page in the linear mapping.
@@ -176,19 +192,19 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
  * than write-back somewhere - some CPUs do not like it when mappings with
  * different caching policies exist. This changes the page attributes of the
  * in kernel linear mapping too.
- * 
+ *
  * The caller needs to ensure that there are no conflicting mappings elsewhere.
  * This function only deals with the kernel linear map.
- * 
+ *
  * Caller must call global_flush_tlb() after this.
  */
 int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
 {
-	int err = 0, kernel_map = 0;
-	int i; 
+	int err = 0, kernel_map = 0, i;
+
+	if (address >= __START_KERNEL_map &&
+			address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
 
-	if (address >= __START_KERNEL_map
-	    && address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
 		address = (unsigned long)__va(__pa(address));
 		kernel_map = 1;
 	}
@@ -198,7 +214,8 @@ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
 		unsigned long pfn = __pa(address) >> PAGE_SHIFT;
 
 		if (!kernel_map || pte_present(pfn_pte(0, prot))) {
-			err = __change_page_attr(address, pfn, prot, PAGE_KERNEL);
+			err = __change_page_attr(address, pfn, prot,
+						PAGE_KERNEL);
 			if (err)
 				break;
 		}
@@ -207,14 +224,16 @@ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
 		if (__pa(address) < KERNEL_TEXT_SIZE) {
 			unsigned long addr2;
 			pgprot_t prot2;
+
 			addr2 = __START_KERNEL_map + __pa(address);
 			/* Make sure the kernel mappings stay executable */
 			prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
 			err = __change_page_attr(addr2, pfn, prot2,
 						 PAGE_KERNEL_EXEC);
-		} 
-	} 	
-	up_write(&init_mm.mmap_sem); 
+		}
+	}
+	up_write(&init_mm.mmap_sem);
+
 	return err;
 }
 
@@ -222,11 +241,13 @@ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
 int change_page_attr(struct page *page, int numpages, pgprot_t prot)
 {
 	unsigned long addr = (unsigned long)page_address(page);
+
 	return change_page_attr_addr(addr, numpages, prot);
 }
+EXPORT_SYMBOL(change_page_attr);
 
 void global_flush_tlb(void)
-{ 
+{
 	struct page *pg, *next;
 	struct list_head l;
 
@@ -248,8 +269,6 @@ void global_flush_tlb(void)
 			continue;
 		ClearPagePrivate(pg);
 		__free_page(pg);
-	} 
-} 
-
-EXPORT_SYMBOL(change_page_attr);
+	}
+}
 EXPORT_SYMBOL(global_flush_tlb);

From 0c42f392767d3592e1cf676857d398ef69be7c9c Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:42 +0100
Subject: [PATCH 748/890] c_p_a(): do a simple self test at boot

When CONFIG_DEBUG_RODATA is enabled undo the ro mapping and redo it again.
This gives some simple testing for change_page_attr().

Signed-off-by: Andi Kleen <ak@suse.de>
Acked-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig.debug |  5 +++++
 arch/x86/mm/init_32.c  | 26 ++++++++++++++++++++++++++
 arch/x86/mm/init_64.c  | 10 ++++++++++
 3 files changed, 41 insertions(+)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 9bb61e1aed69..38211ff0447f 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -204,4 +204,9 @@ config DEBUG_BOOT_PARAMS
 	help
 	  This option will cause struct boot_params to be exported via debugfs.
 
+config CPA_DEBUG
+	bool "CPA self test code"
+	help
+	  Do change_page_attr self tests at boot.
+
 endmenu
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 459b384acda9..724a5eff6f34 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -799,6 +799,20 @@ void mark_rodata_ro(void)
 		change_page_attr(virt_to_page(start),
 		                 size >> PAGE_SHIFT, PAGE_KERNEL_RX);
 		printk("Write protecting the kernel text: %luk\n", size >> 10);
+
+#ifdef CONFIG_CPA_DEBUG
+		global_flush_tlb();
+
+		printk("Testing CPA: Reverting %lx-%lx\n", start, start+size);
+		change_page_attr(virt_to_page(start), size>>PAGE_SHIFT,
+				 PAGE_KERNEL_EXEC);
+		global_flush_tlb();
+
+		printk("Testing CPA: write protecting again\n");
+		change_page_attr(virt_to_page(start), size>>PAGE_SHIFT,
+				PAGE_KERNEL_RX);
+		global_flush_tlb();
+#endif
 	}
 #endif
 	start += size;
@@ -815,6 +829,18 @@ void mark_rodata_ro(void)
 	 * of who is the culprit.
 	 */
 	global_flush_tlb();
+
+#ifdef CONFIG_CPA_DEBUG
+	printk("Testing CPA: undo %lx-%lx\n", start, start + size);
+	change_page_attr(virt_to_page(start), size >> PAGE_SHIFT,
+				PAGE_KERNEL);
+	global_flush_tlb();
+
+	printk("Testing CPA: write protecting again\n");
+	change_page_attr(virt_to_page(start), size >> PAGE_SHIFT,
+				PAGE_KERNEL_RO);
+	global_flush_tlb();
+#endif
 }
 #endif
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 8198840c3dcb..0fd9d7f77786 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -606,6 +606,16 @@ void mark_rodata_ro(void)
 	 * of who is the culprit.
 	 */
 	global_flush_tlb();
+
+#ifdef CONFIG_CPA_DEBUG
+	printk("Testing CPA: undo %lx-%lx\n", start, end);
+	change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL);
+	global_flush_tlb();
+
+	printk("Testing CPA: again\n");
+	change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
+	global_flush_tlb();
+#endif
 }
 #endif
 

From e66a95127dc4273d3573eb03657991f7bbd5dca3 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:33:42 +0100
Subject: [PATCH 749/890] x86: add dump_pagetable helper to X86_32

Similar to x86 64-bit.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_32.c | 72 +++++++++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 33 deletions(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 93ede2dde958..41c31968a74c 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -173,6 +173,44 @@ static void force_sig_info_fault(int si_signo, int si_code,
 	force_sig_info(si_signo, &info, tsk);
 }
 
+void dump_pagetable(unsigned long address)
+{
+	__typeof__(pte_val(__pte(0))) page;
+
+	page = read_cr3();
+	page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
+#ifdef CONFIG_X86_PAE
+	printk("*pdpt = %016Lx ", page);
+	if ((page >> PAGE_SHIFT) < max_low_pfn
+	    && page & _PAGE_PRESENT) {
+		page &= PAGE_MASK;
+		page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
+		                                         & (PTRS_PER_PMD - 1)];
+		printk(KERN_CONT "*pde = %016Lx ", page);
+		page &= ~_PAGE_NX;
+	}
+#else
+	printk("*pde = %08lx ", page);
+#endif
+
+	/*
+	 * We must not directly access the pte in the highpte
+	 * case if the page table is located in highmem.
+	 * And let's rather not kmap-atomic the pte, just in case
+	 * it's allocated already.
+	 */
+	if ((page >> PAGE_SHIFT) < max_low_pfn
+	    && (page & _PAGE_PRESENT)
+	    && !(page & _PAGE_PSE)) {
+		page &= PAGE_MASK;
+		page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
+		                                         & (PTRS_PER_PTE - 1)];
+		printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
+	}
+
+	printk("\n");
+}
+
 void do_invalid_op(struct pt_regs *, unsigned long);
 
 static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
@@ -572,7 +610,6 @@ no_context:
 	bust_spinlocks(1);
 
 	if (oops_may_print()) {
-		__typeof__(pte_val(__pte(0))) page;
 
 #ifdef CONFIG_X86_PAE
 		if (error_code & PF_INSTR) {
@@ -593,38 +630,7 @@ no_context:
 		printk(" at virtual address %08lx\n", address);
 		printk(KERN_ALERT "printing ip: %08lx ", regs->ip);
 
-		page = read_cr3();
-		page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
-#ifdef CONFIG_X86_PAE
-		printk("*pdpt = %016Lx ", page);
-		if ((page >> PAGE_SHIFT) < max_low_pfn
-		    && page & _PAGE_PRESENT) {
-			page &= PAGE_MASK;
-			page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
-			                                         & (PTRS_PER_PMD - 1)];
-			printk(KERN_CONT "*pde = %016Lx ", page);
-			page &= ~_PAGE_NX;
-		}
-#else
-		printk("*pde = %08lx ", page);
-#endif
-
-		/*
-		 * We must not directly access the pte in the highpte
-		 * case if the page table is located in highmem.
-		 * And let's rather not kmap-atomic the pte, just in case
-		 * it's allocated already.
-		 */
-		if ((page >> PAGE_SHIFT) < max_low_pfn
-		    && (page & _PAGE_PRESENT)
-		    && !(page & _PAGE_PSE)) {
-			page &= PAGE_MASK;
-			page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
-			                                         & (PTRS_PER_PTE - 1)];
-			printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
-		}
-
-		printk("\n");
+		dump_pagetable(address);
 	}
 
 	tsk->thread.cr2 = address;

From 4c3c4b4513a361cc6ac5ee8677695260c4f0f25f Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:42 +0100
Subject: [PATCH 750/890] x86: clean up pte_exec

- Rename it to pte_exec() from pte_exec_kernel(). There is nothing
kernel specific in there.
- Move it into the common file because _PAGE_NX is 0 on !PAE and then
pte_exec() will be always evaluate to true.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_32.c           | 2 +-
 arch/x86/mm/init_32.c            | 2 +-
 include/asm-x86/pgtable-2level.h | 8 --------
 include/asm-x86/pgtable-3level.h | 8 --------
 include/asm-x86/pgtable.h        | 1 +
 5 files changed, 3 insertions(+), 18 deletions(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 41c31968a74c..f7972ae7da07 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -615,7 +615,7 @@ no_context:
 		if (error_code & PF_INSTR) {
 			pte_t *pte = lookup_address(address);
 
-			if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
+			if (pte && pte_present(*pte) && !pte_exec(*pte))
 				printk(KERN_CRIT "kernel tried to execute "
 					"NX-protected page - exploit attempt? "
 					"(uid: %d)\n", current->uid);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 724a5eff6f34..5080646da771 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -542,7 +542,7 @@ int __init set_kernel_exec(unsigned long vaddr, int enable)
 	pte = lookup_address(vaddr);
 	BUG_ON(!pte);
 
-	if (!pte_exec_kernel(*pte))
+	if (!pte_exec(*pte))
 		ret = 0;
 
 	if (enable)
diff --git a/include/asm-x86/pgtable-2level.h b/include/asm-x86/pgtable-2level.h
index ce486bdbbbb7..701404fab308 100644
--- a/include/asm-x86/pgtable-2level.h
+++ b/include/asm-x86/pgtable-2level.h
@@ -55,14 +55,6 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp)
 #define pte_none(x)		(!(x).pte_low)
 #define pte_pfn(x)		(pte_val(x) >> PAGE_SHIFT)
 
-/*
- * All present pages are kernel-executable:
- */
-static inline int pte_exec_kernel(pte_t pte)
-{
-	return 1;
-}
-
 /*
  * Bits 0, 6 and 7 are taken, split up the 29 bits of offset
  * into this range:
diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h
index 62bb06575d5a..62a1ffbc8784 100644
--- a/include/asm-x86/pgtable-3level.h
+++ b/include/asm-x86/pgtable-3level.h
@@ -19,14 +19,6 @@
 #define pud_bad(pud)				0
 #define pud_present(pud)			1
 
-/*
- * All present pages with !NX bit are kernel-executable:
- */
-static inline int pte_exec_kernel(pte_t pte)
-{
-	return !(pte_val(pte) & _PAGE_NX);
-}
-
 /* Rules for using set_pte: the pte being assigned *must* be
  * either not present or in a state where the hardware will
  * not attempt to update the pte.  In places where this is
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 75a656e6b3f8..2f3d90807efb 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -139,6 +139,7 @@ static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED;
 static inline int pte_write(pte_t pte)		{ return pte_val(pte) & _PAGE_RW; }
 static inline int pte_file(pte_t pte)		{ return pte_val(pte) & _PAGE_FILE; }
 static inline int pte_huge(pte_t pte)		{ return pte_val(pte) & _PAGE_PSE; }
+static inline int pte_exec(pte_t pte)		{ return !(pte_val(pte) & _PAGE_NX); }
 
 static inline int pmd_large(pmd_t pte) {
 	return (pmd_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==

From a5a5dc31794c3271c066835ad2c90c58a3805569 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:42 +0100
Subject: [PATCH 751/890] x86: add pte accessors for the global bit

Needed for some test code.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 2f3d90807efb..18a5f9e32834 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -139,6 +139,7 @@ static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED;
 static inline int pte_write(pte_t pte)		{ return pte_val(pte) & _PAGE_RW; }
 static inline int pte_file(pte_t pte)		{ return pte_val(pte) & _PAGE_FILE; }
 static inline int pte_huge(pte_t pte)		{ return pte_val(pte) & _PAGE_PSE; }
+static inline int pte_global(pte_t pte) 	{ return pte_val(pte) & _PAGE_GLOBAL; }
 static inline int pte_exec(pte_t pte)		{ return !(pte_val(pte) & _PAGE_NX); }
 
 static inline int pmd_large(pmd_t pte) {
@@ -155,6 +156,8 @@ static inline pte_t pte_mkyoung(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_A
 static inline pte_t pte_mkwrite(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_RW); }
 static inline pte_t pte_mkhuge(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_PSE); }
 static inline pte_t pte_clrhuge(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_PSE); }
+static inline pte_t pte_mkglobal(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_GLOBAL); }
+static inline pte_t pte_clrglobal(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_GLOBAL); }
 
 extern pteval_t __supported_pte_mask;
 

From f0646e43acb18f0e00b00085dc88bc3f403e7930 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:43 +0100
Subject: [PATCH 752/890] x86: return the page table level in lookup_address()

based on this patch from Andi Kleen:

|  Subject: CPA: Return the page table level in lookup_address()
|  From: Andi Kleen <ak@suse.de>
|
|  Needed for the next change.
|
|  And change all the callers.

and ported it to x86.git.

Signed-off-by: Andi Kleen <ak@suse.de>
Acked-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_32.c       | 3 ++-
 arch/x86/mm/init_32.c        | 3 ++-
 arch/x86/mm/pageattr_32.c    | 7 +++++--
 arch/x86/mm/pageattr_64.c    | 7 +++++--
 arch/x86/xen/mmu.c           | 9 ++++++---
 include/asm-x86/pgtable_32.h | 2 +-
 include/asm-x86/pgtable_64.h | 2 +-
 7 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index f7972ae7da07..f4f8c324715f 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -613,7 +613,8 @@ no_context:
 
 #ifdef CONFIG_X86_PAE
 		if (error_code & PF_INSTR) {
-			pte_t *pte = lookup_address(address);
+			int level;
+			pte_t *pte = lookup_address(address, &level);
 
 			if (pte && pte_present(*pte) && !pte_exec(*pte))
 				printk(KERN_CRIT "kernel tried to execute "
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 5080646da771..206e3f6800b9 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -535,11 +535,12 @@ int __init set_kernel_exec(unsigned long vaddr, int enable)
 {
 	pte_t *pte;
 	int ret = 1;
+	int level;
 
 	if (!nx_enabled)
 		goto out;
 
-	pte = lookup_address(vaddr);
+	pte = lookup_address(vaddr, &level);
 	BUG_ON(!pte);
 
 	if (!pte_exec(*pte))
diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index be4656403d77..523fd5b37df9 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -18,7 +18,7 @@
 static DEFINE_SPINLOCK(cpa_lock);
 static struct list_head df_list = LIST_HEAD_INIT(df_list);
 
-pte_t *lookup_address(unsigned long address)
+pte_t *lookup_address(unsigned long address, int *level)
 {
 	pgd_t *pgd = pgd_offset_k(address);
 	pud_t *pud;
@@ -32,8 +32,10 @@ pte_t *lookup_address(unsigned long address)
 	pmd = pmd_offset(pud, address);
 	if (pmd_none(*pmd))
 		return NULL;
+	*level = 2;
 	if (pmd_large(*pmd))
 		return (pte_t *)pmd;
+	*level = 3;
 
 	return pte_offset_kernel(pmd, address);
 }
@@ -156,11 +158,12 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
 	struct page *kpte_page;
 	unsigned long address;
 	pte_t *kpte;
+	int level;
 
 	BUG_ON(PageHighMem(page));
 	address = (unsigned long)page_address(page);
 
-	kpte = lookup_address(address);
+	kpte = lookup_address(address, &level);
 	if (!kpte)
 		return -EINVAL;
 
diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index 14ab327cde0c..59cd066f6741 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -14,7 +14,7 @@
 #include <asm/uaccess.h>
 #include <asm/io.h>
 
-pte_t *lookup_address(unsigned long address)
+pte_t *lookup_address(unsigned long address, int *level)
 {
 	pgd_t *pgd = pgd_offset_k(address);
 	pud_t *pud;
@@ -29,8 +29,10 @@ pte_t *lookup_address(unsigned long address)
 	pmd = pmd_offset(pud, address);
 	if (!pmd_present(*pmd))
 		return NULL;
+	*level = 3;
 	if (pmd_large(*pmd))
 		return (pte_t *)pmd;
+	*level = 4;
 
 	pte = pte_offset_kernel(pmd, address);
 	if (pte && !pte_present(*pte))
@@ -140,8 +142,9 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
 	struct page *kpte_page;
 	pgprot_t ref_prot2;
 	pte_t *kpte;
+	int level;
 
-	kpte = lookup_address(address);
+	kpte = lookup_address(address, &level);
 	if (!kpte)
 		return 0;
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index e6184735545f..45aa771e73a9 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -58,7 +58,8 @@
 
 xmaddr_t arbitrary_virt_to_machine(unsigned long address)
 {
-	pte_t *pte = lookup_address(address);
+	int level;
+	pte_t *pte = lookup_address(address, &level);
 	unsigned offset = address & PAGE_MASK;
 
 	BUG_ON(pte == NULL);
@@ -70,8 +71,9 @@ void make_lowmem_page_readonly(void *vaddr)
 {
 	pte_t *pte, ptev;
 	unsigned long address = (unsigned long)vaddr;
+	int level;
 
-	pte = lookup_address(address);
+	pte = lookup_address(address, &level);
 	BUG_ON(pte == NULL);
 
 	ptev = pte_wrprotect(*pte);
@@ -84,8 +86,9 @@ void make_lowmem_page_readwrite(void *vaddr)
 {
 	pte_t *pte, ptev;
 	unsigned long address = (unsigned long)vaddr;
+	int level;
 
-	pte = lookup_address(address);
+	pte = lookup_address(address, &level);
 	BUG_ON(pte == NULL);
 
 	ptev = pte_mkwrite(*pte);
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index eb8cccfa6a49..9381bd37b9b1 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -182,7 +182,7 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
  * NOTE: the return type is pte_t but if the pmd is PSE then we return it
  * as a pte too.
  */
-extern pte_t *lookup_address(unsigned long address);
+extern pte_t *lookup_address(unsigned long address, int *level);
 
 /*
  * Make a given kernel text page executable/non-executable.
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 29fdeb8111bb..139da50cd510 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -240,7 +240,7 @@ extern struct list_head pgd_list;
 
 extern int kern_addr_valid(unsigned long addr); 
 
-pte_t *lookup_address(unsigned long addr);
+pte_t *lookup_address(unsigned long addr, int *level);
 
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)		\
 		remap_pfn_range(vma, vaddr, pfn, size, prot)

From fa2d8369a1e0e5dd0fdaaba9d40ca5187f4cd20b Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:43 +0100
Subject: [PATCH 753/890] x86: c_p_a(), add simple self test at boot

Since change_page_attr() is tricky code it is good to have some regression
test code. This patch maps and unmaps some random pages in the direct mapping
at boot and then dumps the state and does some simple sanity checks.

Add it with a CONFIG option.

Signed-off-by: Andi Kleen <ak@suse.de>
Acked-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/Makefile_32     |   1 +
 arch/x86/mm/Makefile_64     |   1 +
 arch/x86/mm/pageattr-test.c | 233 ++++++++++++++++++++++++++++++++++++
 3 files changed, 235 insertions(+)
 create mode 100644 arch/x86/mm/pageattr-test.c

diff --git a/arch/x86/mm/Makefile_32 b/arch/x86/mm/Makefile_32
index 6a5e981981f7..be2f55160bf8 100644
--- a/arch/x86/mm/Makefile_32
+++ b/arch/x86/mm/Makefile_32
@@ -4,6 +4,7 @@
 
 obj-y	:= init_32.o pgtable_32.o fault_32.o ioremap_32.o extable.o pageattr_32.o mmap.o
 
+obj-$(CONFIG_CPA_DEBUG) += pageattr-test.o
 obj-$(CONFIG_NUMA) += discontig_32.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_HIGHMEM) += highmem_32.o
diff --git a/arch/x86/mm/Makefile_64 b/arch/x86/mm/Makefile_64
index 09c997fe5856..cb723167631b 100644
--- a/arch/x86/mm/Makefile_64
+++ b/arch/x86/mm/Makefile_64
@@ -7,3 +7,4 @@ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_NUMA) += numa_64.o
 obj-$(CONFIG_K8_NUMA) += k8topology_64.o
 obj-$(CONFIG_ACPI_NUMA) += srat_64.o
+obj-$(CONFIG_CPA_DEBUG) += pageattr-test.o
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
new file mode 100644
index 000000000000..4b22eb47f23a
--- /dev/null
+++ b/arch/x86/mm/pageattr-test.c
@@ -0,0 +1,233 @@
+/*
+ * self test for change_page_attr.
+ *
+ * Clears the global bit on random pages in the direct mapping, then reverts
+ * and compares page tables forwards and afterwards.
+ */
+
+#include <linux/mm.h>
+#include <linux/random.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <asm/cacheflush.h>
+#include <asm/pgtable.h>
+#include <asm/kdebug.h>
+
+enum {
+	NTEST = 400,
+#ifdef CONFIG_X86_64
+	LOWEST_LEVEL = 4,
+	LPS = (1 << PMD_SHIFT),
+#elif defined(CONFIG_X86_PAE)
+	LOWEST_LEVEL = 3,
+	LPS = (1 << PMD_SHIFT),
+#else
+	LOWEST_LEVEL = 3, /* lookup_address lies here */
+	LPS = (1 << 22),
+#endif
+	GPS = (1<<30)
+};
+
+#ifdef CONFIG_X86_64
+#include <asm/proto.h>
+#define max_mapped end_pfn_map
+#else
+#define max_mapped max_low_pfn
+#endif
+
+struct split_state {
+	long lpg, gpg, spg, exec;
+	long min_exec, max_exec;
+};
+
+static __init int print_split(struct split_state *s)
+{
+	int printed = 0;
+	long i, expected, missed = 0;
+	int err = 0;
+
+	s->lpg = s->gpg = s->spg = s->exec = 0;
+	s->min_exec = ~0UL;
+	s->max_exec = 0;
+	for (i = 0; i < max_mapped; ) {
+		int level;
+		pte_t *pte;
+		unsigned long adr = (unsigned long)__va(i << PAGE_SHIFT);
+
+		pte = lookup_address(adr, &level);
+		if (!pte) {
+			if (!printed) {
+				dump_pagetable(adr);
+				printk("CPA %lx no pte level %d\n", adr, level);
+				printed = 1;
+			}
+			missed++;
+			i++;
+			continue;
+		}
+
+		if (level == 2 && sizeof(long) == 8) {
+			s->gpg++;
+			i += GPS/PAGE_SIZE;
+		} else if (level != LOWEST_LEVEL) {
+			if (!(pte_val(*pte) & _PAGE_PSE)) {
+				printk("%lx level %d but not PSE %Lx\n",
+					adr, level, (u64)pte_val(*pte));
+				err = 1;
+			}
+			s->lpg++;
+			i += LPS/PAGE_SIZE;
+		} else {
+			s->spg++;
+			i++;
+		}
+		if (!(pte_val(*pte) & _PAGE_NX)) {
+			s->exec++;
+			if (adr < s->min_exec)
+				s->min_exec = adr;
+			if (adr > s->max_exec)
+				s->max_exec = adr;
+		}
+	}
+	printk("CPA mapping 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n",
+		s->spg, s->lpg, s->gpg, s->exec,
+		s->min_exec != ~0UL ? s->min_exec : 0, s->max_exec, missed);
+	expected = (s->gpg*GPS + s->lpg*LPS)/PAGE_SIZE + s->spg + missed;
+	if (expected != i) {
+		printk("CPA max_mapped %lu but expected %lu\n",
+			max_mapped, expected);
+		return 1;
+	}
+	return err;
+}
+
+static __init int state_same(struct split_state *a, struct split_state *b)
+{
+	return a->lpg == b->lpg && a->gpg == b->gpg && a->spg == b->spg &&
+			a->exec == b->exec;
+}
+
+static unsigned long addr[NTEST] __initdata;
+static unsigned len[NTEST] __initdata;
+
+/* Change the global bit on random pages in the direct mapping */
+static __init int exercise_pageattr(void)
+{
+	int i, k;
+	pte_t *pte, pte0;
+	int level;
+	int err;
+	struct split_state sa, sb, sc;
+	int failed = 0;
+	unsigned long *bm;
+
+	printk("CPA exercising pageattr\n");
+
+	bm = vmalloc((max_mapped + 7) / 8);
+	if (!bm) {
+		printk("CPA Cannot vmalloc bitmap\n");
+		return -ENOMEM;
+	}
+	memset(bm, 0, (max_mapped + 7) / 8);
+
+	failed += print_split(&sa);
+	srandom32(100);
+	for (i = 0; i < NTEST; i++) {
+		unsigned long pfn = random32() % max_mapped;
+		addr[i] = (unsigned long)__va(pfn << PAGE_SHIFT);
+		len[i] = random32() % 100;
+		len[i] = min_t(unsigned long, len[i], max_mapped - pfn - 1);
+		if (len[i] == 0)
+			len[i] = 1;
+
+		pte = NULL;
+		pte0 = pfn_pte(0, __pgprot(0)); /* shut gcc up */
+		for (k = 0; k < len[i]; k++) {
+			pte = lookup_address(addr[i] + k*PAGE_SIZE, &level);
+			if (!pte || pgprot_val(pte_pgprot(*pte)) == 0) {
+				addr[i] = 0;
+				break;
+			}
+			if (k == 0)
+				pte0 = *pte;
+			else if (pgprot_val(pte_pgprot(*pte)) !=
+					pgprot_val(pte_pgprot(pte0))) {
+				len[i] = k;
+				break;
+			}
+			if (test_bit(pfn + k, bm)) {
+				len[i] = k;
+				break;
+			}
+			__set_bit(pfn + k, bm);
+		}
+		if (!addr[i] || !pte || !k) {
+			addr[i] = 0;
+			continue;
+		}
+
+		err = change_page_attr(virt_to_page(addr[i]), len[i],
+			    pte_pgprot(pte_clrhuge(pte_clrglobal(pte0))));
+		if (err < 0) {
+			printk("CPA %d failed %d\n", i, err);
+			failed++;
+		}
+
+		pte = lookup_address(addr[i], &level);
+		if (!pte || pte_global(*pte) || pte_huge(*pte)) {
+			printk("CPA %lx: bad pte %Lx\n", addr[i],
+				pte ? (u64)pte_val(*pte) : 0ULL);
+			failed++;
+		}
+		if (level != LOWEST_LEVEL) {
+			printk("CPA %lx: unexpected level %d\n", addr[i],
+					level);
+			failed++;
+		}
+
+	}
+	vfree(bm);
+	global_flush_tlb();
+
+	failed += print_split(&sb);
+
+	printk("CPA reverting everything\n");
+	for (i = 0; i < NTEST; i++) {
+		if (!addr[i])
+			continue;
+		pte = lookup_address(addr[i], &level);
+		if (!pte) {
+			printk("CPA lookup of %lx failed\n", addr[i]);
+			failed++;
+			continue;
+		}
+		err = change_page_attr(virt_to_page(addr[i]), len[i],
+					  pte_pgprot(pte_mkglobal(*pte)));
+		if (err < 0) {
+			printk("CPA reverting failed: %d\n", err);
+			failed++;
+		}
+		pte = lookup_address(addr[i], &level);
+		if (!pte || !pte_global(*pte)) {
+			printk("CPA %lx: bad pte after revert %Lx\n", addr[i],
+			       pte ? (u64)pte_val(*pte) : 0ULL);
+			failed++;
+		}
+
+	}
+	global_flush_tlb();
+
+	failed += print_split(&sc);
+	if (!state_same(&sa, &sc))
+		failed++;
+
+	if (failed)
+		printk("CPA selftests NOT PASSED. Please report.\n");
+	else
+		printk("CPA selftests PASSED\n");
+
+	return 0;
+}
+
+module_init(exercise_pageattr);

From 851339b1ffc2b215c7c4e448465e78a4c2f643f2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:43 +0100
Subject: [PATCH 754/890] x86: clean up arch/x86/mm/pageattr-test.c

fix 15 checkpatch warnings.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr-test.c | 115 +++++++++++++++++++-----------------
 1 file changed, 62 insertions(+), 53 deletions(-)

diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 4b22eb47f23a..c58fab061762 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -4,36 +4,36 @@
  * Clears the global bit on random pages in the direct mapping, then reverts
  * and compares page tables forwards and afterwards.
  */
-
-#include <linux/mm.h>
+#include <linux/bootmem.h>
 #include <linux/random.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/mm.h>
+
 #include <asm/cacheflush.h>
 #include <asm/pgtable.h>
 #include <asm/kdebug.h>
 
 enum {
-	NTEST = 400,
+	NTEST			= 400,
 #ifdef CONFIG_X86_64
-	LOWEST_LEVEL = 4,
-	LPS = (1 << PMD_SHIFT),
+	LOWEST_LEVEL		= 4,
+	LPS			= (1 << PMD_SHIFT),
 #elif defined(CONFIG_X86_PAE)
-	LOWEST_LEVEL = 3,
-	LPS = (1 << PMD_SHIFT),
+	LOWEST_LEVEL		= 3,
+	LPS			= (1 << PMD_SHIFT),
 #else
-	LOWEST_LEVEL = 3, /* lookup_address lies here */
-	LPS = (1 << 22),
+	LOWEST_LEVEL		= 3, /* lookup_address lies here */
+	LPS			= (1 << 22),
 #endif
-	GPS = (1<<30)
+	GPS			= (1<<30)
 };
 
 #ifdef CONFIG_X86_64
-#include <asm/proto.h>
-#define max_mapped end_pfn_map
+# include <asm/proto.h>
+# define max_mapped		end_pfn_map
 #else
-#define max_mapped max_low_pfn
+# define max_mapped		max_low_pfn
 #endif
 
 struct split_state {
@@ -43,23 +43,24 @@ struct split_state {
 
 static __init int print_split(struct split_state *s)
 {
-	int printed = 0;
 	long i, expected, missed = 0;
+	int printed = 0;
 	int err = 0;
 
 	s->lpg = s->gpg = s->spg = s->exec = 0;
 	s->min_exec = ~0UL;
 	s->max_exec = 0;
 	for (i = 0; i < max_mapped; ) {
+		unsigned long addr = (unsigned long)__va(i << PAGE_SHIFT);
 		int level;
 		pte_t *pte;
-		unsigned long adr = (unsigned long)__va(i << PAGE_SHIFT);
 
-		pte = lookup_address(adr, &level);
+		pte = lookup_address(addr, &level);
 		if (!pte) {
 			if (!printed) {
-				dump_pagetable(adr);
-				printk("CPA %lx no pte level %d\n", adr, level);
+				dump_pagetable(addr);
+				printk(KERN_INFO "CPA %lx no pte level %d\n",
+					addr, level);
 				printed = 1;
 			}
 			missed++;
@@ -72,8 +73,9 @@ static __init int print_split(struct split_state *s)
 			i += GPS/PAGE_SIZE;
 		} else if (level != LOWEST_LEVEL) {
 			if (!(pte_val(*pte) & _PAGE_PSE)) {
-				printk("%lx level %d but not PSE %Lx\n",
-					adr, level, (u64)pte_val(*pte));
+				printk(KERN_ERR
+					"%lx level %d but not PSE %Lx\n",
+					addr, level, (u64)pte_val(*pte));
 				err = 1;
 			}
 			s->lpg++;
@@ -84,18 +86,20 @@ static __init int print_split(struct split_state *s)
 		}
 		if (!(pte_val(*pte) & _PAGE_NX)) {
 			s->exec++;
-			if (adr < s->min_exec)
-				s->min_exec = adr;
-			if (adr > s->max_exec)
-				s->max_exec = adr;
+			if (addr < s->min_exec)
+				s->min_exec = addr;
+			if (addr > s->max_exec)
+				s->max_exec = addr;
 		}
 	}
-	printk("CPA mapping 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n",
+	printk(KERN_INFO
+		"CPA mapping 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n",
 		s->spg, s->lpg, s->gpg, s->exec,
 		s->min_exec != ~0UL ? s->min_exec : 0, s->max_exec, missed);
+
 	expected = (s->gpg*GPS + s->lpg*LPS)/PAGE_SIZE + s->spg + missed;
 	if (expected != i) {
-		printk("CPA max_mapped %lu but expected %lu\n",
+		printk(KERN_ERR "CPA max_mapped %lu but expected %lu\n",
 			max_mapped, expected);
 		return 1;
 	}
@@ -105,56 +109,62 @@ static __init int print_split(struct split_state *s)
 static __init int state_same(struct split_state *a, struct split_state *b)
 {
 	return a->lpg == b->lpg && a->gpg == b->gpg && a->spg == b->spg &&
-			a->exec == b->exec;
+		a->exec == b->exec;
 }
 
-static unsigned long addr[NTEST] __initdata;
-static unsigned len[NTEST] __initdata;
+static unsigned long __initdata addr[NTEST];
+static unsigned int __initdata len[NTEST];
 
 /* Change the global bit on random pages in the direct mapping */
 static __init int exercise_pageattr(void)
 {
-	int i, k;
-	pte_t *pte, pte0;
-	int level;
-	int err;
 	struct split_state sa, sb, sc;
-	int failed = 0;
 	unsigned long *bm;
+	pte_t *pte, pte0;
+	int failed = 0;
+	int level;
+	int i, k;
+	int err;
 
-	printk("CPA exercising pageattr\n");
+	printk(KERN_INFO "CPA exercising pageattr\n");
 
 	bm = vmalloc((max_mapped + 7) / 8);
 	if (!bm) {
-		printk("CPA Cannot vmalloc bitmap\n");
+		printk(KERN_ERR "CPA Cannot vmalloc bitmap\n");
 		return -ENOMEM;
 	}
 	memset(bm, 0, (max_mapped + 7) / 8);
 
 	failed += print_split(&sa);
 	srandom32(100);
+
 	for (i = 0; i < NTEST; i++) {
 		unsigned long pfn = random32() % max_mapped;
+
 		addr[i] = (unsigned long)__va(pfn << PAGE_SHIFT);
 		len[i] = random32() % 100;
 		len[i] = min_t(unsigned long, len[i], max_mapped - pfn - 1);
+
 		if (len[i] == 0)
 			len[i] = 1;
 
 		pte = NULL;
 		pte0 = pfn_pte(0, __pgprot(0)); /* shut gcc up */
+
 		for (k = 0; k < len[i]; k++) {
 			pte = lookup_address(addr[i] + k*PAGE_SIZE, &level);
 			if (!pte || pgprot_val(pte_pgprot(*pte)) == 0) {
 				addr[i] = 0;
 				break;
 			}
-			if (k == 0)
+			if (k == 0) {
 				pte0 = *pte;
-			else if (pgprot_val(pte_pgprot(*pte)) !=
+			} else {
+				if (pgprot_val(pte_pgprot(*pte)) !=
 					pgprot_val(pte_pgprot(pte0))) {
-				len[i] = k;
-				break;
+					len[i] = k;
+					break;
+				}
 			}
 			if (test_bit(pfn + k, bm)) {
 				len[i] = k;
@@ -170,19 +180,19 @@ static __init int exercise_pageattr(void)
 		err = change_page_attr(virt_to_page(addr[i]), len[i],
 			    pte_pgprot(pte_clrhuge(pte_clrglobal(pte0))));
 		if (err < 0) {
-			printk("CPA %d failed %d\n", i, err);
+			printk(KERN_ERR "CPA %d failed %d\n", i, err);
 			failed++;
 		}
 
 		pte = lookup_address(addr[i], &level);
 		if (!pte || pte_global(*pte) || pte_huge(*pte)) {
-			printk("CPA %lx: bad pte %Lx\n", addr[i],
+			printk(KERN_ERR "CPA %lx: bad pte %Lx\n", addr[i],
 				pte ? (u64)pte_val(*pte) : 0ULL);
 			failed++;
 		}
 		if (level != LOWEST_LEVEL) {
-			printk("CPA %lx: unexpected level %d\n", addr[i],
-					level);
+			printk(KERN_ERR "CPA %lx: unexpected level %d\n",
+				addr[i], level);
 			failed++;
 		}
 
@@ -192,26 +202,26 @@ static __init int exercise_pageattr(void)
 
 	failed += print_split(&sb);
 
-	printk("CPA reverting everything\n");
+	printk(KERN_INFO "CPA reverting everything\n");
 	for (i = 0; i < NTEST; i++) {
 		if (!addr[i])
 			continue;
 		pte = lookup_address(addr[i], &level);
 		if (!pte) {
-			printk("CPA lookup of %lx failed\n", addr[i]);
+			printk(KERN_ERR "CPA lookup of %lx failed\n", addr[i]);
 			failed++;
 			continue;
 		}
 		err = change_page_attr(virt_to_page(addr[i]), len[i],
 					  pte_pgprot(pte_mkglobal(*pte)));
 		if (err < 0) {
-			printk("CPA reverting failed: %d\n", err);
+			printk(KERN_ERR "CPA reverting failed: %d\n", err);
 			failed++;
 		}
 		pte = lookup_address(addr[i], &level);
 		if (!pte || !pte_global(*pte)) {
-			printk("CPA %lx: bad pte after revert %Lx\n", addr[i],
-			       pte ? (u64)pte_val(*pte) : 0ULL);
+			printk(KERN_ERR "CPA %lx: bad pte after revert %Lx\n",
+				addr[i], pte ? (u64)pte_val(*pte) : 0ULL);
 			failed++;
 		}
 
@@ -223,11 +233,10 @@ static __init int exercise_pageattr(void)
 		failed++;
 
 	if (failed)
-		printk("CPA selftests NOT PASSED. Please report.\n");
+		printk(KERN_ERR "CPA selftests NOT PASSED. Please report.\n");
 	else
-		printk("CPA selftests PASSED\n");
+		printk(KERN_INFO "CPA selftests PASSED\n");
 
 	return 0;
 }
-
 module_init(exercise_pageattr);

From 718f94974d59f57a57480d2448beee05720ab237 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:33:43 +0100
Subject: [PATCH 755/890] x86: cleanup boot_ioremap_32.c

Coding style cleanup before modifying the file.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/boot_ioremap_32.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/x86/mm/boot_ioremap_32.c b/arch/x86/mm/boot_ioremap_32.c
index b20f74a2770f..7ede1e3e1f42 100644
--- a/arch/x86/mm/boot_ioremap_32.c
+++ b/arch/x86/mm/boot_ioremap_32.c
@@ -1,7 +1,7 @@
 /*
  * arch/i386/mm/boot_ioremap.c
- * 
- * Re-map functions for early boot-time before paging_init() when the 
+ *
+ * Re-map functions for early boot-time before paging_init() when the
  * boot-time pagetables are still in use
  *
  * Written by Dave Hansen <haveblue@us.ibm.com>
@@ -23,15 +23,15 @@
 #include <linux/init.h>
 #include <linux/stddef.h>
 
-/* 
- * I'm cheating here.  It is known that the two boot PTE pages are 
+/*
+ * I'm cheating here.  It is known that the two boot PTE pages are
  * allocated next to each other.  I'm pretending that they're just
- * one big array. 
+ * one big array.
  */
 
 #define BOOT_PTE_PTRS (PTRS_PER_PTE*2)
 
-static unsigned long boot_pte_index(unsigned long vaddr) 
+static unsigned long boot_pte_index(unsigned long vaddr)
 {
 	return __pa(vaddr) >> PAGE_SHIFT;
 }
@@ -47,7 +47,7 @@ static inline boot_pte_t* boot_vaddr_to_pte(void *address)
  * phys_addr and virtual_source, and who also has a preference
  * about which virtual address from which to steal ptes
  */
-static void __boot_ioremap(unsigned long phys_addr, unsigned long nrpages, 
+static void __boot_ioremap(unsigned long phys_addr, unsigned long nrpages,
 		    void* virtual_source)
 {
 	boot_pte_t* pte;
@@ -70,7 +70,7 @@ static __initdata char boot_ioremap_space[BOOT_IOREMAP_SIZE]
 /*
  * This only applies to things which need to ioremap before paging_init()
  * bt_ioremap() and plain ioremap() are both useless at this point.
- * 
+ *
  * When used, we're still using the boot-time pagetables, which only
  * have 2 PTE pages mapping the first 8MB
  *
@@ -82,18 +82,18 @@ __init void* boot_ioremap(unsigned long phys_addr, unsigned long size)
 {
 	unsigned long last_addr, offset;
 	unsigned int nrpages;
-	
+
 	last_addr = phys_addr + size - 1;
 
 	/* page align the requested address */
 	offset = phys_addr & ~PAGE_MASK;
 	phys_addr &= PAGE_MASK;
 	size = PAGE_ALIGN(last_addr) - phys_addr;
-	
+
 	nrpages = size >> PAGE_SHIFT;
 	if (nrpages > BOOT_IOREMAP_PAGES)
 		return NULL;
-	
+
 	__boot_ioremap(phys_addr, nrpages, boot_ioremap_space);
 
 	return &boot_ioremap_space[offset];

From fbd3bfd87f91e6a454f3f61f67ee745f0cb59aa4 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 30 Jan 2008 13:33:43 +0100
Subject: [PATCH 756/890] x86: use __PAGE_KERNEL_EXEC in ioremap_64.c

This patch replaces the manual permission setup for pages in ioremap_64.c with
the pre-defined __PAGE_KERNEL_EXEC value.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/ioremap_64.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/mm/ioremap_64.c b/arch/x86/mm/ioremap_64.c
index 6fbd3c7932e0..d6cf14976827 100644
--- a/arch/x86/mm/ioremap_64.c
+++ b/arch/x86/mm/ioremap_64.c
@@ -84,8 +84,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
 	if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
 		return (__force void __iomem *)phys_to_virt(phys_addr);
 
-	pgprot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_GLOBAL
-			  | _PAGE_DIRTY | _PAGE_ACCESSED | flags);
+	pgprot = __pgprot(__PAGE_KERNEL_EXEC | _PAGE_GLOBAL | flags);
 	/*
 	 * Mappings have to be page-aligned
 	 */

From 4138cc3418f5eaa7524ff8e927102863f1ba0ea5 Mon Sep 17 00:00:00 2001
From: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Date: Wed, 30 Jan 2008 13:33:43 +0100
Subject: [PATCH 757/890] x86: set strong uncacheable where UC is really
 desired

Also use _PAGE_PWT for all the mappings which need uncache mapping.
Instead of existing PAT2 which is UC- (and can be overwritten by MTRRs),
we now use PAT3 which is strong uncacheable.

This makes it consistent with pgprot_noncached()

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/ioremap_32.c | 2 +-
 arch/x86/mm/ioremap_64.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index 0b278315d737..ef0f6a452ee1 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -119,7 +119,7 @@ EXPORT_SYMBOL(__ioremap);
 void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
 {
 	unsigned long last_addr;
-	void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD);
+	void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD | _PAGE_PWT);
 	if (!p) 
 		return p; 
 
diff --git a/arch/x86/mm/ioremap_64.c b/arch/x86/mm/ioremap_64.c
index d6cf14976827..2815ab60009b 100644
--- a/arch/x86/mm/ioremap_64.c
+++ b/arch/x86/mm/ioremap_64.c
@@ -138,7 +138,7 @@ EXPORT_SYMBOL(__ioremap);
 
 void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
 {
-	return __ioremap(phys_addr, size, _PAGE_PCD);
+	return __ioremap(phys_addr, size, _PAGE_PCD | _PAGE_PWT);
 }
 EXPORT_SYMBOL(ioremap_nocache);
 

From 0947b2f31ca1ea1211d3cde2dbd8fcec579ef395 Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Wed, 30 Jan 2008 13:33:44 +0100
Subject: [PATCH 758/890] i386 boot: replace boot_ioremap with enhanced
 bt_ioremap - enhance bt_ioremap

This patch makes it possible for bt_ioremap() to be used before
paging_init(), via providing an early implementation of set_fixmap()
that can be used before paging_init().

This way boot_ioremap() can be replaced by bt_ioremap().

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup_32.c |  1 +
 arch/x86/mm/init_32.c      |  2 +
 arch/x86/mm/ioremap_32.c   | 87 +++++++++++++++++++++++++++++++++++++-
 include/asm-x86/io_32.h    |  3 ++
 4 files changed, 91 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 83ba3ca5f431..35db426de300 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -697,6 +697,7 @@ void __init setup_arch(char **cmdline_p)
 	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
 	pre_setup_arch_hook();
 	early_cpu_init();
+	bt_ioremap_init();
 
 #ifdef CONFIG_EFI
 	if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 206e3f6800b9..f4e1894367a6 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -429,9 +429,11 @@ static void __init pagetable_init (void)
 	 * Fixed mappings, only the page table structure has to be
 	 * created - mappings will be set by set_fixmap():
 	 */
+	bt_ioremap_clear();
 	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
 	end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
 	page_table_range_init(vaddr, end, pgd_base);
+	bt_ioremap_reset();
 
 	permanent_kmaps_init(pgd_base);
 
diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index ef0f6a452ee1..fd1f5b6cfa20 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -208,6 +208,89 @@ void iounmap(volatile void __iomem *addr)
 }
 EXPORT_SYMBOL(iounmap);
 
+static __initdata int after_paging_init;
+static __initdata unsigned long bm_pte[1024]
+				__attribute__((aligned(PAGE_SIZE)));
+
+static inline unsigned long * __init bt_ioremap_pgd(unsigned long addr)
+{
+	return (unsigned long *)swapper_pg_dir + ((addr >> 22) & 1023);
+}
+
+static inline unsigned long * __init bt_ioremap_pte(unsigned long addr)
+{
+	return bm_pte + ((addr >> PAGE_SHIFT) & 1023);
+}
+
+void __init bt_ioremap_init(void)
+{
+	unsigned long *pgd;
+
+	pgd = bt_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
+	*pgd = __pa(bm_pte) | _PAGE_TABLE;
+	memset(bm_pte, 0, sizeof(bm_pte));
+	BUG_ON(pgd != bt_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
+}
+
+void __init bt_ioremap_clear(void)
+{
+	unsigned long *pgd;
+
+	pgd = bt_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
+	*pgd = 0;
+	__flush_tlb_all();
+}
+
+void __init bt_ioremap_reset(void)
+{
+	enum fixed_addresses idx;
+	unsigned long *pte, phys, addr;
+
+	after_paging_init = 1;
+	for (idx = FIX_BTMAP_BEGIN; idx <= FIX_BTMAP_END; idx--) {
+		addr = fix_to_virt(idx);
+		pte = bt_ioremap_pte(addr);
+		if (!*pte & _PAGE_PRESENT) {
+			phys = *pte & PAGE_MASK;
+			set_fixmap(idx, phys);
+		}
+	}
+}
+
+static void __init __bt_set_fixmap(enum fixed_addresses idx,
+				   unsigned long phys, pgprot_t flags)
+{
+	unsigned long *pte, addr = __fix_to_virt(idx);
+
+	if (idx >= __end_of_fixed_addresses) {
+		BUG();
+		return;
+	}
+	pte = bt_ioremap_pte(addr);
+	if (pgprot_val(flags))
+		*pte = (phys & PAGE_MASK) | pgprot_val(flags);
+	else
+		*pte = 0;
+	__flush_tlb_one(addr);
+}
+
+static inline void __init bt_set_fixmap(enum fixed_addresses idx,
+					unsigned long phys)
+{
+	if (after_paging_init)
+		set_fixmap(idx, phys);
+	else
+		__bt_set_fixmap(idx, phys, PAGE_KERNEL);
+}
+
+static inline void __init bt_clear_fixmap(enum fixed_addresses idx)
+{
+	if (after_paging_init)
+		clear_fixmap(idx);
+	else
+		__bt_set_fixmap(idx, 0, __pgprot(0));
+}
+
 void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
 {
 	unsigned long offset, last_addr;
@@ -244,7 +327,7 @@ void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
 	 */
 	idx = FIX_BTMAP_BEGIN;
 	while (nrpages > 0) {
-		set_fixmap(idx, phys_addr);
+		bt_set_fixmap(idx, phys_addr);
 		phys_addr += PAGE_SIZE;
 		--idx;
 		--nrpages;
@@ -267,7 +350,7 @@ void __init bt_iounmap(void *addr, unsigned long size)
 
 	idx = FIX_BTMAP_BEGIN;
 	while (nrpages > 0) {
-		clear_fixmap(idx);
+		bt_clear_fixmap(idx);
 		--idx;
 		--nrpages;
 	}
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index db3978846379..f7b733d9ace9 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -139,6 +139,9 @@ extern void iounmap(volatile void __iomem *addr);
  * mappings, before the real ioremap() is functional.
  * A boot-time mapping is currently limited to at most 16 pages.
  */
+extern void bt_ioremap_init(void);
+extern void bt_ioremap_clear(void);
+extern void bt_ioremap_reset(void);
 extern void *bt_ioremap(unsigned long offset, unsigned long size);
 extern void bt_iounmap(void *addr, unsigned long size);
 extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);

From 4716e79c9946044a53a65418cfba04836f6a5c36 Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Wed, 30 Jan 2008 13:33:44 +0100
Subject: [PATCH 759/890] x86: replace boot_ioremap() with enhanced
 bt_ioremap() - remove boot_ioremap()

This patch replaces boot_ioremap invokation with bt_ioremap and
removes the boot_ioremap implementation.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig              |   6 --
 arch/x86/kernel/srat_32.c     |   8 +--
 arch/x86/mm/Makefile_32       |   1 -
 arch/x86/mm/boot_ioremap_32.c | 100 ----------------------------------
 include/asm-x86/efi.h         |   8 +--
 5 files changed, 5 insertions(+), 118 deletions(-)
 delete mode 100644 arch/x86/mm/boot_ioremap_32.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 92dc919c7640..fb3eea3e38ee 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1009,12 +1009,6 @@ config IRQBALANCE
 	  The default yes will allow the kernel to do irq load balancing.
 	  Saying no will keep the kernel from doing irq load balancing.
 
-# turning this on wastes a bunch of space.
-# Summit needs it only when NUMA is on
-config BOOT_IOREMAP
-	def_bool y
-	depends on X86_32 && (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI))
-
 config SECCOMP
 	def_bool y
 	prompt "Enable seccomp to safely compute untrusted bytecode"
diff --git a/arch/x86/kernel/srat_32.c b/arch/x86/kernel/srat_32.c
index 2a8713ec0f9a..24bfd4a9e62a 100644
--- a/arch/x86/kernel/srat_32.c
+++ b/arch/x86/kernel/srat_32.c
@@ -57,8 +57,6 @@ static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS];
 static int num_memory_chunks;		/* total number of memory chunks */
 static u8 __initdata apicid_to_pxm[MAX_APICID];
 
-extern void * boot_ioremap(unsigned long, unsigned long);
-
 /* Identify CPU proximity domains */
 static void __init parse_cpu_affinity_structure(char *p)
 {
@@ -299,7 +297,7 @@ int __init get_memcfg_from_srat(void)
 	}
 
 	rsdt = (struct acpi_table_rsdt *)
-	    boot_ioremap(rsdp->rsdt_physical_address, sizeof(struct acpi_table_rsdt));
+	    bt_ioremap(rsdp->rsdt_physical_address, sizeof(struct acpi_table_rsdt));
 
 	if (!rsdt) {
 		printk(KERN_WARNING
@@ -339,11 +337,11 @@ int __init get_memcfg_from_srat(void)
 	for (i = 0; i < tables; i++) {
 		/* Map in header, then map in full table length. */
 		header = (struct acpi_table_header *)
-			boot_ioremap(saved_rsdt.table.table_offset_entry[i], sizeof(struct acpi_table_header));
+			bt_ioremap(saved_rsdt.table.table_offset_entry[i], sizeof(struct acpi_table_header));
 		if (!header)
 			break;
 		header = (struct acpi_table_header *)
-			boot_ioremap(saved_rsdt.table.table_offset_entry[i], header->length);
+			bt_ioremap(saved_rsdt.table.table_offset_entry[i], header->length);
 		if (!header)
 			break;
 
diff --git a/arch/x86/mm/Makefile_32 b/arch/x86/mm/Makefile_32
index be2f55160bf8..1aeba3bf34bd 100644
--- a/arch/x86/mm/Makefile_32
+++ b/arch/x86/mm/Makefile_32
@@ -8,4 +8,3 @@ obj-$(CONFIG_CPA_DEBUG) += pageattr-test.o
 obj-$(CONFIG_NUMA) += discontig_32.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_HIGHMEM) += highmem_32.o
-obj-$(CONFIG_BOOT_IOREMAP) += boot_ioremap_32.o
diff --git a/arch/x86/mm/boot_ioremap_32.c b/arch/x86/mm/boot_ioremap_32.c
deleted file mode 100644
index 7ede1e3e1f42..000000000000
--- a/arch/x86/mm/boot_ioremap_32.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * arch/i386/mm/boot_ioremap.c
- *
- * Re-map functions for early boot-time before paging_init() when the
- * boot-time pagetables are still in use
- *
- * Written by Dave Hansen <haveblue@us.ibm.com>
- */
-
-
-/*
- * We need to use the 2-level pagetable functions, but CONFIG_X86_PAE
- * keeps that from happening.  If anyone has a better way, I'm listening.
- *
- * boot_pte_t is defined only if this all works correctly
- */
-
-#undef CONFIG_X86_PAE
-#undef CONFIG_PARAVIRT
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-#include <linux/init.h>
-#include <linux/stddef.h>
-
-/*
- * I'm cheating here.  It is known that the two boot PTE pages are
- * allocated next to each other.  I'm pretending that they're just
- * one big array.
- */
-
-#define BOOT_PTE_PTRS (PTRS_PER_PTE*2)
-
-static unsigned long boot_pte_index(unsigned long vaddr)
-{
-	return __pa(vaddr) >> PAGE_SHIFT;
-}
-
-static inline boot_pte_t* boot_vaddr_to_pte(void *address)
-{
-	boot_pte_t* boot_pg = (boot_pte_t*)pg0;
-	return &boot_pg[boot_pte_index((unsigned long)address)];
-}
-
-/*
- * This is only for a caller who is clever enough to page-align
- * phys_addr and virtual_source, and who also has a preference
- * about which virtual address from which to steal ptes
- */
-static void __boot_ioremap(unsigned long phys_addr, unsigned long nrpages,
-		    void* virtual_source)
-{
-	boot_pte_t* pte;
-	int i;
-	char *vaddr = virtual_source;
-
-	pte = boot_vaddr_to_pte(virtual_source);
-	for (i=0; i < nrpages; i++, phys_addr += PAGE_SIZE, pte++) {
-		set_pte(pte, pfn_pte(phys_addr>>PAGE_SHIFT, PAGE_KERNEL));
-		__flush_tlb_one((unsigned long) &vaddr[i*PAGE_SIZE]);
-	}
-}
-
-/* the virtual space we're going to remap comes from this array */
-#define BOOT_IOREMAP_PAGES 4
-#define BOOT_IOREMAP_SIZE (BOOT_IOREMAP_PAGES*PAGE_SIZE)
-static __initdata char boot_ioremap_space[BOOT_IOREMAP_SIZE]
-		       __attribute__ ((aligned (PAGE_SIZE)));
-
-/*
- * This only applies to things which need to ioremap before paging_init()
- * bt_ioremap() and plain ioremap() are both useless at this point.
- *
- * When used, we're still using the boot-time pagetables, which only
- * have 2 PTE pages mapping the first 8MB
- *
- * There is no unmap.  The boot-time PTE pages aren't used after boot.
- * If you really want the space back, just remap it yourself.
- * boot_ioremap(&ioremap_space-PAGE_OFFSET, BOOT_IOREMAP_SIZE)
- */
-__init void* boot_ioremap(unsigned long phys_addr, unsigned long size)
-{
-	unsigned long last_addr, offset;
-	unsigned int nrpages;
-
-	last_addr = phys_addr + size - 1;
-
-	/* page align the requested address */
-	offset = phys_addr & ~PAGE_MASK;
-	phys_addr &= PAGE_MASK;
-	size = PAGE_ALIGN(last_addr) - phys_addr;
-
-	nrpages = size >> PAGE_SHIFT;
-	if (nrpages > BOOT_IOREMAP_PAGES)
-		return NULL;
-
-	__boot_ioremap(phys_addr, nrpages, boot_ioremap_space);
-
-	return &boot_ioremap_space[offset];
-}
diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h
index 6d54502755aa..10fcf20bdc73 100644
--- a/include/asm-x86/efi.h
+++ b/include/asm-x86/efi.h
@@ -32,13 +32,9 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
 	efi_call_virt(f, a1, a2, a3, a4, a5)
 #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)	\
 	efi_call_virt(f, a1, a2, a3, a4, a5, a6)
-/*
- * We require an early boot_ioremap mapping mechanism initially
- */
-extern void *boot_ioremap(unsigned long, unsigned long);
 
-#define efi_early_ioremap(addr, size)		boot_ioremap(addr, size)
-#define efi_early_iounmap(vaddr, size)
+#define efi_early_ioremap(addr, size)		bt_ioremap(addr, size)
+#define efi_early_iounmap(vaddr, size)		bt_iounmap(vaddr, size)
 
 #define efi_ioremap(addr, size)			ioremap(addr, size)
 

From beacfaac3f23b30814aafee37a055257c7062ef3 Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Wed, 30 Jan 2008 13:33:44 +0100
Subject: [PATCH 760/890] x86 32-bit boot: rename bt_ioremap() to
 early_ioremap()

This patch renames bt_ioremap to early_ioremap, which is used in
x86_64. This makes it easier to merge i386 and x86_64 usage.

[ mingo@elte.hu: fix ]

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/efi.c      | 24 ++++++++++++------------
 arch/x86/kernel/efi_32.c   |  2 +-
 arch/x86/kernel/setup_32.c |  6 +++---
 arch/x86/kernel/srat_32.c  |  6 +++---
 arch/x86/mm/init_32.c      |  4 ++--
 arch/x86/mm/ioremap_32.c   | 38 +++++++++++++++++++-------------------
 include/asm-x86/dmi.h      |  7 ++-----
 include/asm-x86/efi.h      |  8 --------
 include/asm-x86/io_32.h    | 16 ++++++++--------
 9 files changed, 50 insertions(+), 61 deletions(-)

diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 2939b015c2ed..5d492f99d967 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -256,12 +256,12 @@ void __init efi_init(void)
 	memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
 	memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
 
-	efi.systab = efi_early_ioremap((unsigned long)efi_phys.systab,
-				       sizeof(efi_system_table_t));
+	efi.systab = early_ioremap((unsigned long)efi_phys.systab,
+				   sizeof(efi_system_table_t));
 	if (efi.systab == NULL)
 		printk(KERN_ERR "Couldn't map the EFI system table!\n");
 	memcpy(&efi_systab, efi.systab, sizeof(efi_system_table_t));
-	efi_early_iounmap(efi.systab, sizeof(efi_system_table_t));
+	early_iounmap(efi.systab, sizeof(efi_system_table_t));
 	efi.systab = &efi_systab;
 
 	/*
@@ -278,14 +278,14 @@ void __init efi_init(void)
 	/*
 	 * Show what we know for posterity
 	 */
-	c16 = tmp = efi_early_ioremap(efi.systab->fw_vendor, 2);
+	c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
 	if (c16) {
 		for (i = 0; i < sizeof(vendor) && *c16; ++i)
 			vendor[i] = *c16++;
 		vendor[i] = '\0';
 	} else
 		printk(KERN_ERR PFX "Could not map the firmware vendor!\n");
-	efi_early_iounmap(tmp, 2);
+	early_iounmap(tmp, 2);
 
 	printk(KERN_INFO "EFI v%u.%.02u by %s \n",
 	       efi.systab->hdr.revision >> 16,
@@ -294,7 +294,7 @@ void __init efi_init(void)
 	/*
 	 * Let's see what config tables the firmware passed to us.
 	 */
-	config_tables = efi_early_ioremap(
+	config_tables = early_ioremap(
 		efi.systab->tables,
 		efi.systab->nr_tables * sizeof(efi_config_table_t));
 	if (config_tables == NULL)
@@ -328,7 +328,7 @@ void __init efi_init(void)
 		}
 	}
 	printk("\n");
-	efi_early_iounmap(config_tables,
+	early_iounmap(config_tables,
 			  efi.systab->nr_tables * sizeof(efi_config_table_t));
 
 	/*
@@ -337,8 +337,8 @@ void __init efi_init(void)
 	 * address of several of the EFI runtime functions, needed to
 	 * set the firmware into virtual mode.
 	 */
-	runtime = efi_early_ioremap((unsigned long)efi.systab->runtime,
-				    sizeof(efi_runtime_services_t));
+	runtime = early_ioremap((unsigned long)efi.systab->runtime,
+				sizeof(efi_runtime_services_t));
 	if (runtime != NULL) {
 		/*
 		 * We will only need *early* access to the following
@@ -357,11 +357,11 @@ void __init efi_init(void)
 	} else
 		printk(KERN_ERR "Could not map the EFI runtime service "
 		       "table!\n");
-	efi_early_iounmap(runtime, sizeof(efi_runtime_services_t));
+	early_iounmap(runtime, sizeof(efi_runtime_services_t));
 
 	/* Map the EFI memory map */
-	memmap.map = efi_early_ioremap((unsigned long)memmap.phys_map,
-				       memmap.nr_map * memmap.desc_size);
+	memmap.map = early_ioremap((unsigned long)memmap.phys_map,
+				   memmap.nr_map * memmap.desc_size);
 	if (memmap.map == NULL)
 		printk(KERN_ERR "Could not map the EFI memory map!\n");
 	memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c
index afd2c3b039d6..114b896d7573 100644
--- a/arch/x86/kernel/efi_32.c
+++ b/arch/x86/kernel/efi_32.c
@@ -117,7 +117,7 @@ void __init efi_map_memmap(void)
 {
 	memmap.map = NULL;
 
-	memmap.map = bt_ioremap((unsigned long) memmap.phys_map,
+	memmap.map = early_ioremap((unsigned long) memmap.phys_map,
 			(memmap.nr_map * memmap.desc_size));
 	if (memmap.map == NULL)
 		printk(KERN_ERR "Could not remap the EFI memmap!\n");
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 35db426de300..c038b09b1723 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -577,9 +577,9 @@ static void __init relocate_initrd(void)
 		if (clen > MAX_MAP_CHUNK-slop)
 			clen = MAX_MAP_CHUNK-slop;
 		mapaddr = ramdisk_image & PAGE_MASK;
-		p = bt_ioremap(mapaddr, clen+slop);
+		p = early_ioremap(mapaddr, clen+slop);
 		memcpy(q, p+slop, clen);
-		bt_iounmap(p, clen+slop);
+		early_iounmap(p, clen+slop);
 		q += clen;
 		ramdisk_image += clen;
 		ramdisk_size  -= clen;
@@ -697,7 +697,7 @@ void __init setup_arch(char **cmdline_p)
 	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
 	pre_setup_arch_hook();
 	early_cpu_init();
-	bt_ioremap_init();
+	early_ioremap_init();
 
 #ifdef CONFIG_EFI
 	if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
diff --git a/arch/x86/kernel/srat_32.c b/arch/x86/kernel/srat_32.c
index 24bfd4a9e62a..2bf6903cb444 100644
--- a/arch/x86/kernel/srat_32.c
+++ b/arch/x86/kernel/srat_32.c
@@ -297,7 +297,7 @@ int __init get_memcfg_from_srat(void)
 	}
 
 	rsdt = (struct acpi_table_rsdt *)
-	    bt_ioremap(rsdp->rsdt_physical_address, sizeof(struct acpi_table_rsdt));
+	    early_ioremap(rsdp->rsdt_physical_address, sizeof(struct acpi_table_rsdt));
 
 	if (!rsdt) {
 		printk(KERN_WARNING
@@ -337,11 +337,11 @@ int __init get_memcfg_from_srat(void)
 	for (i = 0; i < tables; i++) {
 		/* Map in header, then map in full table length. */
 		header = (struct acpi_table_header *)
-			bt_ioremap(saved_rsdt.table.table_offset_entry[i], sizeof(struct acpi_table_header));
+			early_ioremap(saved_rsdt.table.table_offset_entry[i], sizeof(struct acpi_table_header));
 		if (!header)
 			break;
 		header = (struct acpi_table_header *)
-			bt_ioremap(saved_rsdt.table.table_offset_entry[i], header->length);
+			early_ioremap(saved_rsdt.table.table_offset_entry[i], header->length);
 		if (!header)
 			break;
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index f4e1894367a6..e00c1d7128b1 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -429,11 +429,11 @@ static void __init pagetable_init (void)
 	 * Fixed mappings, only the page table structure has to be
 	 * created - mappings will be set by set_fixmap():
 	 */
-	bt_ioremap_clear();
+	early_ioremap_clear();
 	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
 	end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
 	page_table_range_init(vaddr, end, pgd_base);
-	bt_ioremap_reset();
+	early_ioremap_reset();
 
 	permanent_kmaps_init(pgd_base);
 
diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index fd1f5b6cfa20..43b5e9fd6370 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -212,36 +212,36 @@ static __initdata int after_paging_init;
 static __initdata unsigned long bm_pte[1024]
 				__attribute__((aligned(PAGE_SIZE)));
 
-static inline unsigned long * __init bt_ioremap_pgd(unsigned long addr)
+static inline unsigned long * __init early_ioremap_pgd(unsigned long addr)
 {
 	return (unsigned long *)swapper_pg_dir + ((addr >> 22) & 1023);
 }
 
-static inline unsigned long * __init bt_ioremap_pte(unsigned long addr)
+static inline unsigned long * __init early_ioremap_pte(unsigned long addr)
 {
 	return bm_pte + ((addr >> PAGE_SHIFT) & 1023);
 }
 
-void __init bt_ioremap_init(void)
+void __init early_ioremap_init(void)
 {
 	unsigned long *pgd;
 
-	pgd = bt_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
+	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
 	*pgd = __pa(bm_pte) | _PAGE_TABLE;
 	memset(bm_pte, 0, sizeof(bm_pte));
-	BUG_ON(pgd != bt_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
+	BUG_ON(pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
 }
 
-void __init bt_ioremap_clear(void)
+void __init early_ioremap_clear(void)
 {
 	unsigned long *pgd;
 
-	pgd = bt_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
+	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
 	*pgd = 0;
 	__flush_tlb_all();
 }
 
-void __init bt_ioremap_reset(void)
+void __init early_ioremap_reset(void)
 {
 	enum fixed_addresses idx;
 	unsigned long *pte, phys, addr;
@@ -249,7 +249,7 @@ void __init bt_ioremap_reset(void)
 	after_paging_init = 1;
 	for (idx = FIX_BTMAP_BEGIN; idx <= FIX_BTMAP_END; idx--) {
 		addr = fix_to_virt(idx);
-		pte = bt_ioremap_pte(addr);
+		pte = early_ioremap_pte(addr);
 		if (!*pte & _PAGE_PRESENT) {
 			phys = *pte & PAGE_MASK;
 			set_fixmap(idx, phys);
@@ -257,7 +257,7 @@ void __init bt_ioremap_reset(void)
 	}
 }
 
-static void __init __bt_set_fixmap(enum fixed_addresses idx,
+static void __init __early_set_fixmap(enum fixed_addresses idx,
 				   unsigned long phys, pgprot_t flags)
 {
 	unsigned long *pte, addr = __fix_to_virt(idx);
@@ -266,7 +266,7 @@ static void __init __bt_set_fixmap(enum fixed_addresses idx,
 		BUG();
 		return;
 	}
-	pte = bt_ioremap_pte(addr);
+	pte = early_ioremap_pte(addr);
 	if (pgprot_val(flags))
 		*pte = (phys & PAGE_MASK) | pgprot_val(flags);
 	else
@@ -274,24 +274,24 @@ static void __init __bt_set_fixmap(enum fixed_addresses idx,
 	__flush_tlb_one(addr);
 }
 
-static inline void __init bt_set_fixmap(enum fixed_addresses idx,
+static inline void __init early_set_fixmap(enum fixed_addresses idx,
 					unsigned long phys)
 {
 	if (after_paging_init)
 		set_fixmap(idx, phys);
 	else
-		__bt_set_fixmap(idx, phys, PAGE_KERNEL);
+		__early_set_fixmap(idx, phys, PAGE_KERNEL);
 }
 
-static inline void __init bt_clear_fixmap(enum fixed_addresses idx)
+static inline void __init early_clear_fixmap(enum fixed_addresses idx)
 {
 	if (after_paging_init)
 		clear_fixmap(idx);
 	else
-		__bt_set_fixmap(idx, 0, __pgprot(0));
+		__early_set_fixmap(idx, 0, __pgprot(0));
 }
 
-void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
+void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
 {
 	unsigned long offset, last_addr;
 	unsigned int nrpages;
@@ -327,7 +327,7 @@ void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
 	 */
 	idx = FIX_BTMAP_BEGIN;
 	while (nrpages > 0) {
-		bt_set_fixmap(idx, phys_addr);
+		early_set_fixmap(idx, phys_addr);
 		phys_addr += PAGE_SIZE;
 		--idx;
 		--nrpages;
@@ -335,7 +335,7 @@ void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
 	return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN));
 }
 
-void __init bt_iounmap(void *addr, unsigned long size)
+void __init early_iounmap(void *addr, unsigned long size)
 {
 	unsigned long virt_addr;
 	unsigned long offset;
@@ -350,7 +350,7 @@ void __init bt_iounmap(void *addr, unsigned long size)
 
 	idx = FIX_BTMAP_BEGIN;
 	while (nrpages > 0) {
-		bt_clear_fixmap(idx);
+		early_clear_fixmap(idx);
 		--idx;
 		--nrpages;
 	}
diff --git a/include/asm-x86/dmi.h b/include/asm-x86/dmi.h
index 5008c365e6e4..1241e6ad1935 100644
--- a/include/asm-x86/dmi.h
+++ b/include/asm-x86/dmi.h
@@ -5,9 +5,6 @@
 
 #ifdef CONFIG_X86_32
 
-/* Use early IO mappings for DMI because it's initialized early */
-#define dmi_ioremap bt_ioremap
-#define dmi_iounmap bt_iounmap
 #define dmi_alloc alloc_bootmem
 
 #else /* CONFIG_X86_32 */
@@ -28,9 +25,9 @@ static inline void *dmi_alloc(unsigned len)
 	return dmi_alloc_data + idx;
 }
 
+#endif
+
 #define dmi_ioremap early_ioremap
 #define dmi_iounmap early_iounmap
 
 #endif
-
-#endif
diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h
index 10fcf20bdc73..8380131ca542 100644
--- a/include/asm-x86/efi.h
+++ b/include/asm-x86/efi.h
@@ -33,9 +33,6 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
 #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)	\
 	efi_call_virt(f, a1, a2, a3, a4, a5, a6)
 
-#define efi_early_ioremap(addr, size)		bt_ioremap(addr, size)
-#define efi_early_iounmap(vaddr, size)		bt_iounmap(vaddr, size)
-
 #define efi_ioremap(addr, size)			ioremap(addr, size)
 
 #define end_pfn_map				max_low_pfn
@@ -91,13 +88,8 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
 	efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
 
-#define efi_early_ioremap(addr, size)		early_ioremap(addr, size)
-#define efi_early_iounmap(vaddr, size)		early_iounmap(vaddr, size)
-
 extern void *efi_ioremap(unsigned long offset, unsigned long size);
 
-extern int efi_time;
-
 #endif /* CONFIG_X86_32 */
 
 extern void efi_reserve_bootmem(void);
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index f7b733d9ace9..059a1fee4de3 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -135,20 +135,20 @@ static inline void __iomem * ioremap(unsigned long offset, unsigned long size)
 extern void iounmap(volatile void __iomem *addr);
 
 /*
- * bt_ioremap() and bt_iounmap() are for temporary early boot-time
+ * early_ioremap() and early_iounmap() are for temporary early boot-time
  * mappings, before the real ioremap() is functional.
  * A boot-time mapping is currently limited to at most 16 pages.
  */
-extern void bt_ioremap_init(void);
-extern void bt_ioremap_clear(void);
-extern void bt_ioremap_reset(void);
-extern void *bt_ioremap(unsigned long offset, unsigned long size);
-extern void bt_iounmap(void *addr, unsigned long size);
+extern void early_ioremap_init(void);
+extern void early_ioremap_clear(void);
+extern void early_ioremap_reset(void);
+extern void *early_ioremap(unsigned long offset, unsigned long size);
+extern void early_iounmap(void *addr, unsigned long size);
 extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
 
 /* Use early IO mappings for DMI because it's initialized early */
-#define dmi_ioremap bt_ioremap
-#define dmi_iounmap bt_iounmap
+#define dmi_ioremap early_ioremap
+#define dmi_iounmap early_iounmap
 #define dmi_alloc alloc_bootmem
 
 /*

From 64a8f852a20e90bf3018d3ace5b7f514f39db4cd Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Wed, 30 Jan 2008 13:33:44 +0100
Subject: [PATCH 761/890] x86: early_ioremap_reset fix

This patch fixes a bug of early_ioremap_reset.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/ioremap_32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index 43b5e9fd6370..bfd7b8b2fe60 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -247,7 +247,7 @@ void __init early_ioremap_reset(void)
 	unsigned long *pte, phys, addr;
 
 	after_paging_init = 1;
-	for (idx = FIX_BTMAP_BEGIN; idx <= FIX_BTMAP_END; idx--) {
+	for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
 		addr = fix_to_virt(idx);
 		pte = early_ioremap_pte(addr);
 		if (!*pte & _PAGE_PRESENT) {

From 1b42f51630c7eebce6fb780b480731eb81afd325 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:45 +0100
Subject: [PATCH 762/890] x86: enhance early_ioremap()

 - allow nesting of up to 4 levels

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/ioremap_32.c    | 36 ++++++++++++++++++++++++++----------
 include/asm-x86/fixmap_32.h |  9 +++++----
 2 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index bfd7b8b2fe60..63f76ecae44c 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -291,23 +291,28 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx)
 		__early_set_fixmap(idx, 0, __pgprot(0));
 }
 
+
+int __initdata early_ioremap_nested;
+
 void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
 {
 	unsigned long offset, last_addr;
-	unsigned int nrpages;
-	enum fixed_addresses idx;
+	unsigned int nrpages, nesting;
+	enum fixed_addresses idx0, idx;
+
+	WARN_ON(system_state != SYSTEM_BOOTING);
+
+	nesting = early_ioremap_nested;
 
 	/* Don't allow wraparound or zero size */
 	last_addr = phys_addr + size - 1;
 	if (!size || last_addr < phys_addr)
 		return NULL;
 
-	/*
-	 * Don't remap the low PCI/ISA area, it's always mapped..
-	 */
-	if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
-		return phys_to_virt(phys_addr);
+	if (nesting >= FIX_BTMAPS_NESTING)
+		return NULL;
 
+	early_ioremap_nested++;
 	/*
 	 * Mappings have to be page-aligned
 	 */
@@ -325,14 +330,16 @@ void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
 	/*
 	 * Ok, go for it..
 	 */
-	idx = FIX_BTMAP_BEGIN;
+	idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting;
+	idx = idx0;
 	while (nrpages > 0) {
 		early_set_fixmap(idx, phys_addr);
 		phys_addr += PAGE_SIZE;
 		--idx;
 		--nrpages;
 	}
-	return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN));
+
+	return (void*) (offset + fix_to_virt(idx0));
 }
 
 void __init early_iounmap(void *addr, unsigned long size)
@@ -341,17 +348,26 @@ void __init early_iounmap(void *addr, unsigned long size)
 	unsigned long offset;
 	unsigned int nrpages;
 	enum fixed_addresses idx;
+	unsigned int nesting;
+
+	nesting = --early_ioremap_nested;
 
 	virt_addr = (unsigned long)addr;
 	if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))
 		return;
+
 	offset = virt_addr & ~PAGE_MASK;
 	nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
 
-	idx = FIX_BTMAP_BEGIN;
+	idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting;
 	while (nrpages > 0) {
 		early_clear_fixmap(idx);
 		--idx;
 		--nrpages;
 	}
 }
+
+void __this_fixmap_does_not_exist(void)
+{
+	WARN_ON(1);
+}
diff --git a/include/asm-x86/fixmap_32.h b/include/asm-x86/fixmap_32.h
index 249e753ac805..0e990218a09c 100644
--- a/include/asm-x86/fixmap_32.h
+++ b/include/asm-x86/fixmap_32.h
@@ -65,7 +65,7 @@ enum fixed_addresses {
 #endif
 #ifdef CONFIG_X86_VISWS_APIC
 	FIX_CO_CPU,	/* Cobalt timer */
-	FIX_CO_APIC,	/* Cobalt APIC Redirection Table */ 
+	FIX_CO_APIC,	/* Cobalt APIC Redirection Table */
 	FIX_LI_PCIA,	/* Lithium PCI Bridge A */
 	FIX_LI_PCIB,	/* Lithium PCI Bridge B */
 #endif
@@ -74,7 +74,7 @@ enum fixed_addresses {
 #endif
 #ifdef CONFIG_X86_CYCLONE_TIMER
 	FIX_CYCLONE_TIMER, /*cyclone timer register*/
-#endif 
+#endif
 #ifdef CONFIG_HIGHMEM
 	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
 	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
@@ -91,9 +91,10 @@ enum fixed_addresses {
 #endif
 	__end_of_permanent_fixed_addresses,
 	/* temporary boot-time mappings, used before ioremap() is functional */
-#define NR_FIX_BTMAPS	16
+#define NR_FIX_BTMAPS		16
+#define FIX_BTMAPS_NESTING	4
 	FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
-	FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1,
+	FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,
 	FIX_WP_TEST,
 	__end_of_fixed_addresses
 };

From a8efa1cd51d6f6407df5f42d8f1a7e9fc7178d1a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:45 +0100
Subject: [PATCH 763/890] x86: increase the number of boot-mappings

increase max early_ioremap() remapping size from 64K to 256K.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/fixmap_32.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-x86/fixmap_32.h b/include/asm-x86/fixmap_32.h
index 0e990218a09c..46bedb7b5426 100644
--- a/include/asm-x86/fixmap_32.h
+++ b/include/asm-x86/fixmap_32.h
@@ -91,7 +91,7 @@ enum fixed_addresses {
 #endif
 	__end_of_permanent_fixed_addresses,
 	/* temporary boot-time mappings, used before ioremap() is functional */
-#define NR_FIX_BTMAPS		16
+#define NR_FIX_BTMAPS		64
 #define FIX_BTMAPS_NESTING	4
 	FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
 	FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,

From bd796ed0232a036f5ab14ac68d0a05f791ebcc3b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:45 +0100
Subject: [PATCH 764/890] x86: add debug warnings to early_ioremap()

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/ioremap_32.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index 63f76ecae44c..4bcd5e065df4 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -306,12 +306,15 @@ void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
 
 	/* Don't allow wraparound or zero size */
 	last_addr = phys_addr + size - 1;
-	if (!size || last_addr < phys_addr)
+	if (!size || last_addr < phys_addr) {
+		WARN_ON(1);
 		return NULL;
+	}
 
-	if (nesting >= FIX_BTMAPS_NESTING)
+	if (nesting >= FIX_BTMAPS_NESTING) {
+		WARN_ON(1);
 		return NULL;
-
+	}
 	early_ioremap_nested++;
 	/*
 	 * Mappings have to be page-aligned
@@ -324,8 +327,10 @@ void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
 	 * Mappings have to fit in the FIX_BTMAP area.
 	 */
 	nrpages = size >> PAGE_SHIFT;
-	if (nrpages > NR_FIX_BTMAPS)
+	if (nrpages > NR_FIX_BTMAPS) {
+		WARN_ON(1);
 		return NULL;
+	}
 
 	/*
 	 * Ok, go for it..
@@ -351,11 +356,13 @@ void __init early_iounmap(void *addr, unsigned long size)
 	unsigned int nesting;
 
 	nesting = --early_ioremap_nested;
+	WARN_ON(nesting < 0);
 
 	virt_addr = (unsigned long)addr;
-	if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))
+	if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) {
+		WARN_ON(1);
 		return;
-
+	}
 	offset = virt_addr & ~PAGE_MASK;
 	nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
 

From d18d6d65efc2f5a3ff9a41528fbcb716b2e32615 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:45 +0100
Subject: [PATCH 765/890] x86: early_ioremap(), debugging

add early_ioremap() debug printouts via the early_ioremap_debug
boot option.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/ioremap_32.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index 4bcd5e065df4..30855c44818d 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -208,6 +208,17 @@ void iounmap(volatile void __iomem *addr)
 }
 EXPORT_SYMBOL(iounmap);
 
+
+int __initdata early_ioremap_debug;
+
+static int __init early_ioremap_debug_setup(char *str)
+{
+	early_ioremap_debug = 1;
+
+	return 1;
+}
+__setup("early_ioremap_debug", early_ioremap_debug_setup);
+
 static __initdata int after_paging_init;
 static __initdata unsigned long bm_pte[1024]
 				__attribute__((aligned(PAGE_SIZE)));
@@ -226,6 +237,9 @@ void __init early_ioremap_init(void)
 {
 	unsigned long *pgd;
 
+	if (early_ioremap_debug)
+		printk("early_ioremap_init()\n");
+
 	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
 	*pgd = __pa(bm_pte) | _PAGE_TABLE;
 	memset(bm_pte, 0, sizeof(bm_pte));
@@ -236,6 +250,9 @@ void __init early_ioremap_clear(void)
 {
 	unsigned long *pgd;
 
+	if (early_ioremap_debug)
+		printk("early_ioremap_clear()\n");
+
 	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
 	*pgd = 0;
 	__flush_tlb_all();
@@ -303,6 +320,11 @@ void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
 	WARN_ON(system_state != SYSTEM_BOOTING);
 
 	nesting = early_ioremap_nested;
+	if (early_ioremap_debug) {
+		printk("early_ioremap(%08lx, %08lx) [%d] => ",
+				phys_addr, size, nesting);
+		dump_stack();
+	}
 
 	/* Don't allow wraparound or zero size */
 	last_addr = phys_addr + size - 1;
@@ -343,6 +365,8 @@ void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
 		--idx;
 		--nrpages;
 	}
+	if (early_ioremap_debug)
+		printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0));
 
 	return (void*) (offset + fix_to_virt(idx0));
 }
@@ -358,6 +382,11 @@ void __init early_iounmap(void *addr, unsigned long size)
 	nesting = --early_ioremap_nested;
 	WARN_ON(nesting < 0);
 
+	if (early_ioremap_debug) {
+		printk("early_iounmap(%p, %08lx) [%d]\n", addr, size, nesting);
+		dump_stack();
+	}
+
 	virt_addr = (unsigned long)addr;
 	if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) {
 		WARN_ON(1);

From 793b24a2dd0d695c748efba7b2295e7f9fdd3139 Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Wed, 30 Jan 2008 13:33:45 +0100
Subject: [PATCH 766/890] x86: make early_ioremap_debug early_param

This patch makes "early_ioremap_debug" a early parameter, because
"early_ioreamp/early_iounmap" is only used during early boot stage.

Signed-off-by: Huang Ying <ying.huang@intel.com>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/ioremap_32.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index 30855c44818d..7f7daf50efed 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -215,9 +215,9 @@ static int __init early_ioremap_debug_setup(char *str)
 {
 	early_ioremap_debug = 1;
 
-	return 1;
+	return 0;
 }
-__setup("early_ioremap_debug", early_ioremap_debug_setup);
+early_param("early_ioremap_debug", early_ioremap_debug_setup);
 
 static __initdata int after_paging_init;
 static __initdata unsigned long bm_pte[1024]

From d690b2afd5a7a02816386aa704c8c0b1aca8d2de Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:47 +0100
Subject: [PATCH 767/890] x86: add early_ioremap() leak detection

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/ioremap_32.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index 7f7daf50efed..b743de841f68 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -311,6 +311,22 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx)
 
 int __initdata early_ioremap_nested;
 
+static int __init check_early_ioremap_leak(void)
+{
+	if (!early_ioremap_nested)
+		return 0;
+
+	printk(KERN_WARNING
+		"Debug warning: early ioremap leak of %d areas detected.\n",
+			early_ioremap_nested);
+	printk(KERN_WARNING
+		"please boot with early_ioremap_debug and report the dmesg.\n");
+	WARN_ON(1);
+
+	return 1;
+}
+late_initcall(check_early_ioremap_leak);
+
 void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
 {
 	unsigned long offset, last_addr;

From d701fda8601fe267fbd3648f108f0e751305101b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:48 +0100
Subject: [PATCH 768/890] x86: fix early_ioremap()/btmap

fix a long-standing weakness of the early-ioremap allocator: it
uses a single pgd entry for the boot mappings, and was not properly
protecting itself against crossing a 2MB (4MB) boundary.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/fixmap_32.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/include/asm-x86/fixmap_32.h b/include/asm-x86/fixmap_32.h
index 46bedb7b5426..fde140fd6d95 100644
--- a/include/asm-x86/fixmap_32.h
+++ b/include/asm-x86/fixmap_32.h
@@ -90,10 +90,18 @@ enum fixed_addresses {
 	FIX_PARAVIRT_BOOTMAP,
 #endif
 	__end_of_permanent_fixed_addresses,
-	/* temporary boot-time mappings, used before ioremap() is functional */
+	/*
+	 * 256 temporary boot-time mappings, used by early_ioremap(),
+	 * before ioremap() is functional.
+	 *
+	 * We round it up to the next 512 pages boundary so that we
+	 * can have a single pgd entry and a single pte table:
+	 */
 #define NR_FIX_BTMAPS		64
 #define FIX_BTMAPS_NESTING	4
-	FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
+	FIX_BTMAP_END =
+		__end_of_permanent_fixed_addresses + 512 -
+			(__end_of_permanent_fixed_addresses & 511),
 	FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,
 	FIX_WP_TEST,
 	__end_of_fixed_addresses

From 927222b102186a6cc3e43e25062fcd18c800435e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:49 +0100
Subject: [PATCH 769/890] x86: fix EISA ioremap

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/traps_32.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 8534cb53ff60..3cf72977d012 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1142,11 +1142,11 @@ void __init trap_init(void)
 	int i;
 
 #ifdef CONFIG_EISA
-	void __iomem *p = ioremap(0x0FFFD9, 4);
+	void __iomem *p = early_ioremap(0x0FFFD9, 4);
 	if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) {
 		EISA_bus = 1;
 	}
-	iounmap(p);
+	early_iounmap(p, 4);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC

From 0e3a95492989e452a33e5df9b51365da574b854d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:49 +0100
Subject: [PATCH 770/890] x86: early_ioremap_init(), enhance warnings

enhance the debug warning in early_ioremap_init().

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/ioremap_32.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index b743de841f68..f8e6c4709cc2 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -243,7 +243,22 @@ void __init early_ioremap_init(void)
 	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
 	*pgd = __pa(bm_pte) | _PAGE_TABLE;
 	memset(bm_pte, 0, sizeof(bm_pte));
-	BUG_ON(pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
+	/*
+	 * The boot-ioremap range spans multiple pgds, for which
+	 * we are not prepared:
+	 */
+	if (pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))) {
+		WARN_ON(1);
+		printk("pgd %p != %p\n",
+			pgd, early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
+		printk("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
+			fix_to_virt(FIX_BTMAP_BEGIN));
+		printk("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
+			fix_to_virt(FIX_BTMAP_END));
+
+		printk("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
+		printk("FIX_BTMAP_BEGIN:     %d\n", FIX_BTMAP_BEGIN);
+	}
 }
 
 void __init early_ioremap_clear(void)

From 5b016432a768d94c707aee02a202c95d163f27c4 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:50 +0100
Subject: [PATCH 771/890] x86: cpa: use wbinvd() macro instead of inline
 assembly in 64bit c_p_a()

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index 59cd066f6741..b21a874c2dd3 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -86,7 +86,7 @@ static void flush_kernel_map(void *arg)
 	   much cheaper than WBINVD. */
 	/* clflush is still broken. Disable for now. */
 	if (1 || !cpu_has_clflush) {
-		asm volatile("wbinvd" ::: "memory");
+		wbinvd();
 	} else {
 		list_for_each_entry(pg, l, lru) {
 			void *addr = page_address(pg);

From a3ae91b0a02a4ca0088f30d572c848f5d522573a Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:50 +0100
Subject: [PATCH 772/890] x86: cpa: remove unnecessary masking of address

virt_to_page does not care about the bits below the page granuality.
So don't mask them.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index b21a874c2dd3..4d172881af70 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -148,7 +148,7 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
 	if (!kpte)
 		return 0;
 
-	kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
+	kpte_page = virt_to_page(kpte);
 	BUG_ON(PageLRU(kpte_page));
 	BUG_ON(PageCompound(kpte_page));
 	if (pgprot_val(prot) != pgprot_val(ref_prot)) {

From c93c82bbea20bef627dbe8e34a4bc8e526217104 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:50 +0100
Subject: [PATCH 773/890] x86: shrink __PAGE_KERNEL/__PAGE_KERNEL_EXEC on non
 PAE kernels

No need to make it 64bit there.

Signed-off-by: Andi Kleen <ak@suse.de>
Acked-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_32.c     | 4 ++--
 include/asm-x86/pgtable.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index e00c1d7128b1..08ba3a14c72c 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -332,9 +332,9 @@ static void __init set_highmem_pages_init(int bad_ppro)
 #define set_highmem_pages_init(bad_ppro) do { } while (0)
 #endif /* CONFIG_HIGHMEM */
 
-unsigned long long __PAGE_KERNEL = _PAGE_KERNEL;
+pteval_t __PAGE_KERNEL = _PAGE_KERNEL;
 EXPORT_SYMBOL(__PAGE_KERNEL);
-unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
+pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
 
 #ifdef CONFIG_NUMA
 extern void __init remap_numa_kva(void);
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 18a5f9e32834..0c3ecca0f79c 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -69,7 +69,7 @@
 #define _PAGE_KERNEL (_PAGE_KERNEL_EXEC | _PAGE_NX)
 
 #ifndef __ASSEMBLY__
-extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
+extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
 #endif	/* __ASSEMBLY__ */
 #else
 #define __PAGE_KERNEL_EXEC						\

From c6ca18eb759c43ae20d953ef90c3de4557f9342a Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:51 +0100
Subject: [PATCH 774/890] x86: add pte_pgprot to 32-bit

64bit already had it.

Needed for later patches.

Signed-off-by: Andi Kleen <ak@suse.de>
Acked-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable.h    | 2 ++
 include/asm-x86/pgtable_64.h | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 0c3ecca0f79c..b4ee5939afe5 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -187,6 +187,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 	return __pte(val);
 }
 
+#define pte_pgprot(x) __pgprot(pte_val(x) & (0xfff | _PAGE_NX))
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else  /* !CONFIG_PARAVIRT */
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 139da50cd510..df1fcbd12e43 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -120,8 +120,6 @@ static inline void native_pgd_clear(pgd_t * pgd)
 
 #define pte_same(a, b)		((a).pte == (b).pte)
 
-#define pte_pgprot(a)	(__pgprot((a).pte & ~PHYSICAL_PAGE_MASK))
-
 #endif /* !__ASSEMBLY__ */
 
 #define PMD_SIZE	(_AC(1,UL) << PMD_SHIFT)

From aaa0e89023656c078c67cbd0460fc2ccbf3323c6 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:51 +0100
Subject: [PATCH 775/890] x86: don't drop NX bit in pte modifier functions on
 32-bit

The pte_* modifier functions that cleared bits dropped the NX bit on 32bit
PAE because they only worked in int, but NX is in bit 63. Fix that
by adding appropiate casts so that the arithmetic happens as long long
on PAE kernels.

I decided to just use 64bit arithmetic instead of open coding like
pte_modify() because gcc should generate good enough code for that now.

Signed-off-by: Andi Kleen <ak@suse.de>
Acked-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index b4ee5939afe5..2f93723a3e18 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -147,17 +147,17 @@ static inline int pmd_large(pmd_t pte) {
 		(_PAGE_PSE|_PAGE_PRESENT);
 }
 
-static inline pte_t pte_mkclean(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_DIRTY); }
-static inline pte_t pte_mkold(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_ACCESSED); }
-static inline pte_t pte_wrprotect(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_RW); }
-static inline pte_t pte_mkexec(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_NX); }
+static inline pte_t pte_mkclean(pte_t pte)	{ return __pte(pte_val(pte) & ~(pteval_t)_PAGE_DIRTY); }
+static inline pte_t pte_mkold(pte_t pte)	{ return __pte(pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED); }
+static inline pte_t pte_wrprotect(pte_t pte)	{ return __pte(pte_val(pte) & ~(pteval_t)_PAGE_RW); }
+static inline pte_t pte_mkexec(pte_t pte)	{ return __pte(pte_val(pte) & ~(pteval_t)_PAGE_NX); }
 static inline pte_t pte_mkdirty(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_DIRTY); }
 static inline pte_t pte_mkyoung(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_ACCESSED); }
 static inline pte_t pte_mkwrite(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_RW); }
 static inline pte_t pte_mkhuge(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_PSE); }
-static inline pte_t pte_clrhuge(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_PSE); }
+static inline pte_t pte_clrhuge(pte_t pte)	{ return __pte(pte_val(pte) & ~(pteval_t)_PAGE_PSE); }
 static inline pte_t pte_mkglobal(pte_t pte)	{ return __pte(pte_val(pte) | _PAGE_GLOBAL); }
-static inline pte_t pte_clrglobal(pte_t pte)	{ return __pte(pte_val(pte) & ~_PAGE_GLOBAL); }
+static inline pte_t pte_clrglobal(pte_t pte)	{ return __pte(pte_val(pte) & ~(pteval_t)_PAGE_GLOBAL); }
 
 extern pteval_t __supported_pte_mask;
 

From 6ba9b7d8f0fe786954015ce5c0ef1837d5df56b7 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:52 +0100
Subject: [PATCH 776/890] x86: fix c_p_a() boot crash

fix:

> hm, i just found a failing 64-bit .config while testing your CPA
> patchset:
>
>  [    1.916541] CPA mapping 4k 0 large 2048 gb 0 x 0[0-0] miss 0
>  [    1.919874] Unable to handle kernel paging request at 000000000335aea8 RIP:
>  [    1.919874]  [<ffffffff8021d2d3>] change_page_attr+0x3/0x61
>  [    1.919874] PGD 0
>  [    1.919874] Oops: 0000 [1]
>  [    1.919874] CPU 0

This handles addresses which don't have a mem_map entry.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr-test.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index c58fab061762..91e05a26004d 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -177,7 +177,7 @@ static __init int exercise_pageattr(void)
 			continue;
 		}
 
-		err = change_page_attr(virt_to_page(addr[i]), len[i],
+		err = change_page_attr_addr(addr[i], len[i],
 			    pte_pgprot(pte_clrhuge(pte_clrglobal(pte0))));
 		if (err < 0) {
 			printk(KERN_ERR "CPA %d failed %d\n", i, err);
@@ -212,7 +212,7 @@ static __init int exercise_pageattr(void)
 			failed++;
 			continue;
 		}
-		err = change_page_attr(virt_to_page(addr[i]), len[i],
+		err = change_page_attr_addr(addr[i], len[i],
 					  pte_pgprot(pte_mkglobal(*pte)));
 		if (err < 0) {
 			printk(KERN_ERR "CPA reverting failed: %d\n", err);

From 3c868823413d76bdd80c643603be8ab09dcb4d65 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:52 +0100
Subject: [PATCH 777/890] x86: c_p_a() fix: reorder TLB / cache flushes to
 follow Intel recommendation

Intel recommends to first flush the TLBs and then the caches
on caching attribute changes. c_p_a() previously did it the
other way round. Reorder that.

The procedure is still not fully compliant to the Intel documentation
because Intel recommends a all CPU synchronization step between
the TLB flushes and the cache flushes.

However on all new Intel CPUs this is now meaningless anyways
because they support Self-Snoop and can skip the cache flush
step anyway.

[ mingo@elte.hu: decoupled from clflush and ported it to x86.git ]

Signed-off-by: Andi Kleen <ak@suse.de>
Acked-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_32.c | 12 ++++++------
 arch/x86/mm/pageattr_64.c |  3 ++-
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 523fd5b37df9..5cb5c7101f41 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -87,6 +87,12 @@ static void flush_kernel_map(void *arg)
 	struct list_head *lh = (struct list_head *)arg;
 	struct page *p;
 
+	/*
+	 * Flush all to work around Errata in early athlons regarding
+	 * large page flushing.
+	 */
+	__flush_tlb_all();
+
 	/* High level code is not ready for clflush yet */
 	if (0 && cpu_has_clflush) {
 		list_for_each_entry(p, lh, lru)
@@ -95,12 +101,6 @@ static void flush_kernel_map(void *arg)
 		if (boot_cpu_data.x86_model >= 4)
 			wbinvd();
 	}
-
-	/*
-	 * Flush all to work around Errata in early athlons regarding
-	 * large page flushing.
-	 */
-	__flush_tlb_all();
 }
 
 static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index 4d172881af70..3ccdb1401e67 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -82,6 +82,8 @@ static void flush_kernel_map(void *arg)
 	struct list_head *l = (struct list_head *)arg;
 	struct page *pg;
 
+	__flush_tlb_all();
+
 	/* When clflush is available always use it because it is
 	   much cheaper than WBINVD. */
 	/* clflush is still broken. Disable for now. */
@@ -94,7 +96,6 @@ static void flush_kernel_map(void *arg)
 			clflush_cache_range(addr, PAGE_SIZE);
 		}
 	}
-	__flush_tlb_all();
 }
 
 static inline void flush_map(struct list_head *l)

From 895bdc29956e5c9e9e101b2b1c2f0ed34130f945 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:52 +0100
Subject: [PATCH 778/890] x86: c_p_a() make it more robust against use of PAT
 bits

Use the page table level instead of the PSE bit to check if the PTE
is for a 4K page or not. This makes the code more robust when the PAT
bit is changed because the PAT bit on 4K pages is in the same position
as the PSE bit.

Signed-off-by: Andi Kleen <ak@suse.de>
Acked-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_32.c | 4 ++--
 arch/x86/mm/pageattr_64.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 5cb5c7101f41..66688a630839 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -172,7 +172,7 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
 	BUG_ON(PageCompound(kpte_page));
 
 	if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
-		if (!pte_huge(*kpte)) {
+		if (level == 3) {
 			set_pte_atomic(kpte, mk_pte(page, prot));
 		} else {
 			struct page *split;
@@ -190,7 +190,7 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
 		}
 		page_private(kpte_page)++;
 	} else {
-		if (!pte_huge(*kpte)) {
+		if (level == 3) {
 			set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
 			BUG_ON(page_private(kpte_page) == 0);
 			page_private(kpte_page)--;
diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index 3ccdb1401e67..73dbbb4048e6 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -153,7 +153,7 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
 	BUG_ON(PageLRU(kpte_page));
 	BUG_ON(PageCompound(kpte_page));
 	if (pgprot_val(prot) != pgprot_val(ref_prot)) {
-		if (!pte_huge(*kpte)) {
+		if (level == 4) {
 			set_pte(kpte, pfn_pte(pfn, prot));
 		} else {
 			/*
@@ -172,7 +172,7 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
 		}
 		page_private(kpte_page)++;
 	} else {
-		if (!pte_huge(*kpte)) {
+		if (level == 4) {
 			set_pte(kpte, pfn_pte(pfn, ref_prot));
 			BUG_ON(page_private(kpte_page) == 0);
 			page_private(kpte_page)--;

From 1e8e23bc469a11fc190a6bace857075d83dc2555 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:53 +0100
Subject: [PATCH 779/890] x86: introduce canon_pgprot()

Introduce canon_pgprot()

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 2f93723a3e18..2726ddc457fd 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -189,6 +189,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 
 #define pte_pgprot(x) __pgprot(pte_val(x) & (0xfff | _PAGE_NX))
 
+#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else  /* !CONFIG_PARAVIRT */

From 934d15854d04e8ca2495d8f5698164df990d5d66 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:33:53 +0100
Subject: [PATCH 780/890] x86: remove set_kernel_exec()

The SMP trampoline always runs in real mode, so making it executable
in the page tables doesn't make much sense because it executes
before page tables are set up. That was the only user of
set_kernel_exec(). Remove set_kernel_exec().

Signed-off-by: Andi Kleen <ak@suse.de>
Acked-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/smpboot_32.c | 11 -----------
 arch/x86/mm/init_32.c        | 29 -----------------------------
 include/asm-x86/pgtable_32.h | 12 ------------
 3 files changed, 52 deletions(-)

diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c
index 7a62dced61ca..5787a0c3e296 100644
--- a/arch/x86/kernel/smpboot_32.c
+++ b/arch/x86/kernel/smpboot_32.c
@@ -107,7 +107,6 @@ u8 apicid_2_node[MAX_APICID];
 extern const unsigned char trampoline_data [];
 extern const unsigned char trampoline_end  [];
 static unsigned char *trampoline_base;
-static int trampoline_exec;
 
 static void map_cpu_to_logical_apicid(void);
 
@@ -139,10 +138,6 @@ void __init smp_alloc_memory(void)
 	 */
 	if (__pa(trampoline_base) >= 0x9F000)
 		BUG();
-	/*
-	 * Make the SMP trampoline executable:
-	 */
-	trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
 }
 
 /*
@@ -1290,12 +1285,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 	setup_ioapic_dest();
 #endif
 	zap_low_mappings();
-#ifndef CONFIG_HOTPLUG_CPU
-	/*
-	 * Disable executability of the SMP trampoline:
-	 */
-	set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
-#endif
 }
 
 void __init smp_intr_init(void)
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 08ba3a14c72c..016c8ccd1d8d 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -529,35 +529,6 @@ static void __init set_nx(void)
 	}
 }
 
-/*
- * Enables/disables executability of a given kernel page and
- * returns the previous setting.
- */
-int __init set_kernel_exec(unsigned long vaddr, int enable)
-{
-	pte_t *pte;
-	int ret = 1;
-	int level;
-
-	if (!nx_enabled)
-		goto out;
-
-	pte = lookup_address(vaddr, &level);
-	BUG_ON(!pte);
-
-	if (!pte_exec(*pte))
-		ret = 0;
-
-	if (enable)
-		pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
-	else
-		pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
-	pte_update_defer(&init_mm, vaddr, pte);
-	__flush_tlb_all();
-out:
-	return ret;
-}
-
 #endif
 
 /*
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 9381bd37b9b1..72eb06c2bac9 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -184,18 +184,6 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
  */
 extern pte_t *lookup_address(unsigned long address, int *level);
 
-/*
- * Make a given kernel text page executable/non-executable.
- * Returns the previous executability setting of that page (which
- * is used to restore the previous state). Used by the SMP bootup code.
- * NOTE: this is an __init function for security reasons.
- */
-#ifdef CONFIG_X86_PAE
- extern int set_kernel_exec(unsigned long vaddr, int enable);
-#else
- static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;}
-#endif
-
 #if defined(CONFIG_HIGHPTE)
 #define pte_offset_map(dir, address) \
 	((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address))

From 1a2b441231ddc12b785940000320894bfa02bd82 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 30 Jan 2008 13:33:54 +0100
Subject: [PATCH 781/890] x86: fix early_ioremap() on 64-bit

Fix early_ioremap() on x86-64

I had ACPI failures on several machines since a few days. Symptom
was NUMA nodes not getting detected or worse cores not getting detected.
They all came from ACPI not being able to read various of its tables. I finally
bisected it down to Jeremy's "put _PAGE_GLOBAL into PAGE_KERNEL" change.
With that the fix was fairly obvious. The problem was that early_ioremap()
didn't use a "_all" flush that would affect the global PTEs too. So
with global bits getting used everywhere now an early_ioremap would
not actually flush a mapping if something else was mapped previously
on that slot (which can happen with early_iounmap inbetween)

This patch changes all flushes in init_64.c to be __flush_tlb_all()
and fixes the problem here.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_64.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0fd9d7f77786..c250580a9432 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -228,7 +228,7 @@ __meminit void *early_ioremap(unsigned long addr, unsigned long size)
 		addr &= PMD_MASK;
 		for (i = 0; i < pmds; i++, addr += PMD_SIZE)
 			set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
-		__flush_tlb();
+		__flush_tlb_all();
 		return (void *)vaddr;
 	next:
 		;
@@ -249,7 +249,7 @@ __meminit void early_iounmap(void *addr, unsigned long size)
 	pmd = level2_kernel_pgt + pmd_index(vaddr);
 	for (i = 0; i < pmds; i++)
 		pmd_clear(pmd + i);
-	__flush_tlb();
+	__flush_tlb_all();
 }
 
 static void __meminit
@@ -317,7 +317,7 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne
 		spin_unlock(&init_mm.page_table_lock);
 		unmap_low_page(pmd);
 	}
-	__flush_tlb();
+	__flush_tlb_all();
 } 
 
 static void __init find_early_table_space(unsigned long end)

From cd58289667293593b04fd315ec7f2f37589134cb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:54 +0100
Subject: [PATCH 782/890] x86: fix more non-global TLB flushes

fix more __flush_tlb() instances, out of caution.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/head64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 58438bafedca..a317336cdeaa 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -27,7 +27,7 @@ static void __init zap_identity_mappings(void)
 {
 	pgd_t *pgd = pgd_offset_k(0UL);
 	pgd_clear(pgd);
-	__flush_tlb();
+	__flush_tlb_all();
 }
 
 /* Don't add a printk in there. printk relies on the PDA which is not initialized 

From a2172e2586f6662af996e47f417bb718c37cf8d2 Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Wed, 30 Jan 2008 13:33:55 +0100
Subject: [PATCH 783/890] x86: fix some bugs about EFI runtime code mapping

This patch fixes some bugs of making EFI runtime code executable.

- Use change_page_attr in i386 too. Because the runtime code may be
  mapped not through ioremap.

- If there is no _PAGE_NX in __supported_pte_mask, the change_page_attr
  is not called.

- Make efi_ioremap map pages as PAGE_KERNEL_EXEC_NOCACHE, because EFI runtime
  code may be mapped through efi_ioremap.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/efi.c    | 35 ++++++++++++++++++++++++++++++-----
 arch/x86/kernel/efi_64.c | 26 ++++++--------------------
 include/asm-x86/efi.h    |  1 -
 3 files changed, 36 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 5d492f99d967..834ecfb41e97 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -40,6 +40,8 @@
 #include <asm/setup.h>
 #include <asm/efi.h>
 #include <asm/time.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
 
 #define EFI_DEBUG	1
 #define PFX 		"EFI: "
@@ -379,6 +381,32 @@ void __init efi_init(void)
 #endif
 }
 
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+static void __init runtime_code_page_mkexec(void)
+{
+	efi_memory_desc_t *md;
+	unsigned long end;
+	void *p;
+
+	if (!(__supported_pte_mask & _PAGE_NX))
+		return;
+
+	/* Make EFI runtime service code area executable */
+	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+		md = p;
+		end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
+		if (md->type == EFI_RUNTIME_SERVICES_CODE &&
+		    (end >> PAGE_SHIFT) <= end_pfn_map)
+			change_page_attr_addr(md->virt_addr,
+					      md->num_pages,
+					      PAGE_KERNEL_EXEC_NOCACHE);
+	}
+	__flush_tlb_all();
+}
+#else
+static inline void __init runtime_code_page_mkexec(void) { }
+#endif
+
 /*
  * This function will switch the EFI runtime services to virtual mode.
  * Essentially, look through the EFI memmap and map every region that
@@ -399,9 +427,9 @@ void __init efi_enter_virtual_mode(void)
 		md = p;
 		if (!(md->attribute & EFI_MEMORY_RUNTIME))
 			continue;
+		end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
 		if ((md->attribute & EFI_MEMORY_WB) &&
-		    (((md->phys_addr + (md->num_pages<<EFI_PAGE_SHIFT)) >>
-		      PAGE_SHIFT) < end_pfn_map))
+		    ((end >> PAGE_SHIFT) <= end_pfn_map))
 			md->virt_addr = (unsigned long)__va(md->phys_addr);
 		else
 			md->virt_addr = (unsigned long)
@@ -410,7 +438,6 @@ void __init efi_enter_virtual_mode(void)
 		if (!md->virt_addr)
 			printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n",
 			       (unsigned long long)md->phys_addr);
-		end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
 		if ((md->phys_addr <= (unsigned long)efi_phys.systab) &&
 		    ((unsigned long)efi_phys.systab < end))
 			efi.systab = (efi_system_table_t *)(unsigned long)
@@ -448,9 +475,7 @@ void __init efi_enter_virtual_mode(void)
 	efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
 	efi.reset_system = virt_efi_reset_system;
 	efi.set_virtual_address_map = virt_efi_set_virtual_address_map;
-#ifdef CONFIG_X86_64
 	runtime_code_page_mkexec();
-#endif
 }
 
 /*
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 1f8bbd9644d7..9f8a75594398 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -33,7 +33,6 @@
 #include <asm/e820.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
-#include <asm/cacheflush.h>
 #include <asm/proto.h>
 #include <asm/efi.h>
 
@@ -55,7 +54,7 @@ static void __init early_mapping_set_exec(unsigned long start,
 		else
 			set_pte(kpte, __pte((pte_val(*kpte) | _PAGE_NX) & \
 					    __supported_pte_mask));
-		if (pte_huge(*kpte))
+		if (level == 4)
 			start = (start + PMD_SIZE) & PMD_MASK;
 		else
 			start = (start + PAGE_SIZE) & PAGE_MASK;
@@ -67,6 +66,9 @@ static void __init early_runtime_code_mapping_set_exec(int executable)
 	efi_memory_desc_t *md;
 	void *p;
 
+	if (!(__supported_pte_mask & _PAGE_NX))
+		return;
+
 	/* Make EFI runtime service code area executable */
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		md = p;
@@ -116,22 +118,6 @@ void __init efi_reserve_bootmem(void)
 				memmap.nr_map * memmap.desc_size);
 }
 
-void __init runtime_code_page_mkexec(void)
-{
-	efi_memory_desc_t *md;
-	void *p;
-
-	/* Make EFI runtime service code area executable */
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		md = p;
-		if (md->type == EFI_RUNTIME_SERVICES_CODE)
-			change_page_attr_addr(md->virt_addr,
-					      md->num_pages,
-					      PAGE_KERNEL_EXEC);
-	}
-	__flush_tlb_all();
-}
-
 void __iomem * __init efi_ioremap(unsigned long offset,
 				  unsigned long size)
 {
@@ -146,8 +132,8 @@ void __iomem * __init efi_ioremap(unsigned long offset,
 		return NULL;
 
 	for (i = 0; i < pages; i++) {
-		set_fixmap_nocache(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped,
-				   offset);
+		__set_fixmap(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped,
+			     offset, PAGE_KERNEL_EXEC_NOCACHE);
 		offset += PAGE_SIZE;
 		pages_mapped++;
 	}
diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h
index 8380131ca542..9e08ae84795d 100644
--- a/include/asm-x86/efi.h
+++ b/include/asm-x86/efi.h
@@ -95,6 +95,5 @@ extern void *efi_ioremap(unsigned long offset, unsigned long size);
 extern void efi_reserve_bootmem(void);
 extern void efi_call_phys_prelog(void);
 extern void efi_call_phys_epilog(void);
-extern void runtime_code_page_mkexec(void);
 
 #endif

From 78c94abaea55df7003f3ad0e5b6c78ee1cc860bb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:55 +0100
Subject: [PATCH 784/890] x86: simplify the 32-bit cpa code

simplify the 32-bit cpa code.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_32.c | 179 ++++++++++++--------------------------
 1 file changed, 56 insertions(+), 123 deletions(-)

diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 66688a630839..570a37bf1401 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -15,9 +15,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 
-static DEFINE_SPINLOCK(cpa_lock);
-static struct list_head df_list = LIST_HEAD_INIT(df_list);
-
 pte_t *lookup_address(unsigned long address, int *level)
 {
 	pgd_t *pgd = pgd_offset_k(address);
@@ -48,9 +45,7 @@ split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
 	pte_t *pbase;
 	int i;
 
-	spin_unlock_irq(&cpa_lock);
 	base = alloc_pages(GFP_KERNEL, 0);
-	spin_lock_irq(&cpa_lock);
 	if (!base)
 		return NULL;
 
@@ -58,9 +53,6 @@ split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
 	 * page_private is used to track the number of entries in
 	 * the page table page that have non standard attributes.
 	 */
-	SetPagePrivate(base);
-	page_private(base) = 0;
-
 	address = __pa(address);
 	addr = address & LARGE_PAGE_MASK;
 	pbase = (pte_t *)page_address(base);
@@ -73,36 +65,6 @@ split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
 	return base;
 }
 
-static void cache_flush_page(struct page *p)
-{
-	void *addr = page_address(p);
-	int i;
-
-	for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
-		clflush(addr + i);
-}
-
-static void flush_kernel_map(void *arg)
-{
-	struct list_head *lh = (struct list_head *)arg;
-	struct page *p;
-
-	/*
-	 * Flush all to work around Errata in early athlons regarding
-	 * large page flushing.
-	 */
-	__flush_tlb_all();
-
-	/* High level code is not ready for clflush yet */
-	if (0 && cpu_has_clflush) {
-		list_for_each_entry(p, lh, lru)
-			cache_flush_page(p);
-	} else {
-		if (boot_cpu_data.x86_model >= 4)
-			wbinvd();
-	}
-}
-
 static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 {
 	unsigned long flags;
@@ -127,36 +89,12 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 	spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
-/*
- * No more special protections in this 2/4MB area - revert to a large
- * page again.
- */
-static inline void revert_page(struct page *kpte_page, unsigned long address)
-{
-	pgprot_t ref_prot;
-	pte_t *linear;
-
-	ref_prot =
-	((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
-		? PAGE_KERNEL_LARGE_EXEC : PAGE_KERNEL_LARGE;
-
-	linear = (pte_t *)
-		pmd_offset(pud_offset(pgd_offset_k(address), address), address);
-	set_pmd_pte(linear,  address,
-		    pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT,
-			    ref_prot));
-}
-
-static inline void save_page(struct page *kpte_page)
-{
-	if (!test_and_set_bit(PG_arch_1, &kpte_page->flags))
-		list_add(&kpte_page->lru, &df_list);
-}
-
 static int __change_page_attr(struct page *page, pgprot_t prot)
 {
+	pgprot_t ref_prot = PAGE_KERNEL;
 	struct page *kpte_page;
 	unsigned long address;
+	pgprot_t oldprot;
 	pte_t *kpte;
 	int level;
 
@@ -167,58 +105,41 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
 	if (!kpte)
 		return -EINVAL;
 
+	oldprot = pte_pgprot(*kpte);
 	kpte_page = virt_to_page(kpte);
 	BUG_ON(PageLRU(kpte_page));
 	BUG_ON(PageCompound(kpte_page));
 
-	if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
-		if (level == 3) {
-			set_pte_atomic(kpte, mk_pte(page, prot));
-		} else {
-			struct page *split;
-			pgprot_t ref_prot;
-
-			ref_prot =
-			((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
-				? PAGE_KERNEL_EXEC : PAGE_KERNEL;
-			split = split_large_page(address, prot, ref_prot);
-			if (!split)
-				return -ENOMEM;
-
-			set_pmd_pte(kpte, address, mk_pte(split, ref_prot));
-			kpte_page = split;
-		}
-		page_private(kpte_page)++;
-	} else {
-		if (level == 3) {
-			set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
-			BUG_ON(page_private(kpte_page) == 0);
-			page_private(kpte_page)--;
-		} else
-			BUG();
-	}
-
 	/*
-	 * If the pte was reserved, it means it was created at boot
-	 * time (not via split_large_page) and in turn we must not
-	 * replace it with a largepage.
+	 * Better fail early if someone sets the kernel text to NX.
+	 * Does not cover __inittext
 	 */
+	BUG_ON(address >= (unsigned long)&_text &&
+		address < (unsigned long)&_etext &&
+	       (pgprot_val(prot) & _PAGE_NX));
 
-	save_page(kpte_page);
-	if (!PageReserved(kpte_page)) {
-		if (cpu_has_pse && (page_private(kpte_page) == 0)) {
-			paravirt_release_pt(page_to_pfn(kpte_page));
-			revert_page(kpte_page, address);
-		}
+	if ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
+		ref_prot = PAGE_KERNEL_EXEC;
+
+	ref_prot = canon_pgprot(ref_prot);
+	prot = canon_pgprot(prot);
+
+	if (level == 3) {
+		set_pte_atomic(kpte, mk_pte(page, prot));
+	} else {
+		struct page *split;
+		split = split_large_page(address, prot, ref_prot);
+		if (!split)
+			return -ENOMEM;
+
+		/*
+		 * There's a small window here to waste a bit of RAM:
+		 */
+		set_pmd_pte(kpte, address, mk_pte(split, ref_prot));
 	}
 	return 0;
 }
 
-static inline void flush_map(struct list_head *l)
-{
-	on_each_cpu(flush_kernel_map, l, 1, 1);
-}
-
 /*
  * Change the page attributes of an page in the linear mapping.
  *
@@ -234,40 +155,52 @@ static inline void flush_map(struct list_head *l)
  */
 int change_page_attr(struct page *page, int numpages, pgprot_t prot)
 {
-	unsigned long flags;
 	int err = 0, i;
 
-	spin_lock_irqsave(&cpa_lock, flags);
 	for (i = 0; i < numpages; i++, page++) {
 		err = __change_page_attr(page, prot);
 		if (err)
 			break;
 	}
-	spin_unlock_irqrestore(&cpa_lock, flags);
 
 	return err;
 }
 EXPORT_SYMBOL(change_page_attr);
 
+int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot)
+{
+	int i;
+	unsigned long pfn = (addr >> PAGE_SHIFT);
+
+	for (i = 0; i < numpages; i++) {
+		if (!pfn_valid(pfn + i)) {
+			break;
+		} else {
+			int level;
+			pte_t *pte = lookup_address(addr + i*PAGE_SIZE, &level);
+			BUG_ON(pte && !pte_none(*pte));
+		}
+	}
+	return change_page_attr(virt_to_page(addr), i, prot);
+}
+
+static void flush_kernel_map(void *arg)
+{
+	/*
+	 * Flush all to work around Errata in early athlons regarding
+	 * large page flushing.
+	 */
+	__flush_tlb_all();
+
+	if (boot_cpu_data.x86_model >= 4)
+		wbinvd();
+}
+
 void global_flush_tlb(void)
 {
-	struct page *pg, *next;
-	struct list_head l;
-
 	BUG_ON(irqs_disabled());
 
-	spin_lock_irq(&cpa_lock);
-	list_replace_init(&df_list, &l);
-	spin_unlock_irq(&cpa_lock);
-	flush_map(&l);
-	list_for_each_entry_safe(pg, next, &l, lru) {
-		list_del(&pg->lru);
-		clear_bit(PG_arch_1, &pg->flags);
-		if (PageReserved(pg) || !cpu_has_pse || page_private(pg) != 0)
-			continue;
-		ClearPagePrivate(pg);
-		__free_page(pg);
-	}
+	on_each_cpu(flush_kernel_map, NULL, 1, 1);
 }
 EXPORT_SYMBOL(global_flush_tlb);
 

From 97f99fedf27f337e2d3d95ca01e321beb26edc3d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:55 +0100
Subject: [PATCH 785/890] x86: simplify 32-bit cpa largepage splitting

simplify 32-bit cpa largepage splitting: do a pure split and repeat
the pte lookup to get the new pte modified.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_32.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 570a37bf1401..1011b21f8db0 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -38,7 +38,7 @@ pte_t *lookup_address(unsigned long address, int *level)
 }
 
 static struct page *
-split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
+split_large_page(unsigned long address, pgprot_t ref_prot)
 {
 	unsigned long addr;
 	struct page *base;
@@ -58,10 +58,9 @@ split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
 	pbase = (pte_t *)page_address(base);
 	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
 
-	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
-		set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
-					   addr == address ? prot : ref_prot));
-	}
+	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
+		set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
+
 	return base;
 }
 
@@ -101,6 +100,7 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
 	BUG_ON(PageHighMem(page));
 	address = (unsigned long)page_address(page);
 
+repeat:
 	kpte = lookup_address(address, &level);
 	if (!kpte)
 		return -EINVAL;
@@ -128,7 +128,8 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
 		set_pte_atomic(kpte, mk_pte(page, prot));
 	} else {
 		struct page *split;
-		split = split_large_page(address, prot, ref_prot);
+
+		split = split_large_page(address, ref_prot);
 		if (!split)
 			return -ENOMEM;
 
@@ -136,6 +137,7 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
 		 * There's a small window here to waste a bit of RAM:
 		 */
 		set_pmd_pte(kpte, address, mk_pte(split, ref_prot));
+		goto repeat;
 	}
 	return 0;
 }

From bb5c2dbd57d93a36b0386dd783dd95e0cbaaa23f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:56 +0100
Subject: [PATCH 786/890] x86: further cpa largepage-split cleanups

further cpa largepage-split cleanups: make the splitup isolated
functionality, without leaking details back into __change_page_attr().

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_32.c | 89 +++++++++++++++++++++------------------
 1 file changed, 49 insertions(+), 40 deletions(-)

diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 1011b21f8db0..14c923b3b07f 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -37,33 +37,6 @@ pte_t *lookup_address(unsigned long address, int *level)
 	return pte_offset_kernel(pmd, address);
 }
 
-static struct page *
-split_large_page(unsigned long address, pgprot_t ref_prot)
-{
-	unsigned long addr;
-	struct page *base;
-	pte_t *pbase;
-	int i;
-
-	base = alloc_pages(GFP_KERNEL, 0);
-	if (!base)
-		return NULL;
-
-	/*
-	 * page_private is used to track the number of entries in
-	 * the page table page that have non standard attributes.
-	 */
-	address = __pa(address);
-	addr = address & LARGE_PAGE_MASK;
-	pbase = (pte_t *)page_address(base);
-	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
-
-	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
-		set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
-
-	return base;
-}
-
 static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 {
 	unsigned long flags;
@@ -88,14 +61,58 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 	spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
+static int
+split_large_page(pte_t *kpte, unsigned long address, pgprot_t ref_prot)
+{
+	int i, level;
+	unsigned long addr;
+	pte_t *pbase, *tmp;
+	struct page *base;
+
+	base = alloc_pages(GFP_KERNEL, 0);
+	if (!base)
+		return -ENOMEM;
+
+	down_write(&init_mm.mmap_sem);
+	/*
+	 * Check for races, another CPU might have split this page
+	 * up for us already:
+	 */
+	tmp = lookup_address(address, &level);
+	if (tmp != kpte)
+		goto out_unlock;
+
+	address = __pa(address);
+	addr = address & LARGE_PAGE_MASK;
+	pbase = (pte_t *)page_address(base);
+	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
+
+	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
+		set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
+
+	/*
+	 * Install the new, split up pagetable:
+	 */
+	set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
+	base = NULL;
+
+out_unlock:
+	up_write(&init_mm.mmap_sem);
+
+	if (base)
+		__free_pages(base, 0);
+
+	return 0;
+}
+
 static int __change_page_attr(struct page *page, pgprot_t prot)
 {
 	pgprot_t ref_prot = PAGE_KERNEL;
 	struct page *kpte_page;
 	unsigned long address;
+	int level, err = 0;
 	pgprot_t oldprot;
 	pte_t *kpte;
-	int level;
 
 	BUG_ON(PageHighMem(page));
 	address = (unsigned long)page_address(page);
@@ -127,19 +144,11 @@ repeat:
 	if (level == 3) {
 		set_pte_atomic(kpte, mk_pte(page, prot));
 	} else {
-		struct page *split;
-
-		split = split_large_page(address, ref_prot);
-		if (!split)
-			return -ENOMEM;
-
-		/*
-		 * There's a small window here to waste a bit of RAM:
-		 */
-		set_pmd_pte(kpte, address, mk_pte(split, ref_prot));
-		goto repeat;
+		err = split_large_page(kpte, address, ref_prot);
+		if (!err)
+			goto repeat;
 	}
-	return 0;
+	return err;
 }
 
 /*

From 5508a7489659f1eed108d3ae7c2d36c8794ee330 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:56 +0100
Subject: [PATCH 787/890] x86: cpa self-test fixes

cpa self-test fixes. change_page_attr_addr() was buggy, it
passed in a virtual address as a physical one.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr-test.c |  8 --------
 arch/x86/mm/pageattr_32.c   | 10 +++++++---
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 91e05a26004d..a12dabbd5c33 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -106,12 +106,6 @@ static __init int print_split(struct split_state *s)
 	return err;
 }
 
-static __init int state_same(struct split_state *a, struct split_state *b)
-{
-	return a->lpg == b->lpg && a->gpg == b->gpg && a->spg == b->spg &&
-		a->exec == b->exec;
-}
-
 static unsigned long __initdata addr[NTEST];
 static unsigned int __initdata len[NTEST];
 
@@ -229,8 +223,6 @@ static __init int exercise_pageattr(void)
 	global_flush_tlb();
 
 	failed += print_split(&sc);
-	if (!state_same(&sa, &sc))
-		failed++;
 
 	if (failed)
 		printk(KERN_ERR "CPA selftests NOT PASSED. Please report.\n");
diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 14c923b3b07f..ad0868bfa374 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -79,8 +79,10 @@ split_large_page(pte_t *kpte, unsigned long address, pgprot_t ref_prot)
 	 * up for us already:
 	 */
 	tmp = lookup_address(address, &level);
-	if (tmp != kpte)
+	if (tmp != kpte) {
+		WARN_ON_ONCE(1);
 		goto out_unlock;
+	}
 
 	address = __pa(address);
 	addr = address & LARGE_PAGE_MASK;
@@ -181,17 +183,19 @@ EXPORT_SYMBOL(change_page_attr);
 int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot)
 {
 	int i;
-	unsigned long pfn = (addr >> PAGE_SHIFT);
+	unsigned long pfn = (__pa(addr) >> PAGE_SHIFT);
 
 	for (i = 0; i < numpages; i++) {
 		if (!pfn_valid(pfn + i)) {
+			WARN_ON_ONCE(1);
 			break;
 		} else {
 			int level;
 			pte_t *pte = lookup_address(addr + i*PAGE_SIZE, &level);
-			BUG_ON(pte && !pte_none(*pte));
+			BUG_ON(pte && pte_none(*pte));
 		}
 	}
+
 	return change_page_attr(virt_to_page(addr), i, prot);
 }
 

From 7afe15b9d888050435cd154906828df88d4e667d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:57 +0100
Subject: [PATCH 788/890] x86: simplify cpa largepage split, #3

simplify cpa largepage split: push the reference protection bits
into the largepage-splitting function.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_32.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index ad0868bfa374..0966023dfd70 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -61,13 +61,13 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 	spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
-static int
-split_large_page(pte_t *kpte, unsigned long address, pgprot_t ref_prot)
+static int split_large_page(pte_t *kpte, unsigned long address)
 {
-	int i, level;
+	pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
 	unsigned long addr;
 	pte_t *pbase, *tmp;
 	struct page *base;
+	int i, level;
 
 	base = alloc_pages(GFP_KERNEL, 0);
 	if (!base)
@@ -109,11 +109,9 @@ out_unlock:
 
 static int __change_page_attr(struct page *page, pgprot_t prot)
 {
-	pgprot_t ref_prot = PAGE_KERNEL;
 	struct page *kpte_page;
 	unsigned long address;
 	int level, err = 0;
-	pgprot_t oldprot;
 	pte_t *kpte;
 
 	BUG_ON(PageHighMem(page));
@@ -124,7 +122,6 @@ repeat:
 	if (!kpte)
 		return -EINVAL;
 
-	oldprot = pte_pgprot(*kpte);
 	kpte_page = virt_to_page(kpte);
 	BUG_ON(PageLRU(kpte_page));
 	BUG_ON(PageCompound(kpte_page));
@@ -137,16 +134,10 @@ repeat:
 		address < (unsigned long)&_etext &&
 	       (pgprot_val(prot) & _PAGE_NX));
 
-	if ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
-		ref_prot = PAGE_KERNEL_EXEC;
-
-	ref_prot = canon_pgprot(ref_prot);
-	prot = canon_pgprot(prot);
-
 	if (level == 3) {
-		set_pte_atomic(kpte, mk_pte(page, prot));
+		set_pte_atomic(kpte, mk_pte(page, canon_pgprot(prot)));
 	} else {
-		err = split_large_page(kpte, address, ref_prot);
+		err = split_large_page(kpte, address);
 		if (!err)
 			goto repeat;
 	}

From 9a3dc7804e9856668caef41efc54179e61ffccc0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:57 +0100
Subject: [PATCH 789/890] x86: cpa: simplify locking

further simplify cpa locking: since the largepage-split is a
slowpath, use the pgd_lock for the whole operation, intead
of the mmap_sem.

This also makes it suitable for DEBUG_PAGEALLOC purposes again.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_32.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 0966023dfd70..9cf2fea54eb5 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -37,9 +37,8 @@ pte_t *lookup_address(unsigned long address, int *level)
 	return pte_offset_kernel(pmd, address);
 }
 
-static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
+static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 {
-	unsigned long flags;
 	struct page *page;
 
 	/* change init_mm */
@@ -47,7 +46,6 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 	if (SHARED_KERNEL_PMD)
 		return;
 
-	spin_lock_irqsave(&pgd_lock, flags);
 	for (page = pgd_list; page; page = (struct page *)page->index) {
 		pgd_t *pgd;
 		pud_t *pud;
@@ -58,12 +56,12 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 		pmd = pmd_offset(pud, address);
 		set_pte_atomic((pte_t *)pmd, pte);
 	}
-	spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
 static int split_large_page(pte_t *kpte, unsigned long address)
 {
 	pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
+	unsigned long flags;
 	unsigned long addr;
 	pte_t *pbase, *tmp;
 	struct page *base;
@@ -73,7 +71,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	if (!base)
 		return -ENOMEM;
 
-	down_write(&init_mm.mmap_sem);
+	spin_lock_irqsave(&pgd_lock, flags);
 	/*
 	 * Check for races, another CPU might have split this page
 	 * up for us already:
@@ -95,11 +93,11 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	/*
 	 * Install the new, split up pagetable:
 	 */
-	set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
+	__set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
 	base = NULL;
 
 out_unlock:
-	up_write(&init_mm.mmap_sem);
+	spin_unlock_irqrestore(&pgd_lock, flags);
 
 	if (base)
 		__free_pages(base, 0);

From 12d6f21eacc21d84a809829543f2fe45c7e37319 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:58 +0100
Subject: [PATCH 790/890] x86: do not PSE on CONFIG_DEBUG_PAGEALLOC=y

get more testing of the c_p_a() code done by not turning off
PSE on DEBUG_PAGEALLOC.

this simplifies the early pagetable setup code, and tests
the largepage-splitup code quite heavily.

In the end, all the largepages will be split up pretty quickly,
so there's no difference to how DEBUG_PAGEALLOC worked before.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/common.c |  7 -------
 arch/x86/mm/pageattr_32.c    | 12 +++++++++++-
 include/asm-x86/cacheflush.h |  5 -----
 include/linux/mm.h           | 14 +++++++++++++-
 init/main.c                  |  5 +++++
 5 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index bba850b05d0e..db28aa9e2f69 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -641,13 +641,6 @@ void __init early_cpu_init(void)
 	nexgen_init_cpu();
 	umc_init_cpu();
 	early_cpu_detect();
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
-	/* pse is not compatible with on-the-fly unmapping,
-	 * disable it even if the cpus claim to support it.
-	 */
-	setup_clear_cpu_cap(X86_FEATURE_PSE);
-#endif
 }
 
 /* Make sure %fs is initialized properly in idle threads */
diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 9cf2fea54eb5..dd49b16b3a0e 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -61,13 +61,17 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 static int split_large_page(pte_t *kpte, unsigned long address)
 {
 	pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
+	gfp_t gfp_flags = GFP_KERNEL;
 	unsigned long flags;
 	unsigned long addr;
 	pte_t *pbase, *tmp;
 	struct page *base;
 	int i, level;
 
-	base = alloc_pages(GFP_KERNEL, 0);
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	gfp_flags = GFP_ATOMIC;
+#endif
+	base = alloc_pages(gfp_flags, 0);
 	if (!base)
 		return -ENOMEM;
 
@@ -218,6 +222,12 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 					   numpages * PAGE_SIZE);
 	}
 
+	/*
+	 * If page allocator is not up yet then do not call c_p_a():
+	 */
+	if (!debug_pagealloc_enabled)
+		return;
+
 	/*
 	 * the return value is ignored - the calls cannot fail,
 	 * large pages are disabled at boot time.
diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h
index 9411a2d3f19c..fccb563e2305 100644
--- a/include/asm-x86/cacheflush.h
+++ b/include/asm-x86/cacheflush.h
@@ -29,11 +29,6 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot);
 int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot);
 void clflush_cache_range(void *addr, int size);
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
-/* internal debugging function */
-void kernel_map_pages(struct page *page, int numpages, int enable);
-#endif
-
 #ifdef CONFIG_DEBUG_RODATA
 void mark_rodata_ro(void);
 #endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3c22d971afa7..1bba6789a50a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1118,9 +1118,21 @@ static inline void vm_stat_account(struct mm_struct *mm,
 }
 #endif /* CONFIG_PROC_FS */
 
-#ifndef CONFIG_DEBUG_PAGEALLOC
+#ifdef CONFIG_DEBUG_PAGEALLOC
+extern int debug_pagealloc_enabled;
+
+extern void kernel_map_pages(struct page *page, int numpages, int enable);
+
+static inline void enable_debug_pagealloc(void)
+{
+	debug_pagealloc_enabled = 1;
+}
+#else
 static inline void
 kernel_map_pages(struct page *page, int numpages, int enable) {}
+static inline void enable_debug_pagealloc(void)
+{
+}
 #endif
 
 extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk);
diff --git a/init/main.c b/init/main.c
index 3316dffe3e57..cb81ed116f62 100644
--- a/init/main.c
+++ b/init/main.c
@@ -318,6 +318,10 @@ static int __init unknown_bootoption(char *param, char *val)
 	return 0;
 }
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+int __read_mostly debug_pagealloc_enabled = 0;
+#endif
+
 static int __init init_setup(char *str)
 {
 	unsigned int i;
@@ -552,6 +556,7 @@ asmlinkage void __init start_kernel(void)
 	preempt_disable();
 	build_all_zonelists();
 	page_alloc_init();
+	enable_debug_pagealloc();
 	printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
 	parse_early_param();
 	parse_args("Booting kernel", static_command_line, __start___param,

From 55ce29ba16f82a31424a98988cf37c3babe1b7c8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:58 +0100
Subject: [PATCH 791/890] x86: cpa self-test, WARN_ON()

add a WARN_ON() to the cpa-self-test failure branch.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr-test.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index a12dabbd5c33..0dce0e248a42 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -224,10 +224,12 @@ static __init int exercise_pageattr(void)
 
 	failed += print_split(&sc);
 
-	if (failed)
+	if (failed) {
 		printk(KERN_ERR "CPA selftests NOT PASSED. Please report.\n");
-	else
+		WARN_ON(1);
+	} else {
 		printk(KERN_INFO "CPA selftests PASSED\n");
+	}
 
 	return 0;
 }

From bbb09f5cfcde7e4cf1466111d671ac4a62931148 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:59 +0100
Subject: [PATCH 792/890] x86: prepare for the unification of the cpa code

prepare for the unification of the cpa code, by unifying the
lookup_address() logic between 32-bit and 64-bit.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr-test.c | 4 ++--
 arch/x86/mm/pageattr_32.c   | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 0dce0e248a42..6cc106b388a5 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -20,10 +20,10 @@ enum {
 	LOWEST_LEVEL		= 4,
 	LPS			= (1 << PMD_SHIFT),
 #elif defined(CONFIG_X86_PAE)
-	LOWEST_LEVEL		= 3,
+	LOWEST_LEVEL		= 4,
 	LPS			= (1 << PMD_SHIFT),
 #else
-	LOWEST_LEVEL		= 3, /* lookup_address lies here */
+	LOWEST_LEVEL		= 4, /* lookup_address lies here */
 	LPS			= (1 << 22),
 #endif
 	GPS			= (1<<30)
diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index dd49b16b3a0e..1c7bd81a4194 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -29,10 +29,10 @@ pte_t *lookup_address(unsigned long address, int *level)
 	pmd = pmd_offset(pud, address);
 	if (pmd_none(*pmd))
 		return NULL;
-	*level = 2;
+	*level = 3;
 	if (pmd_large(*pmd))
 		return (pte_t *)pmd;
-	*level = 3;
+	*level = 4;
 
 	return pte_offset_kernel(pmd, address);
 }
@@ -136,7 +136,7 @@ repeat:
 		address < (unsigned long)&_etext &&
 	       (pgprot_val(prot) & _PAGE_NX));
 
-	if (level == 3) {
+	if (level == 4) {
 		set_pte_atomic(kpte, mk_pte(page, canon_pgprot(prot)));
 	} else {
 		err = split_large_page(kpte, address);

From a5f55035f63cf040b4a7161226ec35520cc10daa Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:59 +0100
Subject: [PATCH 793/890] x86: prepare for the unification of the cpa code

prepare for the unification of the cpa code, by unifying the
lookup_address() logic between 32-bit and 64-bit.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_64.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index 73dbbb4048e6..7b734ee5b7fd 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -19,26 +19,21 @@ pte_t *lookup_address(unsigned long address, int *level)
 	pgd_t *pgd = pgd_offset_k(address);
 	pud_t *pud;
 	pmd_t *pmd;
-	pte_t *pte;
 
 	if (pgd_none(*pgd))
 		return NULL;
 	pud = pud_offset(pgd, address);
-	if (!pud_present(*pud))
+	if (pud_none(*pud))
 		return NULL;
 	pmd = pmd_offset(pud, address);
-	if (!pmd_present(*pmd))
+	if (pmd_none(*pmd))
 		return NULL;
 	*level = 3;
 	if (pmd_large(*pmd))
 		return (pte_t *)pmd;
 	*level = 4;
 
-	pte = pte_offset_kernel(pmd, address);
-	if (pte && !pte_present(*pte))
-		pte = NULL;
-
-	return pte;
+	return pte_offset_kernel(pmd, address);
 }
 
 static struct page *

From d6ee09a2a0865f5640592e73ca5da4b06404b200 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:59 +0100
Subject: [PATCH 794/890] x86: simplify pageattr_64.c

simplify pageattr_64.c.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_64.c | 172 +++++++++++++-------------------------
 1 file changed, 57 insertions(+), 115 deletions(-)

diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index 7b734ee5b7fd..8493c469f877 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -9,6 +9,14 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 
+void clflush_cache_range(void *addr, int size)
+{
+	int i;
+
+	for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
+		clflush(addr+i);
+}
+
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
 #include <asm/uaccess.h>
@@ -47,12 +55,6 @@ split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
 	base = alloc_pages(GFP_KERNEL, 0);
 	if (!base)
 		return NULL;
-	/*
-	 * page_private is used to track the number of entries in
-	 * the page table page have non standard attributes.
-	 */
-	SetPagePrivate(base);
-	page_private(base) = 0;
 
 	address = __pa(address);
 	addr = address & LARGE_PAGE_MASK;
@@ -64,80 +66,13 @@ split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
 	return base;
 }
 
-void clflush_cache_range(void *addr, int size)
-{
-	int i;
-
-	for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
-		clflush(addr+i);
-}
-
-static void flush_kernel_map(void *arg)
-{
-	struct list_head *l = (struct list_head *)arg;
-	struct page *pg;
-
-	__flush_tlb_all();
-
-	/* When clflush is available always use it because it is
-	   much cheaper than WBINVD. */
-	/* clflush is still broken. Disable for now. */
-	if (1 || !cpu_has_clflush) {
-		wbinvd();
-	} else {
-		list_for_each_entry(pg, l, lru) {
-			void *addr = page_address(pg);
-
-			clflush_cache_range(addr, PAGE_SIZE);
-		}
-	}
-}
-
-static inline void flush_map(struct list_head *l)
-{
-	on_each_cpu(flush_kernel_map, l, 1, 1);
-}
-
-static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */
-
-static inline void save_page(struct page *fpage)
-{
-	if (!test_and_set_bit(PG_arch_1, &fpage->flags))
-		list_add(&fpage->lru, &deferred_pages);
-}
-
-/*
- * No more special protections in this 2/4MB area - revert to a
- * large page again.
- */
-static void revert_page(unsigned long address, pgprot_t ref_prot)
-{
-	unsigned long pfn;
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t large_pte;
-
-	pgd = pgd_offset_k(address);
-	BUG_ON(pgd_none(*pgd));
-	pud = pud_offset(pgd, address);
-	BUG_ON(pud_none(*pud));
-	pmd = pmd_offset(pud, address);
-	BUG_ON(pmd_val(*pmd) & _PAGE_PSE);
-	pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT;
-	large_pte = pfn_pte(pfn, ref_prot);
-	large_pte = pte_mkhuge(large_pte);
-
-	set_pte((pte_t *)pmd, large_pte);
-}
-
 static int
 __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
 		   pgprot_t ref_prot)
 {
 	struct page *kpte_page;
-	pgprot_t ref_prot2;
 	pte_t *kpte;
+	pgprot_t ref_prot2, oldprot;
 	int level;
 
 	kpte = lookup_address(address, &level);
@@ -145,8 +80,12 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
 		return 0;
 
 	kpte_page = virt_to_page(kpte);
+	oldprot = pte_pgprot(*kpte);
 	BUG_ON(PageLRU(kpte_page));
 	BUG_ON(PageCompound(kpte_page));
+	ref_prot = canon_pgprot(ref_prot);
+	prot = canon_pgprot(prot);
+
 	if (pgprot_val(prot) != pgprot_val(ref_prot)) {
 		if (level == 4) {
 			set_pte(kpte, pfn_pte(pfn, prot));
@@ -165,38 +104,29 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
 			set_pte(kpte, mk_pte(split, ref_prot2));
 			kpte_page = split;
 		}
-		page_private(kpte_page)++;
 	} else {
 		if (level == 4) {
 			set_pte(kpte, pfn_pte(pfn, ref_prot));
-			BUG_ON(page_private(kpte_page) == 0);
-			page_private(kpte_page)--;
 		} else
 			BUG();
 	}
 
-	/* on x86-64 the direct mapping set at boot is not using 4k pages */
-	BUG_ON(PageReserved(kpte_page));
-
-	save_page(kpte_page);
-	if (page_private(kpte_page) == 0)
-		revert_page(address, ref_prot);
 	return 0;
 }
 
-/*
- * Change the page attributes of an page in the linear mapping.
+/**
+ * change_page_attr_addr - Change page table attributes in linear mapping
+ * @address: Virtual address in linear mapping.
+ * @numpages: Number of pages to change
+ * @prot:    New page table attribute (PAGE_*)
  *
- * This should be used when a page is mapped with a different caching policy
- * than write-back somewhere - some CPUs do not like it when mappings with
- * different caching policies exist. This changes the page attributes of the
- * in kernel linear mapping too.
+ * Change page attributes of a page in the direct mapping. This is a variant
+ * of change_page_attr() that also works on memory holes that do not have
+ * mem_map entry (pfn_valid() is false).
  *
- * The caller needs to ensure that there are no conflicting mappings elsewhere.
- * This function only deals with the kernel linear map.
- *
- * Caller must call global_flush_tlb() after this.
+ * See change_page_attr() documentation for more details.
  */
+
 int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
 {
 	int err = 0, kernel_map = 0, i;
@@ -236,7 +166,26 @@ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
 	return err;
 }
 
-/* Don't call this for MMIO areas that may not have a mem_map entry */
+/**
+ * change_page_attr - Change page table attributes in the linear mapping.
+ * @page: First page to change
+ * @numpages: Number of pages to change
+ * @prot: New protection/caching type (PAGE_*)
+ *
+ * Returns 0 on success, otherwise a negated errno.
+ *
+ * This should be used when a page is mapped with a different caching policy
+ * than write-back somewhere - some CPUs do not like it when mappings with
+ * different caching policies exist. This changes the page attributes of the
+ * in kernel linear mapping too.
+ *
+ * Caller must call global_flush_tlb() later to make the changes active.
+ *
+ * The caller needs to ensure that there are no conflicting mappings elsewhere
+ * (e.g. in user space) * This function only deals with the kernel linear map.
+ *
+ * For MMIO areas without mem_map use change_page_attr_addr() instead.
+ */
 int change_page_attr(struct page *page, int numpages, pgprot_t prot)
 {
 	unsigned long addr = (unsigned long)page_address(page);
@@ -245,29 +194,22 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot)
 }
 EXPORT_SYMBOL(change_page_attr);
 
+static void flush_kernel_map(void *arg)
+{
+	/*
+	 * Flush all to work around Errata in early athlons regarding
+	 * large page flushing.
+	 */
+	__flush_tlb_all();
+
+	if (boot_cpu_data.x86_model >= 4)
+		wbinvd();
+}
+
 void global_flush_tlb(void)
 {
-	struct page *pg, *next;
-	struct list_head l;
+	BUG_ON(irqs_disabled());
 
-	/*
-	 * Write-protect the semaphore, to exclude two contexts
-	 * doing a list_replace_init() call in parallel and to
-	 * exclude new additions to the deferred_pages list:
-	 */
-	down_write(&init_mm.mmap_sem);
-	list_replace_init(&deferred_pages, &l);
-	up_write(&init_mm.mmap_sem);
-
-	flush_map(&l);
-
-	list_for_each_entry_safe(pg, next, &l, lru) {
-		list_del(&pg->lru);
-		clear_bit(PG_arch_1, &pg->flags);
-		if (page_private(pg) != 0)
-			continue;
-		ClearPagePrivate(pg);
-		__free_page(pg);
-	}
+	on_each_cpu(flush_kernel_map, NULL, 1, 1);
 }
 EXPORT_SYMBOL(global_flush_tlb);

From 5e5224a77eb4c68a982c8096945b0af902f9d655 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:00 +0100
Subject: [PATCH 795/890] x86: unify header part of pageattr_64.c

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_64.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index 8493c469f877..107b03c7113c 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -19,8 +19,9 @@ void clflush_cache_range(void *addr, int size)
 
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
+#include <asm/sections.h>
 #include <asm/uaccess.h>
-#include <asm/io.h>
+#include <asm/pgalloc.h>
 
 pte_t *lookup_address(unsigned long address, int *level)
 {

From a5c62514885a778016c03dbba833d4ff6786e30c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:00 +0100
Subject: [PATCH 796/890] x86: clean up and simplify 64-bit split_large_page()

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_64.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index 107b03c7113c..cbded8300e36 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -46,7 +46,7 @@ pte_t *lookup_address(unsigned long address, int *level)
 }
 
 static struct page *
-split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
+split_large_page(unsigned long address, pgprot_t ref_prot)
 {
 	unsigned long addr;
 	struct page *base;
@@ -60,10 +60,9 @@ split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
 	address = __pa(address);
 	addr = address & LARGE_PAGE_MASK;
 	pbase = (pte_t *)page_address(base);
-	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
-		pbase[i] = pfn_pte(addr >> PAGE_SHIFT,
-				   addr == address ? prot : ref_prot);
-	}
+	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
+		pbase[i] = pfn_pte(addr >> PAGE_SHIFT, ref_prot);
+
 	return base;
 }
 
@@ -76,6 +75,7 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
 	pgprot_t ref_prot2, oldprot;
 	int level;
 
+repeat:
 	kpte = lookup_address(address, &level);
 	if (!kpte)
 		return 0;
@@ -98,12 +98,12 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
 			struct page *split;
 
 			ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
-			split = split_large_page(address, prot, ref_prot2);
+			split = split_large_page(address, ref_prot2);
 			if (!split)
 				return -ENOMEM;
 			pgprot_val(ref_prot2) &= ~_PAGE_NX;
 			set_pte(kpte, mk_pte(split, ref_prot2));
-			kpte_page = split;
+			goto repeat;
 		}
 	} else {
 		if (level == 4) {

From b65e6390fb99ca93877ffc1c84e93c857f32dafc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:01 +0100
Subject: [PATCH 797/890] x86: introduce native_set_pte_atomic() on 64-bit too

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable_64.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index df1fcbd12e43..223b0032bab5 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -76,6 +76,11 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
 	*ptep = pte;
 }
 
+static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
+{
+	native_set_pte(ptep, pte);
+}
+
 static inline pte_t native_ptep_get_and_clear(pte_t *xp)
 {
 #ifdef CONFIG_SMP

From 34eff1d75b26eac5c04f9c5b52ba9e3bf1dd8877 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:01 +0100
Subject: [PATCH 798/890] x86: simplify __change_page_attr()

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_64.c | 47 +++++++++++++++------------------------
 1 file changed, 18 insertions(+), 29 deletions(-)

diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index cbded8300e36..5eff244c16db 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -67,8 +67,7 @@ split_large_page(unsigned long address, pgprot_t ref_prot)
 }
 
 static int
-__change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
-		   pgprot_t ref_prot)
+__change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot)
 {
 	struct page *kpte_page;
 	pte_t *kpte;
@@ -84,32 +83,24 @@ repeat:
 	oldprot = pte_pgprot(*kpte);
 	BUG_ON(PageLRU(kpte_page));
 	BUG_ON(PageCompound(kpte_page));
-	ref_prot = canon_pgprot(ref_prot);
 	prot = canon_pgprot(prot);
 
-	if (pgprot_val(prot) != pgprot_val(ref_prot)) {
-		if (level == 4) {
-			set_pte(kpte, pfn_pte(pfn, prot));
-		} else {
-			/*
-			 * split_large_page will take the reference for this
-			 * change_page_attr on the split page.
-			 */
-			struct page *split;
-
-			ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
-			split = split_large_page(address, ref_prot2);
-			if (!split)
-				return -ENOMEM;
-			pgprot_val(ref_prot2) &= ~_PAGE_NX;
-			set_pte(kpte, mk_pte(split, ref_prot2));
-			goto repeat;
-		}
+	if (level == 4) {
+		set_pte(kpte, pfn_pte(pfn, prot));
 	} else {
-		if (level == 4) {
-			set_pte(kpte, pfn_pte(pfn, ref_prot));
-		} else
-			BUG();
+		/*
+		 * split_large_page will take the reference for this
+		 * change_page_attr on the split page.
+		 */
+		struct page *split;
+
+		ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
+		split = split_large_page(address, ref_prot2);
+		if (!split)
+			return -ENOMEM;
+		pgprot_val(ref_prot2) &= ~_PAGE_NX;
+		set_pte(kpte, mk_pte(split, ref_prot2));
+		goto repeat;
 	}
 
 	return 0;
@@ -144,8 +135,7 @@ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
 		unsigned long pfn = __pa(address) >> PAGE_SHIFT;
 
 		if (!kernel_map || pte_present(pfn_pte(0, prot))) {
-			err = __change_page_attr(address, pfn, prot,
-						PAGE_KERNEL);
+			err = __change_page_attr(address, pfn, prot);
 			if (err)
 				break;
 		}
@@ -158,8 +148,7 @@ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
 			addr2 = __START_KERNEL_map + __pa(address);
 			/* Make sure the kernel mappings stay executable */
 			prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
-			err = __change_page_attr(addr2, pfn, prot2,
-						 PAGE_KERNEL_EXEC);
+			err = __change_page_attr(addr2, pfn, prot2);
 		}
 	}
 	up_write(&init_mm.mmap_sem);

From 0d82494ebd256bfff2336689271367f9284b4865 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:02 +0100
Subject: [PATCH 799/890] x86: change 64-bit __change_page_attr() to struct
 page

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_64.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index 5eff244c16db..5a03fb5fd530 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -67,7 +67,7 @@ split_large_page(unsigned long address, pgprot_t ref_prot)
 }
 
 static int
-__change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot)
+__change_page_attr(unsigned long address, struct page *page, pgprot_t prot)
 {
 	struct page *kpte_page;
 	pte_t *kpte;
@@ -86,7 +86,7 @@ repeat:
 	prot = canon_pgprot(prot);
 
 	if (level == 4) {
-		set_pte(kpte, pfn_pte(pfn, prot));
+		set_pte(kpte, mk_pte(page, prot));
 	} else {
 		/*
 		 * split_large_page will take the reference for this
@@ -135,7 +135,7 @@ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
 		unsigned long pfn = __pa(address) >> PAGE_SHIFT;
 
 		if (!kernel_map || pte_present(pfn_pte(0, prot))) {
-			err = __change_page_attr(address, pfn, prot);
+			err = __change_page_attr(address, pfn_to_page(pfn), prot);
 			if (err)
 				break;
 		}
@@ -148,7 +148,7 @@ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
 			addr2 = __START_KERNEL_map + __pa(address);
 			/* Make sure the kernel mappings stay executable */
 			prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
-			err = __change_page_attr(addr2, pfn, prot2);
+			err = __change_page_attr(addr2, pfn_to_page(pfn), prot2);
 		}
 	}
 	up_write(&init_mm.mmap_sem);

From d9db847f29bec9957c53fbdc05d65c4286235005 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:02 +0100
Subject: [PATCH 800/890] x86: change 64-bit pageattr to use set_pte_atomic()

NOP change - same as set_pte().

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index 5a03fb5fd530..34acbcd62eb8 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -86,7 +86,7 @@ repeat:
 	prot = canon_pgprot(prot);
 
 	if (level == 4) {
-		set_pte(kpte, mk_pte(page, prot));
+		set_pte_atomic(kpte, mk_pte(page, prot));
 	} else {
 		/*
 		 * split_large_page will take the reference for this
@@ -99,7 +99,7 @@ repeat:
 		if (!split)
 			return -ENOMEM;
 		pgprot_val(ref_prot2) &= ~_PAGE_NX;
-		set_pte(kpte, mk_pte(split, ref_prot2));
+		set_pte_atomic(kpte, mk_pte(split, ref_prot2));
 		goto repeat;
 	}
 

From 44136717e06b888c2d3129a80abf9ac79233f1eb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:03 +0100
Subject: [PATCH 801/890] x86: 64-bit pageattr.c, prepare for unification

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_64.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index 34acbcd62eb8..190d4d37bc2f 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -123,14 +123,15 @@ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
 {
 	int err = 0, kernel_map = 0, i;
 
+#ifdef CONFIG_X86_64
 	if (address >= __START_KERNEL_map &&
 			address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
 
 		address = (unsigned long)__va(__pa(address));
 		kernel_map = 1;
 	}
+#endif
 
-	down_write(&init_mm.mmap_sem);
 	for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
 		unsigned long pfn = __pa(address) >> PAGE_SHIFT;
 
@@ -139,8 +140,11 @@ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
 			if (err)
 				break;
 		}
-		/* Handle kernel mapping too which aliases part of the
-		 * lowmem */
+#ifdef CONFIG_X86_64
+		/*
+		 * Handle kernel mapping too which aliases part of
+		 * lowmem:
+		 */
 		if (__pa(address) < KERNEL_TEXT_SIZE) {
 			unsigned long addr2;
 			pgprot_t prot2;
@@ -150,8 +154,8 @@ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
 			prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
 			err = __change_page_attr(addr2, pfn_to_page(pfn), prot2);
 		}
+#endif
 	}
-	up_write(&init_mm.mmap_sem);
 
 	return err;
 }

From 6faa4c53b2f06fd271060761ce27f4f53289175c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:03 +0100
Subject: [PATCH 802/890] x86: 64-bit, add the new split_large_page() function

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_64.c | 103 +++++++++++++++++++++++++++-----------
 1 file changed, 75 insertions(+), 28 deletions(-)

diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index 190d4d37bc2f..139795029fb6 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -45,34 +45,91 @@ pte_t *lookup_address(unsigned long address, int *level)
 	return pte_offset_kernel(pmd, address);
 }
 
-static struct page *
-split_large_page(unsigned long address, pgprot_t ref_prot)
+static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 {
-	unsigned long addr;
-	struct page *base;
-	pte_t *pbase;
-	int i;
+	/* change init_mm */
+	set_pte_atomic(kpte, pte);
+#ifdef CONFIG_X86_32
+	if (SHARED_KERNEL_PMD)
+		return;
+	{
+		struct page *page;
 
-	base = alloc_pages(GFP_KERNEL, 0);
+		for (page = pgd_list; page; page = (struct page *)page->index) {
+			pgd_t *pgd;
+			pud_t *pud;
+			pmd_t *pmd;
+
+			pgd = (pgd_t *)page_address(page) + pgd_index(address);
+			pud = pud_offset(pgd, address);
+			pmd = pmd_offset(pud, address);
+			set_pte_atomic((pte_t *)pmd, pte);
+		}
+	}
+#endif
+}
+
+static int split_large_page(pte_t *kpte, unsigned long address)
+{
+	pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
+	gfp_t gfp_flags = GFP_KERNEL;
+	unsigned long flags;
+	unsigned long addr;
+	pte_t *pbase, *tmp;
+	struct page *base;
+	int i, level;
+
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	gfp_flags = GFP_ATOMIC;
+#endif
+	base = alloc_pages(gfp_flags, 0);
 	if (!base)
-		return NULL;
+		return -ENOMEM;
+
+	spin_lock_irqsave(&pgd_lock, flags);
+	/*
+	 * Check for races, another CPU might have split this page
+	 * up for us already:
+	 */
+	tmp = lookup_address(address, &level);
+	if (tmp != kpte) {
+		WARN_ON_ONCE(1);
+		goto out_unlock;
+	}
 
 	address = __pa(address);
 	addr = address & LARGE_PAGE_MASK;
 	pbase = (pte_t *)page_address(base);
-	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
-		pbase[i] = pfn_pte(addr >> PAGE_SHIFT, ref_prot);
+#ifdef CONFIG_X86_32
+	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
+#endif
 
-	return base;
+	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
+		set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
+
+	/*
+	 * Install the new, split up pagetable:
+	 */
+	__set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
+	base = NULL;
+
+out_unlock:
+	spin_unlock_irqrestore(&pgd_lock, flags);
+
+	if (base)
+		__free_pages(base, 0);
+
+	return 0;
 }
 
 static int
 __change_page_attr(unsigned long address, struct page *page, pgprot_t prot)
 {
-	struct page *kpte_page;
-	pte_t *kpte;
 	pgprot_t ref_prot2, oldprot;
-	int level;
+	struct page *kpte_page;
+	int level, err = 0;
+	pte_t *kpte;
 
 repeat:
 	kpte = lookup_address(address, &level);
@@ -88,22 +145,12 @@ repeat:
 	if (level == 4) {
 		set_pte_atomic(kpte, mk_pte(page, prot));
 	} else {
-		/*
-		 * split_large_page will take the reference for this
-		 * change_page_attr on the split page.
-		 */
-		struct page *split;
-
-		ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
-		split = split_large_page(address, ref_prot2);
-		if (!split)
-			return -ENOMEM;
-		pgprot_val(ref_prot2) &= ~_PAGE_NX;
-		set_pte_atomic(kpte, mk_pte(split, ref_prot2));
-		goto repeat;
+		err = split_large_page(kpte, address);
+		if (!err)
+			goto repeat;
 	}
 
-	return 0;
+	return err;
 }
 
 /**

From 674d67269e79f6697c3480363b28c9b9934fa60c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:03 +0100
Subject: [PATCH 803/890] x86: clean up differences between 64-bit and 32-bit

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_64.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index 139795029fb6..60cfb687f97c 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -126,30 +126,36 @@ out_unlock:
 static int
 __change_page_attr(unsigned long address, struct page *page, pgprot_t prot)
 {
-	pgprot_t ref_prot2, oldprot;
 	struct page *kpte_page;
 	int level, err = 0;
 	pte_t *kpte;
 
+	BUG_ON(PageHighMem(page));
+
 repeat:
 	kpte = lookup_address(address, &level);
 	if (!kpte)
 		return 0;
 
 	kpte_page = virt_to_page(kpte);
-	oldprot = pte_pgprot(*kpte);
 	BUG_ON(PageLRU(kpte_page));
 	BUG_ON(PageCompound(kpte_page));
-	prot = canon_pgprot(prot);
+
+	/*
+	 * Better fail early if someone sets the kernel text to NX.
+	 * Does not cover __inittext
+	 */
+	BUG_ON(address >= (unsigned long)&_text &&
+		address < (unsigned long)&_etext &&
+	       (pgprot_val(prot) & _PAGE_NX));
 
 	if (level == 4) {
-		set_pte_atomic(kpte, mk_pte(page, prot));
+		set_pte_atomic(kpte, mk_pte(page, canon_pgprot(prot)));
 	} else {
 		err = split_large_page(kpte, address);
 		if (!err)
 			goto repeat;
 	}
-
 	return err;
 }
 

From f5a50ce1bf53a07cb7d0bab1a87e62cc4f34f0ab Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:03 +0100
Subject: [PATCH 804/890] x86: return -EINVAL in __change_page_attr(), instead
 of 0

careful: might change driver behavior - but this is the right
return value.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index 60cfb687f97c..4053832d4108 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -135,7 +135,7 @@ __change_page_attr(unsigned long address, struct page *page, pgprot_t prot)
 repeat:
 	kpte = lookup_address(address, &level);
 	if (!kpte)
-		return 0;
+		return -EINVAL;
 
 	kpte_page = virt_to_page(kpte);
 	BUG_ON(PageLRU(kpte_page));

From ace63e3743ae59fc0cce48450bd2e410776b4148 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:03 +0100
Subject: [PATCH 805/890] x86: add kernel_map_pages() to 64-bit

needed for DEBUG_PAGEALLOC support and for unification.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_64.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index 4053832d4108..e1c860800ff1 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -260,3 +260,33 @@ void global_flush_tlb(void)
 	on_each_cpu(flush_kernel_map, NULL, 1, 1);
 }
 EXPORT_SYMBOL(global_flush_tlb);
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	if (PageHighMem(page))
+		return;
+	if (!enable) {
+		debug_check_no_locks_freed(page_address(page),
+					   numpages * PAGE_SIZE);
+	}
+
+	/*
+	 * If page allocator is not up yet then do not call c_p_a():
+	 */
+	if (!debug_pagealloc_enabled)
+		return;
+
+	/*
+	 * the return value is ignored - the calls cannot fail,
+	 * large pages are disabled at boot time.
+	 */
+	change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
+
+	/*
+	 * we should perform an IPI and flush all tlbs,
+	 * but that can deadlock->flush only current cpu.
+	 */
+	__flush_tlb_all();
+}
+#endif

From 31a0717cbc6191fc56326fdf95548d87055686e3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:03 +0100
Subject: [PATCH 806/890] x86: enable DEBUG_PAGEALLOC on 64-bit

enable CONFIG_DEBUG_PAGEALLOC=y on 64-bit kernels too.

preliminary testing shows that it's working fine.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig.debug | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 38211ff0447f..46d34b2db421 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -41,7 +41,6 @@ comment "Page alloc debug is incompatible with Software Suspend on i386"
 config DEBUG_PAGEALLOC
 	bool "Debug page memory allocations"
 	depends on DEBUG_KERNEL && !HIBERNATION && !HUGETLBFS
-	depends on X86_32
 	help
 	  Unmap pages from the kernel linear mapping after free_pages().
 	  This results in a large slowdown, but helps to find certain types

From 44af6c41e6a055a0b9bd0d2067cfbc8e9f6a24df Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:03 +0100
Subject: [PATCH 807/890] x86: backmerge 64-bit details into 32-bit pageattr.c

backmerge 64-bit details into 32-bit pageattr.c.

the pageattr_32.c and pageattr_64.c files are now identical.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_32.c | 130 +++++++++++++++++++++++++-------------
 1 file changed, 85 insertions(+), 45 deletions(-)

diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 1c7bd81a4194..251613449dd6 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -39,23 +39,26 @@ pte_t *lookup_address(unsigned long address, int *level)
 
 static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 {
-	struct page *page;
-
 	/* change init_mm */
 	set_pte_atomic(kpte, pte);
+#ifdef CONFIG_X86_32
 	if (SHARED_KERNEL_PMD)
 		return;
+	{
+		struct page *page;
 
-	for (page = pgd_list; page; page = (struct page *)page->index) {
-		pgd_t *pgd;
-		pud_t *pud;
-		pmd_t *pmd;
+		for (page = pgd_list; page; page = (struct page *)page->index) {
+			pgd_t *pgd;
+			pud_t *pud;
+			pmd_t *pmd;
 
-		pgd = (pgd_t *)page_address(page) + pgd_index(address);
-		pud = pud_offset(pgd, address);
-		pmd = pmd_offset(pud, address);
-		set_pte_atomic((pte_t *)pmd, pte);
+			pgd = (pgd_t *)page_address(page) + pgd_index(address);
+			pud = pud_offset(pgd, address);
+			pmd = pmd_offset(pud, address);
+			set_pte_atomic((pte_t *)pmd, pte);
+		}
 	}
+#endif
 }
 
 static int split_large_page(pte_t *kpte, unsigned long address)
@@ -89,7 +92,9 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	address = __pa(address);
 	addr = address & LARGE_PAGE_MASK;
 	pbase = (pte_t *)page_address(base);
+#ifdef CONFIG_X86_32
 	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
+#endif
 
 	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
 		set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
@@ -109,15 +114,14 @@ out_unlock:
 	return 0;
 }
 
-static int __change_page_attr(struct page *page, pgprot_t prot)
+static int
+__change_page_attr(unsigned long address, struct page *page, pgprot_t prot)
 {
 	struct page *kpte_page;
-	unsigned long address;
 	int level, err = 0;
 	pte_t *kpte;
 
 	BUG_ON(PageHighMem(page));
-	address = (unsigned long)page_address(page);
 
 repeat:
 	kpte = lookup_address(address, &level);
@@ -146,52 +150,88 @@ repeat:
 	return err;
 }
 
-/*
- * Change the page attributes of an page in the linear mapping.
+/**
+ * change_page_attr_addr - Change page table attributes in linear mapping
+ * @address: Virtual address in linear mapping.
+ * @numpages: Number of pages to change
+ * @prot:    New page table attribute (PAGE_*)
+ *
+ * Change page attributes of a page in the direct mapping. This is a variant
+ * of change_page_attr() that also works on memory holes that do not have
+ * mem_map entry (pfn_valid() is false).
+ *
+ * See change_page_attr() documentation for more details.
+ */
+
+int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
+{
+	int err = 0, kernel_map = 0, i;
+
+#ifdef CONFIG_X86_64
+	if (address >= __START_KERNEL_map &&
+			address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
+
+		address = (unsigned long)__va(__pa(address));
+		kernel_map = 1;
+	}
+#endif
+
+	for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
+		unsigned long pfn = __pa(address) >> PAGE_SHIFT;
+
+		if (!kernel_map || pte_present(pfn_pte(0, prot))) {
+			err = __change_page_attr(address, pfn_to_page(pfn), prot);
+			if (err)
+				break;
+		}
+#ifdef CONFIG_X86_64
+		/*
+		 * Handle kernel mapping too which aliases part of
+		 * lowmem:
+		 */
+		if (__pa(address) < KERNEL_TEXT_SIZE) {
+			unsigned long addr2;
+			pgprot_t prot2;
+
+			addr2 = __START_KERNEL_map + __pa(address);
+			/* Make sure the kernel mappings stay executable */
+			prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
+			err = __change_page_attr(addr2, pfn_to_page(pfn), prot2);
+		}
+#endif
+	}
+
+	return err;
+}
+
+/**
+ * change_page_attr - Change page table attributes in the linear mapping.
+ * @page: First page to change
+ * @numpages: Number of pages to change
+ * @prot: New protection/caching type (PAGE_*)
+ *
+ * Returns 0 on success, otherwise a negated errno.
  *
  * This should be used when a page is mapped with a different caching policy
  * than write-back somewhere - some CPUs do not like it when mappings with
  * different caching policies exist. This changes the page attributes of the
  * in kernel linear mapping too.
  *
- * The caller needs to ensure that there are no conflicting mappings elsewhere.
- * This function only deals with the kernel linear map.
+ * Caller must call global_flush_tlb() later to make the changes active.
  *
- * Caller must call global_flush_tlb() after this.
+ * The caller needs to ensure that there are no conflicting mappings elsewhere
+ * (e.g. in user space) * This function only deals with the kernel linear map.
+ *
+ * For MMIO areas without mem_map use change_page_attr_addr() instead.
  */
 int change_page_attr(struct page *page, int numpages, pgprot_t prot)
 {
-	int err = 0, i;
+	unsigned long addr = (unsigned long)page_address(page);
 
-	for (i = 0; i < numpages; i++, page++) {
-		err = __change_page_attr(page, prot);
-		if (err)
-			break;
-	}
-
-	return err;
+	return change_page_attr_addr(addr, numpages, prot);
 }
 EXPORT_SYMBOL(change_page_attr);
 
-int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot)
-{
-	int i;
-	unsigned long pfn = (__pa(addr) >> PAGE_SHIFT);
-
-	for (i = 0; i < numpages; i++) {
-		if (!pfn_valid(pfn + i)) {
-			WARN_ON_ONCE(1);
-			break;
-		} else {
-			int level;
-			pte_t *pte = lookup_address(addr + i*PAGE_SIZE, &level);
-			BUG_ON(pte && pte_none(*pte));
-		}
-	}
-
-	return change_page_attr(virt_to_page(addr), i, prot);
-}
-
 static void flush_kernel_map(void *arg)
 {
 	/*

From 6050be70d8f7e3952fcc31fcf1fa8a7cbaa18312 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:03 +0100
Subject: [PATCH 808/890] x86: prepare for pageattr.c unification

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr_64.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index e1c860800ff1..0246511be99d 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -9,14 +9,6 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 
-void clflush_cache_range(void *addr, int size)
-{
-	int i;
-
-	for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
-		clflush(addr+i);
-}
-
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>

From b195bc00ef8c2ccf8cc744e5ff9470cb08b45d76 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:03 +0100
Subject: [PATCH 809/890] x86: unify pageattr_32.c and pageattr_64.c

unify the now perfectly identical pageattr_32/64.c files - no code changed.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/Makefile_32                   |   2 +-
 arch/x86/mm/Makefile_64                   |   2 +-
 arch/x86/mm/{pageattr_32.c => pageattr.c} |   0
 arch/x86/mm/pageattr_64.c                 | 284 ----------------------
 4 files changed, 2 insertions(+), 286 deletions(-)
 rename arch/x86/mm/{pageattr_32.c => pageattr.c} (100%)
 delete mode 100644 arch/x86/mm/pageattr_64.c

diff --git a/arch/x86/mm/Makefile_32 b/arch/x86/mm/Makefile_32
index 1aeba3bf34bd..424e5a862271 100644
--- a/arch/x86/mm/Makefile_32
+++ b/arch/x86/mm/Makefile_32
@@ -2,7 +2,7 @@
 # Makefile for the linux i386-specific parts of the memory manager.
 #
 
-obj-y	:= init_32.o pgtable_32.o fault_32.o ioremap_32.o extable.o pageattr_32.o mmap.o
+obj-y	:= init_32.o pgtable_32.o fault_32.o ioremap_32.o extable.o pageattr.o mmap.o
 
 obj-$(CONFIG_CPA_DEBUG) += pageattr-test.o
 obj-$(CONFIG_NUMA) += discontig_32.o
diff --git a/arch/x86/mm/Makefile_64 b/arch/x86/mm/Makefile_64
index cb723167631b..043584478457 100644
--- a/arch/x86/mm/Makefile_64
+++ b/arch/x86/mm/Makefile_64
@@ -2,7 +2,7 @@
 # Makefile for the linux x86_64-specific parts of the memory manager.
 #
 
-obj-y	 := init_64.o fault_64.o ioremap_64.o extable.o pageattr_64.o mmap.o
+obj-y	 := init_64.o fault_64.o ioremap_64.o extable.o pageattr.o mmap.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_NUMA) += numa_64.o
 obj-$(CONFIG_K8_NUMA) += k8topology_64.o
diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr.c
similarity index 100%
rename from arch/x86/mm/pageattr_32.c
rename to arch/x86/mm/pageattr.c
diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
deleted file mode 100644
index 0246511be99d..000000000000
--- a/arch/x86/mm/pageattr_64.c
+++ /dev/null
@@ -1,284 +0,0 @@
-/*
- * Copyright 2002 Andi Kleen, SuSE Labs.
- * Thanks to Ben LaHaise for precious feedback.
- */
-
-#include <linux/highmem.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-
-#include <asm/processor.h>
-#include <asm/tlbflush.h>
-#include <asm/sections.h>
-#include <asm/uaccess.h>
-#include <asm/pgalloc.h>
-
-pte_t *lookup_address(unsigned long address, int *level)
-{
-	pgd_t *pgd = pgd_offset_k(address);
-	pud_t *pud;
-	pmd_t *pmd;
-
-	if (pgd_none(*pgd))
-		return NULL;
-	pud = pud_offset(pgd, address);
-	if (pud_none(*pud))
-		return NULL;
-	pmd = pmd_offset(pud, address);
-	if (pmd_none(*pmd))
-		return NULL;
-	*level = 3;
-	if (pmd_large(*pmd))
-		return (pte_t *)pmd;
-	*level = 4;
-
-	return pte_offset_kernel(pmd, address);
-}
-
-static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
-{
-	/* change init_mm */
-	set_pte_atomic(kpte, pte);
-#ifdef CONFIG_X86_32
-	if (SHARED_KERNEL_PMD)
-		return;
-	{
-		struct page *page;
-
-		for (page = pgd_list; page; page = (struct page *)page->index) {
-			pgd_t *pgd;
-			pud_t *pud;
-			pmd_t *pmd;
-
-			pgd = (pgd_t *)page_address(page) + pgd_index(address);
-			pud = pud_offset(pgd, address);
-			pmd = pmd_offset(pud, address);
-			set_pte_atomic((pte_t *)pmd, pte);
-		}
-	}
-#endif
-}
-
-static int split_large_page(pte_t *kpte, unsigned long address)
-{
-	pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
-	gfp_t gfp_flags = GFP_KERNEL;
-	unsigned long flags;
-	unsigned long addr;
-	pte_t *pbase, *tmp;
-	struct page *base;
-	int i, level;
-
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
-	gfp_flags = GFP_ATOMIC;
-#endif
-	base = alloc_pages(gfp_flags, 0);
-	if (!base)
-		return -ENOMEM;
-
-	spin_lock_irqsave(&pgd_lock, flags);
-	/*
-	 * Check for races, another CPU might have split this page
-	 * up for us already:
-	 */
-	tmp = lookup_address(address, &level);
-	if (tmp != kpte) {
-		WARN_ON_ONCE(1);
-		goto out_unlock;
-	}
-
-	address = __pa(address);
-	addr = address & LARGE_PAGE_MASK;
-	pbase = (pte_t *)page_address(base);
-#ifdef CONFIG_X86_32
-	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
-#endif
-
-	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
-		set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
-
-	/*
-	 * Install the new, split up pagetable:
-	 */
-	__set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
-	base = NULL;
-
-out_unlock:
-	spin_unlock_irqrestore(&pgd_lock, flags);
-
-	if (base)
-		__free_pages(base, 0);
-
-	return 0;
-}
-
-static int
-__change_page_attr(unsigned long address, struct page *page, pgprot_t prot)
-{
-	struct page *kpte_page;
-	int level, err = 0;
-	pte_t *kpte;
-
-	BUG_ON(PageHighMem(page));
-
-repeat:
-	kpte = lookup_address(address, &level);
-	if (!kpte)
-		return -EINVAL;
-
-	kpte_page = virt_to_page(kpte);
-	BUG_ON(PageLRU(kpte_page));
-	BUG_ON(PageCompound(kpte_page));
-
-	/*
-	 * Better fail early if someone sets the kernel text to NX.
-	 * Does not cover __inittext
-	 */
-	BUG_ON(address >= (unsigned long)&_text &&
-		address < (unsigned long)&_etext &&
-	       (pgprot_val(prot) & _PAGE_NX));
-
-	if (level == 4) {
-		set_pte_atomic(kpte, mk_pte(page, canon_pgprot(prot)));
-	} else {
-		err = split_large_page(kpte, address);
-		if (!err)
-			goto repeat;
-	}
-	return err;
-}
-
-/**
- * change_page_attr_addr - Change page table attributes in linear mapping
- * @address: Virtual address in linear mapping.
- * @numpages: Number of pages to change
- * @prot:    New page table attribute (PAGE_*)
- *
- * Change page attributes of a page in the direct mapping. This is a variant
- * of change_page_attr() that also works on memory holes that do not have
- * mem_map entry (pfn_valid() is false).
- *
- * See change_page_attr() documentation for more details.
- */
-
-int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
-{
-	int err = 0, kernel_map = 0, i;
-
-#ifdef CONFIG_X86_64
-	if (address >= __START_KERNEL_map &&
-			address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
-
-		address = (unsigned long)__va(__pa(address));
-		kernel_map = 1;
-	}
-#endif
-
-	for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
-		unsigned long pfn = __pa(address) >> PAGE_SHIFT;
-
-		if (!kernel_map || pte_present(pfn_pte(0, prot))) {
-			err = __change_page_attr(address, pfn_to_page(pfn), prot);
-			if (err)
-				break;
-		}
-#ifdef CONFIG_X86_64
-		/*
-		 * Handle kernel mapping too which aliases part of
-		 * lowmem:
-		 */
-		if (__pa(address) < KERNEL_TEXT_SIZE) {
-			unsigned long addr2;
-			pgprot_t prot2;
-
-			addr2 = __START_KERNEL_map + __pa(address);
-			/* Make sure the kernel mappings stay executable */
-			prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
-			err = __change_page_attr(addr2, pfn_to_page(pfn), prot2);
-		}
-#endif
-	}
-
-	return err;
-}
-
-/**
- * change_page_attr - Change page table attributes in the linear mapping.
- * @page: First page to change
- * @numpages: Number of pages to change
- * @prot: New protection/caching type (PAGE_*)
- *
- * Returns 0 on success, otherwise a negated errno.
- *
- * This should be used when a page is mapped with a different caching policy
- * than write-back somewhere - some CPUs do not like it when mappings with
- * different caching policies exist. This changes the page attributes of the
- * in kernel linear mapping too.
- *
- * Caller must call global_flush_tlb() later to make the changes active.
- *
- * The caller needs to ensure that there are no conflicting mappings elsewhere
- * (e.g. in user space) * This function only deals with the kernel linear map.
- *
- * For MMIO areas without mem_map use change_page_attr_addr() instead.
- */
-int change_page_attr(struct page *page, int numpages, pgprot_t prot)
-{
-	unsigned long addr = (unsigned long)page_address(page);
-
-	return change_page_attr_addr(addr, numpages, prot);
-}
-EXPORT_SYMBOL(change_page_attr);
-
-static void flush_kernel_map(void *arg)
-{
-	/*
-	 * Flush all to work around Errata in early athlons regarding
-	 * large page flushing.
-	 */
-	__flush_tlb_all();
-
-	if (boot_cpu_data.x86_model >= 4)
-		wbinvd();
-}
-
-void global_flush_tlb(void)
-{
-	BUG_ON(irqs_disabled());
-
-	on_each_cpu(flush_kernel_map, NULL, 1, 1);
-}
-EXPORT_SYMBOL(global_flush_tlb);
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
-void kernel_map_pages(struct page *page, int numpages, int enable)
-{
-	if (PageHighMem(page))
-		return;
-	if (!enable) {
-		debug_check_no_locks_freed(page_address(page),
-					   numpages * PAGE_SIZE);
-	}
-
-	/*
-	 * If page allocator is not up yet then do not call c_p_a():
-	 */
-	if (!debug_pagealloc_enabled)
-		return;
-
-	/*
-	 * the return value is ignored - the calls cannot fail,
-	 * large pages are disabled at boot time.
-	 */
-	change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
-
-	/*
-	 * we should perform an IPI and flush all tlbs,
-	 * but that can deadlock->flush only current cpu.
-	 */
-	__flush_tlb_all();
-}
-#endif

From 4554ab95c2b9d6b0ee9cf2a7ed3df665422acebb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:03 +0100
Subject: [PATCH 810/890] x86: re-add clflush_cache_range()

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 251613449dd6..510ff4091667 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -9,6 +9,14 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 
+void clflush_cache_range(void *addr, int size)
+{
+	int i;
+
+	for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
+		clflush(addr+i);
+}
+
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>

From e4b71dcf54fa90fc30fb901bbce7e38a46467af7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:04 +0100
Subject: [PATCH 811/890] x86: clean up arch/x86/mm/pageattr.c

do some leftover cleanups in the now unified arch/x86/mm/pageattr.c
file.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr.c       | 13 +++++--------
 include/asm-x86/pgtable_64.h |  2 ++
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 510ff4091667..a270f9ccebfb 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -2,7 +2,6 @@
  * Copyright 2002 Andi Kleen, SuSE Labs.
  * Thanks to Ben LaHaise for precious feedback.
  */
-
 #include <linux/highmem.h>
 #include <linux/module.h>
 #include <linux/sched.h>
@@ -50,9 +49,7 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 	/* change init_mm */
 	set_pte_atomic(kpte, pte);
 #ifdef CONFIG_X86_32
-	if (SHARED_KERNEL_PMD)
-		return;
-	{
+	if (!SHARED_KERNEL_PMD) {
 		struct page *page;
 
 		for (page = pgd_list; page; page = (struct page *)page->index) {
@@ -277,14 +274,14 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 		return;
 
 	/*
-	 * the return value is ignored - the calls cannot fail,
-	 * large pages are disabled at boot time.
+	 * The return value is ignored - the calls cannot fail,
+	 * large pages are disabled at boot time:
 	 */
 	change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
 
 	/*
-	 * we should perform an IPI and flush all tlbs,
-	 * but that can deadlock->flush only current cpu.
+	 * We should perform an IPI and flush all tlbs,
+	 * but that can deadlock->flush only current cpu:
 	 */
 	__flush_tlb_all();
 }
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 223b0032bab5..faa6a96c2a5c 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -25,6 +25,8 @@ extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
 
 #endif /* !__ASSEMBLY__ */
 
+#define SHARED_KERNEL_PMD	1
+
 /*
  * PGDIR_SHIFT determines what a top-level page table entry can map
  */

From 0a663088cd6ff9e89f285ae7689e6eee46cfb54c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:04 +0100
Subject: [PATCH 812/890] x86: clean up lookup_address() declarations

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable.h    | 8 ++++++++
 include/asm-x86/pgtable_32.h | 8 --------
 include/asm-x86/pgtable_64.h | 2 --
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 2726ddc457fd..4409dabe31c6 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -234,6 +234,14 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 
 #ifndef __ASSEMBLY__
 
+/*
+ * Helper function that returns the kernel pagetable entry controlling
+ * the virtual address 'address'. NULL means no pagetable entry present.
+ * NOTE: the return type is pte_t but if the pmd is PSE then we return it
+ * as a pte too.
+ */
+extern pte_t *lookup_address(unsigned long address, int *level);
+
 /* local pte updates need not use xchg for locking */
 static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
 {
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 72eb06c2bac9..ca7b150ca8b7 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -176,14 +176,6 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
 #define pmd_page_vaddr(pmd) \
 		((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
 
-/*
- * Helper function that returns the kernel pagetable entry controlling
- * the virtual address 'address'. NULL means no pagetable entry present.
- * NOTE: the return type is pte_t but if the pmd is PSE then we return it
- * as a pte too.
- */
-extern pte_t *lookup_address(unsigned long address, int *level);
-
 #if defined(CONFIG_HIGHPTE)
 #define pte_offset_map(dir, address) \
 	((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index faa6a96c2a5c..ce1dbab75e39 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -245,8 +245,6 @@ extern struct list_head pgd_list;
 
 extern int kern_addr_valid(unsigned long addr); 
 
-pte_t *lookup_address(unsigned long addr, int *level);
-
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)		\
 		remap_pfn_range(vma, vaddr, pfn, size, prot)
 

From 30551bb3ce9257a2352b3cb4e45010d415cc0ad5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:04 +0100
Subject: [PATCH 813/890] x86: add PG_LEVEL enum

this way PG_LEVEL_1GB will be an easy change.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr-test.c | 4 +---
 arch/x86/mm/pageattr.c      | 9 ++++++---
 include/asm-x86/pgtable.h   | 6 ++++++
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 6cc106b388a5..d7a93008cc12 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -16,14 +16,12 @@
 
 enum {
 	NTEST			= 400,
+	LOWEST_LEVEL		= PG_LEVEL_4K,
 #ifdef CONFIG_X86_64
-	LOWEST_LEVEL		= 4,
 	LPS			= (1 << PMD_SHIFT),
 #elif defined(CONFIG_X86_PAE)
-	LOWEST_LEVEL		= 4,
 	LPS			= (1 << PMD_SHIFT),
 #else
-	LOWEST_LEVEL		= 4, /* lookup_address lies here */
 	LPS			= (1 << 22),
 #endif
 	GPS			= (1<<30)
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a270f9ccebfb..4589a1382fa1 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -28,6 +28,8 @@ pte_t *lookup_address(unsigned long address, int *level)
 	pud_t *pud;
 	pmd_t *pmd;
 
+	*level = PG_LEVEL_NONE;
+
 	if (pgd_none(*pgd))
 		return NULL;
 	pud = pud_offset(pgd, address);
@@ -36,11 +38,12 @@ pte_t *lookup_address(unsigned long address, int *level)
 	pmd = pmd_offset(pud, address);
 	if (pmd_none(*pmd))
 		return NULL;
-	*level = 3;
+
+	*level = PG_LEVEL_2M;
 	if (pmd_large(*pmd))
 		return (pte_t *)pmd;
-	*level = 4;
 
+	*level = PG_LEVEL_4K;
 	return pte_offset_kernel(pmd, address);
 }
 
@@ -145,7 +148,7 @@ repeat:
 		address < (unsigned long)&_etext &&
 	       (pgprot_val(prot) & _PAGE_NX));
 
-	if (level == 4) {
+	if (level == PG_LEVEL_4K) {
 		set_pte_atomic(kpte, mk_pte(page, canon_pgprot(prot)));
 	} else {
 		err = split_large_page(kpte, address);
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 4409dabe31c6..7aa34c8eb220 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -234,6 +234,12 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 
 #ifndef __ASSEMBLY__
 
+enum {
+	PG_LEVEL_NONE,
+	PG_LEVEL_4K,
+	PG_LEVEL_2M,
+};
+
 /*
  * Helper function that returns the kernel pagetable entry controlling
  * the virtual address 'address'. NULL means no pagetable entry present.

From 687c4825b6ccab69b85f266ae925500b27aab6c2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:04 +0100
Subject: [PATCH 814/890] x86: keep the BIOS area executable

keep the BIOS area executable.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 4589a1382fa1..effcd78d5f40 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -22,6 +22,27 @@ void clflush_cache_range(void *addr, int size)
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 
+/*
+ * We allow the BIOS range to be executable:
+ */
+#define BIOS_BEGIN		0x000a0000
+#define BIOS_END		0x00100000
+
+static inline pgprot_t check_exec(pgprot_t prot, unsigned long address)
+{
+	if (__pa(address) >= BIOS_BEGIN && __pa(address) < BIOS_END)
+		pgprot_val(prot) &= ~_PAGE_NX;
+	/*
+	 * Better fail early if someone sets the kernel text to NX.
+	 * Does not cover __inittext
+	 */
+	BUG_ON(address >= (unsigned long)&_text &&
+		address < (unsigned long)&_etext &&
+	       (pgprot_val(prot) & _PAGE_NX));
+
+	return prot;
+}
+
 pte_t *lookup_address(unsigned long address, int *level)
 {
 	pgd_t *pgd = pgd_offset_k(address);
@@ -140,13 +161,7 @@ repeat:
 	BUG_ON(PageLRU(kpte_page));
 	BUG_ON(PageCompound(kpte_page));
 
-	/*
-	 * Better fail early if someone sets the kernel text to NX.
-	 * Does not cover __inittext
-	 */
-	BUG_ON(address >= (unsigned long)&_text &&
-		address < (unsigned long)&_etext &&
-	       (pgprot_val(prot) & _PAGE_NX));
+	prot = check_exec(prot, address);
 
 	if (level == PG_LEVEL_4K) {
 		set_pte_atomic(kpte, mk_pte(page, canon_pgprot(prot)));

From 8192206df093e8fc607b5072ce71a930d44f8638 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:04 +0100
Subject: [PATCH 815/890] x86: change cpa to pfn based

change CPA to pfn based.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index effcd78d5f40..d18c41d752f3 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -3,6 +3,7 @@
  * Thanks to Ben LaHaise for precious feedback.
  */
 #include <linux/highmem.h>
+#include <linux/bootmem.h>
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
@@ -144,13 +145,15 @@ out_unlock:
 }
 
 static int
-__change_page_attr(unsigned long address, struct page *page, pgprot_t prot)
+__change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot)
 {
 	struct page *kpte_page;
 	int level, err = 0;
 	pte_t *kpte;
 
-	BUG_ON(PageHighMem(page));
+#ifdef CONFIG_X86_32
+	BUG_ON(pfn > max_low_pfn);
+#endif
 
 repeat:
 	kpte = lookup_address(address, &level);
@@ -164,7 +167,7 @@ repeat:
 	prot = check_exec(prot, address);
 
 	if (level == PG_LEVEL_4K) {
-		set_pte_atomic(kpte, mk_pte(page, canon_pgprot(prot)));
+		set_pte_atomic(kpte, pfn_pte(pfn, canon_pgprot(prot)));
 	} else {
 		err = split_large_page(kpte, address);
 		if (!err)
@@ -203,7 +206,7 @@ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
 		unsigned long pfn = __pa(address) >> PAGE_SHIFT;
 
 		if (!kernel_map || pte_present(pfn_pte(0, prot))) {
-			err = __change_page_attr(address, pfn_to_page(pfn), prot);
+			err = __change_page_attr(address, pfn, prot);
 			if (err)
 				break;
 		}
@@ -219,7 +222,7 @@ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
 			addr2 = __START_KERNEL_map + __pa(address);
 			/* Make sure the kernel mappings stay executable */
 			prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
-			err = __change_page_attr(addr2, pfn_to_page(pfn), prot2);
+			err = __change_page_attr(addr2, pfn, prot2);
 		}
 #endif
 	}

From 4c881ca1819da180b047f87eedeb9a5957918705 Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Wed, 30 Jan 2008 13:34:04 +0100
Subject: [PATCH 816/890] x86: fix NX bit handling in change_page_attr()

This patch fixes a bug of change_page_attr/change_page_attr_addr on
Intel i386/x86_64 CPUs.  After changing page attribute to be
executable with these functions, the page remains un-executable on
Intel i386/x86_64 CPU.  Because on Intel i386/x86_64 CPU, only if the
"NX" bits of all three level page tables are cleared (PAE is enabled),
the corresponding page is executable (refer to section 4.13.2 of Intel
64 and IA-32 Architectures Software Developer's Manual).  So, the bug
is fixed through clearing the "NX" bit of PMD when splitting the huge
PMD.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index d18c41d752f3..cbe8e9223bee 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -130,8 +130,13 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 		set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
 
 	/*
-	 * Install the new, split up pagetable:
+	 * Install the new, split up pagetable. Important detail here:
+	 *
+	 * On Intel the NX bit of all levels must be cleared to make a
+	 * page executable. See section 4.13.2 of Intel 64 and IA-32
+	 * Architectures Software Developer's Manual).
 	 */
+	ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte)));
 	__set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
 	base = NULL;
 

From d2e626f45cc450c00f5f98a89b8b4c4ac3c9bf5f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:04 +0100
Subject: [PATCH 817/890] x86: add PAGE_KERNEL_EXEC_NOCACHE

add PAGE_KERNEL_EXEC_NOCACHE.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 7aa34c8eb220..ee40a88882f6 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -79,6 +79,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
 
 #define __PAGE_KERNEL_RO		(__PAGE_KERNEL & ~_PAGE_RW)
 #define __PAGE_KERNEL_RX		(__PAGE_KERNEL_EXEC & ~_PAGE_RW)
+#define __PAGE_KERNEL_EXEC_NOCACHE	(__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT)
 #define __PAGE_KERNEL_NOCACHE		(__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
 #define __PAGE_KERNEL_VSYSCALL		(__PAGE_KERNEL_RX | _PAGE_USER)
 #define __PAGE_KERNEL_VSYSCALL_NOCACHE	(__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT)
@@ -96,6 +97,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
 #define PAGE_KERNEL_EXEC		MAKE_GLOBAL(__PAGE_KERNEL_EXEC)
 #define PAGE_KERNEL_RX			MAKE_GLOBAL(__PAGE_KERNEL_RX)
 #define PAGE_KERNEL_NOCACHE		MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
+#define PAGE_KERNEL_EXEC_NOCACHE	MAKE_GLOBAL(__PAGE_KERNEL_EXEC_NOCACHE)
 #define PAGE_KERNEL_LARGE		MAKE_GLOBAL(__PAGE_KERNEL_LARGE)
 #define PAGE_KERNEL_LARGE_EXEC		MAKE_GLOBAL(__PAGE_KERNEL_LARGE_EXEC)
 #define PAGE_KERNEL_VSYSCALL		MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL)

From 1c17f4d615f34a4ecfcf9132d643126b226f5e79 Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Wed, 30 Jan 2008 13:34:04 +0100
Subject: [PATCH 818/890] x86: ioremap_nocache fix

This patch fixes a bug of ioremap_nocache. ioremap_nocache() will call
__ioremap() with flags != 0 to do the real work, which will call
change_page_attr_addr() if phys_addr + size - 1 < (end_pfn_map << PAGE_SHIFT).
But some pages between 0 ~ end_pfn_map << PAGE_SHIFT are not mapped by
identity map, this will make change_page_attr_addr failed.

This patch is based on latest x86 git and has been tested on x86_64 platform.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/ioremap_64.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/x86/mm/ioremap_64.c b/arch/x86/mm/ioremap_64.c
index 2815ab60009b..0a05f024dc22 100644
--- a/arch/x86/mm/ioremap_64.c
+++ b/arch/x86/mm/ioremap_64.c
@@ -41,7 +41,14 @@ ioremap_change_attr(unsigned long phys_addr, unsigned long size,
 	if (phys_addr + size - 1 < (end_pfn_map << PAGE_SHIFT)) {
 		unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 		unsigned long vaddr = (unsigned long) __va(phys_addr);
+		int level;
 
+		/*
+		 * If there is no identity map for this address,
+		 * change_page_attr_addr is unnecessary
+		 */
+		if (!lookup_address(vaddr, &level))
+			return err;
 		/*
  		 * Must use a address here and not struct page because the phys addr
 		 * can be a in hole between nodes and not have an memmap entry.

From f316fe687521fad5ad2fd8389397c38aa97439d2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:04 +0100
Subject: [PATCH 819/890] x86: cpa: make self-test depend on DEBUG_KERNEL

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig.debug | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 46d34b2db421..2a859a7e2d74 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -205,6 +205,7 @@ config DEBUG_BOOT_PARAMS
 
 config CPA_DEBUG
 	bool "CPA self test code"
+	depends on DEBUG_KERNEL
 	help
 	  Do change_page_attr self tests at boot.
 

From ed724be65fa18833244d81b484e425fc838837fa Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 30 Jan 2008 13:34:04 +0100
Subject: [PATCH 820/890] x86: turn the check_exec function into function that

What the check_exec() function really is trying to do is enforce certain
bits in the pgprot that are required by the x86 architecture, but that
callers might not be aware of (such as NX bit exclusion of the BIOS
area for BIOS based PCI access; it's not uncommon to ioremap the BIOS
region for various purposes and normally ioremap() memory has the NX bit
set).

This patch turns the check_exec() function into static_protections()
which also is now used to make sure the kernel text area remains non-NX
and that the .rodata section remains read-only. If the architecture
ends up requiring more such mandatory prot settings for specific areas,
this is now a reasonable place to add these.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr.c | 47 +++++++++++++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 10 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index cbe8e9223bee..00f6f341e291 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -24,22 +24,49 @@ void clflush_cache_range(void *addr, int size)
 #include <asm/pgalloc.h>
 
 /*
- * We allow the BIOS range to be executable:
+ * We must allow the BIOS range to be executable:
  */
 #define BIOS_BEGIN		0x000a0000
 #define BIOS_END		0x00100000
 
-static inline pgprot_t check_exec(pgprot_t prot, unsigned long address)
+static inline int
+within(unsigned long addr, unsigned long start, unsigned long end)
 {
-	if (__pa(address) >= BIOS_BEGIN && __pa(address) < BIOS_END)
-		pgprot_val(prot) &= ~_PAGE_NX;
+	return addr >= start && addr < end;
+}
+
+/*
+ * Certain areas of memory on x86 require very specific protection flags,
+ * for example the BIOS area or kernel text. Callers don't always get this
+ * right (again, ioremap() on BIOS memory is not uncommon) so this function
+ * checks and fixes these known static required protection bits.
+ */
+static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
+{
+	pgprot_t forbidden = __pgprot(0);
+
 	/*
-	 * Better fail early if someone sets the kernel text to NX.
-	 * Does not cover __inittext
+	 * The BIOS area between 640k and 1Mb needs to be executable for
+	 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
 	 */
-	BUG_ON(address >= (unsigned long)&_text &&
-		address < (unsigned long)&_etext &&
-	       (pgprot_val(prot) & _PAGE_NX));
+	if (within(__pa(address), BIOS_BEGIN, BIOS_END))
+		pgprot_val(forbidden) |= _PAGE_NX;
+
+	/*
+	 * The kernel text needs to be executable for obvious reasons
+	 * Does not cover __inittext since that is gone later on
+	 */
+	if (within(address, (unsigned long)_text, (unsigned long)_etext))
+		pgprot_val(forbidden) |= _PAGE_NX;
+
+#ifdef CONFIG_DEBUG_RODATA
+	/* The .rodata section needs to be read-only */
+	if (within(address, (unsigned long)__start_rodata,
+				(unsigned long)__end_rodata))
+		pgprot_val(forbidden) |= _PAGE_RW;
+#endif
+
+	prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
 
 	return prot;
 }
@@ -169,7 +196,7 @@ repeat:
 	BUG_ON(PageLRU(kpte_page));
 	BUG_ON(PageCompound(kpte_page));
 
-	prot = check_exec(prot, address);
+	prot = static_protections(prot, address);
 
 	if (level == PG_LEVEL_4K) {
 		set_pte_atomic(kpte, pfn_pte(pfn, canon_pgprot(prot)));

From a40343497e8983cdf07fde99c9081ee62b96f745 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:04 +0100
Subject: [PATCH 821/890] x86: fix ioremap pgprot inconsistency

The pgprot flags which are handed into ioremap_page_range() are
different to those which are set in change_page_attr(). The
ioremap_page_range flags are executable, while the c_p_a flags are
not. Also make the mappings global (which is a NOP currently on 32bit,
although CPUs from PPRO+ onwards support it, but that's a separate
fix.)

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/ioremap_32.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index f8e6c4709cc2..18757f058bda 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -67,8 +67,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
 				return NULL;
 	}
 
-	prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY
-			| _PAGE_ACCESSED | flags);
+	prot = MAKE_GLOBAL(__PAGE_KERNEL | flags);
 
 	/*
 	 * Mappings have to be page-aligned

From 1aaf74e919be54be0023c3124923fb537c7fb772 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:05 +0100
Subject: [PATCH 822/890] x86: fix ioremap pgprot inconsistency

The pgprot flags which are handed into ioremap_page_range() are
different to those which are set in change_page_attr(). The
ioremap_page_range flags are executable, while the c_p_a flags are
not.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/ioremap_64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/ioremap_64.c b/arch/x86/mm/ioremap_64.c
index 0a05f024dc22..a37556124c86 100644
--- a/arch/x86/mm/ioremap_64.c
+++ b/arch/x86/mm/ioremap_64.c
@@ -53,7 +53,7 @@ ioremap_change_attr(unsigned long phys_addr, unsigned long size,
  		 * Must use a address here and not struct page because the phys addr
 		 * can be a in hole between nodes and not have an memmap entry.
 		 */
-		err = change_page_attr_addr(vaddr,npages,__pgprot(__PAGE_KERNEL|flags));
+		err = change_page_attr_addr(vaddr,npages,MAKE_GLOBAL(__PAGE_KERNEL|flags));
 		if (!err)
 			global_flush_tlb();
 	}
@@ -91,7 +91,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
 	if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
 		return (__force void __iomem *)phys_to_virt(phys_addr);
 
-	pgprot = __pgprot(__PAGE_KERNEL_EXEC | _PAGE_GLOBAL | flags);
+	pgprot = MAKE_GLOBAL(__PAGE_KERNEL | flags);
 	/*
 	 * Mappings have to be page-aligned
 	 */

From 91eebf40b3cb5abd76e813e17dbc320ff2ea3295 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:05 +0100
Subject: [PATCH 823/890] x86: style cleanup of ioremap code

Fix the coding style before going further.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/ioremap_32.c | 95 ++++++++++++++++++++--------------------
 arch/x86/mm/ioremap_64.c | 48 ++++++++++----------
 2 files changed, 69 insertions(+), 74 deletions(-)

diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index 18757f058bda..30ff2586db1e 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -1,6 +1,4 @@
 /*
- * arch/i386/mm/ioremap.c
- *
  * Re-map IO memory to kernel address space so that we can access it.
  * This is needed for high PCI addresses that aren't mapped in the
  * 640k-1MB IO memory area on PC's
@@ -21,10 +19,6 @@
 #define ISA_START_ADDRESS	0xa0000
 #define ISA_END_ADDRESS		0x100000
 
-/*
- * Generic mapping function (not visible outside):
- */
-
 /*
  * Remap an arbitrary physical address space into the kernel virtual
  * address space. Needed when the kernel wants to access high addresses
@@ -34,10 +28,11 @@
  * have to convert them into an offset in a page-aligned mapping, but the
  * caller shouldn't need to know that small detail.
  */
-void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
+void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
+			unsigned long flags)
 {
-	void __iomem * addr;
-	struct vm_struct * area;
+	void __iomem *addr;
+	struct vm_struct *area;
 	unsigned long offset, last_addr;
 	pgprot_t prot;
 
@@ -61,9 +56,10 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
 
 		t_addr = __va(phys_addr);
 		t_end = t_addr + (size - 1);
-	   
-		for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
-			if(!PageReserved(page))
+
+		for (page = virt_to_page(t_addr);
+		     page <= virt_to_page(t_end); page++)
+			if (!PageReserved(page))
 				return NULL;
 	}
 
@@ -85,7 +81,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
 	area->phys_addr = phys_addr;
 	addr = (void __iomem *) area->addr;
 	if (ioremap_page_range((unsigned long) addr,
-			(unsigned long) addr + size, phys_addr, prot)) {
+			       (unsigned long) addr + size, phys_addr, prot)) {
 		vunmap((void __force *) addr);
 		return NULL;
 	}
@@ -102,31 +98,31 @@ EXPORT_SYMBOL(__ioremap);
  * make bus memory CPU accessible via the readb/readw/readl/writeb/
  * writew/writel functions and the other mmio helpers. The returned
  * address is not guaranteed to be usable directly as a virtual
- * address. 
+ * address.
  *
  * This version of ioremap ensures that the memory is marked uncachable
  * on the CPU as well as honouring existing caching rules from things like
- * the PCI bus. Note that there are other caches and buffers on many 
+ * the PCI bus. Note that there are other caches and buffers on many
  * busses. In particular driver authors should read up on PCI writes
  *
  * It's useful if some control registers are in such an area and
  * write combining or read caching is not desirable:
- * 
+ *
  * Must be freed with iounmap.
  */
-
-void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
+void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size)
 {
 	unsigned long last_addr;
 	void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD | _PAGE_PWT);
-	if (!p) 
-		return p; 
+
+	if (!p)
+		return p;
 
 	/* Guaranteed to be > phys_addr, as per __ioremap() */
 	last_addr = phys_addr + size - 1;
 
 	if (last_addr < virt_to_phys(high_memory) - 1) {
-		struct page *ppage = virt_to_page(__va(phys_addr));		
+		struct page *ppage = virt_to_page(__va(phys_addr));
 		unsigned long npages;
 
 		phys_addr &= PAGE_MASK;
@@ -135,18 +131,18 @@ void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
 		last_addr = PAGE_ALIGN(last_addr);
 
 		/* .. but that's ok, because modulo-2**n arithmetic will make
-	 	* the page-aligned "last - first" come out right.
-	 	*/
+		 * the page-aligned "last - first" come out right.
+		 */
 		npages = (last_addr - phys_addr) >> PAGE_SHIFT;
 
-		if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) { 
-			iounmap(p); 
+		if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) {
+			iounmap(p);
 			p = NULL;
 		}
 		global_flush_tlb();
 	}
 
-	return p;					
+	return p;
 }
 EXPORT_SYMBOL(ioremap_nocache);
 
@@ -169,10 +165,11 @@ void iounmap(volatile void __iomem *addr)
 	 * of ISA space.   So handle that here.
 	 */
 	if (addr >= phys_to_virt(ISA_START_ADDRESS) &&
-			addr < phys_to_virt(ISA_END_ADDRESS))
+	    addr < phys_to_virt(ISA_END_ADDRESS))
 		return;
 
-	addr = (volatile void __iomem *)(PAGE_MASK & (unsigned long __force)addr);
+	addr = (volatile void __iomem *)
+		(PAGE_MASK & (unsigned long __force)addr);
 
 	/* Use the vm area unlocked, assuming the caller
 	   ensures there isn't another iounmap for the same address
@@ -187,7 +184,7 @@ void iounmap(volatile void __iomem *addr)
 	read_unlock(&vmlist_lock);
 
 	if (!p) {
-		printk("iounmap: bad address %p\n", addr);
+		printk(KERN_ERR "iounmap: bad address %p\n", addr);
 		dump_stack();
 		return;
 	}
@@ -198,12 +195,12 @@ void iounmap(volatile void __iomem *addr)
 				 get_vm_area_size(p) >> PAGE_SHIFT,
 				 PAGE_KERNEL);
 		global_flush_tlb();
-	} 
+	}
 
 	/* Finally remove it */
 	o = remove_vm_area((void *)addr);
 	BUG_ON(p != o || o == NULL);
-	kfree(p); 
+	kfree(p);
 }
 EXPORT_SYMBOL(iounmap);
 
@@ -237,7 +234,7 @@ void __init early_ioremap_init(void)
 	unsigned long *pgd;
 
 	if (early_ioremap_debug)
-		printk("early_ioremap_init()\n");
+		printk(KERN_DEBUG "early_ioremap_init()\n");
 
 	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
 	*pgd = __pa(bm_pte) | _PAGE_TABLE;
@@ -248,15 +245,16 @@ void __init early_ioremap_init(void)
 	 */
 	if (pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))) {
 		WARN_ON(1);
-		printk("pgd %p != %p\n",
-			pgd, early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
-		printk("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
-			fix_to_virt(FIX_BTMAP_BEGIN));
-		printk("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
-			fix_to_virt(FIX_BTMAP_END));
+		printk(KERN_WARNING "pgd %p != %p\n",
+		       pgd, early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
+		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
+		       fix_to_virt(FIX_BTMAP_BEGIN));
+		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
+		       fix_to_virt(FIX_BTMAP_END));
 
-		printk("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
-		printk("FIX_BTMAP_BEGIN:     %d\n", FIX_BTMAP_BEGIN);
+		printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
+		printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
+		       FIX_BTMAP_BEGIN);
 	}
 }
 
@@ -265,7 +263,7 @@ void __init early_ioremap_clear(void)
 	unsigned long *pgd;
 
 	if (early_ioremap_debug)
-		printk("early_ioremap_clear()\n");
+		printk(KERN_DEBUG "early_ioremap_clear()\n");
 
 	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
 	*pgd = 0;
@@ -331,10 +329,10 @@ static int __init check_early_ioremap_leak(void)
 		return 0;
 
 	printk(KERN_WARNING
-		"Debug warning: early ioremap leak of %d areas detected.\n",
-			early_ioremap_nested);
+	       "Debug warning: early ioremap leak of %d areas detected.\n",
+	       early_ioremap_nested);
 	printk(KERN_WARNING
-		"please boot with early_ioremap_debug and report the dmesg.\n");
+	       "please boot with early_ioremap_debug and report the dmesg.\n");
 	WARN_ON(1);
 
 	return 1;
@@ -351,8 +349,8 @@ void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
 
 	nesting = early_ioremap_nested;
 	if (early_ioremap_debug) {
-		printk("early_ioremap(%08lx, %08lx) [%d] => ",
-				phys_addr, size, nesting);
+		printk(KERN_DEBUG "early_ioremap(%08lx, %08lx) [%d] => ",
+		       phys_addr, size, nesting);
 		dump_stack();
 	}
 
@@ -398,7 +396,7 @@ void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
 	if (early_ioremap_debug)
 		printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0));
 
-	return (void*) (offset + fix_to_virt(idx0));
+	return (void *) (offset + fix_to_virt(idx0));
 }
 
 void __init early_iounmap(void *addr, unsigned long size)
@@ -413,7 +411,8 @@ void __init early_iounmap(void *addr, unsigned long size)
 	WARN_ON(nesting < 0);
 
 	if (early_ioremap_debug) {
-		printk("early_iounmap(%p, %08lx) [%d]\n", addr, size, nesting);
+		printk(KERN_DEBUG "early_iounmap(%p, %08lx) [%d]\n", addr,
+		       size, nesting);
 		dump_stack();
 	}
 
diff --git a/arch/x86/mm/ioremap_64.c b/arch/x86/mm/ioremap_64.c
index a37556124c86..bd962cc636c5 100644
--- a/arch/x86/mm/ioremap_64.c
+++ b/arch/x86/mm/ioremap_64.c
@@ -1,6 +1,4 @@
 /*
- * arch/x86_64/mm/ioremap.c
- *
  * Re-map IO memory to kernel address space so that we can access it.
  * This is needed for high PCI addresses that aren't mapped in the
  * 640k-1MB IO memory area on PC's
@@ -33,9 +31,8 @@ EXPORT_SYMBOL(__phys_addr);
  * Fix up the linear direct mapping of the kernel to avoid cache attribute
  * conflicts.
  */
-static int
-ioremap_change_attr(unsigned long phys_addr, unsigned long size,
-					unsigned long flags)
+static int ioremap_change_attr(unsigned long phys_addr, unsigned long size,
+			       unsigned long flags)
 {
 	int err = 0;
 	if (phys_addr + size - 1 < (end_pfn_map << PAGE_SHIFT)) {
@@ -50,20 +47,18 @@ ioremap_change_attr(unsigned long phys_addr, unsigned long size,
 		if (!lookup_address(vaddr, &level))
 			return err;
 		/*
- 		 * Must use a address here and not struct page because the phys addr
-		 * can be a in hole between nodes and not have an memmap entry.
+		 * Must use a address here and not struct page because
+		 * the phys addr can be a in hole between nodes and
+		 * not have an memmap entry.
 		 */
-		err = change_page_attr_addr(vaddr,npages,MAKE_GLOBAL(__PAGE_KERNEL|flags));
+		err = change_page_attr_addr(vaddr,npages,
+					    MAKE_GLOBAL(__PAGE_KERNEL|flags));
 		if (!err)
 			global_flush_tlb();
 	}
 	return err;
 }
 
-/*
- * Generic mapping function
- */
-
 /*
  * Remap an arbitrary physical address space into the kernel virtual
  * address space. Needed when the kernel wants to access high addresses
@@ -73,10 +68,11 @@ ioremap_change_attr(unsigned long phys_addr, unsigned long size,
  * have to convert them into an offset in a page-aligned mapping, but the
  * caller shouldn't need to know that small detail.
  */
-void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
+void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
+			unsigned long flags)
 {
-	void * addr;
-	struct vm_struct * area;
+	void *addr;
+	struct vm_struct *area;
 	unsigned long offset, last_addr;
 	pgprot_t pgprot;
 
@@ -130,20 +126,19 @@ EXPORT_SYMBOL(__ioremap);
  * make bus memory CPU accessible via the readb/readw/readl/writeb/
  * writew/writel functions and the other mmio helpers. The returned
  * address is not guaranteed to be usable directly as a virtual
- * address. 
+ * address.
  *
  * This version of ioremap ensures that the memory is marked uncachable
  * on the CPU as well as honouring existing caching rules from things like
- * the PCI bus. Note that there are other caches and buffers on many 
+ * the PCI bus. Note that there are other caches and buffers on many
  * busses. In particular driver authors should read up on PCI writes
  *
  * It's useful if some control registers are in such an area and
  * write combining or read caching is not desirable:
- * 
+ *
  * Must be freed with iounmap.
  */
-
-void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
+void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size)
 {
 	return __ioremap(phys_addr, size, _PAGE_PCD | _PAGE_PWT);
 }
@@ -159,13 +154,14 @@ void iounmap(volatile void __iomem *addr)
 {
 	struct vm_struct *p, *o;
 
-	if (addr <= high_memory) 
-		return; 
+	if (addr <= high_memory)
+		return;
 	if (addr >= phys_to_virt(ISA_START_ADDRESS) &&
-		addr < phys_to_virt(ISA_END_ADDRESS))
+	    addr < phys_to_virt(ISA_END_ADDRESS))
 		return;
 
-	addr = (volatile void __iomem *)(PAGE_MASK & (unsigned long __force)addr);
+	addr = (volatile void __iomem *)
+		(PAGE_MASK & (unsigned long __force)addr);
 	/* Use the vm area unlocked, assuming the caller
 	   ensures there isn't another iounmap for the same address
 	   in parallel. Reuse of the virtual address is prevented by
@@ -179,7 +175,7 @@ void iounmap(volatile void __iomem *addr)
 	read_unlock(&vmlist_lock);
 
 	if (!p) {
-		printk("iounmap: bad address %p\n", addr);
+		printk(KERN_ERR "iounmap: bad address %p\n", addr);
 		dump_stack();
 		return;
 	}
@@ -191,7 +187,7 @@ void iounmap(volatile void __iomem *addr)
 	/* Finally remove it */
 	o = remove_vm_area((void *)addr);
 	BUG_ON(p != o || o == NULL);
-	kfree(p); 
+	kfree(p);
 }
 EXPORT_SYMBOL(iounmap);
 

From 3cbd09e482ade50d212511d4693bd754d4d9dd55 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:05 +0100
Subject: [PATCH 824/890] x86: cleanup ioremap includes

Get rid of the douplicate define of ISA_START/END_ADDRESS and use the
same headers in 32 and 64 bit code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/ioremap_32.c | 17 ++++++++---------
 arch/x86/mm/ioremap_64.c | 13 ++++++-------
 2 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index 30ff2586db1e..d43251c9c1c6 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -6,18 +6,17 @@
  * (C) Copyright 1995 1996 Linus Torvalds
  */
 
-#include <linux/vmalloc.h>
 #include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/module.h>
 #include <linux/io.h>
-#include <asm/fixmap.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include <asm/pgtable.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
 
-#define ISA_START_ADDRESS	0xa0000
-#define ISA_END_ADDRESS		0x100000
+#include <asm/cacheflush.h>
+#include <asm/e820.h>
+#include <asm/fixmap.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
 
 /*
  * Remap an arbitrary physical address space into the kernel virtual
diff --git a/arch/x86/mm/ioremap_64.c b/arch/x86/mm/ioremap_64.c
index bd962cc636c5..8862a19f39c8 100644
--- a/arch/x86/mm/ioremap_64.c
+++ b/arch/x86/mm/ioremap_64.c
@@ -6,18 +6,17 @@
  * (C) Copyright 1995 1996 Linus Torvalds
  */
 
-#include <linux/vmalloc.h>
 #include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/module.h>
 #include <linux/io.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
 
-#include <asm/pgalloc.h>
-#include <asm/fixmap.h>
-#include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
-#include <asm/proto.h>
 #include <asm/e820.h>
+#include <asm/fixmap.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
 
 unsigned long __phys_addr(unsigned long x)
 {

From f87519e8f4f1de9b39a40e56479a7ad2443169dd Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:05 +0100
Subject: [PATCH 825/890] x86: introduce max_pfn_mapped

64bit uses end_pfn_map and 32bit uses max_low_pfn. There are several
files which have #ifdef'ed defines which map either to end_pfn_map or
max_low_pfn. Replace this by a universal define and clean up all the
other instances.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/efi.c       |  4 ++--
 arch/x86/mm/pageattr-test.c | 21 +++++++--------------
 include/asm-x86/efi.h       |  2 --
 include/asm-x86/page.h      |  2 ++
 4 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 834ecfb41e97..57b57778bf60 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -396,7 +396,7 @@ static void __init runtime_code_page_mkexec(void)
 		md = p;
 		end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
 		if (md->type == EFI_RUNTIME_SERVICES_CODE &&
-		    (end >> PAGE_SHIFT) <= end_pfn_map)
+		    (end >> PAGE_SHIFT) <= max_pfn_mapped)
 			change_page_attr_addr(md->virt_addr,
 					      md->num_pages,
 					      PAGE_KERNEL_EXEC_NOCACHE);
@@ -429,7 +429,7 @@ void __init efi_enter_virtual_mode(void)
 			continue;
 		end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
 		if ((md->attribute & EFI_MEMORY_WB) &&
-		    ((end >> PAGE_SHIFT) <= end_pfn_map))
+		    ((end >> PAGE_SHIFT) <= max_pfn_mapped))
 			md->virt_addr = (unsigned long)__va(md->phys_addr);
 		else
 			md->virt_addr = (unsigned long)
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index d7a93008cc12..6a41a0f0c149 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -27,13 +27,6 @@ enum {
 	GPS			= (1<<30)
 };
 
-#ifdef CONFIG_X86_64
-# include <asm/proto.h>
-# define max_mapped		end_pfn_map
-#else
-# define max_mapped		max_low_pfn
-#endif
-
 struct split_state {
 	long lpg, gpg, spg, exec;
 	long min_exec, max_exec;
@@ -48,7 +41,7 @@ static __init int print_split(struct split_state *s)
 	s->lpg = s->gpg = s->spg = s->exec = 0;
 	s->min_exec = ~0UL;
 	s->max_exec = 0;
-	for (i = 0; i < max_mapped; ) {
+	for (i = 0; i < max_pfn_mapped; ) {
 		unsigned long addr = (unsigned long)__va(i << PAGE_SHIFT);
 		int level;
 		pte_t *pte;
@@ -97,8 +90,8 @@ static __init int print_split(struct split_state *s)
 
 	expected = (s->gpg*GPS + s->lpg*LPS)/PAGE_SIZE + s->spg + missed;
 	if (expected != i) {
-		printk(KERN_ERR "CPA max_mapped %lu but expected %lu\n",
-			max_mapped, expected);
+		printk(KERN_ERR "CPA max_pfn_mapped %lu but expected %lu\n",
+			max_pfn_mapped, expected);
 		return 1;
 	}
 	return err;
@@ -120,22 +113,22 @@ static __init int exercise_pageattr(void)
 
 	printk(KERN_INFO "CPA exercising pageattr\n");
 
-	bm = vmalloc((max_mapped + 7) / 8);
+	bm = vmalloc((max_pfn_mapped + 7) / 8);
 	if (!bm) {
 		printk(KERN_ERR "CPA Cannot vmalloc bitmap\n");
 		return -ENOMEM;
 	}
-	memset(bm, 0, (max_mapped + 7) / 8);
+	memset(bm, 0, (max_pfn_mapped + 7) / 8);
 
 	failed += print_split(&sa);
 	srandom32(100);
 
 	for (i = 0; i < NTEST; i++) {
-		unsigned long pfn = random32() % max_mapped;
+		unsigned long pfn = random32() % max_pfn_mapped;
 
 		addr[i] = (unsigned long)__va(pfn << PAGE_SHIFT);
 		len[i] = random32() % 100;
-		len[i] = min_t(unsigned long, len[i], max_mapped - pfn - 1);
+		len[i] = min_t(unsigned long, len[i], max_pfn_mapped - pfn - 1);
 
 		if (len[i] == 0)
 			len[i] = 1;
diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h
index 9e08ae84795d..9c68a1f098d8 100644
--- a/include/asm-x86/efi.h
+++ b/include/asm-x86/efi.h
@@ -35,8 +35,6 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
 
 #define efi_ioremap(addr, size)			ioremap(addr, size)
 
-#define end_pfn_map				max_low_pfn
-
 #else /* !CONFIG_X86_32 */
 
 #define MAX_EFI_IO_PAGES	100
diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index eba88d940092..e2c79d8dcdcf 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -33,8 +33,10 @@
 
 #ifdef CONFIG_X86_64
 #include <asm/page_64.h>
+#define max_pfn_mapped		end_pfn_map
 #else
 #include <asm/page_32.h>
+#define max_pfn_mapped		max_low_pfn
 #endif	/* CONFIG_X86_64 */
 
 #define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)

From 74ff2857f099951020a47420872f5c1460f32c27 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:05 +0100
Subject: [PATCH 826/890] x86: make c_p_a unconditional in ioremap

Make c_p_a unconditional for ioremap and iounmap. This ensures
complete consistency of the flags which are handed to
ioremap_page_range and the real flags in the mappings.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/ioremap_32.c |  4 ++--
 arch/x86/mm/ioremap_64.c | 14 ++++++--------
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index d43251c9c1c6..213ace58a188 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -74,7 +74,7 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 	/*
 	 * Ok, go for it..
 	 */
-	area = get_vm_area(size, VM_IOREMAP | (flags << 20));
+	area = get_vm_area(size, VM_IOREMAP);
 	if (!area)
 		return NULL;
 	area->phys_addr = phys_addr;
@@ -189,7 +189,7 @@ void iounmap(volatile void __iomem *addr)
 	}
 
 	/* Reset the direct mapping. Can block */
-	if ((p->flags >> 20) && p->phys_addr < virt_to_phys(high_memory) - 1) {
+	if (p->phys_addr < virt_to_phys(high_memory) - 1) {
 		change_page_attr(virt_to_page(__va(p->phys_addr)),
 				 get_vm_area_size(p) >> PAGE_SHIFT,
 				 PAGE_KERNEL);
diff --git a/arch/x86/mm/ioremap_64.c b/arch/x86/mm/ioremap_64.c
index 8862a19f39c8..bb9246c4a07b 100644
--- a/arch/x86/mm/ioremap_64.c
+++ b/arch/x86/mm/ioremap_64.c
@@ -31,7 +31,7 @@ EXPORT_SYMBOL(__phys_addr);
  * conflicts.
  */
 static int ioremap_change_attr(unsigned long phys_addr, unsigned long size,
-			       unsigned long flags)
+			       pgprot_t prot)
 {
 	int err = 0;
 	if (phys_addr + size - 1 < (end_pfn_map << PAGE_SHIFT)) {
@@ -50,8 +50,8 @@ static int ioremap_change_attr(unsigned long phys_addr, unsigned long size,
 		 * the phys addr can be a in hole between nodes and
 		 * not have an memmap entry.
 		 */
-		err = change_page_attr_addr(vaddr,npages,
-					    MAKE_GLOBAL(__PAGE_KERNEL|flags));
+		err = change_page_attr_addr(vaddr, npages, prot);
+
 		if (!err)
 			global_flush_tlb();
 	}
@@ -97,7 +97,7 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 	/*
 	 * Ok, go for it..
 	 */
-	area = get_vm_area(size, VM_IOREMAP | (flags << 20));
+	area = get_vm_area(size, VM_IOREMAP);
 	if (!area)
 		return NULL;
 	area->phys_addr = phys_addr;
@@ -107,8 +107,7 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 		remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
 		return NULL;
 	}
-	if (flags && ioremap_change_attr(phys_addr, size, flags) < 0) {
-		area->flags &= 0xffffff;
+	if (ioremap_change_attr(phys_addr, size, pgprot) < 0) {
 		vunmap(addr);
 		return NULL;
 	}
@@ -180,8 +179,7 @@ void iounmap(volatile void __iomem *addr)
 	}
 
 	/* Reset the direct mapping. Can block */
-	if (p->flags >> 20)
-		ioremap_change_attr(p->phys_addr, p->size, 0);
+	ioremap_change_attr(p->phys_addr, p->size, PAGE_KERNEL);
 
 	/* Finally remove it */
 	o = remove_vm_area((void *)addr);

From e9332cacd74b25548a517b8dd4fff0216f7907db Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:05 +0100
Subject: [PATCH 827/890] x86: switch to change_page_attr_addr in ioremap_32.c

Use change_page_attr_addr() instead of change_page_attr(), which
simplifies the code significantly and matches the 64bit
implementation.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/ioremap_32.c | 90 ++++++++++++++++++++++------------------
 1 file changed, 49 insertions(+), 41 deletions(-)

diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index 213ace58a188..ae7e55c8c647 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -6,6 +6,7 @@
  * (C) Copyright 1995 1996 Linus Torvalds
  */
 
+#include <linux/bootmem.h>
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/module.h>
@@ -18,6 +19,42 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 
+/*
+ * Fix up the linear direct mapping of the kernel to avoid cache attribute
+ * conflicts.
+ */
+static int ioremap_change_attr(unsigned long phys_addr, unsigned long size,
+			       pgprot_t prot)
+{
+	unsigned long npages, vaddr, last_addr = phys_addr + size - 1;
+	int err, level;
+
+	/* No change for pages after the last mapping */
+	if (last_addr >= (max_pfn_mapped << PAGE_SHIFT))
+		return 0;
+
+	npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	vaddr = (unsigned long) __va(phys_addr);
+
+	/*
+	 * If there is no identity map for this address,
+	 * change_page_attr_addr is unnecessary
+	 */
+	if (!lookup_address(vaddr, &level))
+		return 0;
+
+	/*
+	 * Must use an address here and not struct page because the
+	 * phys addr can be a in hole between nodes and not have a
+	 * memmap entry.
+	 */
+	err = change_page_attr_addr(vaddr, npages, prot);
+	if (!err)
+		global_flush_tlb();
+
+	return err;
+}
+
 /*
  * Remap an arbitrary physical address space into the kernel virtual
  * address space. Needed when the kernel wants to access high addresses
@@ -33,7 +70,7 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 	void __iomem *addr;
 	struct vm_struct *area;
 	unsigned long offset, last_addr;
-	pgprot_t prot;
+	pgprot_t pgprot;
 
 	/* Don't allow wraparound or zero size */
 	last_addr = phys_addr + size - 1;
@@ -62,7 +99,7 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 				return NULL;
 	}
 
-	prot = MAKE_GLOBAL(__PAGE_KERNEL | flags);
+	pgprot = MAKE_GLOBAL(__PAGE_KERNEL | flags);
 
 	/*
 	 * Mappings have to be page-aligned
@@ -79,11 +116,17 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 		return NULL;
 	area->phys_addr = phys_addr;
 	addr = (void __iomem *) area->addr;
-	if (ioremap_page_range((unsigned long) addr,
-			       (unsigned long) addr + size, phys_addr, prot)) {
+	if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
+			       phys_addr, pgprot)) {
 		vunmap((void __force *) addr);
 		return NULL;
 	}
+
+	if (ioremap_change_attr(phys_addr, size, pgprot) < 0) {
+		vunmap(addr);
+		return NULL;
+	}
+
 	return (void __iomem *) (offset + (char __iomem *)addr);
 }
 EXPORT_SYMBOL(__ioremap);
@@ -111,37 +154,7 @@ EXPORT_SYMBOL(__ioremap);
  */
 void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size)
 {
-	unsigned long last_addr;
-	void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD | _PAGE_PWT);
-
-	if (!p)
-		return p;
-
-	/* Guaranteed to be > phys_addr, as per __ioremap() */
-	last_addr = phys_addr + size - 1;
-
-	if (last_addr < virt_to_phys(high_memory) - 1) {
-		struct page *ppage = virt_to_page(__va(phys_addr));
-		unsigned long npages;
-
-		phys_addr &= PAGE_MASK;
-
-		/* This might overflow and become zero.. */
-		last_addr = PAGE_ALIGN(last_addr);
-
-		/* .. but that's ok, because modulo-2**n arithmetic will make
-		 * the page-aligned "last - first" come out right.
-		 */
-		npages = (last_addr - phys_addr) >> PAGE_SHIFT;
-
-		if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) {
-			iounmap(p);
-			p = NULL;
-		}
-		global_flush_tlb();
-	}
-
-	return p;
+	return __ioremap(phys_addr, size, _PAGE_PCD | _PAGE_PWT);
 }
 EXPORT_SYMBOL(ioremap_nocache);
 
@@ -189,12 +202,7 @@ void iounmap(volatile void __iomem *addr)
 	}
 
 	/* Reset the direct mapping. Can block */
-	if (p->phys_addr < virt_to_phys(high_memory) - 1) {
-		change_page_attr(virt_to_page(__va(p->phys_addr)),
-				 get_vm_area_size(p) >> PAGE_SHIFT,
-				 PAGE_KERNEL);
-		global_flush_tlb();
-	}
+	ioremap_change_attr(p->phys_addr, p->size, PAGE_KERNEL);
 
 	/* Finally remove it */
 	o = remove_vm_area((void *)addr);

From 4b40fcee131847069543cc60a3a79887743c53db Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:05 +0100
Subject: [PATCH 828/890] x86: __iomem annotations

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/ioremap_32.c | 2 +-
 arch/x86/mm/ioremap_64.c | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index ae7e55c8c647..5273ac4aa96a 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -81,7 +81,7 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 	 * Don't remap the low PCI/ISA area, it's always mapped..
 	 */
 	if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
-		return (void __iomem *) phys_to_virt(phys_addr);
+		return (__force void __iomem *)phys_to_virt(phys_addr);
 
 	/*
 	 * Don't allow anybody to remap normal RAM that we're using..
diff --git a/arch/x86/mm/ioremap_64.c b/arch/x86/mm/ioremap_64.c
index bb9246c4a07b..e79d2b353de0 100644
--- a/arch/x86/mm/ioremap_64.c
+++ b/arch/x86/mm/ioremap_64.c
@@ -70,7 +70,7 @@ static int ioremap_change_attr(unsigned long phys_addr, unsigned long size,
 void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 			unsigned long flags)
 {
-	void *addr;
+	void __iomem *addr;
 	struct vm_struct *area;
 	unsigned long offset, last_addr;
 	pgprot_t pgprot;
@@ -101,7 +101,7 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 	if (!area)
 		return NULL;
 	area->phys_addr = phys_addr;
-	addr = area->addr;
+	addr = (void __iomem *) area->addr;
 	if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
 			       phys_addr, pgprot)) {
 		remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
@@ -111,7 +111,7 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 		vunmap(addr);
 		return NULL;
 	}
-	return (__force void __iomem *) (offset + (char *)addr);
+	return (void __iomem *) (offset + (char __iomem *)addr);
 }
 EXPORT_SYMBOL(__ioremap);
 
@@ -152,7 +152,7 @@ void iounmap(volatile void __iomem *addr)
 {
 	struct vm_struct *p, *o;
 
-	if (addr <= high_memory)
+	if ((void __force *)addr <= high_memory)
 		return;
 	if (addr >= phys_to_virt(ISA_START_ADDRESS) &&
 	    addr < phys_to_virt(ISA_END_ADDRESS))

From e4c1b977f0036c00ebabb60375cb63d0de9d43fa Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:05 +0100
Subject: [PATCH 829/890] x86: use remove_vm_are in ioremap_32 error path

When ioremap_page_range fails, then we can use remove_vm_area instead
of vunmap safely.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/ioremap_32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index 5273ac4aa96a..4d919c37d1d6 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -118,7 +118,7 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 	addr = (void __iomem *) area->addr;
 	if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
 			       phys_addr, pgprot)) {
-		vunmap((void __force *) addr);
+		remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
 		return NULL;
 	}
 

From 240d3a7c47e3fb9c2533f63e9e323a25d91d0643 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:05 +0100
Subject: [PATCH 830/890] x86: unify ioremap

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/ioremap_32.c |  18 +++
 arch/x86/mm/ioremap_64.c | 313 ++++++++++++++++++++++++++++++++++++---
 2 files changed, 311 insertions(+), 20 deletions(-)

diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index 4d919c37d1d6..f4a2082568c8 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -19,6 +19,18 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 
+#ifdef CONFIG_X86_64
+
+unsigned long __phys_addr(unsigned long x)
+{
+	if (x >= __START_KERNEL_map)
+		return x - __START_KERNEL_map + phys_base;
+	return x - PAGE_OFFSET;
+}
+EXPORT_SYMBOL(__phys_addr);
+
+#endif
+
 /*
  * Fix up the linear direct mapping of the kernel to avoid cache attribute
  * conflicts.
@@ -49,6 +61,7 @@ static int ioremap_change_attr(unsigned long phys_addr, unsigned long size,
 	 * memmap entry.
 	 */
 	err = change_page_attr_addr(vaddr, npages, prot);
+
 	if (!err)
 		global_flush_tlb();
 
@@ -83,6 +96,7 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 	if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
 		return (__force void __iomem *)phys_to_virt(phys_addr);
 
+#ifdef CONFIG_X86_32
 	/*
 	 * Don't allow anybody to remap normal RAM that we're using..
 	 */
@@ -98,6 +112,7 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 			if (!PageReserved(page))
 				return NULL;
 	}
+#endif
 
 	pgprot = MAKE_GLOBAL(__PAGE_KERNEL | flags);
 
@@ -211,6 +226,7 @@ void iounmap(volatile void __iomem *addr)
 }
 EXPORT_SYMBOL(iounmap);
 
+#ifdef CONFIG_X86_32
 
 int __initdata early_ioremap_debug;
 
@@ -443,3 +459,5 @@ void __this_fixmap_does_not_exist(void)
 {
 	WARN_ON(1);
 }
+
+#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/mm/ioremap_64.c b/arch/x86/mm/ioremap_64.c
index e79d2b353de0..f4a2082568c8 100644
--- a/arch/x86/mm/ioremap_64.c
+++ b/arch/x86/mm/ioremap_64.c
@@ -6,6 +6,7 @@
  * (C) Copyright 1995 1996 Linus Torvalds
  */
 
+#include <linux/bootmem.h>
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/module.h>
@@ -18,6 +19,8 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 
+#ifdef CONFIG_X86_64
+
 unsigned long __phys_addr(unsigned long x)
 {
 	if (x >= __START_KERNEL_map)
@@ -26,6 +29,8 @@ unsigned long __phys_addr(unsigned long x)
 }
 EXPORT_SYMBOL(__phys_addr);
 
+#endif
+
 /*
  * Fix up the linear direct mapping of the kernel to avoid cache attribute
  * conflicts.
@@ -33,28 +38,33 @@ EXPORT_SYMBOL(__phys_addr);
 static int ioremap_change_attr(unsigned long phys_addr, unsigned long size,
 			       pgprot_t prot)
 {
-	int err = 0;
-	if (phys_addr + size - 1 < (end_pfn_map << PAGE_SHIFT)) {
-		unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-		unsigned long vaddr = (unsigned long) __va(phys_addr);
-		int level;
+	unsigned long npages, vaddr, last_addr = phys_addr + size - 1;
+	int err, level;
 
-		/*
-		 * If there is no identity map for this address,
-		 * change_page_attr_addr is unnecessary
-		 */
-		if (!lookup_address(vaddr, &level))
-			return err;
-		/*
-		 * Must use a address here and not struct page because
-		 * the phys addr can be a in hole between nodes and
-		 * not have an memmap entry.
-		 */
-		err = change_page_attr_addr(vaddr, npages, prot);
+	/* No change for pages after the last mapping */
+	if (last_addr >= (max_pfn_mapped << PAGE_SHIFT))
+		return 0;
+
+	npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	vaddr = (unsigned long) __va(phys_addr);
+
+	/*
+	 * If there is no identity map for this address,
+	 * change_page_attr_addr is unnecessary
+	 */
+	if (!lookup_address(vaddr, &level))
+		return 0;
+
+	/*
+	 * Must use an address here and not struct page because the
+	 * phys addr can be a in hole between nodes and not have a
+	 * memmap entry.
+	 */
+	err = change_page_attr_addr(vaddr, npages, prot);
+
+	if (!err)
+		global_flush_tlb();
 
-		if (!err)
-			global_flush_tlb();
-	}
 	return err;
 }
 
@@ -86,7 +96,26 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 	if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
 		return (__force void __iomem *)phys_to_virt(phys_addr);
 
+#ifdef CONFIG_X86_32
+	/*
+	 * Don't allow anybody to remap normal RAM that we're using..
+	 */
+	if (phys_addr <= virt_to_phys(high_memory - 1)) {
+		char *t_addr, *t_end;
+		struct page *page;
+
+		t_addr = __va(phys_addr);
+		t_end = t_addr + (size - 1);
+
+		for (page = virt_to_page(t_addr);
+		     page <= virt_to_page(t_end); page++)
+			if (!PageReserved(page))
+				return NULL;
+	}
+#endif
+
 	pgprot = MAKE_GLOBAL(__PAGE_KERNEL | flags);
+
 	/*
 	 * Mappings have to be page-aligned
 	 */
@@ -107,10 +136,12 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 		remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
 		return NULL;
 	}
+
 	if (ioremap_change_attr(phys_addr, size, pgprot) < 0) {
 		vunmap(addr);
 		return NULL;
 	}
+
 	return (void __iomem *) (offset + (char __iomem *)addr);
 }
 EXPORT_SYMBOL(__ioremap);
@@ -154,12 +185,19 @@ void iounmap(volatile void __iomem *addr)
 
 	if ((void __force *)addr <= high_memory)
 		return;
+
+	/*
+	 * __ioremap special-cases the PCI/ISA range by not instantiating a
+	 * vm_area and by simply returning an address into the kernel mapping
+	 * of ISA space.   So handle that here.
+	 */
 	if (addr >= phys_to_virt(ISA_START_ADDRESS) &&
 	    addr < phys_to_virt(ISA_END_ADDRESS))
 		return;
 
 	addr = (volatile void __iomem *)
 		(PAGE_MASK & (unsigned long __force)addr);
+
 	/* Use the vm area unlocked, assuming the caller
 	   ensures there isn't another iounmap for the same address
 	   in parallel. Reuse of the virtual address is prevented by
@@ -188,3 +226,238 @@ void iounmap(volatile void __iomem *addr)
 }
 EXPORT_SYMBOL(iounmap);
 
+#ifdef CONFIG_X86_32
+
+int __initdata early_ioremap_debug;
+
+static int __init early_ioremap_debug_setup(char *str)
+{
+	early_ioremap_debug = 1;
+
+	return 0;
+}
+early_param("early_ioremap_debug", early_ioremap_debug_setup);
+
+static __initdata int after_paging_init;
+static __initdata unsigned long bm_pte[1024]
+				__attribute__((aligned(PAGE_SIZE)));
+
+static inline unsigned long * __init early_ioremap_pgd(unsigned long addr)
+{
+	return (unsigned long *)swapper_pg_dir + ((addr >> 22) & 1023);
+}
+
+static inline unsigned long * __init early_ioremap_pte(unsigned long addr)
+{
+	return bm_pte + ((addr >> PAGE_SHIFT) & 1023);
+}
+
+void __init early_ioremap_init(void)
+{
+	unsigned long *pgd;
+
+	if (early_ioremap_debug)
+		printk(KERN_DEBUG "early_ioremap_init()\n");
+
+	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
+	*pgd = __pa(bm_pte) | _PAGE_TABLE;
+	memset(bm_pte, 0, sizeof(bm_pte));
+	/*
+	 * The boot-ioremap range spans multiple pgds, for which
+	 * we are not prepared:
+	 */
+	if (pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))) {
+		WARN_ON(1);
+		printk(KERN_WARNING "pgd %p != %p\n",
+		       pgd, early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
+		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
+		       fix_to_virt(FIX_BTMAP_BEGIN));
+		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
+		       fix_to_virt(FIX_BTMAP_END));
+
+		printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
+		printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
+		       FIX_BTMAP_BEGIN);
+	}
+}
+
+void __init early_ioremap_clear(void)
+{
+	unsigned long *pgd;
+
+	if (early_ioremap_debug)
+		printk(KERN_DEBUG "early_ioremap_clear()\n");
+
+	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
+	*pgd = 0;
+	__flush_tlb_all();
+}
+
+void __init early_ioremap_reset(void)
+{
+	enum fixed_addresses idx;
+	unsigned long *pte, phys, addr;
+
+	after_paging_init = 1;
+	for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
+		addr = fix_to_virt(idx);
+		pte = early_ioremap_pte(addr);
+		if (!*pte & _PAGE_PRESENT) {
+			phys = *pte & PAGE_MASK;
+			set_fixmap(idx, phys);
+		}
+	}
+}
+
+static void __init __early_set_fixmap(enum fixed_addresses idx,
+				   unsigned long phys, pgprot_t flags)
+{
+	unsigned long *pte, addr = __fix_to_virt(idx);
+
+	if (idx >= __end_of_fixed_addresses) {
+		BUG();
+		return;
+	}
+	pte = early_ioremap_pte(addr);
+	if (pgprot_val(flags))
+		*pte = (phys & PAGE_MASK) | pgprot_val(flags);
+	else
+		*pte = 0;
+	__flush_tlb_one(addr);
+}
+
+static inline void __init early_set_fixmap(enum fixed_addresses idx,
+					unsigned long phys)
+{
+	if (after_paging_init)
+		set_fixmap(idx, phys);
+	else
+		__early_set_fixmap(idx, phys, PAGE_KERNEL);
+}
+
+static inline void __init early_clear_fixmap(enum fixed_addresses idx)
+{
+	if (after_paging_init)
+		clear_fixmap(idx);
+	else
+		__early_set_fixmap(idx, 0, __pgprot(0));
+}
+
+
+int __initdata early_ioremap_nested;
+
+static int __init check_early_ioremap_leak(void)
+{
+	if (!early_ioremap_nested)
+		return 0;
+
+	printk(KERN_WARNING
+	       "Debug warning: early ioremap leak of %d areas detected.\n",
+	       early_ioremap_nested);
+	printk(KERN_WARNING
+	       "please boot with early_ioremap_debug and report the dmesg.\n");
+	WARN_ON(1);
+
+	return 1;
+}
+late_initcall(check_early_ioremap_leak);
+
+void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
+{
+	unsigned long offset, last_addr;
+	unsigned int nrpages, nesting;
+	enum fixed_addresses idx0, idx;
+
+	WARN_ON(system_state != SYSTEM_BOOTING);
+
+	nesting = early_ioremap_nested;
+	if (early_ioremap_debug) {
+		printk(KERN_DEBUG "early_ioremap(%08lx, %08lx) [%d] => ",
+		       phys_addr, size, nesting);
+		dump_stack();
+	}
+
+	/* Don't allow wraparound or zero size */
+	last_addr = phys_addr + size - 1;
+	if (!size || last_addr < phys_addr) {
+		WARN_ON(1);
+		return NULL;
+	}
+
+	if (nesting >= FIX_BTMAPS_NESTING) {
+		WARN_ON(1);
+		return NULL;
+	}
+	early_ioremap_nested++;
+	/*
+	 * Mappings have to be page-aligned
+	 */
+	offset = phys_addr & ~PAGE_MASK;
+	phys_addr &= PAGE_MASK;
+	size = PAGE_ALIGN(last_addr) - phys_addr;
+
+	/*
+	 * Mappings have to fit in the FIX_BTMAP area.
+	 */
+	nrpages = size >> PAGE_SHIFT;
+	if (nrpages > NR_FIX_BTMAPS) {
+		WARN_ON(1);
+		return NULL;
+	}
+
+	/*
+	 * Ok, go for it..
+	 */
+	idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting;
+	idx = idx0;
+	while (nrpages > 0) {
+		early_set_fixmap(idx, phys_addr);
+		phys_addr += PAGE_SIZE;
+		--idx;
+		--nrpages;
+	}
+	if (early_ioremap_debug)
+		printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0));
+
+	return (void *) (offset + fix_to_virt(idx0));
+}
+
+void __init early_iounmap(void *addr, unsigned long size)
+{
+	unsigned long virt_addr;
+	unsigned long offset;
+	unsigned int nrpages;
+	enum fixed_addresses idx;
+	unsigned int nesting;
+
+	nesting = --early_ioremap_nested;
+	WARN_ON(nesting < 0);
+
+	if (early_ioremap_debug) {
+		printk(KERN_DEBUG "early_iounmap(%p, %08lx) [%d]\n", addr,
+		       size, nesting);
+		dump_stack();
+	}
+
+	virt_addr = (unsigned long)addr;
+	if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) {
+		WARN_ON(1);
+		return;
+	}
+	offset = virt_addr & ~PAGE_MASK;
+	nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
+
+	idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting;
+	while (nrpages > 0) {
+		early_clear_fixmap(idx);
+		--idx;
+		--nrpages;
+	}
+}
+
+void __this_fixmap_does_not_exist(void)
+{
+	WARN_ON(1);
+}
+
+#endif /* CONFIG_X86_32 */

From e64c8aa0c5e5d23730b2d702297e01cd7fe53144 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:05 +0100
Subject: [PATCH 831/890] x86: unify ioremap_32 and _64

Unify the now identical ioremap_32.c and ioremap_64.c into the
same ioremap.c file. No code changed.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/Makefile_32                 |   2 +-
 arch/x86/mm/Makefile_64                 |   2 +-
 arch/x86/mm/{ioremap_32.c => ioremap.c} |   0
 arch/x86/mm/ioremap_64.c                | 463 ------------------------
 4 files changed, 2 insertions(+), 465 deletions(-)
 rename arch/x86/mm/{ioremap_32.c => ioremap.c} (100%)
 delete mode 100644 arch/x86/mm/ioremap_64.c

diff --git a/arch/x86/mm/Makefile_32 b/arch/x86/mm/Makefile_32
index 424e5a862271..af0d39bea6c5 100644
--- a/arch/x86/mm/Makefile_32
+++ b/arch/x86/mm/Makefile_32
@@ -2,7 +2,7 @@
 # Makefile for the linux i386-specific parts of the memory manager.
 #
 
-obj-y	:= init_32.o pgtable_32.o fault_32.o ioremap_32.o extable.o pageattr.o mmap.o
+obj-y	:= init_32.o pgtable_32.o fault_32.o ioremap.o extable.o pageattr.o mmap.o
 
 obj-$(CONFIG_CPA_DEBUG) += pageattr-test.o
 obj-$(CONFIG_NUMA) += discontig_32.o
diff --git a/arch/x86/mm/Makefile_64 b/arch/x86/mm/Makefile_64
index 043584478457..b564b5a760da 100644
--- a/arch/x86/mm/Makefile_64
+++ b/arch/x86/mm/Makefile_64
@@ -2,7 +2,7 @@
 # Makefile for the linux x86_64-specific parts of the memory manager.
 #
 
-obj-y	 := init_64.o fault_64.o ioremap_64.o extable.o pageattr.o mmap.o
+obj-y	 := init_64.o fault_64.o ioremap.o extable.o pageattr.o mmap.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_NUMA) += numa_64.o
 obj-$(CONFIG_K8_NUMA) += k8topology_64.o
diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap.c
similarity index 100%
rename from arch/x86/mm/ioremap_32.c
rename to arch/x86/mm/ioremap.c
diff --git a/arch/x86/mm/ioremap_64.c b/arch/x86/mm/ioremap_64.c
deleted file mode 100644
index f4a2082568c8..000000000000
--- a/arch/x86/mm/ioremap_64.c
+++ /dev/null
@@ -1,463 +0,0 @@
-/*
- * Re-map IO memory to kernel address space so that we can access it.
- * This is needed for high PCI addresses that aren't mapped in the
- * 640k-1MB IO memory area on PC's
- *
- * (C) Copyright 1995 1996 Linus Torvalds
- */
-
-#include <linux/bootmem.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include <asm/cacheflush.h>
-#include <asm/e820.h>
-#include <asm/fixmap.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-
-#ifdef CONFIG_X86_64
-
-unsigned long __phys_addr(unsigned long x)
-{
-	if (x >= __START_KERNEL_map)
-		return x - __START_KERNEL_map + phys_base;
-	return x - PAGE_OFFSET;
-}
-EXPORT_SYMBOL(__phys_addr);
-
-#endif
-
-/*
- * Fix up the linear direct mapping of the kernel to avoid cache attribute
- * conflicts.
- */
-static int ioremap_change_attr(unsigned long phys_addr, unsigned long size,
-			       pgprot_t prot)
-{
-	unsigned long npages, vaddr, last_addr = phys_addr + size - 1;
-	int err, level;
-
-	/* No change for pages after the last mapping */
-	if (last_addr >= (max_pfn_mapped << PAGE_SHIFT))
-		return 0;
-
-	npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	vaddr = (unsigned long) __va(phys_addr);
-
-	/*
-	 * If there is no identity map for this address,
-	 * change_page_attr_addr is unnecessary
-	 */
-	if (!lookup_address(vaddr, &level))
-		return 0;
-
-	/*
-	 * Must use an address here and not struct page because the
-	 * phys addr can be a in hole between nodes and not have a
-	 * memmap entry.
-	 */
-	err = change_page_attr_addr(vaddr, npages, prot);
-
-	if (!err)
-		global_flush_tlb();
-
-	return err;
-}
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
-			unsigned long flags)
-{
-	void __iomem *addr;
-	struct vm_struct *area;
-	unsigned long offset, last_addr;
-	pgprot_t pgprot;
-
-	/* Don't allow wraparound or zero size */
-	last_addr = phys_addr + size - 1;
-	if (!size || last_addr < phys_addr)
-		return NULL;
-
-	/*
-	 * Don't remap the low PCI/ISA area, it's always mapped..
-	 */
-	if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
-		return (__force void __iomem *)phys_to_virt(phys_addr);
-
-#ifdef CONFIG_X86_32
-	/*
-	 * Don't allow anybody to remap normal RAM that we're using..
-	 */
-	if (phys_addr <= virt_to_phys(high_memory - 1)) {
-		char *t_addr, *t_end;
-		struct page *page;
-
-		t_addr = __va(phys_addr);
-		t_end = t_addr + (size - 1);
-
-		for (page = virt_to_page(t_addr);
-		     page <= virt_to_page(t_end); page++)
-			if (!PageReserved(page))
-				return NULL;
-	}
-#endif
-
-	pgprot = MAKE_GLOBAL(__PAGE_KERNEL | flags);
-
-	/*
-	 * Mappings have to be page-aligned
-	 */
-	offset = phys_addr & ~PAGE_MASK;
-	phys_addr &= PAGE_MASK;
-	size = PAGE_ALIGN(last_addr+1) - phys_addr;
-
-	/*
-	 * Ok, go for it..
-	 */
-	area = get_vm_area(size, VM_IOREMAP);
-	if (!area)
-		return NULL;
-	area->phys_addr = phys_addr;
-	addr = (void __iomem *) area->addr;
-	if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
-			       phys_addr, pgprot)) {
-		remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
-		return NULL;
-	}
-
-	if (ioremap_change_attr(phys_addr, size, pgprot) < 0) {
-		vunmap(addr);
-		return NULL;
-	}
-
-	return (void __iomem *) (offset + (char __iomem *)addr);
-}
-EXPORT_SYMBOL(__ioremap);
-
-/**
- * ioremap_nocache     -   map bus memory into CPU space
- * @offset:    bus address of the memory
- * @size:      size of the resource to map
- *
- * ioremap_nocache performs a platform specific sequence of operations to
- * make bus memory CPU accessible via the readb/readw/readl/writeb/
- * writew/writel functions and the other mmio helpers. The returned
- * address is not guaranteed to be usable directly as a virtual
- * address.
- *
- * This version of ioremap ensures that the memory is marked uncachable
- * on the CPU as well as honouring existing caching rules from things like
- * the PCI bus. Note that there are other caches and buffers on many
- * busses. In particular driver authors should read up on PCI writes
- *
- * It's useful if some control registers are in such an area and
- * write combining or read caching is not desirable:
- *
- * Must be freed with iounmap.
- */
-void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size)
-{
-	return __ioremap(phys_addr, size, _PAGE_PCD | _PAGE_PWT);
-}
-EXPORT_SYMBOL(ioremap_nocache);
-
-/**
- * iounmap - Free a IO remapping
- * @addr: virtual address from ioremap_*
- *
- * Caller must ensure there is only one unmapping for the same pointer.
- */
-void iounmap(volatile void __iomem *addr)
-{
-	struct vm_struct *p, *o;
-
-	if ((void __force *)addr <= high_memory)
-		return;
-
-	/*
-	 * __ioremap special-cases the PCI/ISA range by not instantiating a
-	 * vm_area and by simply returning an address into the kernel mapping
-	 * of ISA space.   So handle that here.
-	 */
-	if (addr >= phys_to_virt(ISA_START_ADDRESS) &&
-	    addr < phys_to_virt(ISA_END_ADDRESS))
-		return;
-
-	addr = (volatile void __iomem *)
-		(PAGE_MASK & (unsigned long __force)addr);
-
-	/* Use the vm area unlocked, assuming the caller
-	   ensures there isn't another iounmap for the same address
-	   in parallel. Reuse of the virtual address is prevented by
-	   leaving it in the global lists until we're done with it.
-	   cpa takes care of the direct mappings. */
-	read_lock(&vmlist_lock);
-	for (p = vmlist; p; p = p->next) {
-		if (p->addr == addr)
-			break;
-	}
-	read_unlock(&vmlist_lock);
-
-	if (!p) {
-		printk(KERN_ERR "iounmap: bad address %p\n", addr);
-		dump_stack();
-		return;
-	}
-
-	/* Reset the direct mapping. Can block */
-	ioremap_change_attr(p->phys_addr, p->size, PAGE_KERNEL);
-
-	/* Finally remove it */
-	o = remove_vm_area((void *)addr);
-	BUG_ON(p != o || o == NULL);
-	kfree(p);
-}
-EXPORT_SYMBOL(iounmap);
-
-#ifdef CONFIG_X86_32
-
-int __initdata early_ioremap_debug;
-
-static int __init early_ioremap_debug_setup(char *str)
-{
-	early_ioremap_debug = 1;
-
-	return 0;
-}
-early_param("early_ioremap_debug", early_ioremap_debug_setup);
-
-static __initdata int after_paging_init;
-static __initdata unsigned long bm_pte[1024]
-				__attribute__((aligned(PAGE_SIZE)));
-
-static inline unsigned long * __init early_ioremap_pgd(unsigned long addr)
-{
-	return (unsigned long *)swapper_pg_dir + ((addr >> 22) & 1023);
-}
-
-static inline unsigned long * __init early_ioremap_pte(unsigned long addr)
-{
-	return bm_pte + ((addr >> PAGE_SHIFT) & 1023);
-}
-
-void __init early_ioremap_init(void)
-{
-	unsigned long *pgd;
-
-	if (early_ioremap_debug)
-		printk(KERN_DEBUG "early_ioremap_init()\n");
-
-	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
-	*pgd = __pa(bm_pte) | _PAGE_TABLE;
-	memset(bm_pte, 0, sizeof(bm_pte));
-	/*
-	 * The boot-ioremap range spans multiple pgds, for which
-	 * we are not prepared:
-	 */
-	if (pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))) {
-		WARN_ON(1);
-		printk(KERN_WARNING "pgd %p != %p\n",
-		       pgd, early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
-		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
-		       fix_to_virt(FIX_BTMAP_BEGIN));
-		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
-		       fix_to_virt(FIX_BTMAP_END));
-
-		printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
-		printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
-		       FIX_BTMAP_BEGIN);
-	}
-}
-
-void __init early_ioremap_clear(void)
-{
-	unsigned long *pgd;
-
-	if (early_ioremap_debug)
-		printk(KERN_DEBUG "early_ioremap_clear()\n");
-
-	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
-	*pgd = 0;
-	__flush_tlb_all();
-}
-
-void __init early_ioremap_reset(void)
-{
-	enum fixed_addresses idx;
-	unsigned long *pte, phys, addr;
-
-	after_paging_init = 1;
-	for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
-		addr = fix_to_virt(idx);
-		pte = early_ioremap_pte(addr);
-		if (!*pte & _PAGE_PRESENT) {
-			phys = *pte & PAGE_MASK;
-			set_fixmap(idx, phys);
-		}
-	}
-}
-
-static void __init __early_set_fixmap(enum fixed_addresses idx,
-				   unsigned long phys, pgprot_t flags)
-{
-	unsigned long *pte, addr = __fix_to_virt(idx);
-
-	if (idx >= __end_of_fixed_addresses) {
-		BUG();
-		return;
-	}
-	pte = early_ioremap_pte(addr);
-	if (pgprot_val(flags))
-		*pte = (phys & PAGE_MASK) | pgprot_val(flags);
-	else
-		*pte = 0;
-	__flush_tlb_one(addr);
-}
-
-static inline void __init early_set_fixmap(enum fixed_addresses idx,
-					unsigned long phys)
-{
-	if (after_paging_init)
-		set_fixmap(idx, phys);
-	else
-		__early_set_fixmap(idx, phys, PAGE_KERNEL);
-}
-
-static inline void __init early_clear_fixmap(enum fixed_addresses idx)
-{
-	if (after_paging_init)
-		clear_fixmap(idx);
-	else
-		__early_set_fixmap(idx, 0, __pgprot(0));
-}
-
-
-int __initdata early_ioremap_nested;
-
-static int __init check_early_ioremap_leak(void)
-{
-	if (!early_ioremap_nested)
-		return 0;
-
-	printk(KERN_WARNING
-	       "Debug warning: early ioremap leak of %d areas detected.\n",
-	       early_ioremap_nested);
-	printk(KERN_WARNING
-	       "please boot with early_ioremap_debug and report the dmesg.\n");
-	WARN_ON(1);
-
-	return 1;
-}
-late_initcall(check_early_ioremap_leak);
-
-void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
-{
-	unsigned long offset, last_addr;
-	unsigned int nrpages, nesting;
-	enum fixed_addresses idx0, idx;
-
-	WARN_ON(system_state != SYSTEM_BOOTING);
-
-	nesting = early_ioremap_nested;
-	if (early_ioremap_debug) {
-		printk(KERN_DEBUG "early_ioremap(%08lx, %08lx) [%d] => ",
-		       phys_addr, size, nesting);
-		dump_stack();
-	}
-
-	/* Don't allow wraparound or zero size */
-	last_addr = phys_addr + size - 1;
-	if (!size || last_addr < phys_addr) {
-		WARN_ON(1);
-		return NULL;
-	}
-
-	if (nesting >= FIX_BTMAPS_NESTING) {
-		WARN_ON(1);
-		return NULL;
-	}
-	early_ioremap_nested++;
-	/*
-	 * Mappings have to be page-aligned
-	 */
-	offset = phys_addr & ~PAGE_MASK;
-	phys_addr &= PAGE_MASK;
-	size = PAGE_ALIGN(last_addr) - phys_addr;
-
-	/*
-	 * Mappings have to fit in the FIX_BTMAP area.
-	 */
-	nrpages = size >> PAGE_SHIFT;
-	if (nrpages > NR_FIX_BTMAPS) {
-		WARN_ON(1);
-		return NULL;
-	}
-
-	/*
-	 * Ok, go for it..
-	 */
-	idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting;
-	idx = idx0;
-	while (nrpages > 0) {
-		early_set_fixmap(idx, phys_addr);
-		phys_addr += PAGE_SIZE;
-		--idx;
-		--nrpages;
-	}
-	if (early_ioremap_debug)
-		printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0));
-
-	return (void *) (offset + fix_to_virt(idx0));
-}
-
-void __init early_iounmap(void *addr, unsigned long size)
-{
-	unsigned long virt_addr;
-	unsigned long offset;
-	unsigned int nrpages;
-	enum fixed_addresses idx;
-	unsigned int nesting;
-
-	nesting = --early_ioremap_nested;
-	WARN_ON(nesting < 0);
-
-	if (early_ioremap_debug) {
-		printk(KERN_DEBUG "early_iounmap(%p, %08lx) [%d]\n", addr,
-		       size, nesting);
-		dump_stack();
-	}
-
-	virt_addr = (unsigned long)addr;
-	if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) {
-		WARN_ON(1);
-		return;
-	}
-	offset = virt_addr & ~PAGE_MASK;
-	nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
-
-	idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting;
-	while (nrpages > 0) {
-		early_clear_fixmap(idx);
-		--idx;
-		--nrpages;
-	}
-}
-
-void __this_fixmap_does_not_exist(void)
-{
-	WARN_ON(1);
-}
-
-#endif /* CONFIG_X86_32 */

From e81d5dc41b67349c06e80658227c9156738f0df1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:06 +0100
Subject: [PATCH 832/890] x86: cpa: move clflush_cache_range()

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 00f6f341e291..4208571334db 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -9,14 +9,6 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 
-void clflush_cache_range(void *addr, int size)
-{
-	int i;
-
-	for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
-		clflush(addr+i);
-}
-
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
@@ -290,6 +282,14 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot)
 }
 EXPORT_SYMBOL(change_page_attr);
 
+void clflush_cache_range(void *addr, int size)
+{
+	int i;
+
+	for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
+		clflush(addr+i);
+}
+
 static void flush_kernel_map(void *arg)
 {
 	/*

From 75cbade8ea3127a84e3da7c2c15808e54f0df7e8 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Wed, 30 Jan 2008 13:34:06 +0100
Subject: [PATCH 833/890] x86: a new API for drivers/etc to control cache and
 other page attributes

Right now, if drivers or other code want to change, say, a cache attribute of a
page, the only API they have is change_page_attr(). c-p-a is a really bad API
for this, because it forces the caller to know *ALL* the attributes he wants
for the page, not just the 1 thing he wants to change. So code that wants to
set a page uncachable, needs to be aware of the NX status as well etc etc etc.

This patch introduces a set of new APIs for this, set_pages_<attr> and
set_memory_<attr>, that offer a logical change to the user, and leave all
attributes not implied by the requested logical change alone.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pageattr.c       | 197 +++++++++++++++++++++++++++++++++++
 include/asm-x86/cacheflush.h |  15 +++
 2 files changed, 212 insertions(+)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 4208571334db..f3c9510b8d92 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -211,6 +211,8 @@ repeat:
  * mem_map entry (pfn_valid() is false).
  *
  * See change_page_attr() documentation for more details.
+ *
+ * Modules and drivers should use the set_memory_* APIs instead.
  */
 
 int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
@@ -273,6 +275,8 @@ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
  * (e.g. in user space) * This function only deals with the kernel linear map.
  *
  * For MMIO areas without mem_map use change_page_attr_addr() instead.
+ *
+ * Modules and drivers should use the set_pages_* APIs instead.
  */
 int change_page_attr(struct page *page, int numpages, pgprot_t prot)
 {
@@ -282,6 +286,199 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot)
 }
 EXPORT_SYMBOL(change_page_attr);
 
+/**
+ * change_page_attr_set - Change page table attributes in the linear mapping.
+ * @addr: Virtual address in linear mapping.
+ * @numpages: Number of pages to change
+ * @prot: Protection/caching type bits to set (PAGE_*)
+ *
+ * Returns 0 on success, otherwise a negated errno.
+ *
+ * This should be used when a page is mapped with a different caching policy
+ * than write-back somewhere - some CPUs do not like it when mappings with
+ * different caching policies exist. This changes the page attributes of the
+ * in kernel linear mapping too.
+ *
+ * Caller must call global_flush_tlb() later to make the changes active.
+ *
+ * The caller needs to ensure that there are no conflicting mappings elsewhere
+ * (e.g. in user space) * This function only deals with the kernel linear map.
+ *
+ * This function is different from change_page_attr() in that only selected bits
+ * are impacted, all other bits remain as is.
+ */
+int change_page_attr_set(unsigned long addr, int numpages, pgprot_t prot)
+{
+	pgprot_t current_prot;
+	int level;
+	pte_t *pte;
+
+	pte = lookup_address(addr, &level);
+	if (pte)
+		current_prot = pte_pgprot(*pte);
+	else
+		pgprot_val(current_prot) = 0;
+
+	pgprot_val(prot) = pgprot_val(current_prot) | pgprot_val(prot);
+
+	return change_page_attr_addr(addr, numpages, prot);
+}
+
+/**
+ * change_page_attr_clear - Change page table attributes in the linear mapping.
+ * @addr: Virtual address in linear mapping.
+ * @numpages: Number of pages to change
+ * @prot: Protection/caching type bits to clear (PAGE_*)
+ *
+ * Returns 0 on success, otherwise a negated errno.
+ *
+ * This should be used when a page is mapped with a different caching policy
+ * than write-back somewhere - some CPUs do not like it when mappings with
+ * different caching policies exist. This changes the page attributes of the
+ * in kernel linear mapping too.
+ *
+ * Caller must call global_flush_tlb() later to make the changes active.
+ *
+ * The caller needs to ensure that there are no conflicting mappings elsewhere
+ * (e.g. in user space) * This function only deals with the kernel linear map.
+ *
+ * This function is different from change_page_attr() in that only selected bits
+ * are impacted, all other bits remain as is.
+ */
+int change_page_attr_clear(unsigned long addr, int numpages, pgprot_t prot)
+{
+	pgprot_t current_prot;
+	int level;
+	pte_t *pte;
+
+	pte = lookup_address(addr, &level);
+	if (pte)
+		current_prot = pte_pgprot(*pte);
+	else
+		pgprot_val(current_prot) = 0;
+
+	pgprot_val(prot) = pgprot_val(current_prot) & ~pgprot_val(prot);
+
+	return change_page_attr_addr(addr, numpages, prot);
+}
+
+
+
+int set_memory_uc(unsigned long addr, int numpages)
+{
+	pgprot_t uncached;
+
+	pgprot_val(uncached) = _PAGE_PCD | _PAGE_PWT;
+	return change_page_attr_set(addr, numpages, uncached);
+}
+EXPORT_SYMBOL(set_memory_uc);
+
+int set_memory_wb(unsigned long addr, int numpages)
+{
+	pgprot_t uncached;
+
+	pgprot_val(uncached) = _PAGE_PCD | _PAGE_PWT;
+	return change_page_attr_clear(addr, numpages, uncached);
+}
+EXPORT_SYMBOL(set_memory_wb);
+
+int set_memory_x(unsigned long addr, int numpages)
+{
+	pgprot_t nx;
+
+	pgprot_val(nx) = _PAGE_NX;
+	return change_page_attr_clear(addr, numpages, nx);
+}
+EXPORT_SYMBOL(set_memory_x);
+
+int set_memory_nx(unsigned long addr, int numpages)
+{
+	pgprot_t nx;
+
+	pgprot_val(nx) = _PAGE_NX;
+	return change_page_attr_set(addr, numpages, nx);
+}
+EXPORT_SYMBOL(set_memory_nx);
+
+int set_memory_ro(unsigned long addr, int numpages)
+{
+	pgprot_t rw;
+
+	pgprot_val(rw) = _PAGE_RW;
+	return change_page_attr_clear(addr, numpages, rw);
+}
+EXPORT_SYMBOL(set_memory_ro);
+
+int set_memory_rw(unsigned long addr, int numpages)
+{
+	pgprot_t rw;
+
+	pgprot_val(rw) = _PAGE_RW;
+	return change_page_attr_set(addr, numpages, rw);
+}
+EXPORT_SYMBOL(set_memory_rw);
+
+int set_pages_uc(struct page *page, int numpages)
+{
+	unsigned long addr = (unsigned long)page_address(page);
+	pgprot_t uncached;
+
+	pgprot_val(uncached) = _PAGE_PCD | _PAGE_PWT;
+	return change_page_attr_set(addr, numpages, uncached);
+}
+EXPORT_SYMBOL(set_pages_uc);
+
+int set_pages_wb(struct page *page, int numpages)
+{
+	unsigned long addr = (unsigned long)page_address(page);
+	pgprot_t uncached;
+
+	pgprot_val(uncached) = _PAGE_PCD | _PAGE_PWT;
+	return change_page_attr_clear(addr, numpages, uncached);
+}
+EXPORT_SYMBOL(set_pages_wb);
+
+int set_pages_x(struct page *page, int numpages)
+{
+	unsigned long addr = (unsigned long)page_address(page);
+	pgprot_t nx;
+
+	pgprot_val(nx) = _PAGE_NX;
+	return change_page_attr_clear(addr, numpages, nx);
+}
+EXPORT_SYMBOL(set_pages_x);
+
+int set_pages_nx(struct page *page, int numpages)
+{
+	unsigned long addr = (unsigned long)page_address(page);
+	pgprot_t nx;
+
+	pgprot_val(nx) = _PAGE_NX;
+	return change_page_attr_set(addr, numpages, nx);
+}
+EXPORT_SYMBOL(set_pages_nx);
+
+int set_pages_ro(struct page *page, int numpages)
+{
+	unsigned long addr = (unsigned long)page_address(page);
+	pgprot_t rw;
+
+	pgprot_val(rw) = _PAGE_RW;
+	return change_page_attr_clear(addr, numpages, rw);
+}
+EXPORT_SYMBOL(set_pages_ro);
+
+int set_pages_rw(struct page *page, int numpages)
+{
+	unsigned long addr = (unsigned long)page_address(page);
+	pgprot_t rw;
+
+	pgprot_val(rw) = _PAGE_RW;
+	return change_page_attr_set(addr, numpages, rw);
+}
+EXPORT_SYMBOL(set_pages_rw);
+
+
 void clflush_cache_range(void *addr, int size)
 {
 	int i;
diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h
index fccb563e2305..7b928d55c190 100644
--- a/include/asm-x86/cacheflush.h
+++ b/include/asm-x86/cacheflush.h
@@ -27,6 +27,21 @@
 void global_flush_tlb(void);
 int change_page_attr(struct page *page, int numpages, pgprot_t prot);
 int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot);
+
+int set_pages_uc(struct page *page, int numpages);
+int set_pages_wb(struct page *page, int numpages);
+int set_pages_x(struct page *page, int numpages);
+int set_pages_nx(struct page *page, int numpages);
+int set_pages_ro(struct page *page, int numpages);
+int set_pages_rw(struct page *page, int numpages);
+
+int set_memory_uc(unsigned long addr, int numpages);
+int set_memory_wb(unsigned long addr, int numpages);
+int set_memory_x(unsigned long addr, int numpages);
+int set_memory_nx(unsigned long addr, int numpages);
+int set_memory_ro(unsigned long addr, int numpages);
+int set_memory_rw(unsigned long addr, int numpages);
+
 void clflush_cache_range(void *addr, int size);
 
 #ifdef CONFIG_DEBUG_RODATA

From 6d238cc4dc8a36a3915c26202fe49f58a0683fb9 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Wed, 30 Jan 2008 13:34:06 +0100
Subject: [PATCH 834/890] x86: convert CPA users to the new set_page_ API

This patch converts various users of change_page_attr() to the new,
more intent driven set_page_*/set_memory_* API set.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/efi.c               |  8 ++++----
 arch/x86/kernel/pci-gart_64.c       |  3 +--
 arch/x86/mm/init_32.c               | 20 +++++++-------------
 arch/x86/mm/init_64.c               | 10 ++++------
 drivers/char/agp/intel-agp.c        |  6 +++---
 drivers/video/vermilion/vermilion.c |  9 +++------
 include/asm-x86/agp.h               |  4 ++--
 sound/pci/intel8x0.c                |  7 +++++--
 8 files changed, 29 insertions(+), 38 deletions(-)

diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 57b57778bf60..a70fe77354b8 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -396,10 +396,10 @@ static void __init runtime_code_page_mkexec(void)
 		md = p;
 		end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
 		if (md->type == EFI_RUNTIME_SERVICES_CODE &&
-		    (end >> PAGE_SHIFT) <= max_pfn_mapped)
-			change_page_attr_addr(md->virt_addr,
-					      md->num_pages,
-					      PAGE_KERNEL_EXEC_NOCACHE);
+		    (end >> PAGE_SHIFT) <= max_pfn_mapped) {
+			set_memory_x(md->virt_addr, md->num_pages);
+			set_memory_uc(md->virt_addr, md->num_pages);
+		}
 	}
 	__flush_tlb_all();
 }
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 04ca5c5221d7..8860c6eba8ab 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -570,8 +570,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 	gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size));
 	if (!gatt)
 		panic("Cannot allocate GATT table");
-	if (change_page_attr_addr((unsigned long)gatt, gatt_size >> PAGE_SHIFT,
-				  PAGE_KERNEL_NOCACHE))
+	if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT))
 		panic("Could not set GART PTEs to uncacheable pages");
 	global_flush_tlb();
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 016c8ccd1d8d..7c9bb3076b8a 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -770,34 +770,30 @@ void mark_rodata_ro(void)
 	if (num_possible_cpus() <= 1)
 #endif
 	{
-		change_page_attr(virt_to_page(start),
-		                 size >> PAGE_SHIFT, PAGE_KERNEL_RX);
+		set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 		printk("Write protecting the kernel text: %luk\n", size >> 10);
 
 #ifdef CONFIG_CPA_DEBUG
 		global_flush_tlb();
 
 		printk("Testing CPA: Reverting %lx-%lx\n", start, start+size);
-		change_page_attr(virt_to_page(start), size>>PAGE_SHIFT,
-				 PAGE_KERNEL_EXEC);
+		set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
 		global_flush_tlb();
 
 		printk("Testing CPA: write protecting again\n");
-		change_page_attr(virt_to_page(start), size>>PAGE_SHIFT,
-				PAGE_KERNEL_RX);
+		set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
 		global_flush_tlb();
 #endif
 	}
 #endif
 	start += size;
 	size = (unsigned long)__end_rodata - start;
-	change_page_attr(virt_to_page(start),
-	                 size >> PAGE_SHIFT, PAGE_KERNEL_RO);
+	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 	printk("Write protecting the kernel read-only data: %luk\n",
 	       size >> 10);
 
 	/*
-	 * change_page_attr() requires a global_flush_tlb() call after it.
+	 * set_pages_*() requires a global_flush_tlb() call after it.
 	 * We do this after the printk so that if something went wrong in the
 	 * change, the printk gets out at least to give a better debug hint
 	 * of who is the culprit.
@@ -806,13 +802,11 @@ void mark_rodata_ro(void)
 
 #ifdef CONFIG_CPA_DEBUG
 	printk("Testing CPA: undo %lx-%lx\n", start, start + size);
-	change_page_attr(virt_to_page(start), size >> PAGE_SHIFT,
-				PAGE_KERNEL);
+	set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
 	global_flush_tlb();
 
 	printk("Testing CPA: write protecting again\n");
-	change_page_attr(virt_to_page(start), size >> PAGE_SHIFT,
-				PAGE_KERNEL_RO);
+	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 	global_flush_tlb();
 #endif
 }
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index c250580a9432..05bb12db0b09 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -556,8 +556,6 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
 		init_page_count(virt_to_page(addr));
 		memset((void *)(addr & ~(PAGE_SIZE-1)),
 			POISON_FREE_INITMEM, PAGE_SIZE);
-		if (addr >= __START_KERNEL_map)
-			change_page_attr_addr(addr, 1, __pgprot(0));
 		free_page(addr);
 		totalram_pages++;
 	}
@@ -594,13 +592,13 @@ void mark_rodata_ro(void)
 	if (end <= start)
 		return;
 
-	change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
+	set_memory_ro(start, (end - start) >> PAGE_SHIFT);
 
 	printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
 	       (end - start) >> 10);
 
 	/*
-	 * change_page_attr_addr() requires a global_flush_tlb() call after it.
+	 * set_memory_*() requires a global_flush_tlb() call after it.
 	 * We do this after the printk so that if something went wrong in the
 	 * change, the printk gets out at least to give a better debug hint
 	 * of who is the culprit.
@@ -609,11 +607,11 @@ void mark_rodata_ro(void)
 
 #ifdef CONFIG_CPA_DEBUG
 	printk("Testing CPA: undo %lx-%lx\n", start, end);
-	change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL);
+	set_memory_rw(start, (end-start) >> PAGE_SHIFT);
 	global_flush_tlb();
 
 	printk("Testing CPA: again\n");
-	change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
+	set_memory_ro(start, (end-start) >> PAGE_SHIFT);
 	global_flush_tlb();
 #endif
 }
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 03eac1eb8e0f..c03a7143928f 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -210,8 +210,8 @@ static void *i8xx_alloc_pages(void)
 	if (page == NULL)
 		return NULL;
 
-	if (change_page_attr(page, 4, PAGE_KERNEL_NOCACHE) < 0) {
-		change_page_attr(page, 4, PAGE_KERNEL);
+	if (set_pages_uc(page, 4) < 0) {
+		set_pages_wb(page, 4);
 		global_flush_tlb();
 		__free_pages(page, 2);
 		return NULL;
@@ -230,7 +230,7 @@ static void i8xx_destroy_pages(void *addr)
 		return;
 
 	page = virt_to_page(addr);
-	change_page_attr(page, 4, PAGE_KERNEL);
+	set_pages_wb(page, 4);
 	global_flush_tlb();
 	put_page(page);
 	__free_pages(page, 2);
diff --git a/drivers/video/vermilion/vermilion.c b/drivers/video/vermilion/vermilion.c
index c31f549ebea0..fb72778dee48 100644
--- a/drivers/video/vermilion/vermilion.c
+++ b/drivers/video/vermilion/vermilion.c
@@ -88,9 +88,7 @@ static int vmlfb_alloc_vram_area(struct vram_area *va, unsigned max_order,
 {
 	gfp_t flags;
 	unsigned long i;
-	pgprot_t wc_pageprot;
 
-	wc_pageprot = PAGE_KERNEL_NOCACHE;
 	max_order++;
 	do {
 		/*
@@ -131,8 +129,7 @@ static int vmlfb_alloc_vram_area(struct vram_area *va, unsigned max_order,
 	 */
 
 	global_flush_tlb();
-	change_page_attr(virt_to_page(va->logical), va->size >> PAGE_SHIFT,
-			 wc_pageprot);
+	set_pages_uc(virt_to_page(va->logical), va->size >> PAGE_SHIFT);
 	global_flush_tlb();
 
 	printk(KERN_DEBUG MODULE_NAME
@@ -157,8 +154,8 @@ static void vmlfb_free_vram_area(struct vram_area *va)
 		 * Reset the linear kernel map caching policy.
 		 */
 
-		change_page_attr(virt_to_page(va->logical),
-				 va->size >> PAGE_SHIFT, PAGE_KERNEL);
+		set_pages_wb(virt_to_page(va->logical),
+				 va->size >> PAGE_SHIFT);
 		global_flush_tlb();
 
 		/*
diff --git a/include/asm-x86/agp.h b/include/asm-x86/agp.h
index 62df2a9e7130..f6df72561832 100644
--- a/include/asm-x86/agp.h
+++ b/include/asm-x86/agp.h
@@ -16,8 +16,8 @@
  * Caller's responsibility to call global_flush_tlb() for performance
  * reasons
  */
-#define map_page_into_agp(page) change_page_attr(page, 1, PAGE_KERNEL_NOCACHE)
-#define unmap_page_from_agp(page) change_page_attr(page, 1, PAGE_KERNEL)
+#define map_page_into_agp(page) set_pages_uc(page, 1)
+#define unmap_page_from_agp(page) set_pages_wb(page, 1)
 #define flush_agp_mappings() global_flush_tlb()
 
 /*
diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
index b4a38a3d855b..e5650905296e 100644
--- a/sound/pci/intel8x0.c
+++ b/sound/pci/intel8x0.c
@@ -711,11 +711,14 @@ static void snd_intel8x0_setup_periods(struct intel8x0 *chip, struct ichdev *ich
 static void fill_nocache(void *buf, int size, int nocache)
 {
 	size = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	change_page_attr(virt_to_page(buf), size, nocache ? PAGE_KERNEL_NOCACHE : PAGE_KERNEL);
+	if (nocache)
+		set_pages_uc(virt_to_page(buf), size);
+	else
+		set_pages_wb(virt_to_page(buf), size);
 	global_flush_tlb();
 }
 #else
-#define fill_nocache(buf,size,nocache)
+#define fill_nocache(buf, size, nocache) do { ; } while (0)
 #endif
 
 /*

From e1271f686a0e376aa6ee97984c16f91a787e4480 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Wed, 30 Jan 2008 13:34:06 +0100
Subject: [PATCH 835/890] x86: deprecate change_page_attr() for drivers

With the introduction of the new API, no driver or non-archcore code needs
to use c-p-a anymore, so this patch also deprecates the EXPORT_SYMBOL of CPA
(it's a horrible API after all).

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pageattr.c       | 2 +-
 include/asm-x86/cacheflush.h | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index f3c9510b8d92..19f7f7a0b36f 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -284,7 +284,7 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot)
 
 	return change_page_attr_addr(addr, numpages, prot);
 }
-EXPORT_SYMBOL(change_page_attr);
+EXPORT_UNUSED_SYMBOL(change_page_attr); /* to be removed in 2.6.27 */
 
 /**
  * change_page_attr_set - Change page table attributes in the linear mapping.
diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h
index 7b928d55c190..e79159bc0987 100644
--- a/include/asm-x86/cacheflush.h
+++ b/include/asm-x86/cacheflush.h
@@ -25,7 +25,8 @@
 	memcpy(dst, src, len)
 
 void global_flush_tlb(void);
-int change_page_attr(struct page *page, int numpages, pgprot_t prot);
+int __deprecated_for_modules change_page_attr(struct page *page, int numpages,
+								pgprot_t prot);
 int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot);
 
 int set_pages_uc(struct page *page, int numpages);

From 5f5192b9feeff6a96c97c143c3ca558fdbe2dc8e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:06 +0100
Subject: [PATCH 836/890] x86: move page_is_ram() function

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_32.c     | 22 ----------------------
 arch/x86/mm/ioremap.c     | 24 ++++++++++++++++++++++++
 include/asm-x86/page.h    |  3 +++
 include/asm-x86/page_32.h |  1 -
 4 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 7c9bb3076b8a..f7b941c3b2c3 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -211,28 +211,6 @@ static inline int page_kills_ppro(unsigned long pagenr)
 	return 0;
 }
 
-int page_is_ram(unsigned long pagenr)
-{
-	int i;
-	unsigned long addr, end;
-
-	for (i = 0; i < e820.nr_map; i++) {
-
-		if (e820.map[i].type != E820_RAM)	/* not usable memory */
-			continue;
-		/*
-		 *	!!!FIXME!!! Some BIOSen report areas as RAM that
-		 *	are not. Notably the 640->1Mb area. We need a sanity
-		 *	check here.
-		 */
-		addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
-		end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
-		if  ((pagenr >= addr) && (pagenr < end))
-			return 1;
-	}
-	return 0;
-}
-
 #ifdef CONFIG_HIGHMEM
 pte_t *kmap_pte;
 pgprot_t kmap_prot;
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index f4a2082568c8..d3026e1906f9 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -31,6 +31,30 @@ EXPORT_SYMBOL(__phys_addr);
 
 #endif
 
+int page_is_ram(unsigned long pagenr)
+{
+	unsigned long addr, end;
+	int i;
+
+	for (i = 0; i < e820.nr_map; i++) {
+		/*
+		 * Not usable memory:
+		 */
+		if (e820.map[i].type != E820_RAM)
+			continue;
+		/*
+		 *	!!!FIXME!!! Some BIOSen report areas as RAM that
+		 *	are not. Notably the 640->1Mb area. We need a sanity
+		 *	check here.
+		 */
+		addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT;
+		end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
+		if ((pagenr >= addr) && (pagenr < end))
+			return 1;
+	}
+	return 0;
+}
+
 /*
  * Fix up the linear direct mapping of the kernel to avoid cache attribute
  * conflicts.
diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index e2c79d8dcdcf..c8b30efeed85 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -47,6 +47,9 @@
 
 
 #ifndef __ASSEMBLY__
+
+extern int page_is_ram(unsigned long pagenr);
+
 struct page;
 
 static void inline clear_user_page(void *page, unsigned long vaddr,
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index 11c4b39cada1..a6fd10f230d2 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -73,7 +73,6 @@ extern int nx_enabled;
  */
 extern unsigned int __VMALLOC_RESERVE;
 extern int sysctl_legacy_va_layout;
-extern int page_is_ram(unsigned long pagenr);
 
 #define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
 #define MAXMEM			(-__PAGE_OFFSET-__VMALLOC_RESERVE)

From 950f9d95bed1a366434d3597ea75f5b9d772d74f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:06 +0100
Subject: [PATCH 837/890] x86: fix the missing BIOS area check in page_is_ram

page_is_ram has a FIXME since ages, which reminds to sanity check the
BIOS area between 640k and 1M, which is sometimes falsely reported as
RAM in the e820 tables.

Implement the sanity check. Move the BIOS range defines from
pageattr.c into e820.h to avoid duplicate defines.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/ioremap.c  | 15 ++++++++++-----
 arch/x86/mm/pageattr.c |  7 +------
 include/asm-x86/e820.h |  3 +++
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index d3026e1906f9..24e42cab8d58 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -42,13 +42,18 @@ int page_is_ram(unsigned long pagenr)
 		 */
 		if (e820.map[i].type != E820_RAM)
 			continue;
-		/*
-		 *	!!!FIXME!!! Some BIOSen report areas as RAM that
-		 *	are not. Notably the 640->1Mb area. We need a sanity
-		 *	check here.
-		 */
 		addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT;
 		end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
+
+		/*
+		 * Sanity check: Some BIOSen report areas as RAM that
+		 * are not. Notably the 640->1Mb area, which is the
+		 * PCI BIOS area.
+		 */
+		if (addr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
+		    end < (BIOS_END >> PAGE_SHIFT))
+			continue;
+
 		if ((pagenr >= addr) && (pagenr < end))
 			return 1;
 	}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 19f7f7a0b36f..fcd96125c5ae 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -9,18 +9,13 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 
+#include <asm/e820.h>
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 
-/*
- * We must allow the BIOS range to be executable:
- */
-#define BIOS_BEGIN		0x000a0000
-#define BIOS_END		0x00100000
-
 static inline int
 within(unsigned long addr, unsigned long start, unsigned long end)
 {
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index f96f1853bc41..7004251fc66b 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -25,6 +25,9 @@ struct e820map {
 #define ISA_START_ADDRESS	0xa0000
 #define ISA_END_ADDRESS		0x100000
 
+#define BIOS_BEGIN		0x000a0000
+#define BIOS_END		0x00100000
+
 #ifdef __KERNEL__
 #ifdef CONFIG_X86_32
 # include "e820_32.h"

From 266b9f8727976769e2ed2dad77ac9295f37e321e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:06 +0100
Subject: [PATCH 838/890] x86: fix ioremap RAM check

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/ioremap.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 24e42cab8d58..e84c09e7d2c1 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -125,23 +125,14 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 	if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
 		return (__force void __iomem *)phys_to_virt(phys_addr);
 
-#ifdef CONFIG_X86_32
 	/*
 	 * Don't allow anybody to remap normal RAM that we're using..
 	 */
-	if (phys_addr <= virt_to_phys(high_memory - 1)) {
-		char *t_addr, *t_end;
-		struct page *page;
-
-		t_addr = __va(phys_addr);
-		t_end = t_addr + (size - 1);
-
-		for (page = virt_to_page(t_addr);
-		     page <= virt_to_page(t_end); page++)
-			if (!PageReserved(page))
-				return NULL;
+	for (offset = phys_addr >> PAGE_SHIFT; offset < max_pfn_mapped &&
+	     (offset << PAGE_SHIFT) < last_addr; offset++) {
+		if (page_is_ram(offset))
+			return NULL;
 	}
-#endif
 
 	pgprot = MAKE_GLOBAL(__PAGE_KERNEL | flags);
 

From 5f8681529cb243b3a492e55f2da9d632ad0d5e32 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:06 +0100
Subject: [PATCH 839/890] x86: fix ioremap API

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/ioremap.c   | 11 ++++++++---
 include/asm-x86/io_32.h | 13 +++----------
 include/asm-x86/io_64.h | 13 +++----------
 3 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index e84c09e7d2c1..8777bb7688f4 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -106,8 +106,8 @@ static int ioremap_change_attr(unsigned long phys_addr, unsigned long size,
  * have to convert them into an offset in a page-aligned mapping, but the
  * caller shouldn't need to know that small detail.
  */
-void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
-			unsigned long flags)
+static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
+			       unsigned long flags)
 {
 	void __iomem *addr;
 	struct vm_struct *area;
@@ -164,7 +164,6 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 
 	return (void __iomem *) (offset + (char __iomem *)addr);
 }
-EXPORT_SYMBOL(__ioremap);
 
 /**
  * ioremap_nocache     -   map bus memory into CPU space
@@ -193,6 +192,12 @@ void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size)
 }
 EXPORT_SYMBOL(ioremap_nocache);
 
+void __iomem *ioremap_cache(unsigned long phys_addr, unsigned long size)
+{
+	return __ioremap(phys_addr, size, 0);
+}
+EXPORT_SYMBOL(ioremap_cache);
+
 /**
  * iounmap - Free a IO remapping
  * @addr: virtual address from ioremap_*
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index 059a1fee4de3..586d7aa54ceb 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -100,8 +100,6 @@ static inline void * phys_to_virt(unsigned long address)
  */
 #define page_to_phys(page)    ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
 
-extern void __iomem * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
-
 /**
  * ioremap     -   map bus memory into CPU space
  * @offset:    bus address of the memory
@@ -116,18 +114,13 @@ extern void __iomem * __ioremap(unsigned long offset, unsigned long size, unsign
  * If the area you are trying to map is a PCI BAR you should have a
  * look at pci_iomap().
  */
-extern void __iomem * ioremap_nocache(unsigned long offset, unsigned long size);
-
-static inline void __iomem *
-ioremap_cache(unsigned long offset, unsigned long size)
-{
-	return __ioremap(offset, size, 0);
-}
+extern void __iomem *ioremap_nocache(unsigned long offset, unsigned long size);
+extern void __iomem *ioremap_cache(unsigned long offset, unsigned long size);
 
 /*
  * The default ioremap() behavior is non-cached:
  */
-static inline void __iomem * ioremap(unsigned long offset, unsigned long size)
+static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
 {
 	return ioremap_nocache(offset, size);
 }
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index 7dee3c6e9c39..ee7a5c955962 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -150,8 +150,6 @@ static inline void * phys_to_virt(unsigned long address)
 
 #include <asm-generic/iomap.h>
 
-extern void __iomem *__ioremap(unsigned long offset, unsigned long size, unsigned long flags);
-
 extern void *early_ioremap(unsigned long addr, unsigned long size);
 extern void early_iounmap(void *addr, unsigned long size);
 
@@ -160,18 +158,13 @@ extern void early_iounmap(void *addr, unsigned long size);
  * it's useful if some control registers are in such an area and write combining
  * or read caching is not desirable:
  */
-extern void __iomem * ioremap_nocache(unsigned long offset, unsigned long size);
-
-static inline void __iomem *
-ioremap_cache(unsigned long offset, unsigned long size)
-{
-	return __ioremap(offset, size, 0);
-}
+extern void __iomem *ioremap_nocache(unsigned long offset, unsigned long size);
+extern void __iomem *ioremap_cache(unsigned long offset, unsigned long size);
 
 /*
  * The default ioremap() behavior is non-cached:
  */
-static inline void __iomem * ioremap(unsigned long offset, unsigned long size)
+static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
 {
 	return ioremap_cache(offset, size);
 }

From d806e5ee20f62a892b09aa59559f143d465285db Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:06 +0100
Subject: [PATCH 840/890] x86: cpa: convert ioremap to new API

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/ioremap.c | 57 ++++++++++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 22 deletions(-)

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 8777bb7688f4..b86f66fa5185 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -19,6 +19,11 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 
+enum ioremap_mode {
+	IOR_MODE_UNCACHED,
+	IOR_MODE_CACHED,
+};
+
 #ifdef CONFIG_X86_64
 
 unsigned long __phys_addr(unsigned long x)
@@ -64,19 +69,17 @@ int page_is_ram(unsigned long pagenr)
  * Fix up the linear direct mapping of the kernel to avoid cache attribute
  * conflicts.
  */
-static int ioremap_change_attr(unsigned long phys_addr, unsigned long size,
-			       pgprot_t prot)
+static int ioremap_change_attr(unsigned long paddr, unsigned long size,
+			       enum ioremap_mode mode)
 {
-	unsigned long npages, vaddr, last_addr = phys_addr + size - 1;
+	unsigned long vaddr = (unsigned long)__va(paddr);
+	unsigned long nrpages = size >> PAGE_SHIFT;
 	int err, level;
 
 	/* No change for pages after the last mapping */
-	if (last_addr >= (max_pfn_mapped << PAGE_SHIFT))
+	if ((paddr + size - 1) >= (max_pfn_mapped << PAGE_SHIFT))
 		return 0;
 
-	npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	vaddr = (unsigned long) __va(phys_addr);
-
 	/*
 	 * If there is no identity map for this address,
 	 * change_page_attr_addr is unnecessary
@@ -84,13 +87,15 @@ static int ioremap_change_attr(unsigned long phys_addr, unsigned long size,
 	if (!lookup_address(vaddr, &level))
 		return 0;
 
-	/*
-	 * Must use an address here and not struct page because the
-	 * phys addr can be a in hole between nodes and not have a
-	 * memmap entry.
-	 */
-	err = change_page_attr_addr(vaddr, npages, prot);
-
+	switch (mode) {
+	case IOR_MODE_UNCACHED:
+	default:
+		err = set_memory_uc(vaddr, nrpages);
+		break;
+	case IOR_MODE_CACHED:
+		err = set_memory_wb(vaddr, nrpages);
+		break;
+	}
 	if (!err)
 		global_flush_tlb();
 
@@ -107,12 +112,12 @@ static int ioremap_change_attr(unsigned long phys_addr, unsigned long size,
  * caller shouldn't need to know that small detail.
  */
 static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
-			       unsigned long flags)
+			       enum ioremap_mode mode)
 {
 	void __iomem *addr;
 	struct vm_struct *area;
 	unsigned long offset, last_addr;
-	pgprot_t pgprot;
+	pgprot_t prot;
 
 	/* Don't allow wraparound or zero size */
 	last_addr = phys_addr + size - 1;
@@ -134,7 +139,15 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 			return NULL;
 	}
 
-	pgprot = MAKE_GLOBAL(__PAGE_KERNEL | flags);
+	switch (mode) {
+	case IOR_MODE_UNCACHED:
+	default:
+		prot = PAGE_KERNEL_NOCACHE;
+		break;
+	case IOR_MODE_CACHED:
+		prot = PAGE_KERNEL;
+		break;
+	}
 
 	/*
 	 * Mappings have to be page-aligned
@@ -152,12 +165,12 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 	area->phys_addr = phys_addr;
 	addr = (void __iomem *) area->addr;
 	if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
-			       phys_addr, pgprot)) {
+			       phys_addr, prot)) {
 		remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
 		return NULL;
 	}
 
-	if (ioremap_change_attr(phys_addr, size, pgprot) < 0) {
+	if (ioremap_change_attr(phys_addr, size, mode) < 0) {
 		vunmap(addr);
 		return NULL;
 	}
@@ -188,13 +201,13 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
  */
 void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size)
 {
-	return __ioremap(phys_addr, size, _PAGE_PCD | _PAGE_PWT);
+	return __ioremap(phys_addr, size, IOR_MODE_UNCACHED);
 }
 EXPORT_SYMBOL(ioremap_nocache);
 
 void __iomem *ioremap_cache(unsigned long phys_addr, unsigned long size)
 {
-	return __ioremap(phys_addr, size, 0);
+	return __ioremap(phys_addr, size, IOR_MODE_CACHED);
 }
 EXPORT_SYMBOL(ioremap_cache);
 
@@ -242,7 +255,7 @@ void iounmap(volatile void __iomem *addr)
 	}
 
 	/* Reset the direct mapping. Can block */
-	ioremap_change_attr(p->phys_addr, p->size, PAGE_KERNEL);
+	ioremap_change_attr(p->phys_addr, p->size, IOR_MODE_CACHED);
 
 	/* Finally remove it */
 	o = remove_vm_area((void *)addr);

From f62d0f008e889915c93631c04d4c7d871f05bea7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:07 +0100
Subject: [PATCH 841/890] x86: cpa: set_memory_notpresent()

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_64.c        | 17 +++++++++++++++--
 arch/x86/mm/pageattr.c       | 34 ++++++++++++++++++++++++++--------
 include/asm-x86/cacheflush.h |  1 +
 3 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 05bb12db0b09..4757be7b5e55 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -559,8 +559,21 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
 		free_page(addr);
 		totalram_pages++;
 	}
-	if (addr > __START_KERNEL_map)
-		global_flush_tlb();
+#ifdef CONFIG_DEBUG_RODATA
+	/*
+	 * This will make the __init pages not present and
+	 * not executable, so that any attempt to use a
+	 * __init function from now on will fault immediately
+	 * rather than supriously later when memory gets reused.
+	 *
+	 * We only do this for DEBUG_RODATA to not break up the
+	 * 2Mb kernel mapping just for this debug feature.
+	 */
+	if (begin >= __START_KERNEL_map) {
+		set_memory_np(begin, (end - begin)/PAGE_SIZE);
+		set_memory_nx(begin, (end - begin)/PAGE_SIZE);
+	}
+#endif
 }
 
 void free_initmem(void)
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index fcd96125c5ae..e5910ac37e59 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -357,8 +357,6 @@ int change_page_attr_clear(unsigned long addr, int numpages, pgprot_t prot)
 	return change_page_attr_addr(addr, numpages, prot);
 }
 
-
-
 int set_memory_uc(unsigned long addr, int numpages)
 {
 	pgprot_t uncached;
@@ -402,7 +400,6 @@ int set_memory_ro(unsigned long addr, int numpages)
 	pgprot_val(rw) = _PAGE_RW;
 	return change_page_attr_clear(addr, numpages, rw);
 }
-EXPORT_SYMBOL(set_memory_ro);
 
 int set_memory_rw(unsigned long addr, int numpages)
 {
@@ -411,7 +408,14 @@ int set_memory_rw(unsigned long addr, int numpages)
 	pgprot_val(rw) = _PAGE_RW;
 	return change_page_attr_set(addr, numpages, rw);
 }
-EXPORT_SYMBOL(set_memory_rw);
+
+int set_memory_np(unsigned long addr, int numpages)
+{
+	pgprot_t present;
+
+	pgprot_val(present) = _PAGE_PRESENT;
+	return change_page_attr_clear(addr, numpages, present);
+}
 
 int set_pages_uc(struct page *page, int numpages)
 {
@@ -461,7 +465,6 @@ int set_pages_ro(struct page *page, int numpages)
 	pgprot_val(rw) = _PAGE_RW;
 	return change_page_attr_clear(addr, numpages, rw);
 }
-EXPORT_SYMBOL(set_pages_ro);
 
 int set_pages_rw(struct page *page, int numpages)
 {
@@ -471,8 +474,6 @@ int set_pages_rw(struct page *page, int numpages)
 	pgprot_val(rw) = _PAGE_RW;
 	return change_page_attr_set(addr, numpages, rw);
 }
-EXPORT_SYMBOL(set_pages_rw);
-
 
 void clflush_cache_range(void *addr, int size)
 {
@@ -503,6 +504,20 @@ void global_flush_tlb(void)
 EXPORT_SYMBOL(global_flush_tlb);
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
+
+static int __set_pages_p(struct page *page, int numpages)
+{
+	unsigned long addr = (unsigned long)page_address(page);
+	return change_page_attr_set(addr, numpages,
+				__pgprot(_PAGE_PRESENT | _PAGE_RW));
+}
+
+static int __set_pages_np(struct page *page, int numpages)
+{
+	unsigned long addr = (unsigned long)page_address(page);
+	return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
+}
+
 void kernel_map_pages(struct page *page, int numpages, int enable)
 {
 	if (PageHighMem(page))
@@ -522,7 +537,10 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 	 * The return value is ignored - the calls cannot fail,
 	 * large pages are disabled at boot time:
 	 */
-	change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
+	if (enable)
+		__set_pages_p(page, numpages);
+	else
+		__set_pages_np(page, numpages);
 
 	/*
 	 * We should perform an IPI and flush all tlbs,
diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h
index e79159bc0987..a95afaf1240d 100644
--- a/include/asm-x86/cacheflush.h
+++ b/include/asm-x86/cacheflush.h
@@ -42,6 +42,7 @@ int set_memory_x(unsigned long addr, int numpages);
 int set_memory_nx(unsigned long addr, int numpages);
 int set_memory_ro(unsigned long addr, int numpages);
 int set_memory_rw(unsigned long addr, int numpages);
+int set_memory_np(unsigned long addr, int numpages);
 
 void clflush_cache_range(void *addr, int size);
 

From d1028a154c65d7fadd1b2d0276c077014d401ec7 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 30 Jan 2008 13:34:07 +0100
Subject: [PATCH 842/890] x86: make various pageattr.c functions static

change_page_attr_add is only used in pageattr.c now, so we can
make this function static.
change_page_attr() isn't used anywere at all anymore; this function
is a really bad API anyway so just remove the bloat entirely.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/Makefile_32      |  1 -
 arch/x86/mm/Makefile_64      |  1 -
 arch/x86/mm/pageattr.c       | 51 ++++++++++--------------------------
 include/asm-x86/cacheflush.h |  1 -
 4 files changed, 14 insertions(+), 40 deletions(-)

diff --git a/arch/x86/mm/Makefile_32 b/arch/x86/mm/Makefile_32
index af0d39bea6c5..ffa6d46a1e73 100644
--- a/arch/x86/mm/Makefile_32
+++ b/arch/x86/mm/Makefile_32
@@ -4,7 +4,6 @@
 
 obj-y	:= init_32.o pgtable_32.o fault_32.o ioremap.o extable.o pageattr.o mmap.o
 
-obj-$(CONFIG_CPA_DEBUG) += pageattr-test.o
 obj-$(CONFIG_NUMA) += discontig_32.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_HIGHMEM) += highmem_32.o
diff --git a/arch/x86/mm/Makefile_64 b/arch/x86/mm/Makefile_64
index b564b5a760da..27a090c86e9b 100644
--- a/arch/x86/mm/Makefile_64
+++ b/arch/x86/mm/Makefile_64
@@ -7,4 +7,3 @@ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_NUMA) += numa_64.o
 obj-$(CONFIG_K8_NUMA) += k8topology_64.o
 obj-$(CONFIG_ACPI_NUMA) += srat_64.o
-obj-$(CONFIG_CPA_DEBUG) += pageattr-test.o
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e5910ac37e59..e4d2b6930e61 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -210,7 +210,8 @@ repeat:
  * Modules and drivers should use the set_memory_* APIs instead.
  */
 
-int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
+static int change_page_attr_addr(unsigned long address, int numpages,
+								pgprot_t prot)
 {
 	int err = 0, kernel_map = 0, i;
 
@@ -251,36 +252,6 @@ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
 	return err;
 }
 
-/**
- * change_page_attr - Change page table attributes in the linear mapping.
- * @page: First page to change
- * @numpages: Number of pages to change
- * @prot: New protection/caching type (PAGE_*)
- *
- * Returns 0 on success, otherwise a negated errno.
- *
- * This should be used when a page is mapped with a different caching policy
- * than write-back somewhere - some CPUs do not like it when mappings with
- * different caching policies exist. This changes the page attributes of the
- * in kernel linear mapping too.
- *
- * Caller must call global_flush_tlb() later to make the changes active.
- *
- * The caller needs to ensure that there are no conflicting mappings elsewhere
- * (e.g. in user space) * This function only deals with the kernel linear map.
- *
- * For MMIO areas without mem_map use change_page_attr_addr() instead.
- *
- * Modules and drivers should use the set_pages_* APIs instead.
- */
-int change_page_attr(struct page *page, int numpages, pgprot_t prot)
-{
-	unsigned long addr = (unsigned long)page_address(page);
-
-	return change_page_attr_addr(addr, numpages, prot);
-}
-EXPORT_UNUSED_SYMBOL(change_page_attr); /* to be removed in 2.6.27 */
-
 /**
  * change_page_attr_set - Change page table attributes in the linear mapping.
  * @addr: Virtual address in linear mapping.
@@ -294,15 +265,14 @@ EXPORT_UNUSED_SYMBOL(change_page_attr); /* to be removed in 2.6.27 */
  * different caching policies exist. This changes the page attributes of the
  * in kernel linear mapping too.
  *
- * Caller must call global_flush_tlb() later to make the changes active.
- *
  * The caller needs to ensure that there are no conflicting mappings elsewhere
  * (e.g. in user space) * This function only deals with the kernel linear map.
  *
  * This function is different from change_page_attr() in that only selected bits
  * are impacted, all other bits remain as is.
  */
-int change_page_attr_set(unsigned long addr, int numpages, pgprot_t prot)
+static int change_page_attr_set(unsigned long addr, int numpages,
+								pgprot_t prot)
 {
 	pgprot_t current_prot;
 	int level;
@@ -332,15 +302,14 @@ int change_page_attr_set(unsigned long addr, int numpages, pgprot_t prot)
  * different caching policies exist. This changes the page attributes of the
  * in kernel linear mapping too.
  *
- * Caller must call global_flush_tlb() later to make the changes active.
- *
  * The caller needs to ensure that there are no conflicting mappings elsewhere
  * (e.g. in user space) * This function only deals with the kernel linear map.
  *
  * This function is different from change_page_attr() in that only selected bits
  * are impacted, all other bits remain as is.
  */
-int change_page_attr_clear(unsigned long addr, int numpages, pgprot_t prot)
+static int change_page_attr_clear(unsigned long addr, int numpages,
+								pgprot_t prot)
 {
 	pgprot_t current_prot;
 	int level;
@@ -549,3 +518,11 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 	__flush_tlb_all();
 }
 #endif
+
+/*
+ * The testcases use internal knowledge of the implementation that shouldn't
+ * be exposed to the rest of the kernel. Include these directly here.
+ */
+#ifdef CONFIG_CPA_DEBUG
+#include "pageattr-test.c"
+#endif
diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h
index a95afaf1240d..d15ff359d3e3 100644
--- a/include/asm-x86/cacheflush.h
+++ b/include/asm-x86/cacheflush.h
@@ -27,7 +27,6 @@
 void global_flush_tlb(void);
 int __deprecated_for_modules change_page_attr(struct page *page, int numpages,
 								pgprot_t prot);
-int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot);
 
 int set_pages_uc(struct page *page, int numpages);
 int set_pages_wb(struct page *page, int numpages);

From d7c8f21a8cad0228c7c5ce2bb6dbd95d1ee49d13 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:07 +0100
Subject: [PATCH 843/890] x86: cpa: move flush to cpa

The set_memory_* and set_pages_* family of API's currently requires the
callers to do a global tlb flush after the function call; forgetting this is
a very nasty deathtrap. This patch moves the global tlb flush into
each of the callers

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/pci-gart_64.c       |   1 -
 arch/x86/mm/init_32.c               |  14 ---
 arch/x86/mm/init_64.c               |  10 --
 arch/x86/mm/ioremap.c               |   2 -
 arch/x86/mm/pageattr.c              | 137 ++++++++++++++--------------
 drivers/char/agp/ali-agp.c          |   2 -
 drivers/char/agp/i460-agp.c         |   2 -
 drivers/char/agp/intel-agp.c        |   5 -
 drivers/video/vermilion/vermilion.c |   6 --
 include/asm-x86/agp.h               |   6 +-
 include/asm-x86/cacheflush.h        |   1 -
 sound/pci/intel8x0.c                |   1 -
 12 files changed, 72 insertions(+), 115 deletions(-)

diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 8860c6eba8ab..4d5cc7181982 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -572,7 +572,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 		panic("Cannot allocate GATT table");
 	if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT))
 		panic("Could not set GART PTEs to uncacheable pages");
-	global_flush_tlb();
 
 	memset(gatt, 0, gatt_size);
 	agp_gatt_table = gatt;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index f7b941c3b2c3..0d3369b900e9 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -752,15 +752,11 @@ void mark_rodata_ro(void)
 		printk("Write protecting the kernel text: %luk\n", size >> 10);
 
 #ifdef CONFIG_CPA_DEBUG
-		global_flush_tlb();
-
 		printk("Testing CPA: Reverting %lx-%lx\n", start, start+size);
 		set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
-		global_flush_tlb();
 
 		printk("Testing CPA: write protecting again\n");
 		set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
-		global_flush_tlb();
 #endif
 	}
 #endif
@@ -770,22 +766,12 @@ void mark_rodata_ro(void)
 	printk("Write protecting the kernel read-only data: %luk\n",
 	       size >> 10);
 
-	/*
-	 * set_pages_*() requires a global_flush_tlb() call after it.
-	 * We do this after the printk so that if something went wrong in the
-	 * change, the printk gets out at least to give a better debug hint
-	 * of who is the culprit.
-	 */
-	global_flush_tlb();
-
 #ifdef CONFIG_CPA_DEBUG
 	printk("Testing CPA: undo %lx-%lx\n", start, start + size);
 	set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
-	global_flush_tlb();
 
 	printk("Testing CPA: write protecting again\n");
 	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
-	global_flush_tlb();
 #endif
 }
 #endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 4757be7b5e55..9b69fa54a831 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -610,22 +610,12 @@ void mark_rodata_ro(void)
 	printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
 	       (end - start) >> 10);
 
-	/*
-	 * set_memory_*() requires a global_flush_tlb() call after it.
-	 * We do this after the printk so that if something went wrong in the
-	 * change, the printk gets out at least to give a better debug hint
-	 * of who is the culprit.
-	 */
-	global_flush_tlb();
-
 #ifdef CONFIG_CPA_DEBUG
 	printk("Testing CPA: undo %lx-%lx\n", start, end);
 	set_memory_rw(start, (end-start) >> PAGE_SHIFT);
-	global_flush_tlb();
 
 	printk("Testing CPA: again\n");
 	set_memory_ro(start, (end-start) >> PAGE_SHIFT);
-	global_flush_tlb();
 #endif
 }
 #endif
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index b86f66fa5185..6a9a1418bc98 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -96,8 +96,6 @@ static int ioremap_change_attr(unsigned long paddr, unsigned long size,
 		err = set_memory_wb(vaddr, nrpages);
 		break;
 	}
-	if (!err)
-		global_flush_tlb();
 
 	return err;
 }
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e4d2b6930e61..a2d747c06147 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -22,6 +22,36 @@ within(unsigned long addr, unsigned long start, unsigned long end)
 	return addr >= start && addr < end;
 }
 
+/*
+ * Flushing functions
+ */
+void clflush_cache_range(void *addr, int size)
+{
+	int i;
+
+	for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
+		clflush(addr+i);
+}
+
+static void flush_kernel_map(void *arg)
+{
+	/*
+	 * Flush all to work around Errata in early athlons regarding
+	 * large page flushing.
+	 */
+	__flush_tlb_all();
+
+	if (boot_cpu_data.x86_model >= 4)
+		wbinvd();
+}
+
+static void global_flush_tlb(void)
+{
+	BUG_ON(irqs_disabled());
+
+	on_each_cpu(flush_kernel_map, NULL, 1, 1);
+}
+
 /*
  * Certain areas of memory on x86 require very specific protection flags,
  * for example the BIOS area or kernel text. Callers don't always get this
@@ -328,149 +358,124 @@ static int change_page_attr_clear(unsigned long addr, int numpages,
 
 int set_memory_uc(unsigned long addr, int numpages)
 {
-	pgprot_t uncached;
+	int err;
 
-	pgprot_val(uncached) = _PAGE_PCD | _PAGE_PWT;
-	return change_page_attr_set(addr, numpages, uncached);
+	err = change_page_attr_set(addr, numpages,
+				__pgprot(_PAGE_PCD | _PAGE_PWT));
+	global_flush_tlb();
+	return err;
 }
 EXPORT_SYMBOL(set_memory_uc);
 
 int set_memory_wb(unsigned long addr, int numpages)
 {
-	pgprot_t uncached;
+	int err;
 
-	pgprot_val(uncached) = _PAGE_PCD | _PAGE_PWT;
-	return change_page_attr_clear(addr, numpages, uncached);
+	err = change_page_attr_clear(addr, numpages,
+				__pgprot(_PAGE_PCD | _PAGE_PWT));
+	global_flush_tlb();
+	return err;
 }
 EXPORT_SYMBOL(set_memory_wb);
 
 int set_memory_x(unsigned long addr, int numpages)
 {
-	pgprot_t nx;
+	int err;
 
-	pgprot_val(nx) = _PAGE_NX;
-	return change_page_attr_clear(addr, numpages, nx);
+	err = change_page_attr_clear(addr, numpages,
+				__pgprot(_PAGE_NX));
+	global_flush_tlb();
+	return err;
 }
 EXPORT_SYMBOL(set_memory_x);
 
 int set_memory_nx(unsigned long addr, int numpages)
 {
-	pgprot_t nx;
+	int err;
 
-	pgprot_val(nx) = _PAGE_NX;
-	return change_page_attr_set(addr, numpages, nx);
+	err = change_page_attr_set(addr, numpages,
+				__pgprot(_PAGE_NX));
+	global_flush_tlb();
+	return err;
 }
 EXPORT_SYMBOL(set_memory_nx);
 
 int set_memory_ro(unsigned long addr, int numpages)
 {
-	pgprot_t rw;
+	int err;
 
-	pgprot_val(rw) = _PAGE_RW;
-	return change_page_attr_clear(addr, numpages, rw);
+	err = change_page_attr_clear(addr, numpages,
+				__pgprot(_PAGE_RW));
+	global_flush_tlb();
+	return err;
 }
 
 int set_memory_rw(unsigned long addr, int numpages)
 {
-	pgprot_t rw;
+	int err;
 
-	pgprot_val(rw) = _PAGE_RW;
-	return change_page_attr_set(addr, numpages, rw);
+	err = change_page_attr_set(addr, numpages,
+				__pgprot(_PAGE_RW));
+	global_flush_tlb();
+	return err;
 }
 
 int set_memory_np(unsigned long addr, int numpages)
 {
-	pgprot_t present;
+	int err;
 
-	pgprot_val(present) = _PAGE_PRESENT;
-	return change_page_attr_clear(addr, numpages, present);
+	err = change_page_attr_clear(addr, numpages,
+				__pgprot(_PAGE_PRESENT));
+	global_flush_tlb();
+	return err;
 }
 
 int set_pages_uc(struct page *page, int numpages)
 {
 	unsigned long addr = (unsigned long)page_address(page);
-	pgprot_t uncached;
 
-	pgprot_val(uncached) = _PAGE_PCD | _PAGE_PWT;
-	return change_page_attr_set(addr, numpages, uncached);
+	return set_memory_uc(addr, numpages);
 }
 EXPORT_SYMBOL(set_pages_uc);
 
 int set_pages_wb(struct page *page, int numpages)
 {
 	unsigned long addr = (unsigned long)page_address(page);
-	pgprot_t uncached;
 
-	pgprot_val(uncached) = _PAGE_PCD | _PAGE_PWT;
-	return change_page_attr_clear(addr, numpages, uncached);
+	return set_memory_wb(addr, numpages);
 }
 EXPORT_SYMBOL(set_pages_wb);
 
 int set_pages_x(struct page *page, int numpages)
 {
 	unsigned long addr = (unsigned long)page_address(page);
-	pgprot_t nx;
 
-	pgprot_val(nx) = _PAGE_NX;
-	return change_page_attr_clear(addr, numpages, nx);
+	return set_memory_x(addr, numpages);
 }
 EXPORT_SYMBOL(set_pages_x);
 
 int set_pages_nx(struct page *page, int numpages)
 {
 	unsigned long addr = (unsigned long)page_address(page);
-	pgprot_t nx;
 
-	pgprot_val(nx) = _PAGE_NX;
-	return change_page_attr_set(addr, numpages, nx);
+	return set_memory_nx(addr, numpages);
 }
 EXPORT_SYMBOL(set_pages_nx);
 
 int set_pages_ro(struct page *page, int numpages)
 {
 	unsigned long addr = (unsigned long)page_address(page);
-	pgprot_t rw;
 
-	pgprot_val(rw) = _PAGE_RW;
-	return change_page_attr_clear(addr, numpages, rw);
+	return set_memory_ro(addr, numpages);
 }
 
 int set_pages_rw(struct page *page, int numpages)
 {
 	unsigned long addr = (unsigned long)page_address(page);
-	pgprot_t rw;
 
-	pgprot_val(rw) = _PAGE_RW;
-	return change_page_attr_set(addr, numpages, rw);
+	return set_memory_rw(addr, numpages);
 }
 
-void clflush_cache_range(void *addr, int size)
-{
-	int i;
-
-	for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
-		clflush(addr+i);
-}
-
-static void flush_kernel_map(void *arg)
-{
-	/*
-	 * Flush all to work around Errata in early athlons regarding
-	 * large page flushing.
-	 */
-	__flush_tlb_all();
-
-	if (boot_cpu_data.x86_model >= 4)
-		wbinvd();
-}
-
-void global_flush_tlb(void)
-{
-	BUG_ON(irqs_disabled());
-
-	on_each_cpu(flush_kernel_map, NULL, 1, 1);
-}
-EXPORT_SYMBOL(global_flush_tlb);
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
 
diff --git a/drivers/char/agp/ali-agp.c b/drivers/char/agp/ali-agp.c
index aa5ddb716ffb..1ffb381130c3 100644
--- a/drivers/char/agp/ali-agp.c
+++ b/drivers/char/agp/ali-agp.c
@@ -145,7 +145,6 @@ static void *m1541_alloc_page(struct agp_bridge_data *bridge)
 	void *addr = agp_generic_alloc_page(agp_bridge);
 	u32 temp;
 
-	global_flush_tlb();
 	if (!addr)
 		return NULL;
 
@@ -162,7 +161,6 @@ static void ali_destroy_page(void * addr, int flags)
 		if (flags & AGP_PAGE_DESTROY_UNMAP) {
 			global_cache_flush();	/* is this really needed?  --hch */
 			agp_generic_destroy_page(addr, flags);
-			global_flush_tlb();
 		} else
 			agp_generic_destroy_page(addr, flags);
 	}
diff --git a/drivers/char/agp/i460-agp.c b/drivers/char/agp/i460-agp.c
index e72a83e2bad5..76f581c85a7d 100644
--- a/drivers/char/agp/i460-agp.c
+++ b/drivers/char/agp/i460-agp.c
@@ -527,7 +527,6 @@ static void *i460_alloc_page (struct agp_bridge_data *bridge)
 
 	if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) {
 		page = agp_generic_alloc_page(agp_bridge);
-		global_flush_tlb();
 	} else
 		/* Returning NULL would cause problems */
 		/* AK: really dubious code. */
@@ -539,7 +538,6 @@ static void i460_destroy_page (void *page, int flags)
 {
 	if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) {
 		agp_generic_destroy_page(page, flags);
-		global_flush_tlb();
 	}
 }
 
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index c03a7143928f..189efb6ef970 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -212,11 +212,9 @@ static void *i8xx_alloc_pages(void)
 
 	if (set_pages_uc(page, 4) < 0) {
 		set_pages_wb(page, 4);
-		global_flush_tlb();
 		__free_pages(page, 2);
 		return NULL;
 	}
-	global_flush_tlb();
 	get_page(page);
 	atomic_inc(&agp_bridge->current_memory_agp);
 	return page_address(page);
@@ -231,7 +229,6 @@ static void i8xx_destroy_pages(void *addr)
 
 	page = virt_to_page(addr);
 	set_pages_wb(page, 4);
-	global_flush_tlb();
 	put_page(page);
 	__free_pages(page, 2);
 	atomic_dec(&agp_bridge->current_memory_agp);
@@ -341,7 +338,6 @@ static struct agp_memory *alloc_agpphysmem_i8xx(size_t pg_count, int type)
 
 	switch (pg_count) {
 	case 1: addr = agp_bridge->driver->agp_alloc_page(agp_bridge);
-		global_flush_tlb();
 		break;
 	case 4:
 		/* kludge to get 4 physical pages for ARGB cursor */
@@ -404,7 +400,6 @@ static void intel_i810_free_by_type(struct agp_memory *curr)
 		else {
 			agp_bridge->driver->agp_destroy_page(gart_to_virt(curr->memory[0]),
 							     AGP_PAGE_DESTROY_UNMAP);
-			global_flush_tlb();
 			agp_bridge->driver->agp_destroy_page(gart_to_virt(curr->memory[0]),
 							     AGP_PAGE_DESTROY_FREE);
 		}
diff --git a/drivers/video/vermilion/vermilion.c b/drivers/video/vermilion/vermilion.c
index fb72778dee48..1c656667b937 100644
--- a/drivers/video/vermilion/vermilion.c
+++ b/drivers/video/vermilion/vermilion.c
@@ -124,13 +124,8 @@ static int vmlfb_alloc_vram_area(struct vram_area *va, unsigned max_order,
 	/*
 	 * Change caching policy of the linear kernel map to avoid
 	 * mapping type conflicts with user-space mappings.
-	 * The first global_flush_tlb() is really only there to do a global
-	 * wbinvd().
 	 */
-
-	global_flush_tlb();
 	set_pages_uc(virt_to_page(va->logical), va->size >> PAGE_SHIFT);
-	global_flush_tlb();
 
 	printk(KERN_DEBUG MODULE_NAME
 	       ": Allocated %ld bytes vram area at 0x%08lx\n",
@@ -156,7 +151,6 @@ static void vmlfb_free_vram_area(struct vram_area *va)
 
 		set_pages_wb(virt_to_page(va->logical),
 				 va->size >> PAGE_SHIFT);
-		global_flush_tlb();
 
 		/*
 		 * Decrease the usage count on the pages we've used
diff --git a/include/asm-x86/agp.h b/include/asm-x86/agp.h
index f6df72561832..0c309b9a5217 100644
--- a/include/asm-x86/agp.h
+++ b/include/asm-x86/agp.h
@@ -12,13 +12,9 @@
  * page. This avoids data corruption on some CPUs.
  */
 
-/*
- * Caller's responsibility to call global_flush_tlb() for performance
- * reasons
- */
 #define map_page_into_agp(page) set_pages_uc(page, 1)
 #define unmap_page_from_agp(page) set_pages_wb(page, 1)
-#define flush_agp_mappings() global_flush_tlb()
+#define flush_agp_mappings() do { } while (0)
 
 /*
  * Could use CLFLUSH here if the cpu supports it. But then it would
diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h
index d15ff359d3e3..157da0206ccc 100644
--- a/include/asm-x86/cacheflush.h
+++ b/include/asm-x86/cacheflush.h
@@ -24,7 +24,6 @@
 #define copy_from_user_page(vma, page, vaddr, dst, src, len) \
 	memcpy(dst, src, len)
 
-void global_flush_tlb(void);
 int __deprecated_for_modules change_page_attr(struct page *page, int numpages,
 								pgprot_t prot);
 
diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
index e5650905296e..4bb97646a67a 100644
--- a/sound/pci/intel8x0.c
+++ b/sound/pci/intel8x0.c
@@ -715,7 +715,6 @@ static void fill_nocache(void *buf, int size, int nocache)
 		set_pages_uc(virt_to_page(buf), size);
 	else
 		set_pages_wb(virt_to_page(buf), size);
-	global_flush_tlb();
 }
 #else
 #define fill_nocache(buf, size, nocache) do { ; } while (0)

From 5398f9854f60d670e8ef1ea08c0e0310f253eeb1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:07 +0100
Subject: [PATCH 844/890] x86: remove flush_agp_mappings()

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/char/agp/backend.c | 3 ---
 drivers/char/agp/generic.c | 3 ---
 include/asm-alpha/agp.h    | 1 -
 include/asm-ia64/agp.h     | 1 -
 include/asm-parisc/agp.h   | 1 -
 include/asm-powerpc/agp.h  | 1 -
 include/asm-sparc64/agp.h  | 1 -
 include/asm-x86/agp.h      | 1 -
 8 files changed, 12 deletions(-)

diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index 832ded20fe70..2720882e66fe 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c
@@ -147,7 +147,6 @@ static int agp_backend_initialize(struct agp_bridge_data *bridge)
 			printk(KERN_ERR PFX "unable to get memory for scratch page.\n");
 			return -ENOMEM;
 		}
-		flush_agp_mappings();
 
 		bridge->scratch_page_real = virt_to_gart(addr);
 		bridge->scratch_page =
@@ -191,7 +190,6 @@ err_out:
 	if (bridge->driver->needs_scratch_page) {
 		bridge->driver->agp_destroy_page(gart_to_virt(bridge->scratch_page_real),
 						 AGP_PAGE_DESTROY_UNMAP);
-		flush_agp_mappings();
 		bridge->driver->agp_destroy_page(gart_to_virt(bridge->scratch_page_real),
 						 AGP_PAGE_DESTROY_FREE);
 	}
@@ -219,7 +217,6 @@ static void agp_backend_cleanup(struct agp_bridge_data *bridge)
 	    bridge->driver->needs_scratch_page) {
 		bridge->driver->agp_destroy_page(gart_to_virt(bridge->scratch_page_real),
 						 AGP_PAGE_DESTROY_UNMAP);
-		flush_agp_mappings();
 		bridge->driver->agp_destroy_page(gart_to_virt(bridge->scratch_page_real),
 						 AGP_PAGE_DESTROY_FREE);
 	}
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index 64b2f6d7059d..1a4674ce0c71 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -197,7 +197,6 @@ void agp_free_memory(struct agp_memory *curr)
 		for (i = 0; i < curr->page_count; i++) {
 			curr->bridge->driver->agp_destroy_page(gart_to_virt(curr->memory[i]), AGP_PAGE_DESTROY_UNMAP);
 		}
-		flush_agp_mappings();
 		for (i = 0; i < curr->page_count; i++) {
 			curr->bridge->driver->agp_destroy_page(gart_to_virt(curr->memory[i]), AGP_PAGE_DESTROY_FREE);
 		}
@@ -267,8 +266,6 @@ struct agp_memory *agp_allocate_memory(struct agp_bridge_data *bridge,
 	}
 	new->bridge = bridge;
 
-	flush_agp_mappings();
-
 	return new;
 }
 EXPORT_SYMBOL(agp_allocate_memory);
diff --git a/include/asm-alpha/agp.h b/include/asm-alpha/agp.h
index ef855a3bc0f5..26c179135293 100644
--- a/include/asm-alpha/agp.h
+++ b/include/asm-alpha/agp.h
@@ -7,7 +7,6 @@
 
 #define map_page_into_agp(page) 
 #define unmap_page_from_agp(page) 
-#define flush_agp_mappings() 
 #define flush_agp_cache() mb()
 
 /* Convert a physical address to an address suitable for the GART. */
diff --git a/include/asm-ia64/agp.h b/include/asm-ia64/agp.h
index 4e517f0e6afa..c11fdd8ab4d7 100644
--- a/include/asm-ia64/agp.h
+++ b/include/asm-ia64/agp.h
@@ -15,7 +15,6 @@
  */
 #define map_page_into_agp(page)		/* nothing */
 #define unmap_page_from_agp(page)	/* nothing */
-#define flush_agp_mappings()		/* nothing */
 #define flush_agp_cache()		mb()
 
 /* Convert a physical address to an address suitable for the GART. */
diff --git a/include/asm-parisc/agp.h b/include/asm-parisc/agp.h
index 9f61d4eb6c01..9651660da639 100644
--- a/include/asm-parisc/agp.h
+++ b/include/asm-parisc/agp.h
@@ -9,7 +9,6 @@
 
 #define map_page_into_agp(page)		/* nothing */
 #define unmap_page_from_agp(page)	/* nothing */
-#define flush_agp_mappings()		/* nothing */
 #define flush_agp_cache()		mb()
 
 /* Convert a physical address to an address suitable for the GART. */
diff --git a/include/asm-powerpc/agp.h b/include/asm-powerpc/agp.h
index e5ccaca2f5a4..86455c4c31ee 100644
--- a/include/asm-powerpc/agp.h
+++ b/include/asm-powerpc/agp.h
@@ -6,7 +6,6 @@
 
 #define map_page_into_agp(page)
 #define unmap_page_from_agp(page)
-#define flush_agp_mappings()
 #define flush_agp_cache() mb()
 
 /* Convert a physical address to an address suitable for the GART. */
diff --git a/include/asm-sparc64/agp.h b/include/asm-sparc64/agp.h
index 58f8cb6ae767..e9fcf0e781ea 100644
--- a/include/asm-sparc64/agp.h
+++ b/include/asm-sparc64/agp.h
@@ -5,7 +5,6 @@
 
 #define map_page_into_agp(page) 
 #define unmap_page_from_agp(page) 
-#define flush_agp_mappings() 
 #define flush_agp_cache() mb()
 
 /* Convert a physical address to an address suitable for the GART. */
diff --git a/include/asm-x86/agp.h b/include/asm-x86/agp.h
index 0c309b9a5217..e4004a9f6a9a 100644
--- a/include/asm-x86/agp.h
+++ b/include/asm-x86/agp.h
@@ -14,7 +14,6 @@
 
 #define map_page_into_agp(page) set_pages_uc(page, 1)
 #define unmap_page_from_agp(page) set_pages_wb(page, 1)
-#define flush_agp_mappings() do { } while (0)
 
 /*
  * Could use CLFLUSH here if the cpu supports it. But then it would

From 488fd99588bf23da951b524a806e44feaa1aa366 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 30 Jan 2008 13:34:07 +0100
Subject: [PATCH 845/890] x86: fix pageattr-selftest

In Ingo's testing, he found a bug in the CPA selftest code. What would
happen is that the test would call change_page_attr_addr on a range of
memory, part of which was read only, part of which was writable. The
only thing the test wanted to change was the global bit...

What actually happened was that the selftest would take the permissions
of the first page, and then the change_page_attr_addr call would then
set the permissions of the entire range to this first page. In the
rodata section case, this resulted in pages after the .rodata becoming
read only... which made the kernel rather unhappy in many interesting
ways.

This is just another example of how dangerous the cpa API is (was); this
patch changes the test to use the incremental clear/set APIs
instead, and it changes the clear/set implementation to work on a 1 page
at a time basis.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr-test.c |  8 ++--
 arch/x86/mm/pageattr.c      | 94 +++++++++++++++++++++----------------
 2 files changed, 57 insertions(+), 45 deletions(-)

diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 6a41a0f0c149..fe73905d075e 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -162,8 +162,8 @@ static __init int exercise_pageattr(void)
 			continue;
 		}
 
-		err = change_page_attr_addr(addr[i], len[i],
-			    pte_pgprot(pte_clrhuge(pte_clrglobal(pte0))));
+		err = change_page_attr_clear(addr[i], len[i],
+						__pgprot(_PAGE_GLOBAL));
 		if (err < 0) {
 			printk(KERN_ERR "CPA %d failed %d\n", i, err);
 			failed++;
@@ -197,8 +197,8 @@ static __init int exercise_pageattr(void)
 			failed++;
 			continue;
 		}
-		err = change_page_attr_addr(addr[i], len[i],
-					  pte_pgprot(pte_mkglobal(*pte)));
+		err = change_page_attr_set(addr[i], len[i],
+							__pgprot(_PAGE_GLOBAL));
 		if (err < 0) {
 			printk(KERN_ERR "CPA reverting failed: %d\n", err);
 			failed++;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a2d747c06147..23f0aa3d01c1 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -228,7 +228,6 @@ repeat:
 /**
  * change_page_attr_addr - Change page table attributes in linear mapping
  * @address: Virtual address in linear mapping.
- * @numpages: Number of pages to change
  * @prot:    New page table attribute (PAGE_*)
  *
  * Change page attributes of a page in the direct mapping. This is a variant
@@ -240,10 +239,10 @@ repeat:
  * Modules and drivers should use the set_memory_* APIs instead.
  */
 
-static int change_page_attr_addr(unsigned long address, int numpages,
-								pgprot_t prot)
+static int change_page_attr_addr(unsigned long address, pgprot_t prot)
 {
-	int err = 0, kernel_map = 0, i;
+	int err = 0, kernel_map = 0;
+	unsigned long pfn = __pa(address) >> PAGE_SHIFT;
 
 #ifdef CONFIG_X86_64
 	if (address >= __START_KERNEL_map &&
@@ -254,31 +253,28 @@ static int change_page_attr_addr(unsigned long address, int numpages,
 	}
 #endif
 
-	for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
-		unsigned long pfn = __pa(address) >> PAGE_SHIFT;
-
-		if (!kernel_map || pte_present(pfn_pte(0, prot))) {
-			err = __change_page_attr(address, pfn, prot);
-			if (err)
-				break;
-		}
-#ifdef CONFIG_X86_64
-		/*
-		 * Handle kernel mapping too which aliases part of
-		 * lowmem:
-		 */
-		if (__pa(address) < KERNEL_TEXT_SIZE) {
-			unsigned long addr2;
-			pgprot_t prot2;
-
-			addr2 = __START_KERNEL_map + __pa(address);
-			/* Make sure the kernel mappings stay executable */
-			prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
-			err = __change_page_attr(addr2, pfn, prot2);
-		}
-#endif
+	if (!kernel_map || pte_present(pfn_pte(0, prot))) {
+		err = __change_page_attr(address, pfn, prot);
+		if (err)
+			return err;
 	}
 
+#ifdef CONFIG_X86_64
+	/*
+	 * Handle kernel mapping too which aliases part of
+	 * lowmem:
+	 */
+	if (__pa(address) < KERNEL_TEXT_SIZE) {
+		unsigned long addr2;
+		pgprot_t prot2;
+
+		addr2 = __START_KERNEL_map + __pa(address);
+		/* Make sure the kernel mappings stay executable */
+		prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
+		err = __change_page_attr(addr2, pfn, prot2);
+	}
+#endif
+
 	return err;
 }
 
@@ -307,16 +303,24 @@ static int change_page_attr_set(unsigned long addr, int numpages,
 	pgprot_t current_prot;
 	int level;
 	pte_t *pte;
+	int i, ret;
 
-	pte = lookup_address(addr, &level);
-	if (pte)
-		current_prot = pte_pgprot(*pte);
-	else
-		pgprot_val(current_prot) = 0;
+	for (i = 0; i < numpages ; i++) {
 
-	pgprot_val(prot) = pgprot_val(current_prot) | pgprot_val(prot);
+		pte = lookup_address(addr, &level);
+		if (pte)
+			current_prot = pte_pgprot(*pte);
+		else
+			pgprot_val(current_prot) = 0;
 
-	return change_page_attr_addr(addr, numpages, prot);
+		pgprot_val(prot) = pgprot_val(current_prot) | pgprot_val(prot);
+
+		ret = change_page_attr_addr(addr, prot);
+		if (ret)
+			return ret;
+		addr += PAGE_SIZE;
+	}
+	return 0;
 }
 
 /**
@@ -344,16 +348,24 @@ static int change_page_attr_clear(unsigned long addr, int numpages,
 	pgprot_t current_prot;
 	int level;
 	pte_t *pte;
+	int i, ret;
 
-	pte = lookup_address(addr, &level);
-	if (pte)
-		current_prot = pte_pgprot(*pte);
-	else
-		pgprot_val(current_prot) = 0;
+	for (i = 0; i < numpages; i++) {
+		pte = lookup_address(addr, &level);
+		if (pte)
+			current_prot = pte_pgprot(*pte);
+		else
+			pgprot_val(current_prot) = 0;
 
-	pgprot_val(prot) = pgprot_val(current_prot) & ~pgprot_val(prot);
+		pgprot_val(prot) =
+				pgprot_val(current_prot) & ~pgprot_val(prot);
 
-	return change_page_attr_addr(addr, numpages, prot);
+		ret = change_page_attr_addr(addr, prot);
+		if (ret)
+			return ret;
+		addr += PAGE_SIZE;
+	}
+	return 0;
 }
 
 int set_memory_uc(unsigned long addr, int numpages)

From 3c1df68b848b39270752ff8d4b956cc4a4dce0f6 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 30 Jan 2008 13:34:07 +0100
Subject: [PATCH 846/890] x86: make sure initmem is writable

When we free initmem, various rodata and CPA checks may have left
memory read only.. this patch ensures that the memory is writable
before we free it.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_32.c | 7 +++++++
 arch/x86/mm/init_64.c | 1 +
 2 files changed, 8 insertions(+)

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 0d3369b900e9..4d1156545194 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -780,6 +780,13 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
 {
 	unsigned long addr;
 
+	/*
+	 * We just marked the kernel text read only above, now that
+	 * we are going to free part of that, we need to make that
+	 * writeable first.
+	 */
+	set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
+
 	for (addr = begin; addr < end; addr += PAGE_SIZE) {
 		ClearPageReserved(virt_to_page(addr));
 		init_page_count(virt_to_page(addr));
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9b69fa54a831..f97ace7a55e5 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -570,6 +570,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
 	 * 2Mb kernel mapping just for this debug feature.
 	 */
 	if (begin >= __START_KERNEL_map) {
+		set_memory_rw(begin, (end - begin)/PAGE_SIZE);
 		set_memory_np(begin, (end - begin)/PAGE_SIZE);
 		set_memory_nx(begin, (end - begin)/PAGE_SIZE);
 	}

From a72a08a4b61cb8868606a69b744848552e92062b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:07 +0100
Subject: [PATCH 847/890] x86: cpa: fix split thinko

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 23f0aa3d01c1..a2ee317548f2 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -216,8 +216,12 @@ repeat:
 	prot = static_protections(prot, address);
 
 	if (level == PG_LEVEL_4K) {
+		WARN_ON_ONCE(pgprot_val(prot) & _PAGE_PSE);
 		set_pte_atomic(kpte, pfn_pte(pfn, canon_pgprot(prot)));
 	} else {
+		/* Clear the PSE bit for the 4k level pages ! */
+		pgprot_val(prot) = pgprot_val(prot) & ~_PAGE_PSE;
+
 		err = split_large_page(kpte, address);
 		if (!err)
 			goto repeat;

From 4692a1450b4d1000a942022b088c8791749dd65e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:07 +0100
Subject: [PATCH 848/890] x86: cpa: fix loop

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a2ee317548f2..5cfc0d4ade56 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -304,7 +304,7 @@ static int change_page_attr_addr(unsigned long address, pgprot_t prot)
 static int change_page_attr_set(unsigned long addr, int numpages,
 								pgprot_t prot)
 {
-	pgprot_t current_prot;
+	pgprot_t current_prot, new_prot;
 	int level;
 	pte_t *pte;
 	int i, ret;
@@ -317,9 +317,10 @@ static int change_page_attr_set(unsigned long addr, int numpages,
 		else
 			pgprot_val(current_prot) = 0;
 
-		pgprot_val(prot) = pgprot_val(current_prot) | pgprot_val(prot);
+		pgprot_val(new_prot) =
+			pgprot_val(current_prot) | pgprot_val(prot);
 
-		ret = change_page_attr_addr(addr, prot);
+		ret = change_page_attr_addr(addr, new_prot);
 		if (ret)
 			return ret;
 		addr += PAGE_SIZE;
@@ -349,7 +350,7 @@ static int change_page_attr_set(unsigned long addr, int numpages,
 static int change_page_attr_clear(unsigned long addr, int numpages,
 								pgprot_t prot)
 {
-	pgprot_t current_prot;
+	pgprot_t current_prot, new_prot;
 	int level;
 	pte_t *pte;
 	int i, ret;
@@ -361,10 +362,10 @@ static int change_page_attr_clear(unsigned long addr, int numpages,
 		else
 			pgprot_val(current_prot) = 0;
 
-		pgprot_val(prot) =
+		pgprot_val(new_prot) =
 				pgprot_val(current_prot) & ~pgprot_val(prot);
 
-		ret = change_page_attr_addr(addr, prot);
+		ret = change_page_attr_addr(addr, new_prot);
 		if (ret)
 			return ret;
 		addr += PAGE_SIZE;

From 6eade8ff461fdf13ec6780602eb5618e0bdaa972 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:08 +0100
Subject: [PATCH 849/890] x86: cpa: clean up change_page_attr_set/clear()

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 5cfc0d4ade56..145f5edf488a 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -312,10 +312,10 @@ static int change_page_attr_set(unsigned long addr, int numpages,
 	for (i = 0; i < numpages ; i++) {
 
 		pte = lookup_address(addr, &level);
-		if (pte)
-			current_prot = pte_pgprot(*pte);
-		else
-			pgprot_val(current_prot) = 0;
+		if (!pte)
+			return -EINVAL;
+
+		current_prot = pte_pgprot(*pte);
 
 		pgprot_val(new_prot) =
 			pgprot_val(current_prot) | pgprot_val(prot);
@@ -356,11 +356,12 @@ static int change_page_attr_clear(unsigned long addr, int numpages,
 	int i, ret;
 
 	for (i = 0; i < numpages; i++) {
+
 		pte = lookup_address(addr, &level);
-		if (pte)
-			current_prot = pte_pgprot(*pte);
-		else
-			pgprot_val(current_prot) = 0;
+		if (!pte)
+			return -EINVAL;
+
+		current_prot = pte_pgprot(*pte);
 
 		pgprot_val(new_prot) =
 				pgprot_val(current_prot) & ~pgprot_val(prot);

From adafdf6a4e45f2d1051e10aebe13025e89dbdf6d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:08 +0100
Subject: [PATCH 850/890] x86: ioremap KERN_INFO

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/ioremap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 6a9a1418bc98..ac9ab20d8092 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -293,7 +293,7 @@ void __init early_ioremap_init(void)
 	unsigned long *pgd;
 
 	if (early_ioremap_debug)
-		printk(KERN_DEBUG "early_ioremap_init()\n");
+		printk(KERN_INFO "early_ioremap_init()\n");
 
 	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
 	*pgd = __pa(bm_pte) | _PAGE_TABLE;
@@ -322,7 +322,7 @@ void __init early_ioremap_clear(void)
 	unsigned long *pgd;
 
 	if (early_ioremap_debug)
-		printk(KERN_DEBUG "early_ioremap_clear()\n");
+		printk(KERN_INFO "early_ioremap_clear()\n");
 
 	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
 	*pgd = 0;
@@ -408,7 +408,7 @@ void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
 
 	nesting = early_ioremap_nested;
 	if (early_ioremap_debug) {
-		printk(KERN_DEBUG "early_ioremap(%08lx, %08lx) [%d] => ",
+		printk(KERN_INFO "early_ioremap(%08lx, %08lx) [%d] => ",
 		       phys_addr, size, nesting);
 		dump_stack();
 	}
@@ -470,7 +470,7 @@ void __init early_iounmap(void *addr, unsigned long size)
 	WARN_ON(nesting < 0);
 
 	if (early_ioremap_debug) {
-		printk(KERN_DEBUG "early_iounmap(%p, %08lx) [%d]\n", addr,
+		printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr,
 		       size, nesting);
 		dump_stack();
 	}

From edeed30589f5defe63ce6aaae56f2b7c855e4520 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Wed, 30 Jan 2008 13:34:08 +0100
Subject: [PATCH 851/890] x86: add testcases for RODATA and NX
 protections/attributes

Latest update; I now have 4 NX tests, but 2 fail so they're #if 0'd.
I also cleaned up the NX test code quite a bit, and got rid of the ugly
exception table sorting stuff.

From: Arjan van de Ven <arjan@linux.intel.com>

This patch adds testcases for the CONFIG_DEBUG_RODATA configuration option
as well as the NX CPU feature/mappings. Both testcases can move to tests/
once that patch gets merged into mainline.
(I'm half considering moving the rodata test into mm/init.c but I'll
wait with that until init.c is unified)

As part of this I had to fix a not-quite-right alignment in the vmlinux.lds.h
for the RODATA sections, which lead to 1 page less being marked read only.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/test_nx.c         | 176 ++++++++++++++++++++++++++++++
 arch/x86/kernel/test_rodata.c     |  86 +++++++++++++++
 arch/x86/mm/init_32.c             |   3 +
 arch/x86/mm/init_64.c             |   3 +
 include/asm-generic/vmlinux.lds.h |   1 +
 include/asm-x86/cacheflush.h      |   7 ++
 6 files changed, 276 insertions(+)
 create mode 100644 arch/x86/kernel/test_nx.c
 create mode 100644 arch/x86/kernel/test_rodata.c

diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c
new file mode 100644
index 000000000000..6d7ef11e7975
--- /dev/null
+++ b/arch/x86/kernel/test_nx.c
@@ -0,0 +1,176 @@
+/*
+ * test_nx.c: functional test for NX functionality
+ *
+ * (C) Copyright 2008 Intel Corporation
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/module.h>
+#include <linux/sort.h>
+#include <asm/uaccess.h>
+
+extern int rodata_test_data;
+
+/*
+ * This file checks 4 things:
+ * 1) Check if the stack is not executable
+ * 2) Check if kmalloc memory is not executable
+ * 3) Check if the .rodata section is not executable
+ * 4) Check if the .data section of a module is not executable
+ *
+ * To do this, the test code tries to execute memory in stack/kmalloc/etc,
+ * and then checks if the expected trap happens.
+ *
+ * Sadly, this implies having a dynamic exception handling table entry.
+ * ... which can be done (and will make Rusty cry)... but it can only
+ * be done in a stand-alone module with only 1 entry total.
+ * (otherwise we'd have to sort and that's just too messy)
+ */
+
+
+
+/*
+ * We want to set up an exception handling point on our stack,
+ * which means a variable value. This function is rather dirty
+ * and walks the exception table of the module, looking for a magic
+ * marker and replaces it with a specific function.
+ */
+static void fudze_exception_table(void *marker, void *new)
+{
+	struct module *mod = THIS_MODULE;
+	struct exception_table_entry *extable;
+
+	/*
+	 * Note: This module has only 1 exception table entry,
+	 * so searching and sorting is not needed. If that changes,
+	 * this would be the place to search and re-sort the exception
+	 * table.
+	 */
+	if (mod->num_exentries > 1) {
+		printk(KERN_ERR "test_nx: too many exception table entries!\n");
+		printk(KERN_ERR "test_nx: test results are not reliable.\n");
+		return;
+	}
+	extable = (struct exception_table_entry *)mod->extable;
+	extable[0].insn = (unsigned long)new;
+}
+
+
+/*
+ * exception tables get their symbols translated so we need
+ * to use a fake function to put in there, which we can then
+ * replace at runtime.
+ */
+void foo_label(void);
+
+/*
+ * returns 0 for not-executable, negative for executable
+ *
+ * Note: we cannot allow this function to be inlined, because
+ * that would give us more than 1 exception table entry.
+ * This in turn would break the assumptions above.
+ */
+static noinline int test_address(void *address)
+{
+	unsigned long result;
+
+	/* Set up an exception table entry for our address */
+	fudze_exception_table(&foo_label, address);
+	result = 1;
+	asm volatile(
+		"foo_label:\n"
+		"0:	call *%[fake_code]\n"
+		"1:\n"
+		".section .fixup,\"ax\"\n"
+		"2:	mov %[zero], %[rslt]\n"
+		"	ret\n"
+		".previous\n"
+		".section __ex_table,\"a\"\n"
+		"       .align 8\n"
+		"	.quad 0b\n"
+		"	.quad 2b\n"
+		".previous\n"
+		: [rslt] "=r" (result)
+		: [fake_code] "r" (address), [zero] "r" (0UL), "0" (result)
+	);
+	/* change the exception table back for the next round */
+	fudze_exception_table(address, &foo_label);
+
+	if (result)
+		return -ENODEV;
+	return 0;
+}
+
+static unsigned char test_data = 0xC3; /* 0xC3 is the opcode for "ret" */
+
+static int test_NX(void)
+{
+	int ret = 0;
+	/* 0xC3 is the opcode for "ret" */
+	char stackcode[] = {0xC3, 0x90, 0 };
+	char *heap;
+
+	test_data = 0xC3;
+
+	printk(KERN_INFO "Testing NX protection\n");
+
+	/* Test 1: check if the stack is not executable */
+	if (test_address(&stackcode)) {
+		printk(KERN_ERR "test_nx: stack was executable\n");
+		ret = -ENODEV;
+	}
+
+
+	/* Test 2: Check if the heap is executable */
+	heap = kmalloc(64, GFP_KERNEL);
+	if (!heap)
+		return -ENOMEM;
+	heap[0] = 0xC3; /* opcode for "ret" */
+
+	if (test_address(heap)) {
+		printk(KERN_ERR "test_nx: heap was executable\n");
+		ret = -ENODEV;
+	}
+	kfree(heap);
+
+	/*
+	 * The following 2 tests currently fail, this needs to get fixed
+	 * Until then, don't run them to avoid too many people getting scared
+	 * by the error message
+	 */
+#if 0
+
+#ifdef CONFIG_DEBUG_RODATA
+	/* Test 3: Check if the .rodata section is executable */
+	if (rodata_test_data != 0xC3) {
+		printk(KERN_ERR "test_nx: .rodata marker has invalid value\n");
+		ret = -ENODEV;
+	} else if (test_address(&rodata_test_data)) {
+		printk(KERN_ERR "test_nx: .rodata section is executable\n");
+		ret = -ENODEV;
+	}
+#endif
+
+	/* Test 4: Check if the .data section of a module is executable */
+	if (test_address(&test_data)) {
+		printk(KERN_ERR "test_nx: .data section is executable\n");
+		ret = -ENODEV;
+	}
+
+#endif
+	return 0;
+}
+
+static void test_exit(void)
+{
+}
+
+module_init(test_NX);
+module_exit(test_exit);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Testcase for the NX infrastructure");
+MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c
new file mode 100644
index 000000000000..4c163772000e
--- /dev/null
+++ b/arch/x86/kernel/test_rodata.c
@@ -0,0 +1,86 @@
+/*
+ * test_rodata.c: functional test for mark_rodata_ro function
+ *
+ * (C) Copyright 2008 Intel Corporation
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/module.h>
+#include <asm/sections.h>
+extern int rodata_test_data;
+
+int rodata_test(void)
+{
+	unsigned long result;
+	unsigned long start, end;
+
+	/* test 1: read the value */
+	/* If this test fails, some previous testrun has clobbered the state */
+	if (!rodata_test_data) {
+		printk(KERN_ERR "rodata_test: test 1 fails (start data)\n");
+		return -ENODEV;
+	}
+
+	/* test 2: write to the variable; this should fault */
+	/*
+	 * If this test fails, we managed to overwrite the data
+	 *
+	 * This is written in assembly to be able to catch the
+	 * exception that is supposed to happen in the correct
+	 * case
+	 */
+
+	result = 1;
+	asm volatile(
+		"0:	mov %[zero],(%[rodata_test])\n"
+		"	mov %[zero], %[rslt]\n"
+		"1:\n"
+		".section .fixup,\"ax\"\n"
+		"2:	jmp 1b\n"
+		".previous\n"
+		".section __ex_table,\"a\"\n"
+		"       .align 16\n"
+#ifdef CONFIG_X86_32
+		"	.long 0b,2b\n"
+#else
+		"	.quad 0b,2b\n"
+#endif
+		".previous"
+		: [rslt] "=r" (result)
+		: [rodata_test] "r" (&rodata_test_data), [zero] "r" (0UL)
+	);
+
+
+	if (!result) {
+		printk(KERN_ERR "rodata_test: test data was not read only\n");
+		return -ENODEV;
+	}
+
+	/* test 3: check the value hasn't changed */
+	/* If this test fails, we managed to overwrite the data */
+	if (!rodata_test_data) {
+		printk(KERN_ERR "rodata_test: Test 3 failes (end data)\n");
+		return -ENODEV;
+	}
+	/* test 4: check if the rodata section is 4Kb aligned */
+	start = (unsigned long)__start_rodata;
+	end = (unsigned long)__end_rodata;
+	if (start & (PAGE_SIZE - 1)) {
+		printk(KERN_ERR "rodata_test: .rodata is not 4k aligned\n");
+		return -ENODEV;
+	}
+	if (end & (PAGE_SIZE - 1)) {
+		printk(KERN_ERR "rodata_test: .rodata end is not 4k aligned\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Testcase for the DEBUG_RODATA infrastructure");
+MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 4d1156545194..a72737c05747 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -736,6 +736,8 @@ static int noinline do_test_wp_bit(void)
 }
 
 #ifdef CONFIG_DEBUG_RODATA
+const int rodata_test_data = 0xC3;
+EXPORT_SYMBOL_GPL(rodata_test_data);
 
 void mark_rodata_ro(void)
 {
@@ -765,6 +767,7 @@ void mark_rodata_ro(void)
 	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 	printk("Write protecting the kernel read-only data: %luk\n",
 	       size >> 10);
+	rodata_test();
 
 #ifdef CONFIG_CPA_DEBUG
 	printk("Testing CPA: undo %lx-%lx\n", start, start + size);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index f97ace7a55e5..50d29f5da02b 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -573,6 +573,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
 		set_memory_rw(begin, (end - begin)/PAGE_SIZE);
 		set_memory_np(begin, (end - begin)/PAGE_SIZE);
 		set_memory_nx(begin, (end - begin)/PAGE_SIZE);
+		rodata_test();
 	}
 #endif
 }
@@ -585,6 +586,8 @@ void free_initmem(void)
 }
 
 #ifdef CONFIG_DEBUG_RODATA
+const int rodata_test_data = 0xC3;
+EXPORT_SYMBOL_GPL(rodata_test_data);
 
 void mark_rodata_ro(void)
 {
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 76df771be585..f784d2f34149 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -184,6 +184,7 @@
 		VMLINUX_SYMBOL(__start___param) = .;			\
 		*(__param)						\
 		VMLINUX_SYMBOL(__stop___param) = .;			\
+		. = ALIGN((align));					\
 		VMLINUX_SYMBOL(__end_rodata) = .;			\
 	}								\
 	. = ALIGN((align));
diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h
index 157da0206ccc..3e74aff90809 100644
--- a/include/asm-x86/cacheflush.h
+++ b/include/asm-x86/cacheflush.h
@@ -47,5 +47,12 @@ void clflush_cache_range(void *addr, int size);
 #ifdef CONFIG_DEBUG_RODATA
 void mark_rodata_ro(void);
 #endif
+#ifdef CONFIG_DEBUG_RODATA_TEST
+void rodata_test(void);
+#else
+static inline void rodata_test(void)
+{
+}
+#endif
 
 #endif

From 72932c7ad2cc309b0ba6149aa31faa26aaee54d5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:08 +0100
Subject: [PATCH 852/890] x86: cpa move the flush into set and clear functions

To avoid the modification of the flush code for the clflush
implementation, move the flush into the set and clear functions and
provide helper functions for the debugging code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pageattr-test.c |  8 ++--
 arch/x86/mm/pageattr.c      | 89 +++++++++++++++++--------------------
 2 files changed, 44 insertions(+), 53 deletions(-)

diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index fe73905d075e..4e8b8c6baccd 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -162,8 +162,8 @@ static __init int exercise_pageattr(void)
 			continue;
 		}
 
-		err = change_page_attr_clear(addr[i], len[i],
-						__pgprot(_PAGE_GLOBAL));
+		err = __change_page_attr_clear(addr[i], len[i],
+					       __pgprot(_PAGE_GLOBAL));
 		if (err < 0) {
 			printk(KERN_ERR "CPA %d failed %d\n", i, err);
 			failed++;
@@ -197,8 +197,8 @@ static __init int exercise_pageattr(void)
 			failed++;
 			continue;
 		}
-		err = change_page_attr_set(addr[i], len[i],
-							__pgprot(_PAGE_GLOBAL));
+		err = __change_page_attr_set(addr[i], len[i],
+					     __pgprot(_PAGE_GLOBAL));
 		if (err < 0) {
 			printk(KERN_ERR "CPA reverting failed: %d\n", err);
 			failed++;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 145f5edf488a..55f5b5cdb12e 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -301,8 +301,8 @@ static int change_page_attr_addr(unsigned long address, pgprot_t prot)
  * This function is different from change_page_attr() in that only selected bits
  * are impacted, all other bits remain as is.
  */
-static int change_page_attr_set(unsigned long addr, int numpages,
-								pgprot_t prot)
+static int __change_page_attr_set(unsigned long addr, int numpages,
+				  pgprot_t prot)
 {
 	pgprot_t current_prot, new_prot;
 	int level;
@@ -325,9 +325,19 @@ static int change_page_attr_set(unsigned long addr, int numpages,
 			return ret;
 		addr += PAGE_SIZE;
 	}
+
 	return 0;
 }
 
+static int change_page_attr_set(unsigned long addr, int numpages, pgprot_t prot)
+{
+	int ret = __change_page_attr_set(addr, numpages, prot);
+
+	global_flush_tlb();
+	return ret;
+
+}
+
 /**
  * change_page_attr_clear - Change page table attributes in the linear mapping.
  * @addr: Virtual address in linear mapping.
@@ -347,8 +357,8 @@ static int change_page_attr_set(unsigned long addr, int numpages,
  * This function is different from change_page_attr() in that only selected bits
  * are impacted, all other bits remain as is.
  */
-static int change_page_attr_clear(unsigned long addr, int numpages,
-								pgprot_t prot)
+static int __change_page_attr_clear(unsigned long addr, int numpages,
+				    pgprot_t prot)
 {
 	pgprot_t current_prot, new_prot;
 	int level;
@@ -371,81 +381,59 @@ static int change_page_attr_clear(unsigned long addr, int numpages,
 			return ret;
 		addr += PAGE_SIZE;
 	}
+
 	return 0;
 }
 
+static int change_page_attr_clear(unsigned long addr, int numpages,
+				  pgprot_t prot)
+{
+	int ret = __change_page_attr_clear(addr, numpages, prot);
+
+	global_flush_tlb();
+	return ret;
+
+}
+
 int set_memory_uc(unsigned long addr, int numpages)
 {
-	int err;
-
-	err = change_page_attr_set(addr, numpages,
-				__pgprot(_PAGE_PCD | _PAGE_PWT));
-	global_flush_tlb();
-	return err;
+	return change_page_attr_set(addr, numpages,
+				    __pgprot(_PAGE_PCD | _PAGE_PWT));
 }
 EXPORT_SYMBOL(set_memory_uc);
 
 int set_memory_wb(unsigned long addr, int numpages)
 {
-	int err;
-
-	err = change_page_attr_clear(addr, numpages,
-				__pgprot(_PAGE_PCD | _PAGE_PWT));
-	global_flush_tlb();
-	return err;
+	return change_page_attr_clear(addr, numpages,
+				      __pgprot(_PAGE_PCD | _PAGE_PWT));
 }
 EXPORT_SYMBOL(set_memory_wb);
 
 int set_memory_x(unsigned long addr, int numpages)
 {
-	int err;
-
-	err = change_page_attr_clear(addr, numpages,
-				__pgprot(_PAGE_NX));
-	global_flush_tlb();
-	return err;
+	return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX));
 }
 EXPORT_SYMBOL(set_memory_x);
 
 int set_memory_nx(unsigned long addr, int numpages)
 {
-	int err;
-
-	err = change_page_attr_set(addr, numpages,
-				__pgprot(_PAGE_NX));
-	global_flush_tlb();
-	return err;
+	return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX));
 }
 EXPORT_SYMBOL(set_memory_nx);
 
 int set_memory_ro(unsigned long addr, int numpages)
 {
-	int err;
-
-	err = change_page_attr_clear(addr, numpages,
-				__pgprot(_PAGE_RW));
-	global_flush_tlb();
-	return err;
+	return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW));
 }
 
 int set_memory_rw(unsigned long addr, int numpages)
 {
-	int err;
-
-	err = change_page_attr_set(addr, numpages,
-				__pgprot(_PAGE_RW));
-	global_flush_tlb();
-	return err;
+	return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW));
 }
 
 int set_memory_np(unsigned long addr, int numpages)
 {
-	int err;
-
-	err = change_page_attr_clear(addr, numpages,
-				__pgprot(_PAGE_PRESENT));
-	global_flush_tlb();
-	return err;
+	return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
 }
 
 int set_pages_uc(struct page *page, int numpages)
@@ -500,14 +488,17 @@ int set_pages_rw(struct page *page, int numpages)
 static int __set_pages_p(struct page *page, int numpages)
 {
 	unsigned long addr = (unsigned long)page_address(page);
-	return change_page_attr_set(addr, numpages,
-				__pgprot(_PAGE_PRESENT | _PAGE_RW));
+
+	return __change_page_attr_set(addr, numpages,
+				      __pgprot(_PAGE_PRESENT | _PAGE_RW));
 }
 
 static int __set_pages_np(struct page *page, int numpages)
 {
 	unsigned long addr = (unsigned long)page_address(page);
-	return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
+
+	return __change_page_attr_clear(addr, numpages,
+					__pgprot(_PAGE_PRESENT));
 }
 
 void kernel_map_pages(struct page *page, int numpages, int enable)

From ff31452b6ea5032f26f16140d45dc6596260cd9c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:08 +0100
Subject: [PATCH 853/890] x86: cpa create set_and_clr function

Create a set_and_clr function to avoid the duplicate loops. Allows
also to do combined operations for optimization.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pageattr.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 55f5b5cdb12e..c54832b75069 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -282,6 +282,45 @@ static int change_page_attr_addr(unsigned long address, pgprot_t prot)
 	return err;
 }
 
+static int __change_page_attr_set_clr(unsigned long addr, int numpages,
+				      pgprot_t mask_set, pgprot_t mask_clr)
+{
+	pgprot_t new_prot;
+	int level;
+	pte_t *pte;
+	int i, ret;
+
+	for (i = 0; i < numpages ; i++) {
+
+		pte = lookup_address(addr, &level);
+		if (!pte)
+			return -EINVAL;
+
+		new_prot = pte_pgprot(*pte);
+
+		pgprot_val(new_prot) &= ~pgprot_val(mask_clr);
+		pgprot_val(new_prot) |= pgprot_val(mask_set);
+
+		ret = change_page_attr_addr(addr, new_prot);
+		if (ret)
+			return ret;
+		addr += PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+static int change_page_attr_set_clr(unsigned long addr, int numpages,
+				    pgprot_t mask_set, pgprot_t mask_clr)
+{
+	int ret = __change_page_attr_set_clr(addr, numpages, mask_set,
+					     mask_clr);
+
+	global_flush_tlb();
+
+	return ret;
+}
+
 /**
  * change_page_attr_set - Change page table attributes in the linear mapping.
  * @addr: Virtual address in linear mapping.

From 56744546b3e5379177a70e7306c6283f727e4732 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:08 +0100
Subject: [PATCH 854/890] x86: cpa use the new set_clr function

Convert cpa_set and cpa_clear to call the new set_clr function.
Seperate out the debug helpers.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pageattr.c | 126 +++++++----------------------------------
 1 file changed, 20 insertions(+), 106 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index c54832b75069..7823adab96e4 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -321,116 +321,16 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
 	return ret;
 }
 
-/**
- * change_page_attr_set - Change page table attributes in the linear mapping.
- * @addr: Virtual address in linear mapping.
- * @numpages: Number of pages to change
- * @prot: Protection/caching type bits to set (PAGE_*)
- *
- * Returns 0 on success, otherwise a negated errno.
- *
- * This should be used when a page is mapped with a different caching policy
- * than write-back somewhere - some CPUs do not like it when mappings with
- * different caching policies exist. This changes the page attributes of the
- * in kernel linear mapping too.
- *
- * The caller needs to ensure that there are no conflicting mappings elsewhere
- * (e.g. in user space) * This function only deals with the kernel linear map.
- *
- * This function is different from change_page_attr() in that only selected bits
- * are impacted, all other bits remain as is.
- */
-static int __change_page_attr_set(unsigned long addr, int numpages,
-				  pgprot_t prot)
+static inline int change_page_attr_set(unsigned long addr, int numpages,
+				       pgprot_t mask)
 {
-	pgprot_t current_prot, new_prot;
-	int level;
-	pte_t *pte;
-	int i, ret;
-
-	for (i = 0; i < numpages ; i++) {
-
-		pte = lookup_address(addr, &level);
-		if (!pte)
-			return -EINVAL;
-
-		current_prot = pte_pgprot(*pte);
-
-		pgprot_val(new_prot) =
-			pgprot_val(current_prot) | pgprot_val(prot);
-
-		ret = change_page_attr_addr(addr, new_prot);
-		if (ret)
-			return ret;
-		addr += PAGE_SIZE;
-	}
-
-	return 0;
+	return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
 }
 
-static int change_page_attr_set(unsigned long addr, int numpages, pgprot_t prot)
+static inline int change_page_attr_clear(unsigned long addr, int numpages,
+					 pgprot_t mask)
 {
-	int ret = __change_page_attr_set(addr, numpages, prot);
-
-	global_flush_tlb();
-	return ret;
-
-}
-
-/**
- * change_page_attr_clear - Change page table attributes in the linear mapping.
- * @addr: Virtual address in linear mapping.
- * @numpages: Number of pages to change
- * @prot: Protection/caching type bits to clear (PAGE_*)
- *
- * Returns 0 on success, otherwise a negated errno.
- *
- * This should be used when a page is mapped with a different caching policy
- * than write-back somewhere - some CPUs do not like it when mappings with
- * different caching policies exist. This changes the page attributes of the
- * in kernel linear mapping too.
- *
- * The caller needs to ensure that there are no conflicting mappings elsewhere
- * (e.g. in user space) * This function only deals with the kernel linear map.
- *
- * This function is different from change_page_attr() in that only selected bits
- * are impacted, all other bits remain as is.
- */
-static int __change_page_attr_clear(unsigned long addr, int numpages,
-				    pgprot_t prot)
-{
-	pgprot_t current_prot, new_prot;
-	int level;
-	pte_t *pte;
-	int i, ret;
-
-	for (i = 0; i < numpages; i++) {
-
-		pte = lookup_address(addr, &level);
-		if (!pte)
-			return -EINVAL;
-
-		current_prot = pte_pgprot(*pte);
-
-		pgprot_val(new_prot) =
-				pgprot_val(current_prot) & ~pgprot_val(prot);
-
-		ret = change_page_attr_addr(addr, new_prot);
-		if (ret)
-			return ret;
-		addr += PAGE_SIZE;
-	}
-
-	return 0;
-}
-
-static int change_page_attr_clear(unsigned long addr, int numpages,
-				  pgprot_t prot)
-{
-	int ret = __change_page_attr_clear(addr, numpages, prot);
-
-	global_flush_tlb();
-	return ret;
+	return __change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
 
 }
 
@@ -522,6 +422,20 @@ int set_pages_rw(struct page *page, int numpages)
 }
 
 
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_CPA_DEBUG)
+static inline int __change_page_attr_set(unsigned long addr, int numpages,
+					 pgprot_t mask)
+{
+	return __change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
+}
+
+static inline int __change_page_attr_clear(unsigned long addr, int numpages,
+					   pgprot_t mask)
+{
+	return __change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
+}
+#endif
+
 #ifdef CONFIG_DEBUG_PAGEALLOC
 
 static int __set_pages_p(struct page *page, int numpages)

From 57a6a46aa26d6e39c62daf8b3b96e94f76e6846f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:08 +0100
Subject: [PATCH 855/890] x86: cpa: implement clflush optimization

Use clflush on CPUs which support this.

clflush is only used when the page attribute operation has been
successful. On CPUs which do not support clflush and in the case of
error the old fashioned global_flush_tlb() is called.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pageattr.c | 42 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 7823adab96e4..bbe691dd272e 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -52,6 +52,37 @@ static void global_flush_tlb(void)
 	on_each_cpu(flush_kernel_map, NULL, 1, 1);
 }
 
+struct clflush_data {
+	unsigned long addr;
+	int numpages;
+};
+
+static void __cpa_flush_range(void *arg)
+{
+	struct clflush_data *cld = arg;
+
+	/*
+	 * We could optimize that further and do individual per page
+	 * tlb invalidates for a low number of pages. Caveat: we must
+	 * flush the high aliases on 64bit as well.
+	 */
+	__flush_tlb_all();
+
+	clflush_cache_range((void *) cld->addr, cld->numpages * PAGE_SIZE);
+}
+
+static void cpa_flush_range(unsigned long addr, int numpages)
+{
+	struct clflush_data cld;
+
+	BUG_ON(irqs_disabled());
+
+	cld.addr = addr;
+	cld.numpages = numpages;
+
+	on_each_cpu(__cpa_flush_range, &cld, 1, 1);
+}
+
 /*
  * Certain areas of memory on x86 require very specific protection flags,
  * for example the BIOS area or kernel text. Callers don't always get this
@@ -316,7 +347,16 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
 	int ret = __change_page_attr_set_clr(addr, numpages, mask_set,
 					     mask_clr);
 
-	global_flush_tlb();
+	/*
+	 * On success we use clflush, when the CPU supports it to
+	 * avoid the wbindv. If the CPU does not support it and in the
+	 * error case we fall back to global_flush_tlb (which uses
+	 * wbindv):
+	 */
+	if (!ret && cpu_has_clflush)
+		cpa_flush_range(addr, numpages);
+	else
+		global_flush_tlb();
 
 	return ret;
 }

From af1e6844d60057774910a2d08bd75b67d73ba7d5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:08 +0100
Subject: [PATCH 856/890] x86: cpa: rename global_flush_tlb() to
 cpa_flush_all()

The function name global_flush_tlb() suggests something different from
what the function really does. Rename it to cpa_flush_all(), which is an
understandable counterpart to cpa_flush_range().

no global visibility of the old API anymore.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pageattr-test.c |  4 ++--
 arch/x86/mm/pageattr.c      | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 4e8b8c6baccd..554820265b95 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -183,7 +183,7 @@ static __init int exercise_pageattr(void)
 
 	}
 	vfree(bm);
-	global_flush_tlb();
+	cpa_flush_all();
 
 	failed += print_split(&sb);
 
@@ -211,7 +211,7 @@ static __init int exercise_pageattr(void)
 		}
 
 	}
-	global_flush_tlb();
+	cpa_flush_all();
 
 	failed += print_split(&sc);
 
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index bbe691dd272e..cdd2ea2a2239 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -33,7 +33,7 @@ void clflush_cache_range(void *addr, int size)
 		clflush(addr+i);
 }
 
-static void flush_kernel_map(void *arg)
+static void __cpa_flush_all(void *arg)
 {
 	/*
 	 * Flush all to work around Errata in early athlons regarding
@@ -45,11 +45,11 @@ static void flush_kernel_map(void *arg)
 		wbinvd();
 }
 
-static void global_flush_tlb(void)
+static void cpa_flush_all(void)
 {
 	BUG_ON(irqs_disabled());
 
-	on_each_cpu(flush_kernel_map, NULL, 1, 1);
+	on_each_cpu(__cpa_flush_all, NULL, 1, 1);
 }
 
 struct clflush_data {
@@ -350,13 +350,13 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
 	/*
 	 * On success we use clflush, when the CPU supports it to
 	 * avoid the wbindv. If the CPU does not support it and in the
-	 * error case we fall back to global_flush_tlb (which uses
+	 * error case we fall back to cpa_flush_all (which uses
 	 * wbindv):
 	 */
 	if (!ret && cpu_has_clflush)
 		cpa_flush_range(addr, numpages);
 	else
-		global_flush_tlb();
+		cpa_flush_all();
 
 	return ret;
 }

From cd8ddf1a2800026dd58433333cce7a65cbc6c6d2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:08 +0100
Subject: [PATCH 857/890] x86: clflush_page_range needs mfence

clflush is an unordered operation with respect to other memory
traffic, including other CLFLUSH instructions. This needs proper
fencing with mfence.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pageattr.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index cdd2ea2a2239..90b658ac39c2 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -25,12 +25,24 @@ within(unsigned long addr, unsigned long start, unsigned long end)
 /*
  * Flushing functions
  */
+
+
+/**
+ * clflush_cache_range - flush a cache range with clflush
+ * @addr:	virtual start address
+ * @size:	number of bytes to flush
+ *
+ * clflush is an unordered instruction which needs fencing with mfence
+ * to avoid ordering issues.
+ */
 void clflush_cache_range(void *addr, int size)
 {
 	int i;
 
+	mb();
 	for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
 		clflush(addr+i);
+	mb();
 }
 
 static void __cpa_flush_all(void *arg)

From 3b233e52f70bf102078b2c0c3f7f86a441689056 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:08 +0100
Subject: [PATCH 858/890] x86: optimize clflush

clflush is sufficient to be issued on one CPU. The invalidation is
broadcast throughout the coherence domain.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pageattr.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 90b658ac39c2..bbfc8e2466ab 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -64,35 +64,29 @@ static void cpa_flush_all(void)
 	on_each_cpu(__cpa_flush_all, NULL, 1, 1);
 }
 
-struct clflush_data {
-	unsigned long addr;
-	int numpages;
-};
-
 static void __cpa_flush_range(void *arg)
 {
-	struct clflush_data *cld = arg;
-
 	/*
 	 * We could optimize that further and do individual per page
 	 * tlb invalidates for a low number of pages. Caveat: we must
 	 * flush the high aliases on 64bit as well.
 	 */
 	__flush_tlb_all();
-
-	clflush_cache_range((void *) cld->addr, cld->numpages * PAGE_SIZE);
 }
 
 static void cpa_flush_range(unsigned long addr, int numpages)
 {
-	struct clflush_data cld;
-
 	BUG_ON(irqs_disabled());
 
-	cld.addr = addr;
-	cld.numpages = numpages;
+	on_each_cpu(__cpa_flush_range, NULL, 1, 1);
 
-	on_each_cpu(__cpa_flush_range, &cld, 1, 1);
+	/*
+	 * We only need to flush on one CPU,
+	 * clflush is a MESI-coherent instruction that
+	 * will cause all other CPUs to flush the same
+	 * cachelines:
+	 */
+	clflush_cache_range((void *) addr, numpages * PAGE_SIZE);
 }
 
 /*

From 4c61afcdb2cd4be299c1442b33adf312b695e2d7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:09 +0100
Subject: [PATCH 859/890] x86: fix clflush_page_range logic

only present ptes must be flushed.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pageattr.c       | 31 ++++++++++++++++++++++++-------
 include/asm-x86/cacheflush.h |  2 +-
 2 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index bbfc8e2466ab..97ec9e7d29d9 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -26,7 +26,6 @@ within(unsigned long addr, unsigned long start, unsigned long end)
  * Flushing functions
  */
 
-
 /**
  * clflush_cache_range - flush a cache range with clflush
  * @addr:	virtual start address
@@ -35,13 +34,19 @@ within(unsigned long addr, unsigned long start, unsigned long end)
  * clflush is an unordered instruction which needs fencing with mfence
  * to avoid ordering issues.
  */
-void clflush_cache_range(void *addr, int size)
+void clflush_cache_range(void *vaddr, unsigned int size)
 {
-	int i;
+	void *vend = vaddr + size - 1;
 
 	mb();
-	for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
-		clflush(addr+i);
+
+	for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size)
+		clflush(vaddr);
+	/*
+	 * Flush any possible final partial cacheline:
+	 */
+	clflush(vend);
+
 	mb();
 }
 
@@ -74,9 +79,13 @@ static void __cpa_flush_range(void *arg)
 	__flush_tlb_all();
 }
 
-static void cpa_flush_range(unsigned long addr, int numpages)
+static void cpa_flush_range(unsigned long start, int numpages)
 {
+	unsigned int i, level;
+	unsigned long addr;
+
 	BUG_ON(irqs_disabled());
+	WARN_ON(PAGE_ALIGN(start) != start);
 
 	on_each_cpu(__cpa_flush_range, NULL, 1, 1);
 
@@ -86,7 +95,15 @@ static void cpa_flush_range(unsigned long addr, int numpages)
 	 * will cause all other CPUs to flush the same
 	 * cachelines:
 	 */
-	clflush_cache_range((void *) addr, numpages * PAGE_SIZE);
+	for (i = 0, addr = start; i < numpages; i++, addr += PAGE_SIZE) {
+		pte_t *pte = lookup_address(addr, &level);
+
+		/*
+		 * Only flush present addresses:
+		 */
+		if (pte && pte_present(*pte))
+			clflush_cache_range((void *) addr, PAGE_SIZE);
+	}
 }
 
 /*
diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h
index 3e74aff90809..8dd8c5e3cc7f 100644
--- a/include/asm-x86/cacheflush.h
+++ b/include/asm-x86/cacheflush.h
@@ -42,7 +42,7 @@ int set_memory_ro(unsigned long addr, int numpages);
 int set_memory_rw(unsigned long addr, int numpages);
 int set_memory_np(unsigned long addr, int numpages);
 
-void clflush_cache_range(void *addr, int size);
+void clflush_cache_range(void *addr, unsigned int size);
 
 #ifdef CONFIG_DEBUG_RODATA
 void mark_rodata_ro(void);

From 1a4872529e13265d05ffae75b8d09697540016d2 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 30 Jan 2008 13:34:09 +0100
Subject: [PATCH 860/890] x86: move misplaced rodata check call

It looks like a mismerge put the rodata self-check in the wrong spot; move
it to the right place after marking the .rodata section read only.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_64.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 50d29f5da02b..f51180c02b8f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -573,7 +573,6 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
 		set_memory_rw(begin, (end - begin)/PAGE_SIZE);
 		set_memory_np(begin, (end - begin)/PAGE_SIZE);
 		set_memory_nx(begin, (end - begin)/PAGE_SIZE);
-		rodata_test();
 	}
 #endif
 }
@@ -614,6 +613,8 @@ void mark_rodata_ro(void)
 	printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
 	       (end - start) >> 10);
 
+	rodata_test();
+
 #ifdef CONFIG_CPA_DEBUG
 	printk("Testing CPA: undo %lx-%lx\n", start, end);
 	set_memory_rw(start, (end-start) >> PAGE_SHIFT);

From ee01f1122ceb02a2c9b7142c5dd17b49e59ba774 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:09 +0100
Subject: [PATCH 861/890] x86: init memory debugging

debug incorrect/late access to init memory, by permanently unmapping
the init memory ranges. Depends on CONFIG_DEBUG_PAGEALLOC=y.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_32.c | 11 +++++++++++
 arch/x86/mm/init_64.c | 11 +++++++++++
 2 files changed, 22 insertions(+)

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index a72737c05747..8d7f723cfc28 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -783,6 +783,16 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
 {
 	unsigned long addr;
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	/*
+	 * If debugging page accesses then do not free this memory but
+	 * mark them not present - any buggy init-section access will
+	 * create a kernel page fault:
+	 */
+	printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
+		begin, PAGE_ALIGN(end));
+	set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
+#else
 	/*
 	 * We just marked the kernel text read only above, now that
 	 * we are going to free part of that, we need to make that
@@ -798,6 +808,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
 		totalram_pages++;
 	}
 	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
+#endif
 }
 
 void free_initmem(void)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index f51180c02b8f..e0c1e98ad1bf 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -550,6 +550,16 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
 	if (begin >= end)
 		return;
 
+	/*
+	 * If debugging page accesses then do not free this memory but
+	 * mark them not present - any buggy init-section access will
+	 * create a kernel page fault:
+	 */
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
+		begin, PAGE_ALIGN(end));
+	set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
+#else
 	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
 	for (addr = begin; addr < end; addr += PAGE_SIZE) {
 		ClearPageReserved(virt_to_page(addr));
@@ -575,6 +585,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
 		set_memory_nx(begin, (end - begin)/PAGE_SIZE);
 	}
 #endif
+#endif
 }
 
 void free_initmem(void)

From 8f46924600e30b140445f5b84abe9b80d2fff5fb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:09 +0100
Subject: [PATCH 862/890] x86: enable CONFIG_DEBUG_PAGEALLOC more widely

make CONFIG_DEBUG_PAGEALLOC universally available.

CONFIG_HIBERNATION and CONFIG_HUGETLBFS was disabling it, for no
particular reason.

If there are any unfixed bugs here we'll fix it, but do not disable
vital debugging facilities like that ..

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig.debug | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2a859a7e2d74..347e33e5f395 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -40,7 +40,7 @@ comment "Page alloc debug is incompatible with Software Suspend on i386"
 
 config DEBUG_PAGEALLOC
 	bool "Debug page memory allocations"
-	depends on DEBUG_KERNEL && !HIBERNATION && !HUGETLBFS
+	depends on DEBUG_KERNEL
 	help
 	  Unmap pages from the kernel linear mapping after free_pages().
 	  This results in a large slowdown, but helps to find certain types

From aba8391f7323294e88e3a665513434aba4042a7d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:09 +0100
Subject: [PATCH 863/890] x86: rodata config hookup

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig.debug   | 16 ++++++++++++++++
 arch/x86/kernel/Makefile |  2 ++
 2 files changed, 18 insertions(+)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 347e33e5f395..2d0bd33b73aa 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -68,6 +68,22 @@ config DEBUG_RODATA
 	  data. This is recommended so that we can catch kernel bugs sooner.
 	  If in doubt, say "Y".
 
+config DEBUG_RODATA_TEST
+	bool "Testcase for the DEBUG_RODATA feature"
+	depends on DEBUG_RODATA
+	help
+	  This option enables a testcase for the DEBUG_RODATA
+	  feature as well as for the change_page_attr() infrastructure.
+	  If in doubt, say "N"
+
+config DEBUG_NX_TEST
+	tristate "Testcase for the NX non-executable stack feature"
+	depends on DEBUG_KERNEL && m
+	help
+	  This option enables a testcase for the CPU NX capability
+	  and the software setup of this feature.
+	  If in doubt, say "N"
+
 config 4KSTACKS
 	bool "Use 4Kb for kernel stacks instead of 8Kb"
 	depends on DEBUG_KERNEL
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5bdb0f0431e9..6f813009d44b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -64,6 +64,8 @@ obj-$(CONFIG_HPET_TIMER) 	+= hpet.o
 
 obj-$(CONFIG_K8_NB)		+= k8.o
 obj-$(CONFIG_MGEODE_LX)		+= geode_32.o mfgpt_32.o
+obj-$(CONFIG_DEBUG_RODATA_TEST)	+= test_rodata.o
+obj-$(CONFIG_DEBUG_NX_TEST)	+= test_nx.o
 
 obj-$(CONFIG_VMI)		+= vmi_32.o vmiclock_32.o
 obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o

From 86f03989d99cfa2e1216cdd7aa996852236909cf Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:09 +0100
Subject: [PATCH 864/890] x86: cpa: fix the self-test

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig.debug      |  2 +-
 arch/x86/mm/init_32.c       |  4 +-
 arch/x86/mm/init_64.c       | 16 --------
 arch/x86/mm/pageattr-test.c | 15 +++-----
 arch/x86/mm/pageattr.c      | 74 +++++++++++++++++++------------------
 include/asm-x86/pgtable.h   |  1 +
 6 files changed, 48 insertions(+), 64 deletions(-)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2d0bd33b73aa..2e1e3af28c3a 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -40,7 +40,7 @@ comment "Page alloc debug is incompatible with Software Suspend on i386"
 
 config DEBUG_PAGEALLOC
 	bool "Debug page memory allocations"
-	depends on DEBUG_KERNEL
+	depends on DEBUG_KERNEL && X86_32
 	help
 	  Unmap pages from the kernel linear mapping after free_pages().
 	  This results in a large slowdown, but helps to find certain types
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8d7f723cfc28..8ed5c189d7aa 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -781,8 +781,6 @@ void mark_rodata_ro(void)
 
 void free_init_pages(char *what, unsigned long begin, unsigned long end)
 {
-	unsigned long addr;
-
 #ifdef CONFIG_DEBUG_PAGEALLOC
 	/*
 	 * If debugging page accesses then do not free this memory but
@@ -793,6 +791,8 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
 		begin, PAGE_ALIGN(end));
 	set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
 #else
+	unsigned long addr;
+
 	/*
 	 * We just marked the kernel text read only above, now that
 	 * we are going to free part of that, we need to make that
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e0c1e98ad1bf..8a7b725ce3c7 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -569,22 +569,6 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
 		free_page(addr);
 		totalram_pages++;
 	}
-#ifdef CONFIG_DEBUG_RODATA
-	/*
-	 * This will make the __init pages not present and
-	 * not executable, so that any attempt to use a
-	 * __init function from now on will fault immediately
-	 * rather than supriously later when memory gets reused.
-	 *
-	 * We only do this for DEBUG_RODATA to not break up the
-	 * 2Mb kernel mapping just for this debug feature.
-	 */
-	if (begin >= __START_KERNEL_map) {
-		set_memory_rw(begin, (end - begin)/PAGE_SIZE);
-		set_memory_np(begin, (end - begin)/PAGE_SIZE);
-		set_memory_nx(begin, (end - begin)/PAGE_SIZE);
-	}
-#endif
 #endif
 }
 
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 554820265b95..06353d43f72e 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -15,8 +15,7 @@
 #include <asm/kdebug.h>
 
 enum {
-	NTEST			= 400,
-	LOWEST_LEVEL		= PG_LEVEL_4K,
+	NTEST			= 4000,
 #ifdef CONFIG_X86_64
 	LPS			= (1 << PMD_SHIFT),
 #elif defined(CONFIG_X86_PAE)
@@ -59,10 +58,10 @@ static __init int print_split(struct split_state *s)
 			continue;
 		}
 
-		if (level == 2 && sizeof(long) == 8) {
+		if (level == PG_LEVEL_1G && sizeof(long) == 8) {
 			s->gpg++;
 			i += GPS/PAGE_SIZE;
-		} else if (level != LOWEST_LEVEL) {
+		} else if (level == PG_LEVEL_2M) {
 			if (!(pte_val(*pte) & _PAGE_PSE)) {
 				printk(KERN_ERR
 					"%lx level %d but not PSE %Lx\n",
@@ -162,7 +161,7 @@ static __init int exercise_pageattr(void)
 			continue;
 		}
 
-		err = __change_page_attr_clear(addr[i], len[i],
+		err = change_page_attr_clear(addr[i], len[i],
 					       __pgprot(_PAGE_GLOBAL));
 		if (err < 0) {
 			printk(KERN_ERR "CPA %d failed %d\n", i, err);
@@ -175,7 +174,7 @@ static __init int exercise_pageattr(void)
 				pte ? (u64)pte_val(*pte) : 0ULL);
 			failed++;
 		}
-		if (level != LOWEST_LEVEL) {
+		if (level != PG_LEVEL_4K) {
 			printk(KERN_ERR "CPA %lx: unexpected level %d\n",
 				addr[i], level);
 			failed++;
@@ -183,7 +182,6 @@ static __init int exercise_pageattr(void)
 
 	}
 	vfree(bm);
-	cpa_flush_all();
 
 	failed += print_split(&sb);
 
@@ -197,7 +195,7 @@ static __init int exercise_pageattr(void)
 			failed++;
 			continue;
 		}
-		err = __change_page_attr_set(addr[i], len[i],
+		err = change_page_attr_set(addr[i], len[i],
 					     __pgprot(_PAGE_GLOBAL));
 		if (err < 0) {
 			printk(KERN_ERR "CPA reverting failed: %d\n", err);
@@ -211,7 +209,6 @@ static __init int exercise_pageattr(void)
 		}
 
 	}
-	cpa_flush_all();
 
 	failed += print_split(&sc);
 
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 97ec9e7d29d9..532a40bc0e7e 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -197,10 +197,11 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	unsigned long addr;
 	pte_t *pbase, *tmp;
 	struct page *base;
-	int i, level;
+	unsigned int i, level;
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
-	gfp_flags = GFP_ATOMIC;
+	gfp_flags = __GFP_HIGH | __GFP_NOFAIL | __GFP_NOWARN;
+	gfp_flags = GFP_ATOMIC | __GFP_NOWARN;
 #endif
 	base = alloc_pages(gfp_flags, 0);
 	if (!base)
@@ -224,6 +225,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
 #endif
 
+	pgprot_val(ref_prot) &= ~_PAGE_NX;
 	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
 		set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
 
@@ -248,7 +250,8 @@ out_unlock:
 }
 
 static int
-__change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot)
+__change_page_attr(unsigned long address, unsigned long pfn,
+		   pgprot_t mask_set, pgprot_t mask_clr)
 {
 	struct page *kpte_page;
 	int level, err = 0;
@@ -267,15 +270,20 @@ repeat:
 	BUG_ON(PageLRU(kpte_page));
 	BUG_ON(PageCompound(kpte_page));
 
-	prot = static_protections(prot, address);
-
 	if (level == PG_LEVEL_4K) {
-		WARN_ON_ONCE(pgprot_val(prot) & _PAGE_PSE);
-		set_pte_atomic(kpte, pfn_pte(pfn, canon_pgprot(prot)));
-	} else {
-		/* Clear the PSE bit for the 4k level pages ! */
-		pgprot_val(prot) = pgprot_val(prot) & ~_PAGE_PSE;
+		pgprot_t new_prot = pte_pgprot(*kpte);
+		pte_t new_pte, old_pte = *kpte;
 
+		pgprot_val(new_prot) &= ~pgprot_val(mask_clr);
+		pgprot_val(new_prot) |= pgprot_val(mask_set);
+
+		new_prot = static_protections(new_prot, address);
+
+		new_pte = pfn_pte(pfn, canon_pgprot(new_prot));
+		BUG_ON(pte_pfn(new_pte) != pte_pfn(old_pte));
+
+		set_pte_atomic(kpte, new_pte);
+	} else {
 		err = split_large_page(kpte, address);
 		if (!err)
 			goto repeat;
@@ -297,22 +305,26 @@ repeat:
  * Modules and drivers should use the set_memory_* APIs instead.
  */
 
-static int change_page_attr_addr(unsigned long address, pgprot_t prot)
+static int
+change_page_attr_addr(unsigned long address, pgprot_t mask_set,
+							pgprot_t mask_clr)
 {
 	int err = 0, kernel_map = 0;
-	unsigned long pfn = __pa(address) >> PAGE_SHIFT;
+	unsigned long pfn;
 
 #ifdef CONFIG_X86_64
 	if (address >= __START_KERNEL_map &&
 			address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
 
-		address = (unsigned long)__va(__pa(address));
+		address = (unsigned long)__va(__pa((void *)address));
 		kernel_map = 1;
 	}
 #endif
 
-	if (!kernel_map || pte_present(pfn_pte(0, prot))) {
-		err = __change_page_attr(address, pfn, prot);
+	pfn = __pa(address) >> PAGE_SHIFT;
+
+	if (!kernel_map || 1) {
+		err = __change_page_attr(address, pfn, mask_set, mask_clr);
 		if (err)
 			return err;
 	}
@@ -324,12 +336,15 @@ static int change_page_attr_addr(unsigned long address, pgprot_t prot)
 	 */
 	if (__pa(address) < KERNEL_TEXT_SIZE) {
 		unsigned long addr2;
-		pgprot_t prot2;
 
-		addr2 = __START_KERNEL_map + __pa(address);
+		addr2 = __pa(address) + __START_KERNEL_map - phys_base;
 		/* Make sure the kernel mappings stay executable */
-		prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
-		err = __change_page_attr(addr2, pfn, prot2);
+		pgprot_val(mask_clr) |= _PAGE_NX;
+		/*
+		 * Our high aliases are imprecise, so do not propagate
+		 * failures back to users:
+		 */
+		__change_page_attr(addr2, pfn, mask_set, mask_clr);
 	}
 #endif
 
@@ -339,26 +354,13 @@ static int change_page_attr_addr(unsigned long address, pgprot_t prot)
 static int __change_page_attr_set_clr(unsigned long addr, int numpages,
 				      pgprot_t mask_set, pgprot_t mask_clr)
 {
-	pgprot_t new_prot;
-	int level;
-	pte_t *pte;
-	int i, ret;
+	unsigned int i;
+	int ret;
 
-	for (i = 0; i < numpages ; i++) {
-
-		pte = lookup_address(addr, &level);
-		if (!pte)
-			return -EINVAL;
-
-		new_prot = pte_pgprot(*pte);
-
-		pgprot_val(new_prot) &= ~pgprot_val(mask_clr);
-		pgprot_val(new_prot) |= pgprot_val(mask_set);
-
-		ret = change_page_attr_addr(addr, new_prot);
+	for (i = 0; i < numpages ; i++, addr += PAGE_SIZE) {
+		ret = change_page_attr_addr(addr, mask_set, mask_clr);
 		if (ret)
 			return ret;
-		addr += PAGE_SIZE;
 	}
 
 	return 0;
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index ee40a88882f6..269e7e29ea8e 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -240,6 +240,7 @@ enum {
 	PG_LEVEL_NONE,
 	PG_LEVEL_4K,
 	PG_LEVEL_2M,
+	PG_LEVEL_1G,
 };
 
 /*

From 0879750f5d75dee0546316b7b0e83fb6cd258ad7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:09 +0100
Subject: [PATCH 865/890] x86: cpa cleanup the 64-bit alias math

Cleanup the address calculations, which are necessary to identify the
high/low alias mappings of the kernel on 64 bit machines. Instead of
calling __pa/__va back and forth, calculate the physical address once
and base the other calculations on it. Add understandable constants so
we can use the already available within() helper. Also add comments,
which help mere mortals to understand what this code does.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pageattr.c | 72 ++++++++++++++++++++++--------------------
 1 file changed, 38 insertions(+), 34 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 532a40bc0e7e..ec07c1873d65 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -305,49 +305,53 @@ repeat:
  * Modules and drivers should use the set_memory_* APIs instead.
  */
 
+#define HIGH_MAP_START	__START_KERNEL_map
+#define HIGH_MAP_END	(__START_KERNEL_map + KERNEL_TEXT_SIZE)
+
 static int
 change_page_attr_addr(unsigned long address, pgprot_t mask_set,
-							pgprot_t mask_clr)
+		      pgprot_t mask_clr)
 {
-	int err = 0, kernel_map = 0;
-	unsigned long pfn;
-
-#ifdef CONFIG_X86_64
-	if (address >= __START_KERNEL_map &&
-			address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
-
-		address = (unsigned long)__va(__pa((void *)address));
-		kernel_map = 1;
-	}
-#endif
-
-	pfn = __pa(address) >> PAGE_SHIFT;
-
-	if (!kernel_map || 1) {
-		err = __change_page_attr(address, pfn, mask_set, mask_clr);
-		if (err)
-			return err;
-	}
+	unsigned long phys_addr = __pa(address);
+	unsigned long pfn = phys_addr >> PAGE_SHIFT;
+	int err;
 
 #ifdef CONFIG_X86_64
 	/*
-	 * Handle kernel mapping too which aliases part of
-	 * lowmem:
+	 * If we are inside the high mapped kernel range, then we
+	 * fixup the low mapping first. __va() returns the virtual
+	 * address in the linear mapping:
 	 */
-	if (__pa(address) < KERNEL_TEXT_SIZE) {
-		unsigned long addr2;
-
-		addr2 = __pa(address) + __START_KERNEL_map - phys_base;
-		/* Make sure the kernel mappings stay executable */
-		pgprot_val(mask_clr) |= _PAGE_NX;
-		/*
-		 * Our high aliases are imprecise, so do not propagate
-		 * failures back to users:
-		 */
-		__change_page_attr(addr2, pfn, mask_set, mask_clr);
-	}
+	if (within(address, HIGH_MAP_START, HIGH_MAP_END))
+		address = (unsigned long) __va(phys_addr);
 #endif
 
+	err = __change_page_attr(address, pfn, mask_set, mask_clr);
+	if (err)
+		return err;
+
+#ifdef CONFIG_X86_64
+	/*
+	 * If the physical address is inside the kernel map, we need
+	 * to touch the high mapped kernel as well:
+	 */
+	if (within(phys_addr, 0, KERNEL_TEXT_SIZE)) {
+		/*
+		 * Calc the high mapping address. See __phys_addr()
+		 * for the non obvious details.
+		 */
+		address = phys_addr + HIGH_MAP_START - phys_base;
+		/* Make sure the kernel mappings stay executable */
+		pgprot_val(mask_clr) |= _PAGE_NX;
+
+		/*
+		 * Our high aliases are imprecise, because we check
+		 * everything between 0 and KERNEL_TEXT_SIZE, so do
+		 * not propagate lookup failures back to users:
+		 */
+		__change_page_attr(address, pfn, mask_set, mask_clr);
+	}
+#endif
 	return err;
 }
 

From 9af993a92623e022c176459fa6607a564b9a7eaf Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:09 +0100
Subject: [PATCH 866/890] x86: make ioremap() UC by default

Yes! A mere 120 c_p_a() fixing and rewriting patches later,
we are now confident that we can enable UC by default for
ioremap(), on x86 too.

Every other architectures was doing this already. Doing so
makes Linux more robust against MTRR mixups (which might go
unnoticed if BIOS writers test other OSs only - where PAT
might override bad MTRRs defaults).

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/io_64.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index ee7a5c955962..f64a59cc396d 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -166,7 +166,7 @@ extern void __iomem *ioremap_cache(unsigned long offset, unsigned long size);
  */
 static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
 {
-	return ioremap_cache(offset, size);
+	return ioremap_nocache(offset, size);
 }
 
 extern void iounmap(volatile void __iomem *addr);

From 29caf2f98cdb266dffb50dfd412f951e8d46f719 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:34:09 +0100
Subject: [PATCH 867/890] x86: add is_f00f_bug helper to fault_32|64.c

Further towards unifying these files, add another helper
in same spirit as is_errata93.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_32.c | 39 ++++++++++++++++++++++-----------------
 arch/x86/mm/fault_64.c | 24 ++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 17 deletions(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index f4f8c324715f..75ae8c498d67 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -211,8 +211,6 @@ void dump_pagetable(unsigned long address)
 	printk("\n");
 }
 
-void do_invalid_op(struct pt_regs *, unsigned long);
-
 static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
 {
 	unsigned index = pgd_index(address);
@@ -288,6 +286,26 @@ static int is_errata93(struct pt_regs *regs, unsigned long address)
 	return 0;
 }
 
+void do_invalid_op(struct pt_regs *, unsigned long);
+
+static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
+{
+#ifdef CONFIG_X86_F00F_BUG
+	unsigned long nr;
+	/*
+	 * Pentium F0 0F C7 C8 bug workaround.
+	 */
+	if (boot_cpu_data.f00f_bug) {
+		nr = (address - idt_descr.address) >> 3;
+
+		if (nr == 6) {
+			do_invalid_op(regs, 0);
+			return 1;
+		}
+	}
+#endif
+	return 0;
+}
 
 /*
  * Handle a fault on the vmalloc or module mapping area
@@ -570,21 +588,8 @@ bad_area_nosemaphore:
 		return;
 	}
 
-#ifdef CONFIG_X86_F00F_BUG
-	/*
-	 * Pentium F0 0F C7 C8 bug workaround.
-	 */
-	if (boot_cpu_data.f00f_bug) {
-		unsigned long nr;
-
-		nr = (address - idt_descr.address) >> 3;
-
-		if (nr == 6) {
-			do_invalid_op(regs, 0);
-			return;
-		}
-	}
-#endif
+	if (is_f00f_bug(regs, address))
+		return;
 
 no_context:
 	/* Are we prepared to handle this kernel fault?  */
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index dd26e680a431..df13487aa83a 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -256,6 +256,27 @@ static int is_errata93(struct pt_regs *regs, unsigned long address)
 	return 0;
 }
 
+void do_invalid_op(struct pt_regs *, unsigned long);
+
+static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
+{
+#ifdef CONFIG_X86_F00F_BUG
+	unsigned long nr;
+	/*
+	 * Pentium F0 0F C7 C8 bug workaround.
+	 */
+	if (boot_cpu_data.f00f_bug) {
+		nr = (address - idt_descr.address) >> 3;
+
+		if (nr == 6) {
+			do_invalid_op(regs, 0);
+			return 1;
+		}
+	}
+#endif
+	return 0;
+}
+
 static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
 				 unsigned long error_code)
 {
@@ -581,6 +602,9 @@ bad_area_nosemaphore:
 		return;
 	}
 
+	if (is_f00f_bug(regs, address))
+		return;
+
 no_context:
 	/* Are we prepared to handle this kernel fault?  */
 	if (fixup_exception(regs))

From 35f3266ffbee7ff8ca0474dc8dfa3b73d5a1d1ef Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:34:09 +0100
Subject: [PATCH 868/890] x86: add is_errata100 helper to fault_32|64.c

Further towards unifying these files, add another helper
in same spirit as is_errata93.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_32.c | 19 +++++++++++++++++++
 arch/x86/mm/fault_64.c | 25 +++++++++++++++++--------
 2 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 75ae8c498d67..0bd241729aa3 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -286,6 +286,22 @@ static int is_errata93(struct pt_regs *regs, unsigned long address)
 	return 0;
 }
 
+/*
+ * Work around K8 erratum #100 K8 in compat mode occasionally jumps to illegal
+ * addresses >4GB.  We catch this in the page fault handler because these
+ * addresses are not reachable. Just detect this case and return.  Any code
+ * segment in LDT is compatibility mode.
+ */
+static int is_errata100(struct pt_regs *regs, unsigned long address)
+{
+#ifdef CONFIG_X86_64
+	if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
+	    (address >> 32))
+		return 1;
+#endif
+	return 0;
+}
+
 void do_invalid_op(struct pt_regs *, unsigned long);
 
 static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
@@ -566,6 +582,9 @@ bad_area_nosemaphore:
 		if (is_prefetch(regs, address, error_code))
 			return;
 
+		if (is_errata100(regs, address))
+			return;
+
 		if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
 		    printk_ratelimit()) {
 			printk(
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index df13487aa83a..ccbb8e30d9f5 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -256,6 +256,22 @@ static int is_errata93(struct pt_regs *regs, unsigned long address)
 	return 0;
 }
 
+/*
+ * Work around K8 erratum #100 K8 in compat mode occasionally jumps to illegal
+ * addresses >4GB.  We catch this in the page fault handler because these
+ * addresses are not reachable. Just detect this case and return.  Any code
+ * segment in LDT is compatibility mode.
+ */
+static int is_errata100(struct pt_regs *regs, unsigned long address)
+{
+#ifdef CONFIG_X86_64
+	if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
+	    (address >> 32))
+		return 1;
+#endif
+	return 0;
+}
+
 void do_invalid_op(struct pt_regs *, unsigned long);
 
 static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
@@ -568,14 +584,7 @@ bad_area_nosemaphore:
 		if (is_prefetch(regs, address, error_code))
 			return;
 
-		/* Work around K8 erratum #100 K8 in compat mode
-		   occasionally jumps to illegal addresses >4GB.  We
-		   catch this here in the page fault handler because
-		   these addresses are not reachable. Just detect this
-		   case and return.  Any code segment in LDT is
-		   compatibility mode. */
-		if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
-		    (address >> 32))
+		if (is_errata100(regs, address))
 			return;
 
 		if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&

From b3279c7fd7acf807ab8395d16658514e8c222a37 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:34:10 +0100
Subject: [PATCH 869/890] x86: introduce show_fault_oops helper to
 fault_32|64.c

This will help when unifying the oops dumping code on 32/64
bit.  No functional changes.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_32.c | 54 +++++++++++++++++++++++-------------------
 arch/x86/mm/fault_64.c | 21 ++++++++++------
 2 files changed, 44 insertions(+), 31 deletions(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 0bd241729aa3..fda399758820 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -323,6 +323,35 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
 	return 0;
 }
 
+static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
+			    unsigned long address)
+{
+	if (!oops_may_print())
+		return;
+
+#ifdef CONFIG_X86_PAE
+	if (error_code & PF_INSTR) {
+		int level;
+		pte_t *pte = lookup_address(address, &level);
+
+		if (pte && pte_present(*pte) && !pte_exec(*pte))
+			printk(KERN_CRIT "kernel tried to execute "
+				"NX-protected page - exploit attempt? "
+				"(uid: %d)\n", current->uid);
+	}
+#endif
+	if (address < PAGE_SIZE)
+		printk(KERN_ALERT "BUG: unable to handle kernel NULL "
+				"pointer dereference");
+	else
+		printk(KERN_ALERT "BUG: unable to handle kernel paging"
+				" request");
+	printk(" at virtual address %08lx\n", address);
+	printk(KERN_ALERT "printing ip: %08lx ", regs->ip);
+
+	dump_pagetable(address);
+}
+
 /*
  * Handle a fault on the vmalloc or module mapping area
  *
@@ -633,30 +662,7 @@ no_context:
 
 	bust_spinlocks(1);
 
-	if (oops_may_print()) {
-
-#ifdef CONFIG_X86_PAE
-		if (error_code & PF_INSTR) {
-			int level;
-			pte_t *pte = lookup_address(address, &level);
-
-			if (pte && pte_present(*pte) && !pte_exec(*pte))
-				printk(KERN_CRIT "kernel tried to execute "
-					"NX-protected page - exploit attempt? "
-					"(uid: %d)\n", current->uid);
-		}
-#endif
-		if (address < PAGE_SIZE)
-			printk(KERN_ALERT "BUG: unable to handle kernel NULL "
-					"pointer dereference");
-		else
-			printk(KERN_ALERT "BUG: unable to handle kernel paging"
-					" request");
-		printk(" at virtual address %08lx\n", address);
-		printk(KERN_ALERT "printing ip: %08lx ", regs->ip);
-
-		dump_pagetable(address);
-	}
+	show_fault_oops(regs, error_code, address);
 
 	tsk->thread.cr2 = address;
 	tsk->thread.trap_no = 14;
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index ccbb8e30d9f5..1897704dad46 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -293,6 +293,18 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
 	return 0;
 }
 
+static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
+			    unsigned long address)
+{
+	if (address < PAGE_SIZE)
+		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
+	else
+		printk(KERN_ALERT "Unable to handle kernel paging request");
+	printk(" at %016lx RIP: \n" KERN_ALERT, address);
+	printk_address(regs->ip, 1);
+	dump_pagetable(address);
+}
+
 static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
 				 unsigned long error_code)
 {
@@ -636,13 +648,8 @@ no_context:
 
 	flags = oops_begin();
 
-	if (address < PAGE_SIZE)
-		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
-	else
-		printk(KERN_ALERT "Unable to handle kernel paging request");
-	printk(" at %016lx RIP: \n" KERN_ALERT, address);
-	printk_address(regs->ip, 1);
-	dump_pagetable(address);
+	show_fault_oops(regs, error_code, address);
+
 	tsk->thread.cr2 = address;
 	tsk->thread.trap_no = 14;
 	tsk->thread.error_code = error_code;

From 19f0dda91e0dcb38c97bbfcff62b108a3bbc7907 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:34:10 +0100
Subject: [PATCH 870/890] x86: unify page fault oops printing

This changes the oops dumping format for page faults to
be similar between X86_32 and 64.

This is the first user of printk_address on X86_32.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_32.c | 12 ++++++------
 arch/x86/mm/fault_64.c |  9 ++++++---
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index fda399758820..28ea3d3ec8f8 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -340,15 +340,15 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
 				"(uid: %d)\n", current->uid);
 	}
 #endif
+	printk(KERN_ALERT "BUG: unable to handle kernel ");
 	if (address < PAGE_SIZE)
-		printk(KERN_ALERT "BUG: unable to handle kernel NULL "
-				"pointer dereference");
+		printk(KERN_CONT "NULL pointer dereference");
 	else
-		printk(KERN_ALERT "BUG: unable to handle kernel paging"
-				" request");
-	printk(" at virtual address %08lx\n", address);
-	printk(KERN_ALERT "printing ip: %08lx ", regs->ip);
+		printk(KERN_CONT "paging request");
+	printk(KERN_CONT " at %08lx\n", address);
 
+	printk(KERN_ALERT "IP:");
+	printk_address(regs->ip, 1);
 	dump_pagetable(address);
 }
 
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 1897704dad46..e12c34ba4ba1 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -296,11 +296,14 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
 static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
 			    unsigned long address)
 {
+	printk(KERN_ALERT "BUG: unable to handle kernel ");
 	if (address < PAGE_SIZE)
-		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
+		printk(KERN_CONT "NULL pointer dereference");
 	else
-		printk(KERN_ALERT "Unable to handle kernel paging request");
-	printk(" at %016lx RIP: \n" KERN_ALERT, address);
+		printk(KERN_CONT "paging request");
+	printk(KERN_CONT " at %016lx\n", address);
+
+	printk(KERN_ALERT "IP:");
 	printk_address(regs->ip, 1);
 	dump_pagetable(address);
 }

From bed23c67fe077b4d66cb3329263d7cfa33b3fd67 Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Wed, 30 Jan 2008 13:34:10 +0100
Subject: [PATCH 871/890] x86: use reboot_type on EFI 32

This patch makes reboot_type of BOOT_EFI is used on i386 too. Because
correpsonding reboot code of i386 and x86_64 is merged.

Signed-off-by: Huang Ying <ying.huang@intel.com>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/efi.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index a70fe77354b8..174b067b9d03 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -371,11 +371,9 @@ void __init efi_init(void)
 		printk(KERN_WARNING "Kernel-defined memdesc"
 		       "doesn't match the one from EFI!\n");
 
-#ifdef CONFIG_X86_64
 	/* Setup for EFI runtime service */
 	reboot_type = BOOT_EFI;
 
-#endif
 #if EFI_DEBUG
 	print_efi_memmap();
 #endif

From a3828064be4ed8e95907d3943e7af13cb709694d Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Wed, 30 Jan 2008 13:34:10 +0100
Subject: [PATCH 872/890] x86: fixes some bugs about EFI memory map handling

This patch fixes some bugs of EFI memory handing code.

- On x86_64, it is possible that EFI memory map can not be mapped via
  identity map, so efi_map_memmap is removed, just use early_ioremap.

- On i386, the EFI memory map mapping take effect cross paging_init,
  so it is not necessary to use efi_map_memmap.

- EFI memory map is unmapped in efi_enter_virtual_mode to avoid
  early_ioremap leak.

Signed-off-by: Huang Ying <ying.huang@intel.com>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/efi.c      |  2 ++
 arch/x86/kernel/efi_32.c   | 15 ---------------
 arch/x86/kernel/efi_64.c   |  9 ---------
 arch/x86/kernel/setup_32.c |  2 --
 arch/x86/kernel/setup_64.c |  4 +---
 5 files changed, 3 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 174b067b9d03..1411324a625c 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -474,6 +474,8 @@ void __init efi_enter_virtual_mode(void)
 	efi.reset_system = virt_efi_reset_system;
 	efi.set_virtual_address_map = virt_efi_set_virtual_address_map;
 	runtime_code_page_mkexec();
+	early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
+	memmap.map = NULL;
 }
 
 /*
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c
index 114b896d7573..cb91f985b4a1 100644
--- a/arch/x86/kernel/efi_32.c
+++ b/arch/x86/kernel/efi_32.c
@@ -109,18 +109,3 @@ void efi_call_phys_epilog(void)
 
 	local_irq_restore(efi_rt_eflags);
 }
-
-/*
- * We need to map the EFI memory map again after paging_init().
- */
-void __init efi_map_memmap(void)
-{
-	memmap.map = NULL;
-
-	memmap.map = early_ioremap((unsigned long) memmap.phys_map,
-			(memmap.nr_map * memmap.desc_size));
-	if (memmap.map == NULL)
-		printk(KERN_ERR "Could not remap the EFI memmap!\n");
-
-	memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
-}
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 9f8a75594398..4b73992c1e11 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -103,15 +103,6 @@ void __init efi_call_phys_epilog(void)
 	local_irq_restore(efi_flags);
 }
 
-/*
- * We need to map the EFI memory map again after init_memory_mapping().
- */
-void __init efi_map_memmap(void)
-{
-	memmap.map = __va(memmap.phys_map);
-	memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
-}
-
 void __init efi_reserve_bootmem(void)
 {
 	reserve_bootmem_generic((unsigned long)memmap.phys_map,
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index c038b09b1723..9c0ef4945a58 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -808,8 +808,6 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_X86_GENERICARCH
 	generic_apic_probe();
 #endif
-	if (efi_enabled)
-		efi_map_memmap();
 
 #ifdef CONFIG_ACPI
 	/*
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 4ccad185ab7e..697533e86822 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -378,10 +378,8 @@ void __init setup_arch(char **cmdline_p)
        acpi_reserve_bootmem();
 #endif
 
-	if (efi_enabled) {
-		efi_map_memmap();
+	if (efi_enabled)
 		efi_reserve_bootmem();
-	}
 
        /*
 	* Find and reserve possible boot-time SMP configuration:

From 14a62c34b134d24f7fedb8fd66028ecdcffb32c8 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:34:10 +0100
Subject: [PATCH 873/890] x86: unify ioremap

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_64.c | 303 ++++++++++++++++++++++++------------------
 1 file changed, 176 insertions(+), 127 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 8a7b725ce3c7..bb732bb79b4a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -47,10 +47,10 @@
 #include <asm/numa.h>
 
 #ifndef Dprintk
-#define Dprintk(x...)
+# define Dprintk(x...)
 #endif
 
-const struct dma_mapping_ops* dma_ops;
+const struct dma_mapping_ops *dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
 static unsigned long dma_reserve __initdata;
@@ -67,22 +67,26 @@ void show_mem(void)
 {
 	long i, total = 0, reserved = 0;
 	long shared = 0, cached = 0;
-	pg_data_t *pgdat;
 	struct page *page;
+	pg_data_t *pgdat;
 
 	printk(KERN_INFO "Mem-info:\n");
 	show_free_areas();
-	printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+	printk(KERN_INFO "Free swap:       %6ldkB\n",
+		nr_swap_pages << (PAGE_SHIFT-10));
 
 	for_each_online_pgdat(pgdat) {
-               for (i = 0; i < pgdat->node_spanned_pages; ++i) {
-			/* this loop can take a while with 256 GB and 4k pages
-			   so update the NMI watchdog */
-			if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
+		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
+			/*
+			 * This loop can take a while with 256 GB and
+			 * 4k pages so defer the NMI watchdog:
+			 */
+			if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
 				touch_nmi_watchdog();
-			}
+
 			if (!pfn_valid(pgdat->node_start_pfn + i))
 				continue;
+
 			page = pfn_to_page(pgdat->node_start_pfn + i);
 			total++;
 			if (PageReserved(page))
@@ -91,32 +95,37 @@ void show_mem(void)
 				cached++;
 			else if (page_count(page))
 				shared += page_count(page) - 1;
-               }
+		}
 	}
-	printk(KERN_INFO "%lu pages of RAM\n", total);
-	printk(KERN_INFO "%lu reserved pages\n",reserved);
-	printk(KERN_INFO "%lu pages shared\n",shared);
-	printk(KERN_INFO "%lu pages swap cached\n",cached);
+	printk(KERN_INFO "%lu pages of RAM\n",		total);
+	printk(KERN_INFO "%lu reserved pages\n",	reserved);
+	printk(KERN_INFO "%lu pages shared\n",		shared);
+	printk(KERN_INFO "%lu pages swap cached\n",	cached);
 }
 
 int after_bootmem;
 
 static __init void *spp_getpage(void)
-{ 
+{
 	void *ptr;
+
 	if (after_bootmem)
-		ptr = (void *) get_zeroed_page(GFP_ATOMIC); 
+		ptr = (void *) get_zeroed_page(GFP_ATOMIC);
 	else
 		ptr = alloc_bootmem_pages(PAGE_SIZE);
-	if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
-		panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
+
+	if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) {
+		panic("set_pte_phys: cannot allocate page data %s\n",
+			after_bootmem ? "after bootmem" : "");
+	}
 
 	Dprintk("spp_getpage %p\n", ptr);
-	return ptr;
-} 
 
-static __init void set_pte_phys(unsigned long vaddr,
-			 unsigned long phys, pgprot_t prot)
+	return ptr;
+}
+
+static __init void
+set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
 {
 	pgd_t *pgd;
 	pud_t *pud;
@@ -132,10 +141,11 @@ static __init void set_pte_phys(unsigned long vaddr,
 	}
 	pud = pud_offset(pgd, vaddr);
 	if (pud_none(*pud)) {
-		pmd = (pmd_t *) spp_getpage(); 
+		pmd = (pmd_t *) spp_getpage();
 		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
 		if (pmd != pmd_offset(pud, 0)) {
-			printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
+			printk("PAGETABLE BUG #01! %p <-> %p\n",
+				pmd, pmd_offset(pud, 0));
 			return;
 		}
 	}
@@ -164,8 +174,8 @@ static __init void set_pte_phys(unsigned long vaddr,
 }
 
 /* NOTE: this is meant to be run only at boot */
-void __init 
-__set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
+void __init
+__set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
 {
 	unsigned long address = __fix_to_virt(idx);
 
@@ -180,18 +190,19 @@ static unsigned long __initdata table_start;
 static unsigned long __meminitdata table_end;
 
 static __meminit void *alloc_low_page(unsigned long *phys)
-{ 
+{
 	unsigned long pfn = table_end++;
 	void *adr;
 
 	if (after_bootmem) {
 		adr = (void *)get_zeroed_page(GFP_ATOMIC);
 		*phys = __pa(adr);
+
 		return adr;
 	}
 
-	if (pfn >= end_pfn) 
-		panic("alloc_low_page: ran out of memory"); 
+	if (pfn >= end_pfn)
+		panic("alloc_low_page: ran out of memory");
 
 	adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
 	memset(adr, 0, PAGE_SIZE);
@@ -200,44 +211,49 @@ static __meminit void *alloc_low_page(unsigned long *phys)
 }
 
 static __meminit void unmap_low_page(void *adr)
-{ 
-
+{
 	if (after_bootmem)
 		return;
 
 	early_iounmap(adr, PAGE_SIZE);
-} 
+}
 
 /* Must run before zap_low_mappings */
 __meminit void *early_ioremap(unsigned long addr, unsigned long size)
 {
-	unsigned long vaddr;
 	pmd_t *pmd, *last_pmd;
+	unsigned long vaddr;
 	int i, pmds;
 
 	pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
 	vaddr = __START_KERNEL_map;
 	pmd = level2_kernel_pgt;
 	last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
+
 	for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
 		for (i = 0; i < pmds; i++) {
 			if (pmd_present(pmd[i]))
-				goto next;
+				goto continue_outer_loop;
 		}
 		vaddr += addr & ~PMD_MASK;
 		addr &= PMD_MASK;
+
 		for (i = 0; i < pmds; i++, addr += PMD_SIZE)
 			set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
 		__flush_tlb_all();
+
 		return (void *)vaddr;
-	next:
+continue_outer_loop:
 		;
 	}
 	printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
+
 	return NULL;
 }
 
-/* To avoid virtual aliases later */
+/*
+ * To avoid virtual aliases later:
+ */
 __meminit void early_iounmap(void *addr, unsigned long size)
 {
 	unsigned long vaddr;
@@ -247,8 +263,10 @@ __meminit void early_iounmap(void *addr, unsigned long size)
 	vaddr = (unsigned long)addr;
 	pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
 	pmd = level2_kernel_pgt + pmd_index(vaddr);
+
 	for (i = 0; i < pmds; i++)
 		pmd_clear(pmd + i);
+
 	__flush_tlb_all();
 }
 
@@ -262,9 +280,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
 		pmd_t *pmd = pmd_page + pmd_index(address);
 
 		if (address >= end) {
-			if (!after_bootmem)
+			if (!after_bootmem) {
 				for (; i < PTRS_PER_PMD; i++, pmd++)
 					set_pmd(pmd, __pmd(0));
+			}
 			break;
 		}
 
@@ -280,19 +299,19 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
 static void __meminit
 phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
 {
-	pmd_t *pmd = pmd_offset(pud,0);
+	pmd_t *pmd = pmd_offset(pud, 0);
 	spin_lock(&init_mm.page_table_lock);
 	phys_pmd_init(pmd, address, end);
 	spin_unlock(&init_mm.page_table_lock);
 	__flush_tlb_all();
 }
 
-static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
-{ 
+static void __meminit
+phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
+{
 	int i = pud_index(addr);
 
-
-	for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
+	for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) {
 		unsigned long pmd_phys;
 		pud_t *pud = pud_page + pud_index(addr);
 		pmd_t *pmd;
@@ -300,10 +319,11 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne
 		if (addr >= end)
 			break;
 
-		if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
-			set_pud(pud, __pud(0)); 
+		if (!after_bootmem &&
+				!e820_any_mapped(addr, addr+PUD_SIZE, 0)) {
+			set_pud(pud, __pud(0));
 			continue;
-		} 
+		}
 
 		if (pud_val(*pud)) {
 			phys_pmd_update(pud, addr, end);
@@ -311,14 +331,16 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne
 		}
 
 		pmd = alloc_low_page(&pmd_phys);
+
 		spin_lock(&init_mm.page_table_lock);
 		set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
 		phys_pmd_init(pmd, addr, end);
 		spin_unlock(&init_mm.page_table_lock);
+
 		unmap_low_page(pmd);
 	}
 	__flush_tlb_all();
-} 
+}
 
 static void __init find_early_table_space(unsigned long end)
 {
@@ -329,11 +351,13 @@ static void __init find_early_table_space(unsigned long end)
 	tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
 		 round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
 
- 	/* RED-PEN putting page tables only on node 0 could
- 	   cause a hotspot and fill up ZONE_DMA. The page tables
- 	   need roughly 0.5KB per GB. */
- 	start = 0x8000;
- 	table_start = find_e820_area(start, end, tables);
+	/*
+	 * RED-PEN putting page tables only on node 0 could
+	 * cause a hotspot and fill up ZONE_DMA. The page tables
+	 * need roughly 0.5KB per GB.
+	 */
+	start = 0x8000;
+	table_start = find_e820_area(start, end, tables);
 	if (table_start == -1UL)
 		panic("Cannot find space for the kernel page tables");
 
@@ -345,20 +369,23 @@ static void __init find_early_table_space(unsigned long end)
 		(table_start << PAGE_SHIFT) + tables);
 }
 
-/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
-   This runs before bootmem is initialized and gets pages directly from the 
-   physical memory. To access them they are temporarily mapped. */
+/*
+ * Setup the direct mapping of the physical memory at PAGE_OFFSET.
+ * This runs before bootmem is initialized and gets pages directly from
+ * the physical memory. To access them they are temporarily mapped.
+ */
 void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
-{ 
-	unsigned long next; 
+{
+	unsigned long next;
 
 	Dprintk("init_memory_mapping\n");
 
-	/* 
+	/*
 	 * Find space for the kernel direct mapping tables.
-	 * Later we should allocate these tables in the local node of the memory
-	 * mapped.  Unfortunately this is done currently before the nodes are 
-	 * discovered.
+	 *
+	 * Later we should allocate these tables in the local node of the
+	 * memory mapped. Unfortunately this is done currently before the
+	 * nodes are discovered.
 	 */
 	if (!after_bootmem)
 		find_early_table_space(end);
@@ -367,8 +394,8 @@ void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
 	end = (unsigned long)__va(end);
 
 	for (; start < end; start = next) {
-		unsigned long pud_phys; 
 		pgd_t *pgd = pgd_offset_k(start);
+		unsigned long pud_phys;
 		pud_t *pud;
 
 		if (after_bootmem)
@@ -377,13 +404,13 @@ void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
 			pud = alloc_low_page(&pud_phys);
 
 		next = start + PGDIR_SIZE;
-		if (next > end) 
-			next = end; 
+		if (next > end)
+			next = end;
 		phys_pud_init(pud, __pa(start), __pa(next));
 		if (!after_bootmem)
 			set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
 		unmap_low_page(pud);
-	} 
+	}
 
 	if (!after_bootmem)
 		mmu_cr4_features = read_cr4();
@@ -396,6 +423,7 @@ void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
 void __init paging_init(void)
 {
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
+
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
 	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
 	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
@@ -407,39 +435,48 @@ void __init paging_init(void)
 }
 #endif
 
-/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
-   from the CPU leading to inconsistent cache lines. address and size
-   must be aligned to 2MB boundaries. 
-   Does nothing when the mapping doesn't exist. */
-void __init clear_kernel_mapping(unsigned long address, unsigned long size) 
+/*
+ * Unmap a kernel mapping if it exists. This is useful to avoid
+ * prefetches from the CPU leading to inconsistent cache lines.
+ * address and size must be aligned to 2MB boundaries.
+ * Does nothing when the mapping doesn't exist.
+ */
+void __init clear_kernel_mapping(unsigned long address, unsigned long size)
 {
 	unsigned long end = address + size;
 
 	BUG_ON(address & ~LARGE_PAGE_MASK);
-	BUG_ON(size & ~LARGE_PAGE_MASK); 
-	
-	for (; address < end; address += LARGE_PAGE_SIZE) { 
+	BUG_ON(size & ~LARGE_PAGE_MASK);
+
+	for (; address < end; address += LARGE_PAGE_SIZE) {
 		pgd_t *pgd = pgd_offset_k(address);
 		pud_t *pud;
 		pmd_t *pmd;
+
 		if (pgd_none(*pgd))
 			continue;
+
 		pud = pud_offset(pgd, address);
 		if (pud_none(*pud))
-			continue; 
+			continue;
+
 		pmd = pmd_offset(pud, address);
 		if (!pmd || pmd_none(*pmd))
-			continue; 
-		if (0 == (pmd_val(*pmd) & _PAGE_PSE)) { 
-			/* Could handle this, but it should not happen currently. */
-			printk(KERN_ERR 
-	       "clear_kernel_mapping: mapping has been split. will leak memory\n"); 
-			pmd_ERROR(*pmd); 
+			continue;
+
+		if (!(pmd_val(*pmd) & _PAGE_PSE)) {
+			/*
+			 * Could handle this, but it should not happen
+			 * currently:
+			 */
+			printk(KERN_ERR "clear_kernel_mapping: "
+				"mapping has been split. will leak memory\n");
+			pmd_ERROR(*pmd);
 		}
-		set_pmd(pmd, __pmd(0)); 		
+		set_pmd(pmd, __pmd(0));
 	}
 	__flush_tlb_all();
-} 
+}
 
 /*
  * Memory hotplug specific functions
@@ -466,16 +503,13 @@ int arch_add_memory(int nid, u64 start, u64 size)
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	int ret;
 
-	init_memory_mapping(start, (start + size -1));
+	init_memory_mapping(start, start + size-1);
 
 	ret = __add_pages(zone, start_pfn, nr_pages);
 	if (ret)
-		goto error;
+		printk("%s: Problem encountered in __add_pages!\n", __func__);
 
 	return ret;
-error:
-	printk("%s: Problem encountered in __add_pages!\n", __func__);
-	return ret;
 }
 EXPORT_SYMBOL_GPL(arch_add_memory);
 
@@ -489,8 +523,8 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
-static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
-			 kcore_vsyscall;
+static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
+			 kcore_modules, kcore_vsyscall;
 
 void __init mem_init(void)
 {
@@ -518,7 +552,6 @@ void __init mem_init(void)
 #endif
 	reservedpages = end_pfn - totalram_pages -
 					absent_pages_in_range(0, end_pfn);
-
 	after_bootmem = 1;
 
 	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
@@ -526,15 +559,16 @@ void __init mem_init(void)
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
 	/* Register memory areas for /proc/kcore */
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); 
-	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 
+	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
+	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
 		   VMALLOC_END-VMALLOC_START);
 	kclist_add(&kcore_kernel, &_stext, _end - _stext);
 	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
-	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, 
+	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
 				 VSYSCALL_END - VSYSCALL_START);
 
-	printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
+	printk("Memory: %luk/%luk available (%ldk kernel code, "
+				"%ldk reserved, %ldk data, %ldk init)\n",
 		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
 		end_pfn << (PAGE_SHIFT-10),
 		codesize >> 10,
@@ -561,6 +595,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
 	set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
 #else
 	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
+
 	for (addr = begin; addr < end; addr += PAGE_SIZE) {
 		ClearPageReserved(virt_to_page(addr));
 		init_page_count(virt_to_page(addr));
@@ -596,7 +631,7 @@ void mark_rodata_ro(void)
 #ifdef CONFIG_KPROBES
 	start = (unsigned long)__start_rodata;
 #endif
-	
+
 	end = (unsigned long)__end_rodata;
 	start = (start + PAGE_SIZE - 1) & PAGE_MASK;
 	end &= PAGE_MASK;
@@ -627,17 +662,21 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 }
 #endif
 
-void __init reserve_bootmem_generic(unsigned long phys, unsigned len) 
-{ 
+void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
+{
 #ifdef CONFIG_NUMA
 	int nid = phys_to_nid(phys);
 #endif
 	unsigned long pfn = phys >> PAGE_SHIFT;
+
 	if (pfn >= end_pfn) {
-		/* This can happen with kdump kernels when accessing firmware
-		   tables. */
+		/*
+		 * This can happen with kdump kernels when accessing
+		 * firmware tables:
+		 */
 		if (pfn < end_pfn_map)
 			return;
+
 		printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
 				phys, len);
 		return;
@@ -645,9 +684,9 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
 
 	/* Should check here against the e820 map to avoid double free */
 #ifdef CONFIG_NUMA
-  	reserve_bootmem_node(NODE_DATA(nid), phys, len);
-#else       		
-	reserve_bootmem(phys, len);    
+	reserve_bootmem_node(NODE_DATA(nid), phys, len);
+#else
+	reserve_bootmem(phys, len);
 #endif
 	if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
 		dma_reserve += len / PAGE_SIZE;
@@ -655,46 +694,49 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
 	}
 }
 
-int kern_addr_valid(unsigned long addr) 
-{ 
+int kern_addr_valid(unsigned long addr)
+{
 	unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
 
 	if (above != 0 && above != -1UL)
-		return 0; 
-	
+		return 0;
+
 	pgd = pgd_offset_k(addr);
 	if (pgd_none(*pgd))
 		return 0;
 
 	pud = pud_offset(pgd, addr);
 	if (pud_none(*pud))
-		return 0; 
+		return 0;
 
 	pmd = pmd_offset(pud, addr);
 	if (pmd_none(*pmd))
 		return 0;
+
 	if (pmd_large(*pmd))
 		return pfn_valid(pmd_pfn(*pmd));
 
 	pte = pte_offset_kernel(pmd, addr);
 	if (pte_none(*pte))
 		return 0;
+
 	return pfn_valid(pte_pfn(*pte));
 }
 
-/* A pseudo VMA to allow ptrace access for the vsyscall page.  This only
-   covers the 64bit vsyscall page now. 32bit has a real VMA now and does
-   not need special handling anymore. */
-
+/*
+ * A pseudo VMA to allow ptrace access for the vsyscall page.  This only
+ * covers the 64bit vsyscall page now. 32bit has a real VMA now and does
+ * not need special handling anymore:
+ */
 static struct vm_area_struct gate_vma = {
-	.vm_start = VSYSCALL_START,
-	.vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
-	.vm_page_prot = PAGE_READONLY_EXEC,
-	.vm_flags = VM_READ | VM_EXEC
+	.vm_start	= VSYSCALL_START,
+	.vm_end		= VSYSCALL_START + (VSYSCALL_MAPPED_PAGES * PAGE_SIZE),
+	.vm_page_prot	= PAGE_READONLY_EXEC,
+	.vm_flags	= VM_READ | VM_EXEC
 };
 
 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
@@ -709,14 +751,17 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
 int in_gate_area(struct task_struct *task, unsigned long addr)
 {
 	struct vm_area_struct *vma = get_gate_vma(task);
+
 	if (!vma)
 		return 0;
+
 	return (addr >= vma->vm_start) && (addr < vma->vm_end);
 }
 
-/* Use this when you have no reliable task/vma, typically from interrupt
- * context.  It is less reliable than using the task's vma and may give
- * false positives.
+/*
+ * Use this when you have no reliable task/vma, typically from interrupt
+ * context. It is less reliable than using the task's vma and may give
+ * false positives:
  */
 int in_gate_area_no_task(unsigned long addr)
 {
@@ -736,8 +781,8 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 /*
  * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
  */
-int __meminit vmemmap_populate(struct page *start_page,
-						unsigned long size, int node)
+int __meminit
+vmemmap_populate(struct page *start_page, unsigned long size, int node)
 {
 	unsigned long addr = (unsigned long)start_page;
 	unsigned long end = (unsigned long)(start_page + size);
@@ -752,6 +797,7 @@ int __meminit vmemmap_populate(struct page *start_page,
 		pgd = vmemmap_pgd_populate(addr, node);
 		if (!pgd)
 			return -ENOMEM;
+
 		pud = vmemmap_pud_populate(pgd, addr, node);
 		if (!pud)
 			return -ENOMEM;
@@ -759,19 +805,22 @@ int __meminit vmemmap_populate(struct page *start_page,
 		pmd = pmd_offset(pud, addr);
 		if (pmd_none(*pmd)) {
 			pte_t entry;
-			void *p = vmemmap_alloc_block(PMD_SIZE, node);
+			void *p;
+
+			p = vmemmap_alloc_block(PMD_SIZE, node);
 			if (!p)
 				return -ENOMEM;
 
-			entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL_LARGE);
+			entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
+							PAGE_KERNEL_LARGE);
 			set_pmd(pmd, __pmd(pte_val(entry)));
 
 			printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
 				addr, addr + PMD_SIZE - 1, p, node);
-		} else
+		} else {
 			vmemmap_verify((pte_t *)pmd, node, addr, next);
+		}
 	}
-
 	return 0;
 }
 #endif

From 10f22dde556d1ed41d55355d1fb8ad495f9810c8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:10 +0100
Subject: [PATCH 874/890] x86: arch/x86/mm/init_64.c printk fixes

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_64.c | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bb732bb79b4a..a95272644591 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -46,10 +46,6 @@
 #include <asm/kdebug.h>
 #include <asm/numa.h>
 
-#ifndef Dprintk
-# define Dprintk(x...)
-#endif
-
 const struct dma_mapping_ops *dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
@@ -119,7 +115,7 @@ static __init void *spp_getpage(void)
 			after_bootmem ? "after bootmem" : "");
 	}
 
-	Dprintk("spp_getpage %p\n", ptr);
+	pr_debug("spp_getpage %p\n", ptr);
 
 	return ptr;
 }
@@ -132,11 +128,12 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
 	pmd_t *pmd;
 	pte_t *pte, new_pte;
 
-	Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
+	pr_debug("set_pte_phys %lx to %lx\n", vaddr, phys);
 
 	pgd = pgd_offset_k(vaddr);
 	if (pgd_none(*pgd)) {
-		printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
+		printk(KERN_ERR
+			"PGD FIXMAP MISSING, it should be setup in head.S!\n");
 		return;
 	}
 	pud = pud_offset(pgd, vaddr);
@@ -144,7 +141,7 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
 		pmd = (pmd_t *) spp_getpage();
 		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
 		if (pmd != pmd_offset(pud, 0)) {
-			printk("PAGETABLE BUG #01! %p <-> %p\n",
+			printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
 				pmd, pmd_offset(pud, 0));
 			return;
 		}
@@ -154,7 +151,7 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
 		pte = (pte_t *) spp_getpage();
 		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
 		if (pte != pte_offset_kernel(pmd, 0)) {
-			printk("PAGETABLE BUG #02!\n");
+			printk(KERN_ERR "PAGETABLE BUG #02!\n");
 			return;
 		}
 	}
@@ -180,7 +177,7 @@ __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
 	unsigned long address = __fix_to_virt(idx);
 
 	if (idx >= __end_of_fixed_addresses) {
-		printk("Invalid __set_fixmap\n");
+		printk(KERN_ERR "Invalid __set_fixmap\n");
 		return;
 	}
 	set_pte_phys(address, phys, prot);
@@ -246,7 +243,7 @@ __meminit void *early_ioremap(unsigned long addr, unsigned long size)
 continue_outer_loop:
 		;
 	}
-	printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
+	printk(KERN_ERR "early_ioremap(0x%lx, %lu) failed\n", addr, size);
 
 	return NULL;
 }
@@ -378,7 +375,7 @@ void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
 {
 	unsigned long next;
 
-	Dprintk("init_memory_mapping\n");
+	pr_debug("init_memory_mapping\n");
 
 	/*
 	 * Find space for the kernel direct mapping tables.
@@ -506,8 +503,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
 	init_memory_mapping(start, start + size-1);
 
 	ret = __add_pages(zone, start_pfn, nr_pages);
-	if (ret)
-		printk("%s: Problem encountered in __add_pages!\n", __func__);
+	WARN_ON(1);
 
 	return ret;
 }
@@ -567,7 +563,7 @@ void __init mem_init(void)
 	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
 				 VSYSCALL_END - VSYSCALL_START);
 
-	printk("Memory: %luk/%luk available (%ldk kernel code, "
+	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
 				"%ldk reserved, %ldk data, %ldk init)\n",
 		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
 		end_pfn << (PAGE_SHIFT-10),
@@ -646,10 +642,10 @@ void mark_rodata_ro(void)
 	rodata_test();
 
 #ifdef CONFIG_CPA_DEBUG
-	printk("Testing CPA: undo %lx-%lx\n", start, end);
+	printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end);
 	set_memory_rw(start, (end-start) >> PAGE_SHIFT);
 
-	printk("Testing CPA: again\n");
+	printk(KERN_INFO "Testing CPA: again\n");
 	set_memory_ro(start, (end-start) >> PAGE_SHIFT);
 #endif
 }

From 8550eb99821b3f78cddfd19964f30e8bc4e429e0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:10 +0100
Subject: [PATCH 875/890] x86: arch/x86/mm/init_32.c cleanup

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_32.c     | 240 +++++++++++++++++++-------------------
 include/asm-x86/bugs.h    |   3 +-
 include/asm-x86/numa_32.h |  14 ++-
 3 files changed, 137 insertions(+), 120 deletions(-)

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8ed5c189d7aa..c6975fc6944a 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -39,6 +39,7 @@
 #include <asm/fixmap.h>
 #include <asm/e820.h>
 #include <asm/apic.h>
+#include <asm/bugs.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/pgalloc.h>
@@ -50,7 +51,7 @@ unsigned int __VMALLOC_RESERVE = 128 << 20;
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 unsigned long highstart_pfn, highend_pfn;
 
-static int noinline do_test_wp_bit(void);
+static noinline int do_test_wp_bit(void);
 
 /*
  * Creates a middle page table and puts a pointer to it in the
@@ -61,7 +62,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
 {
 	pud_t *pud;
 	pmd_t *pmd_table;
-		
+
 #ifdef CONFIG_X86_PAE
 	if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
 		pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
@@ -69,18 +70,18 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
 		paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
 		set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
 		pud = pud_offset(pgd, 0);
-		if (pmd_table != pmd_offset(pud, 0))
-			BUG();
+		BUG_ON(pmd_table != pmd_offset(pud, 0));
 	}
 #endif
 	pud = pud_offset(pgd, 0);
 	pmd_table = pmd_offset(pud, 0);
+
 	return pmd_table;
 }
 
 /*
  * Create a page table and place a pointer to it in a middle page
- * directory entry.
+ * directory entry:
  */
 static pte_t * __init one_page_table_init(pmd_t *pmd)
 {
@@ -90,9 +91,10 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
 #ifdef CONFIG_DEBUG_PAGEALLOC
 		page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
 #endif
-		if (!page_table)
+		if (!page_table) {
 			page_table =
 				(pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+		}
 
 		paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT);
 		set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
@@ -103,22 +105,21 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
 }
 
 /*
- * This function initializes a certain range of kernel virtual memory 
+ * This function initializes a certain range of kernel virtual memory
  * with new bootmem page tables, everywhere page tables are missing in
  * the given range.
- */
-
-/*
- * NOTE: The pagetables are allocated contiguous on the physical space 
- * so we can cache the place of the first one and move around without 
+ *
+ * NOTE: The pagetables are allocated contiguous on the physical space
+ * so we can cache the place of the first one and move around without
  * checking the pgd every time.
  */
-static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
+static void __init
+page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
 {
-	pgd_t *pgd;
-	pmd_t *pmd;
 	int pgd_idx, pmd_idx;
 	unsigned long vaddr;
+	pgd_t *pgd;
+	pmd_t *pmd;
 
 	vaddr = start;
 	pgd_idx = pgd_index(vaddr);
@@ -128,7 +129,8 @@ static void __init page_table_range_init (unsigned long start, unsigned long end
 	for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
 		pmd = one_md_table_init(pgd);
 		pmd = pmd + pmd_index(vaddr);
-		for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
+		for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
+							pmd++, pmd_idx++) {
 			one_page_table_init(pmd);
 
 			vaddr += PMD_SIZE;
@@ -145,17 +147,17 @@ static inline int is_kernel_text(unsigned long addr)
 }
 
 /*
- * This maps the physical memory to kernel virtual address space, a total 
- * of max_low_pfn pages, by creating page tables starting from address 
- * PAGE_OFFSET.
+ * This maps the physical memory to kernel virtual address space, a total
+ * of max_low_pfn pages, by creating page tables starting from address
+ * PAGE_OFFSET:
  */
 static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
 {
+	int pgd_idx, pmd_idx, pte_ofs;
 	unsigned long pfn;
 	pgd_t *pgd;
 	pmd_t *pmd;
 	pte_t *pte;
-	int pgd_idx, pmd_idx, pte_ofs;
 
 	pgd_idx = pgd_index(PAGE_OFFSET);
 	pgd = pgd_base + pgd_idx;
@@ -165,40 +167,43 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
 		pmd = one_md_table_init(pgd);
 		if (pfn >= max_low_pfn)
 			continue;
+
 		for (pmd_idx = 0;
 		     pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn;
 		     pmd++, pmd_idx++) {
-			unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
+			unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET;
 
-			/* Map with big pages if possible, otherwise
-			   create normal page tables. */
+			/*
+			 * Map with big pages if possible, otherwise
+			 * create normal page tables:
+			 */
 			if (cpu_has_pse) {
-				unsigned int address2;
+				unsigned int addr2;
 				pgprot_t prot = PAGE_KERNEL_LARGE;
 
-				address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE +
+				addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
 					PAGE_OFFSET + PAGE_SIZE-1;
 
-				if (is_kernel_text(address) ||
-				    is_kernel_text(address2))
+				if (is_kernel_text(addr) ||
+				    is_kernel_text(addr2))
 					prot = PAGE_KERNEL_LARGE_EXEC;
 
 				set_pmd(pmd, pfn_pmd(pfn, prot));
 
 				pfn += PTRS_PER_PTE;
-			} else {
-				pte = one_page_table_init(pmd);
+				continue;
+			}
+			pte = one_page_table_init(pmd);
 
-				for (pte_ofs = 0;
-				     pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn;
-				     pte++, pfn++, pte_ofs++, address += PAGE_SIZE) {
-					pgprot_t prot = PAGE_KERNEL;
+			for (pte_ofs = 0;
+			     pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn;
+			     pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
+				pgprot_t prot = PAGE_KERNEL;
 
-					if (is_kernel_text(address))
-						prot = PAGE_KERNEL_EXEC;
+				if (is_kernel_text(addr))
+					prot = PAGE_KERNEL_EXEC;
 
-					set_pte(pte, pfn_pte(pfn, prot));
-				}
+				set_pte(pte, pfn_pte(pfn, prot));
 			}
 		}
 	}
@@ -215,14 +220,19 @@ static inline int page_kills_ppro(unsigned long pagenr)
 pte_t *kmap_pte;
 pgprot_t kmap_prot;
 
-#define kmap_get_fixmap_pte(vaddr)					\
-	pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), (vaddr)), (vaddr))
+static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr)
+{
+	return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr),
+			vaddr), vaddr), vaddr);
+}
 
 static void __init kmap_init(void)
 {
 	unsigned long kmap_vstart;
 
-	/* cache the first kmap pte */
+	/*
+	 * Cache the first kmap pte:
+	 */
 	kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
 	kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
 
@@ -231,11 +241,11 @@ static void __init kmap_init(void)
 
 static void __init permanent_kmaps_init(pgd_t *pgd_base)
 {
+	unsigned long vaddr;
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
-	unsigned long vaddr;
 
 	vaddr = PKMAP_BASE;
 	page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
@@ -244,7 +254,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
 	pud = pud_offset(pgd, vaddr);
 	pmd = pmd_offset(pud, vaddr);
 	pte = pte_offset_kernel(pmd, vaddr);
-	pkmap_page_table = pte;	
+	pkmap_page_table = pte;
 }
 
 static void __meminit free_new_highpage(struct page *page)
@@ -263,7 +273,8 @@ void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
 		SetPageReserved(page);
 }
 
-static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long pfn)
+static int __meminit
+add_one_highpage_hotplug(struct page *page, unsigned long pfn)
 {
 	free_new_highpage(page);
 	totalram_pages++;
@@ -271,6 +282,7 @@ static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long p
 	max_mapnr = max(pfn, max_mapnr);
 #endif
 	num_physpages++;
+
 	return 0;
 }
 
@@ -278,7 +290,7 @@ static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long p
  * Not currently handling the NUMA case.
  * Assuming single node and all memory that
  * has been added dynamically that would be
- * onlined here is in HIGHMEM
+ * onlined here is in HIGHMEM.
  */
 void __meminit online_page(struct page *page)
 {
@@ -286,13 +298,11 @@ void __meminit online_page(struct page *page)
 	add_one_highpage_hotplug(page, page_to_pfn(page));
 }
 
-
-#ifdef CONFIG_NUMA
-extern void set_highmem_pages_init(int);
-#else
+#ifndef CONFIG_NUMA
 static void __init set_highmem_pages_init(int bad_ppro)
 {
 	int pfn;
+
 	for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) {
 		/*
 		 * Holes under sparsemem might not have no mem_map[]:
@@ -302,23 +312,18 @@ static void __init set_highmem_pages_init(int bad_ppro)
 	}
 	totalram_pages += totalhigh_pages;
 }
-#endif /* CONFIG_FLATMEM */
+#endif /* !CONFIG_NUMA */
 
 #else
-#define kmap_init() do { } while (0)
-#define permanent_kmaps_init(pgd_base) do { } while (0)
-#define set_highmem_pages_init(bad_ppro) do { } while (0)
+# define kmap_init()				do { } while (0)
+# define permanent_kmaps_init(pgd_base)		do { } while (0)
+# define set_highmem_pages_init(bad_ppro)	do { } while (0)
 #endif /* CONFIG_HIGHMEM */
 
 pteval_t __PAGE_KERNEL = _PAGE_KERNEL;
 EXPORT_SYMBOL(__PAGE_KERNEL);
-pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
 
-#ifdef CONFIG_NUMA
-extern void __init remap_numa_kva(void);
-#else
-#define remap_numa_kva() do {} while (0)
-#endif
+pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
 
 void __init native_pagetable_setup_start(pgd_t *base)
 {
@@ -382,10 +387,10 @@ void __init native_pagetable_setup_done(pgd_t *base)
  * be partially populated, and so it avoids stomping on any existing
  * mappings.
  */
-static void __init pagetable_init (void)
+static void __init pagetable_init(void)
 {
-	unsigned long vaddr, end;
 	pgd_t *pgd_base = swapper_pg_dir;
+	unsigned long vaddr, end;
 
 	paravirt_pagetable_setup_start(pgd_base);
 
@@ -424,7 +429,7 @@ static void __init pagetable_init (void)
  * driver might have split up a kernel 4MB mapping.
  */
 char __nosavedata swsusp_pg_dir[PAGE_SIZE]
-	__attribute__ ((aligned (PAGE_SIZE)));
+	__attribute__ ((aligned(PAGE_SIZE)));
 
 static inline void save_pg_dir(void)
 {
@@ -436,7 +441,7 @@ static inline void save_pg_dir(void)
 }
 #endif
 
-void zap_low_mappings (void)
+void zap_low_mappings(void)
 {
 	int i;
 
@@ -448,23 +453,24 @@ void zap_low_mappings (void)
 	 * Note that "pgd_clear()" doesn't do it for
 	 * us, because pgd_clear() is a no-op on i386.
 	 */
-	for (i = 0; i < USER_PTRS_PER_PGD; i++)
+	for (i = 0; i < USER_PTRS_PER_PGD; i++) {
 #ifdef CONFIG_X86_PAE
 		set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
 #else
 		set_pgd(swapper_pg_dir+i, __pgd(0));
 #endif
+	}
 	flush_tlb_all();
 }
 
-int nx_enabled = 0;
+int nx_enabled;
 
 pteval_t __supported_pte_mask __read_mostly = ~_PAGE_NX;
 EXPORT_SYMBOL_GPL(__supported_pte_mask);
 
 #ifdef CONFIG_X86_PAE
 
-static int disable_nx __initdata = 0;
+static int disable_nx __initdata;
 
 /*
  * noexec = on|off
@@ -481,11 +487,14 @@ static int __init noexec_setup(char *str)
 			__supported_pte_mask |= _PAGE_NX;
 			disable_nx = 0;
 		}
-	} else if (!strcmp(str,"off")) {
-		disable_nx = 1;
-		__supported_pte_mask &= ~_PAGE_NX;
-	} else
-		return -EINVAL;
+	} else {
+		if (!strcmp(str, "off")) {
+			disable_nx = 1;
+			__supported_pte_mask &= ~_PAGE_NX;
+		} else {
+			return -EINVAL;
+		}
+	}
 
 	return 0;
 }
@@ -497,6 +506,7 @@ static void __init set_nx(void)
 
 	if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
 		cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
+
 		if ((v[3] & (1 << 20)) && !disable_nx) {
 			rdmsr(MSR_EFER, l, h);
 			l |= EFER_NX;
@@ -506,7 +516,6 @@ static void __init set_nx(void)
 		}
 	}
 }
-
 #endif
 
 /*
@@ -523,7 +532,6 @@ void __init paging_init(void)
 	if (nx_enabled)
 		printk("NX (Execute Disable) protection: active\n");
 #endif
-
 	pagetable_init();
 
 	load_cr3(swapper_pg_dir);
@@ -547,7 +555,6 @@ void __init paging_init(void)
  * used to involve black magic jumps to work around some nasty CPU bugs,
  * but fortunately the switch to using exceptions got rid of all that.
  */
-
 static void __init test_wp_bit(void)
 {
 	printk("Checking if this processor honours the WP bit even in supervisor mode... ");
@@ -567,19 +574,16 @@ static void __init test_wp_bit(void)
 	}
 }
 
-static struct kcore_list kcore_mem, kcore_vmalloc; 
+static struct kcore_list kcore_mem, kcore_vmalloc;
 
 void __init mem_init(void)
 {
-	extern int ppro_with_ram_bug(void);
 	int codesize, reservedpages, datasize, initsize;
-	int tmp;
-	int bad_ppro;
+	int tmp, bad_ppro;
 
 #ifdef CONFIG_FLATMEM
 	BUG_ON(!mem_map);
 #endif
-	
 	bad_ppro = ppro_with_ram_bug();
 
 #ifdef CONFIG_HIGHMEM
@@ -591,14 +595,13 @@ void __init mem_init(void)
 		BUG();
 	}
 #endif
- 
 	/* this will put all low memory onto the freelists */
 	totalram_pages += free_all_bootmem();
 
 	reservedpages = 0;
 	for (tmp = 0; tmp < max_low_pfn; tmp++)
 		/*
-		 * Only count reserved RAM pages
+		 * Only count reserved RAM pages:
 		 */
 		if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
 			reservedpages++;
@@ -609,11 +612,12 @@ void __init mem_init(void)
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); 
-	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 
+	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
+	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
 		   VMALLOC_END-VMALLOC_START);
 
-	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
+	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
+			"%dk reserved, %dk data, %dk init, %ldk highmem)\n",
 		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
 		num_physpages << (PAGE_SHIFT-10),
 		codesize >> 10,
@@ -625,44 +629,45 @@ void __init mem_init(void)
 
 #if 1 /* double-sanity-check paranoia */
 	printk("virtual kernel memory layout:\n"
-	       "    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+		"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 #ifdef CONFIG_HIGHMEM
-	       "    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+		"    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 #endif
-	       "    vmalloc : 0x%08lx - 0x%08lx   (%4ld MB)\n"
-	       "    lowmem  : 0x%08lx - 0x%08lx   (%4ld MB)\n"
-	       "      .init : 0x%08lx - 0x%08lx   (%4ld kB)\n"
-	       "      .data : 0x%08lx - 0x%08lx   (%4ld kB)\n"
-	       "      .text : 0x%08lx - 0x%08lx   (%4ld kB)\n",
-	       FIXADDR_START, FIXADDR_TOP,
-	       (FIXADDR_TOP - FIXADDR_START) >> 10,
+		"    vmalloc : 0x%08lx - 0x%08lx   (%4ld MB)\n"
+		"    lowmem  : 0x%08lx - 0x%08lx   (%4ld MB)\n"
+		"      .init : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+		"      .data : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+		"      .text : 0x%08lx - 0x%08lx   (%4ld kB)\n",
+		FIXADDR_START, FIXADDR_TOP,
+		(FIXADDR_TOP - FIXADDR_START) >> 10,
 
 #ifdef CONFIG_HIGHMEM
-	       PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
-	       (LAST_PKMAP*PAGE_SIZE) >> 10,
+		PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
+		(LAST_PKMAP*PAGE_SIZE) >> 10,
 #endif
 
-	       VMALLOC_START, VMALLOC_END,
-	       (VMALLOC_END - VMALLOC_START) >> 20,
+		VMALLOC_START, VMALLOC_END,
+		(VMALLOC_END - VMALLOC_START) >> 20,
 
-	       (unsigned long)__va(0), (unsigned long)high_memory,
-	       ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
+		(unsigned long)__va(0), (unsigned long)high_memory,
+		((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
 
-	       (unsigned long)&__init_begin, (unsigned long)&__init_end,
-	       ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10,
+		(unsigned long)&__init_begin, (unsigned long)&__init_end,
+		((unsigned long)&__init_end -
+		 (unsigned long)&__init_begin) >> 10,
 
-	       (unsigned long)&_etext, (unsigned long)&_edata,
-	       ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
+		(unsigned long)&_etext, (unsigned long)&_edata,
+		((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
 
-	       (unsigned long)&_text, (unsigned long)&_etext,
-	       ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
+		(unsigned long)&_text, (unsigned long)&_etext,
+		((unsigned long)&_etext - (unsigned long)&_text) >> 10);
 
 #ifdef CONFIG_HIGHMEM
-	BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
-	BUG_ON(VMALLOC_END                     > PKMAP_BASE);
+	BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE	> FIXADDR_START);
+	BUG_ON(VMALLOC_END				> PKMAP_BASE);
 #endif
-	BUG_ON(VMALLOC_START                   > VMALLOC_END);
-	BUG_ON((unsigned long)high_memory      > VMALLOC_START);
+	BUG_ON(VMALLOC_START				> VMALLOC_END);
+	BUG_ON((unsigned long)high_memory		> VMALLOC_START);
 #endif /* double-sanity-check paranoia */
 
 #ifdef CONFIG_X86_PAE
@@ -693,45 +698,45 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
 	return __add_pages(zone, start_pfn, nr_pages);
 }
-
 #endif
 
 struct kmem_cache *pmd_cache;
 
 void __init pgtable_cache_init(void)
 {
-	if (PTRS_PER_PMD > 1)
+	if (PTRS_PER_PMD > 1) {
 		pmd_cache = kmem_cache_create("pmd",
 					      PTRS_PER_PMD*sizeof(pmd_t),
 					      PTRS_PER_PMD*sizeof(pmd_t),
 					      SLAB_PANIC,
 					      pmd_ctor);
+	}
 }
 
 /*
  * This function cannot be __init, since exceptions don't work in that
  * section.  Put this after the callers, so that it cannot be inlined.
  */
-static int noinline do_test_wp_bit(void)
+static noinline int do_test_wp_bit(void)
 {
 	char tmp_reg;
 	int flag;
 
 	__asm__ __volatile__(
-		"	movb %0,%1	\n"
-		"1:	movb %1,%0	\n"
-		"	xorl %2,%2	\n"
+		"	movb %0, %1	\n"
+		"1:	movb %1, %0	\n"
+		"	xorl %2, %2	\n"
 		"2:			\n"
-		".section __ex_table,\"a\"\n"
+		".section __ex_table, \"a\"\n"
 		"	.align 4	\n"
-		"	.long 1b,2b	\n"
+		"	.long 1b, 2b	\n"
 		".previous		\n"
 		:"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
 		 "=q" (tmp_reg),
 		 "=r" (flag)
 		:"2" (1)
 		:"memory");
-	
+
 	return flag;
 }
 
@@ -824,4 +829,3 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 	free_init_pages("initrd memory", start, end);
 }
 #endif
-
diff --git a/include/asm-x86/bugs.h b/include/asm-x86/bugs.h
index aac8317420af..3fcc30dc0731 100644
--- a/include/asm-x86/bugs.h
+++ b/include/asm-x86/bugs.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_X86_BUGS_H
 #define _ASM_X86_BUGS_H
 
-void check_bugs(void);
+extern void check_bugs(void);
+extern int ppro_with_ram_bug(void);
 
 #endif /* _ASM_X86_BUGS_H */
diff --git a/include/asm-x86/numa_32.h b/include/asm-x86/numa_32.h
index 96fcb157db1d..03d0f7a9bf02 100644
--- a/include/asm-x86/numa_32.h
+++ b/include/asm-x86/numa_32.h
@@ -1,3 +1,15 @@
+#ifndef _ASM_X86_32_NUMA_H
+#define _ASM_X86_32_NUMA_H 1
 
-int pxm_to_nid(int pxm);
+extern int pxm_to_nid(int pxm);
 
+#ifdef CONFIG_NUMA
+extern void __init remap_numa_kva(void);
+extern void set_highmem_pages_init(int);
+#else
+static inline void remap_numa_kva(void)
+{
+}
+#endif
+
+#endif /* _ASM_X86_32_NUMA_H */

From d7d119d777d7ed22c4095915e9c985a8b75bfe5f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:34:10 +0100
Subject: [PATCH 876/890] x86: arch/x86/mm/init_32.c printk fixes

printk fixes. NOP in terms of functionality, but strings got
a bit larger due to the KERN_ markers that were added.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_32.c | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c6975fc6944a..02d269c07b96 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -530,7 +530,7 @@ void __init paging_init(void)
 #ifdef CONFIG_X86_PAE
 	set_nx();
 	if (nx_enabled)
-		printk("NX (Execute Disable) protection: active\n");
+		printk(KERN_INFO "NX (Execute Disable) protection: active\n");
 #endif
 	pagetable_init();
 
@@ -557,7 +557,8 @@ void __init paging_init(void)
  */
 static void __init test_wp_bit(void)
 {
-	printk("Checking if this processor honours the WP bit even in supervisor mode... ");
+	printk(KERN_INFO
+  "Checking if this processor honours the WP bit even in supervisor mode...");
 
 	/* Any page-aligned address will do, the test is non-destructive */
 	__set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY);
@@ -565,12 +566,13 @@ static void __init test_wp_bit(void)
 	clear_fixmap(FIX_WP_TEST);
 
 	if (!boot_cpu_data.wp_works_ok) {
-		printk("No.\n");
+		printk(KERN_CONT "No.\n");
 #ifdef CONFIG_X86_WP_WORKS_OK
-		panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
+		panic(
+  "This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
 #endif
 	} else {
-		printk("Ok.\n");
+		printk(KERN_CONT "Ok.\n");
 	}
 }
 
@@ -588,10 +590,12 @@ void __init mem_init(void)
 
 #ifdef CONFIG_HIGHMEM
 	/* check that fixmap and pkmap do not overlap */
-	if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
-		printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n");
+	if (PKMAP_BASE + LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
+		printk(KERN_ERR
+			"fixmap and kmap areas overlap - this will crash\n");
 		printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n",
-				PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START);
+				PKMAP_BASE, PKMAP_BASE + LAST_PKMAP*PAGE_SIZE,
+				FIXADDR_START);
 		BUG();
 	}
 #endif
@@ -628,7 +632,7 @@ void __init mem_init(void)
 	       );
 
 #if 1 /* double-sanity-check paranoia */
-	printk("virtual kernel memory layout:\n"
+	printk(KERN_INFO "virtual kernel memory layout:\n"
 		"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 #ifdef CONFIG_HIGHMEM
 		"    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
@@ -756,13 +760,15 @@ void mark_rodata_ro(void)
 #endif
 	{
 		set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
-		printk("Write protecting the kernel text: %luk\n", size >> 10);
+		printk(KERN_INFO "Write protecting the kernel text: %luk\n",
+			size >> 10);
 
 #ifdef CONFIG_CPA_DEBUG
-		printk("Testing CPA: Reverting %lx-%lx\n", start, start+size);
+		printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
+			start, start+size);
 		set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
 
-		printk("Testing CPA: write protecting again\n");
+		printk(KERN_INFO "Testing CPA: write protecting again\n");
 		set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
 #endif
 	}
@@ -770,15 +776,15 @@ void mark_rodata_ro(void)
 	start += size;
 	size = (unsigned long)__end_rodata - start;
 	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
-	printk("Write protecting the kernel read-only data: %luk\n",
-	       size >> 10);
+	printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
+		size >> 10);
 	rodata_test();
 
 #ifdef CONFIG_CPA_DEBUG
-	printk("Testing CPA: undo %lx-%lx\n", start, start + size);
+	printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, start + size);
 	set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
 
-	printk("Testing CPA: write protecting again\n");
+	printk(KERN_INFO "Testing CPA: write protecting again\n");
 	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 #endif
 }

From 1156e098c5fc142ceaf091f0627109225a9db894 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:34:10 +0100
Subject: [PATCH 877/890] x86: unify fault_32|64.c by ifdef'd function bodies

It's about time to get on with unifying these files, elimination
of the ugly ifdefs can occur in the unified file.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_32.c | 116 ++++++++++++++++++++++++++++++++
 arch/x86/mm/fault_64.c | 148 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 263 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 28ea3d3ec8f8..7d9ecbbba745 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -173,8 +173,17 @@ static void force_sig_info_fault(int si_signo, int si_code,
 	force_sig_info(si_signo, &info, tsk);
 }
 
+#ifdef CONFIG_X86_64
+static int bad_address(void *p)
+{
+	unsigned long dummy;
+	return probe_kernel_address((unsigned long *)p, dummy);
+}
+#endif
+
 void dump_pagetable(unsigned long address)
 {
+#ifdef CONFIG_X86_32
 	__typeof__(pte_val(__pte(0))) page;
 
 	page = read_cr3();
@@ -209,8 +218,42 @@ void dump_pagetable(unsigned long address)
 	}
 
 	printk("\n");
+#else /* CONFIG_X86_64 */
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = (pgd_t *)read_cr3();
+
+	pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
+	pgd += pgd_index(address);
+	if (bad_address(pgd)) goto bad;
+	printk("PGD %lx ", pgd_val(*pgd));
+	if (!pgd_present(*pgd)) goto ret;
+
+	pud = pud_offset(pgd, address);
+	if (bad_address(pud)) goto bad;
+	printk("PUD %lx ", pud_val(*pud));
+	if (!pud_present(*pud))	goto ret;
+
+	pmd = pmd_offset(pud, address);
+	if (bad_address(pmd)) goto bad;
+	printk("PMD %lx ", pmd_val(*pmd));
+	if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret;
+
+	pte = pte_offset_kernel(pmd, address);
+	if (bad_address(pte)) goto bad;
+	printk("PTE %lx", pte_val(*pte));
+ret:
+	printk("\n");
+	return;
+bad:
+	printk("BAD\n");
+#endif
 }
 
+#ifdef CONFIG_X86_32
 static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
 {
 	unsigned index = pgd_index(address);
@@ -246,6 +289,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
 		BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
 	return pmd_k;
 }
+#endif
 
 #ifdef CONFIG_X86_64
 static const char errata93_warning[] =
@@ -326,6 +370,7 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
 static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
 			    unsigned long address)
 {
+#ifdef CONFIG_X86_32
 	if (!oops_may_print())
 		return;
 
@@ -350,8 +395,40 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
 	printk(KERN_ALERT "IP:");
 	printk_address(regs->ip, 1);
 	dump_pagetable(address);
+#else /* CONFIG_X86_64 */
+	printk(KERN_ALERT "BUG: unable to handle kernel ");
+	if (address < PAGE_SIZE)
+		printk(KERN_CONT "NULL pointer dereference");
+	else
+		printk(KERN_CONT "paging request");
+	printk(KERN_CONT " at %016lx\n", address);
+
+	printk(KERN_ALERT "IP:");
+	printk_address(regs->ip, 1);
+	dump_pagetable(address);
+#endif
 }
 
+#ifdef CONFIG_X86_64
+static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
+				 unsigned long error_code)
+{
+	unsigned long flags = oops_begin();
+	struct task_struct *tsk;
+
+	printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
+	       current->comm, address);
+	dump_pagetable(address);
+	tsk = current;
+	tsk->thread.cr2 = address;
+	tsk->thread.trap_no = 14;
+	tsk->thread.error_code = error_code;
+	if (__die("Bad pagetable", regs, error_code))
+		regs = NULL;
+	oops_end(flags, regs, SIGKILL);
+}
+#endif
+
 /*
  * Handle a fault on the vmalloc or module mapping area
  *
@@ -706,6 +783,7 @@ do_sigbus:
 
 void vmalloc_sync_all(void)
 {
+#ifdef CONFIG_X86_32
 	/*
 	 * Note that races in the updates of insync and start aren't
 	 * problematic: insync can only get set bits added, and updates to
@@ -740,4 +818,42 @@ void vmalloc_sync_all(void)
 		if (address == start && test_bit(pgd_index(address), insync))
 			start = address + PGDIR_SIZE;
 	}
+#else /* CONFIG_X86_64 */
+	/*
+	 * Note that races in the updates of insync and start aren't
+	 * problematic: insync can only get set bits added, and updates to
+	 * start are only improving performance (without affecting correctness
+	 * if undone).
+	 */
+	static DECLARE_BITMAP(insync, PTRS_PER_PGD);
+	static unsigned long start = VMALLOC_START & PGDIR_MASK;
+	unsigned long address;
+
+	for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
+		if (!test_bit(pgd_index(address), insync)) {
+			const pgd_t *pgd_ref = pgd_offset_k(address);
+			struct page *page;
+
+			if (pgd_none(*pgd_ref))
+				continue;
+			spin_lock(&pgd_lock);
+			list_for_each_entry(page, &pgd_list, lru) {
+				pgd_t *pgd;
+				pgd = (pgd_t *)page_address(page) + pgd_index(address);
+				if (pgd_none(*pgd))
+					set_pgd(pgd, *pgd_ref);
+				else
+					BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+			}
+			spin_unlock(&pgd_lock);
+			set_bit(pgd_index(address), insync);
+		}
+		if (address == start)
+			start = address + PGDIR_SIZE;
+	}
+	/* Check that there is no need to do the same for the modules area. */
+	BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
+	BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
+				(__START_KERNEL & PGDIR_MASK)));
+#endif
 }
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index e12c34ba4ba1..edca689c62d5 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -176,14 +176,52 @@ static void force_sig_info_fault(int si_signo, int si_code,
 	force_sig_info(si_signo, &info, tsk);
 }
 
+#ifdef CONFIG_X86_64
 static int bad_address(void *p)
 {
 	unsigned long dummy;
 	return probe_kernel_address((unsigned long *)p, dummy);
 }
+#endif
 
 void dump_pagetable(unsigned long address)
 {
+#ifdef CONFIG_X86_32
+	__typeof__(pte_val(__pte(0))) page;
+
+	page = read_cr3();
+	page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
+#ifdef CONFIG_X86_PAE
+	printk("*pdpt = %016Lx ", page);
+	if ((page >> PAGE_SHIFT) < max_low_pfn
+	    && page & _PAGE_PRESENT) {
+		page &= PAGE_MASK;
+		page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
+		                                         & (PTRS_PER_PMD - 1)];
+		printk(KERN_CONT "*pde = %016Lx ", page);
+		page &= ~_PAGE_NX;
+	}
+#else
+	printk("*pde = %08lx ", page);
+#endif
+
+	/*
+	 * We must not directly access the pte in the highpte
+	 * case if the page table is located in highmem.
+	 * And let's rather not kmap-atomic the pte, just in case
+	 * it's allocated already.
+	 */
+	if ((page >> PAGE_SHIFT) < max_low_pfn
+	    && (page & _PAGE_PRESENT)
+	    && !(page & _PAGE_PSE)) {
+		page &= PAGE_MASK;
+		page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
+		                                         & (PTRS_PER_PTE - 1)];
+		printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
+	}
+
+	printk("\n");
+#else /* CONFIG_X86_64 */
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
@@ -215,8 +253,47 @@ ret:
 	return;
 bad:
 	printk("BAD\n");
+#endif
 }
 
+#ifdef CONFIG_X86_32
+static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
+{
+	unsigned index = pgd_index(address);
+	pgd_t *pgd_k;
+	pud_t *pud, *pud_k;
+	pmd_t *pmd, *pmd_k;
+
+	pgd += index;
+	pgd_k = init_mm.pgd + index;
+
+	if (!pgd_present(*pgd_k))
+		return NULL;
+
+	/*
+	 * set_pgd(pgd, *pgd_k); here would be useless on PAE
+	 * and redundant with the set_pmd() on non-PAE. As would
+	 * set_pud.
+	 */
+
+	pud = pud_offset(pgd, address);
+	pud_k = pud_offset(pgd_k, address);
+	if (!pud_present(*pud_k))
+		return NULL;
+
+	pmd = pmd_offset(pud, address);
+	pmd_k = pmd_offset(pud_k, address);
+	if (!pmd_present(*pmd_k))
+		return NULL;
+	if (!pmd_present(*pmd)) {
+		set_pmd(pmd, *pmd_k);
+		arch_flush_lazy_mmu_mode();
+	} else
+		BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
+	return pmd_k;
+}
+#endif
+
 #ifdef CONFIG_X86_64
 static const char errata93_warning[] =
 KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
@@ -296,6 +373,32 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
 static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
 			    unsigned long address)
 {
+#ifdef CONFIG_X86_32
+	if (!oops_may_print())
+		return;
+
+#ifdef CONFIG_X86_PAE
+	if (error_code & PF_INSTR) {
+		int level;
+		pte_t *pte = lookup_address(address, &level);
+
+		if (pte && pte_present(*pte) && !pte_exec(*pte))
+			printk(KERN_CRIT "kernel tried to execute "
+				"NX-protected page - exploit attempt? "
+				"(uid: %d)\n", current->uid);
+	}
+#endif
+	printk(KERN_ALERT "BUG: unable to handle kernel ");
+	if (address < PAGE_SIZE)
+		printk(KERN_CONT "NULL pointer dereference");
+	else
+		printk(KERN_CONT "paging request");
+	printk(KERN_CONT " at %08lx\n", address);
+
+	printk(KERN_ALERT "IP:");
+	printk_address(regs->ip, 1);
+	dump_pagetable(address);
+#else /* CONFIG_X86_64 */
 	printk(KERN_ALERT "BUG: unable to handle kernel ");
 	if (address < PAGE_SIZE)
 		printk(KERN_CONT "NULL pointer dereference");
@@ -306,8 +409,10 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
 	printk(KERN_ALERT "IP:");
 	printk_address(regs->ip, 1);
 	dump_pagetable(address);
+#endif
 }
 
+#ifdef CONFIG_X86_64
 static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
 				 unsigned long error_code)
 {
@@ -325,6 +430,7 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
 		regs = NULL;
 	oops_end(flags, regs, SIGKILL);
 }
+#endif
 
 /*
  * Handle a fault on the vmalloc area
@@ -590,12 +696,15 @@ bad_area:
 bad_area_nosemaphore:
 	/* User mode accesses just cause a SIGSEGV */
 	if (error_code & PF_USER) {
-
 		/*
 		 * It's possible to have interrupts off here.
 		 */
 		local_irq_enable();
 
+		/*
+		 * Valid to do another page fault here because this one came
+		 * from user space.
+		 */
 		if (is_prefetch(regs, address, error_code))
 			return;
 
@@ -696,6 +805,42 @@ LIST_HEAD(pgd_list);
 
 void vmalloc_sync_all(void)
 {
+#ifdef CONFIG_X86_32
+	/*
+	 * Note that races in the updates of insync and start aren't
+	 * problematic: insync can only get set bits added, and updates to
+	 * start are only improving performance (without affecting correctness
+	 * if undone).
+	 */
+	static DECLARE_BITMAP(insync, PTRS_PER_PGD);
+	static unsigned long start = TASK_SIZE;
+	unsigned long address;
+
+	if (SHARED_KERNEL_PMD)
+		return;
+
+	BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
+	for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
+		if (!test_bit(pgd_index(address), insync)) {
+			unsigned long flags;
+			struct page *page;
+
+			spin_lock_irqsave(&pgd_lock, flags);
+			for (page = pgd_list; page; page =
+					(struct page *)page->index)
+				if (!vmalloc_sync_one(page_address(page),
+								address)) {
+					BUG_ON(page != pgd_list);
+					break;
+				}
+			spin_unlock_irqrestore(&pgd_lock, flags);
+			if (!page)
+				set_bit(pgd_index(address), insync);
+		}
+		if (address == start && test_bit(pgd_index(address), insync))
+			start = address + PGDIR_SIZE;
+	}
+#else /* CONFIG_X86_64 */
 	/*
 	 * Note that races in the updates of insync and start aren't
 	 * problematic: insync can only get set bits added, and updates to
@@ -732,4 +877,5 @@ void vmalloc_sync_all(void)
 	BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
 	BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
 				(__START_KERNEL & PGDIR_MASK)));
+#endif
 }

From f8c2ee224d8397364835204c6c0130d08c2e644c Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:34:10 +0100
Subject: [PATCH 878/890] x86: unify fault_32|64.c with ifdefs

Elimination of these ifdefs can be done in a unified file.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault_32.c | 100 ++++++++++++++++++++++++++++++++++++++---
 arch/x86/mm/fault_64.c |  93 +++++++++++++++++++++++++++++++++-----
 2 files changed, 177 insertions(+), 16 deletions(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 7d9ecbbba745..4da4625c6968 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -48,7 +48,11 @@ static inline int notify_page_fault(struct pt_regs *regs)
 	int ret = 0;
 
 	/* kprobe_running() needs smp_processor_id() */
+#ifdef CONFIG_X86_32
 	if (!user_mode_vm(regs)) {
+#else
+	if (!user_mode(regs)) {
+#endif
 		preempt_disable();
 		if (kprobe_running() && kprobe_fault_handler(regs, 14))
 			ret = 1;
@@ -430,11 +434,15 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
 #endif
 
 /*
+ * X86_32
  * Handle a fault on the vmalloc or module mapping area
  *
+ * X86_64
+ * Handle a fault on the vmalloc area
+ *
  * This assumes no large pages in there.
  */
-static inline int vmalloc_fault(unsigned long address)
+static int vmalloc_fault(unsigned long address)
 {
 #ifdef CONFIG_X86_32
 	unsigned long pgd_paddr;
@@ -509,6 +517,9 @@ int show_unhandled_signals = 1;
  * and the problem, and then passes it off to one of the appropriate
  * routines.
  */
+#ifdef CONFIG_X86_64
+asmlinkage
+#endif
 void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
 	struct task_struct *tsk;
@@ -517,6 +528,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	unsigned long address;
 	int write, si_code;
 	int fault;
+#ifdef CONFIG_X86_64
+	unsigned long flags;
+#endif
 
 	/*
 	 * We can fault from pretty much anywhere, with unknown IRQ state.
@@ -548,6 +562,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	 * (error_code & 4) == 0, and that the fault was not a
 	 * protection error (error_code & 9) == 0.
 	 */
+#ifdef CONFIG_X86_32
 	if (unlikely(address >= TASK_SIZE)) {
 		if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
 		    vmalloc_fault(address) >= 0)
@@ -570,7 +585,45 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	 */
 	if (in_atomic() || !mm)
 		goto bad_area_nosemaphore;
+#else /* CONFIG_X86_64 */
+	if (unlikely(address >= TASK_SIZE64)) {
+		/*
+		 * Don't check for the module range here: its PML4
+		 * is always initialized because it's shared with the main
+		 * kernel text. Only vmalloc may need PML4 syncups.
+		 */
+		if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
+		      ((address >= VMALLOC_START && address < VMALLOC_END))) {
+			if (vmalloc_fault(address) >= 0)
+				return;
+		}
+		/*
+		 * Don't take the mm semaphore here. If we fixup a prefetch
+		 * fault we could otherwise deadlock.
+		 */
+		goto bad_area_nosemaphore;
+	}
+	if (likely(regs->flags & X86_EFLAGS_IF))
+		local_irq_enable();
 
+	if (unlikely(error_code & PF_RSVD))
+		pgtable_bad(address, regs, error_code);
+
+	/*
+	 * If we're in an interrupt, have no user context or are running in an
+	 * atomic region then we must not take the fault.
+	 */
+	if (unlikely(in_atomic() || !mm))
+		goto bad_area_nosemaphore;
+
+	/*
+	 * User-mode registers count as a user access even for any
+	 * potential system fault or CPU buglet.
+	 */
+	if (user_mode_vm(regs))
+		error_code |= PF_USER;
+again:
+#endif
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
@@ -596,7 +649,11 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	vma = find_vma(mm, address);
 	if (!vma)
 		goto bad_area;
+#ifdef CONFIG_X86_32
 	if (vma->vm_start <= address)
+#else
+	if (likely(vma->vm_start <= address))
+#endif
 		goto good_area;
 	if (!(vma->vm_flags & VM_GROWSDOWN))
 		goto bad_area;
@@ -634,7 +691,9 @@ good_area:
 			goto bad_area;
 	}
 
- survive:
+#ifdef CONFIG_X86_32
+survive:
+#endif
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo
@@ -705,6 +764,7 @@ bad_area_nosemaphore:
 			print_vma_addr(" in ", regs->ip);
 			printk("\n");
 		}
+
 		tsk->thread.cr2 = address;
 		/* Kernel addresses are always protection faults */
 		tsk->thread.error_code = error_code | (address >= TASK_SIZE);
@@ -722,9 +782,13 @@ no_context:
 		return;
 
 	/*
+	 * X86_32
 	 * Valid to do another page fault here, because if this fault
 	 * had been triggered by is_prefetch fixup_exception would have
 	 * handled it.
+	 *
+	 * X86_64
+	 * Hall of shame of CPU/BIOS bugs.
 	 */
 	if (is_prefetch(regs, address, error_code))
 		return;
@@ -736,7 +800,7 @@ no_context:
  * Oops. The kernel tried to access some bad page. We'll have to
  * terminate things with extreme prejudice.
  */
-
+#ifdef CONFIG_X86_32
 	bust_spinlocks(1);
 
 	show_fault_oops(regs, error_code, address);
@@ -747,6 +811,20 @@ no_context:
 	die("Oops", regs, error_code);
 	bust_spinlocks(0);
 	do_exit(SIGKILL);
+#else /* CONFIG_X86_64 */
+	flags = oops_begin();
+
+	show_fault_oops(regs, error_code, address);
+
+	tsk->thread.cr2 = address;
+	tsk->thread.trap_no = 14;
+	tsk->thread.error_code = error_code;
+	if (__die("Oops", regs, error_code))
+		regs = NULL;
+	/* Executive summary in case the body of the oops scrolled away */
+	printk(KERN_EMERG "CR2: %016lx\n", address);
+	oops_end(flags, regs, SIGKILL);
+#endif
 
 /*
  * We ran out of memory, or some other thing happened to us that made
@@ -754,11 +832,18 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
+#ifdef CONFIG_X86_32
 	if (is_global_init(tsk)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
 	}
+#else
+	if (is_global_init(current)) {
+		yield();
+		goto again;
+	}
+#endif
 	printk("VM: killing process %s\n", tsk->comm);
 	if (error_code & PF_USER)
 		do_group_exit(SIGKILL);
@@ -770,17 +855,22 @@ do_sigbus:
 	/* Kernel mode? Handle exceptions or die */
 	if (!(error_code & PF_USER))
 		goto no_context;
-
+#ifdef CONFIG_X86_32
 	/* User space => ok to do another page fault */
 	if (is_prefetch(regs, address, error_code))
 		return;
-
+#endif
 	tsk->thread.cr2 = address;
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_no = 14;
 	force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
 }
 
+#ifdef CONFIG_X86_64
+DEFINE_SPINLOCK(pgd_lock);
+LIST_HEAD(pgd_list);
+#endif
+
 void vmalloc_sync_all(void)
 {
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index edca689c62d5..0902719388bc 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -51,7 +51,11 @@ static inline int notify_page_fault(struct pt_regs *regs)
 	int ret = 0;
 
 	/* kprobe_running() needs smp_processor_id() */
+#ifdef CONFIG_X86_32
+	if (!user_mode_vm(regs)) {
+#else
 	if (!user_mode(regs)) {
+#endif
 		preempt_disable();
 		if (kprobe_running() && kprobe_fault_handler(regs, 14))
 			ret = 1;
@@ -433,6 +437,10 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
 #endif
 
 /*
+ * X86_32
+ * Handle a fault on the vmalloc or module mapping area
+ *
+ * X86_64
  * Handle a fault on the vmalloc area
  *
  * This assumes no large pages in there.
@@ -512,16 +520,20 @@ int show_unhandled_signals = 1;
  * and the problem, and then passes it off to one of the appropriate
  * routines.
  */
-asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
-					unsigned long error_code)
+#ifdef CONFIG_X86_64
+asmlinkage
+#endif
+void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
 	struct task_struct *tsk;
 	struct mm_struct *mm;
 	struct vm_area_struct *vma;
 	unsigned long address;
-	int write, fault;
+	int write, si_code;
+	int fault;
+#ifdef CONFIG_X86_64
 	unsigned long flags;
-	int si_code;
+#endif
 
 	/*
 	 * We can fault from pretty much anywhere, with unknown IRQ state.
@@ -553,6 +565,30 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	 * (error_code & 4) == 0, and that the fault was not a
 	 * protection error (error_code & 9) == 0.
 	 */
+#ifdef CONFIG_X86_32
+	if (unlikely(address >= TASK_SIZE)) {
+		if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
+		    vmalloc_fault(address) >= 0)
+			return;
+		/*
+		 * Don't take the mm semaphore here. If we fixup a prefetch
+		 * fault we could otherwise deadlock.
+		 */
+		goto bad_area_nosemaphore;
+	}
+
+	/* It's safe to allow irq's after cr2 has been saved and the vmalloc
+	   fault has been handled. */
+	if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
+		local_irq_enable();
+
+	/*
+	 * If we're in an interrupt, have no user context or are running in an
+	 * atomic region then we must not take the fault.
+	 */
+	if (in_atomic() || !mm)
+		goto bad_area_nosemaphore;
+#else /* CONFIG_X86_64 */
 	if (unlikely(address >= TASK_SIZE64)) {
 		/*
 		 * Don't check for the module range here: its PML4
@@ -570,7 +606,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 		 */
 		goto bad_area_nosemaphore;
 	}
-
 	if (likely(regs->flags & X86_EFLAGS_IF))
 		local_irq_enable();
 
@@ -590,8 +625,8 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	 */
 	if (user_mode_vm(regs))
 		error_code |= PF_USER;
-
- again:
+again:
+#endif
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
@@ -617,7 +652,11 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	vma = find_vma(mm, address);
 	if (!vma)
 		goto bad_area;
+#ifdef CONFIG_X86_32
+	if (vma->vm_start <= address)
+#else
 	if (likely(vma->vm_start <= address))
+#endif
 		goto good_area;
 	if (!(vma->vm_flags & VM_GROWSDOWN))
 		goto bad_area;
@@ -655,6 +694,9 @@ good_area:
 			goto bad_area;
 	}
 
+#ifdef CONFIG_X86_32
+survive:
+#endif
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo
@@ -730,7 +772,6 @@ bad_area_nosemaphore:
 		/* Kernel addresses are always protection faults */
 		tsk->thread.error_code = error_code | (address >= TASK_SIZE);
 		tsk->thread.trap_no = 14;
-
 		force_sig_info_fault(SIGSEGV, si_code, address, tsk);
 		return;
 	}
@@ -744,9 +785,14 @@ no_context:
 		return;
 
 	/*
+	 * X86_32
+	 * Valid to do another page fault here, because if this fault
+	 * had been triggered by is_prefetch fixup_exception would have
+	 * handled it.
+	 *
+	 * X86_64
 	 * Hall of shame of CPU/BIOS bugs.
 	 */
-
 	if (is_prefetch(regs, address, error_code))
 		return;
 
@@ -757,7 +803,18 @@ no_context:
  * Oops. The kernel tried to access some bad page. We'll have to
  * terminate things with extreme prejudice.
  */
+#ifdef CONFIG_X86_32
+	bust_spinlocks(1);
 
+	show_fault_oops(regs, error_code, address);
+
+	tsk->thread.cr2 = address;
+	tsk->thread.trap_no = 14;
+	tsk->thread.error_code = error_code;
+	die("Oops", regs, error_code);
+	bust_spinlocks(0);
+	do_exit(SIGKILL);
+#else /* CONFIG_X86_64 */
 	flags = oops_begin();
 
 	show_fault_oops(regs, error_code, address);
@@ -770,6 +827,7 @@ no_context:
 	/* Executive summary in case the body of the oops scrolled away */
 	printk(KERN_EMERG "CR2: %016lx\n", address);
 	oops_end(flags, regs, SIGKILL);
+#endif
 
 /*
  * We ran out of memory, or some other thing happened to us that made
@@ -777,10 +835,18 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
+#ifdef CONFIG_X86_32
+	if (is_global_init(tsk)) {
+		yield();
+		down_read(&mm->mmap_sem);
+		goto survive;
+	}
+#else
 	if (is_global_init(current)) {
 		yield();
 		goto again;
 	}
+#endif
 	printk("VM: killing process %s\n", tsk->comm);
 	if (error_code & PF_USER)
 		do_group_exit(SIGKILL);
@@ -792,16 +858,21 @@ do_sigbus:
 	/* Kernel mode? Handle exceptions or die */
 	if (!(error_code & PF_USER))
 		goto no_context;
-
+#ifdef CONFIG_X86_32
+	/* User space => ok to do another page fault */
+	if (is_prefetch(regs, address, error_code))
+		return;
+#endif
 	tsk->thread.cr2 = address;
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_no = 14;
 	force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
-	return;
 }
 
+#ifdef CONFIG_X86_64
 DEFINE_SPINLOCK(pgd_lock);
 LIST_HEAD(pgd_list);
+#endif
 
 void vmalloc_sync_all(void)
 {

From c61e211d9989e4c112d3d58db12ad58f9016a3c8 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:34:11 +0100
Subject: [PATCH 879/890] x86: unify fault_32|64.c

Unify includes in moved fault.c.

Modify Makefiles to pick up unified file.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/Makefile_32             |   2 +-
 arch/x86/mm/Makefile_64             |   2 +-
 arch/x86/mm/{fault_64.c => fault.c} |  21 +-
 arch/x86/mm/fault_32.c              | 949 ----------------------------
 4 files changed, 14 insertions(+), 960 deletions(-)
 rename arch/x86/mm/{fault_64.c => fault.c} (98%)
 delete mode 100644 arch/x86/mm/fault_32.c

diff --git a/arch/x86/mm/Makefile_32 b/arch/x86/mm/Makefile_32
index ffa6d46a1e73..c36ae88bb543 100644
--- a/arch/x86/mm/Makefile_32
+++ b/arch/x86/mm/Makefile_32
@@ -2,7 +2,7 @@
 # Makefile for the linux i386-specific parts of the memory manager.
 #
 
-obj-y	:= init_32.o pgtable_32.o fault_32.o ioremap.o extable.o pageattr.o mmap.o
+obj-y	:= init_32.o pgtable_32.o fault.o ioremap.o extable.o pageattr.o mmap.o
 
 obj-$(CONFIG_NUMA) += discontig_32.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/x86/mm/Makefile_64 b/arch/x86/mm/Makefile_64
index 27a090c86e9b..688c8c28ac8f 100644
--- a/arch/x86/mm/Makefile_64
+++ b/arch/x86/mm/Makefile_64
@@ -2,7 +2,7 @@
 # Makefile for the linux x86_64-specific parts of the memory manager.
 #
 
-obj-y	 := init_64.o fault_64.o ioremap.o extable.o pageattr.o mmap.o
+obj-y	 := init_64.o fault.o ioremap.o extable.o pageattr.o mmap.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_NUMA) += numa_64.o
 obj-$(CONFIG_K8_NUMA) += k8topology_64.o
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault.c
similarity index 98%
rename from arch/x86/mm/fault_64.c
rename to arch/x86/mm/fault.c
index 0902719388bc..14a0c6e541de 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault.c
@@ -18,6 +18,8 @@
 #include <linux/tty.h>
 #include <linux/vt_kern.h>		/* For unblank_screen() */
 #include <linux/compiler.h>
+#include <linux/highmem.h>
+#include <linux/bootmem.h>		/* for max_low_pfn */
 #include <linux/vmalloc.h>
 #include <linux/module.h>
 #include <linux/kprobes.h>
@@ -25,6 +27,8 @@
 #include <linux/kdebug.h>
 
 #include <asm/system.h>
+#include <asm/desc.h>
+#include <asm/segment.h>
 #include <asm/pgalloc.h>
 #include <asm/smp.h>
 #include <asm/tlbflush.h>
@@ -88,16 +92,15 @@ static int is_prefetch(struct pt_regs *regs, unsigned long addr,
 	unsigned char *max_instr;
 
 #ifdef CONFIG_X86_32
-	if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-		     boot_cpu_data.x86 >= 6)) {
-		/* Catch an obscure case of prefetch inside an NX page. */
-		if (nx_enabled && (error_code & PF_INSTR))
-			return 0;
-	} else {
+# ifdef CONFIG_X86_PAE
+	/* If it was a exec fault on NX page, ignore */
+	if (nx_enabled && (error_code & PF_INSTR))
 		return 0;
-	}
-#else
-	/* If it was a exec fault ignore */
+# else
+	return 0;
+# endif
+#else /* CONFIG_X86_64 */
+	/* If it was a exec fault on NX page, ignore */
 	if (error_code & PF_INSTR)
 		return 0;
 #endif
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
deleted file mode 100644
index 4da4625c6968..000000000000
--- a/arch/x86/mm/fault_32.c
+++ /dev/null
@@ -1,949 +0,0 @@
-/*
- *  Copyright (C) 1995  Linus Torvalds
- */
-
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/tty.h>
-#include <linux/vt_kern.h>		/* For unblank_screen() */
-#include <linux/highmem.h>
-#include <linux/bootmem.h>		/* for max_low_pfn */
-#include <linux/vmalloc.h>
-#include <linux/module.h>
-#include <linux/kprobes.h>
-#include <linux/uaccess.h>
-#include <linux/kdebug.h>
-
-#include <asm/system.h>
-#include <asm/desc.h>
-#include <asm/segment.h>
-
-/*
- * Page fault error code bits
- *	bit 0 == 0 means no page found, 1 means protection fault
- *	bit 1 == 0 means read, 1 means write
- *	bit 2 == 0 means kernel, 1 means user-mode
- *	bit 3 == 1 means use of reserved bit detected
- *	bit 4 == 1 means fault was an instruction fetch
- */
-#define PF_PROT		(1<<0)
-#define PF_WRITE	(1<<1)
-#define PF_USER		(1<<2)
-#define PF_RSVD		(1<<3)
-#define PF_INSTR	(1<<4)
-
-static inline int notify_page_fault(struct pt_regs *regs)
-{
-#ifdef CONFIG_KPROBES
-	int ret = 0;
-
-	/* kprobe_running() needs smp_processor_id() */
-#ifdef CONFIG_X86_32
-	if (!user_mode_vm(regs)) {
-#else
-	if (!user_mode(regs)) {
-#endif
-		preempt_disable();
-		if (kprobe_running() && kprobe_fault_handler(regs, 14))
-			ret = 1;
-		preempt_enable();
-	}
-
-	return ret;
-#else
-	return 0;
-#endif
-}
-
-/*
- * X86_32
- * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
- * Check that here and ignore it.
- *
- * X86_64
- * Sometimes the CPU reports invalid exceptions on prefetch.
- * Check that here and ignore it.
- *
- * Opcode checker based on code by Richard Brunner
- */
-static int is_prefetch(struct pt_regs *regs, unsigned long addr,
-		       unsigned long error_code)
-{
-	unsigned char *instr;
-	int scan_more = 1;
-	int prefetch = 0;
-	unsigned char *max_instr;
-
-#ifdef CONFIG_X86_32
-	if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-		     boot_cpu_data.x86 >= 6)) {
-		/* Catch an obscure case of prefetch inside an NX page. */
-		if (nx_enabled && (error_code & PF_INSTR))
-			return 0;
-	} else {
-		return 0;
-	}
-#else
-	/* If it was a exec fault ignore */
-	if (error_code & PF_INSTR)
-		return 0;
-#endif
-
-	instr = (unsigned char *)convert_ip_to_linear(current, regs);
-	max_instr = instr + 15;
-
-	if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
-		return 0;
-
-	while (scan_more && instr < max_instr) {
-		unsigned char opcode;
-		unsigned char instr_hi;
-		unsigned char instr_lo;
-
-		if (probe_kernel_address(instr, opcode))
-			break;
-
-		instr_hi = opcode & 0xf0;
-		instr_lo = opcode & 0x0f;
-		instr++;
-
-		switch (instr_hi) {
-		case 0x20:
-		case 0x30:
-			/*
-			 * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
-			 * In X86_64 long mode, the CPU will signal invalid
-			 * opcode if some of these prefixes are present so
-			 * X86_64 will never get here anyway
-			 */
-			scan_more = ((instr_lo & 7) == 0x6);
-			break;
-#ifdef CONFIG_X86_64
-		case 0x40:
-			/*
-			 * In AMD64 long mode 0x40..0x4F are valid REX prefixes
-			 * Need to figure out under what instruction mode the
-			 * instruction was issued. Could check the LDT for lm,
-			 * but for now it's good enough to assume that long
-			 * mode only uses well known segments or kernel.
-			 */
-			scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
-			break;
-#endif
-		case 0x60:
-			/* 0x64 thru 0x67 are valid prefixes in all modes. */
-			scan_more = (instr_lo & 0xC) == 0x4;
-			break;
-		case 0xF0:
-			/* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
-			scan_more = !instr_lo || (instr_lo>>1) == 1;
-			break;
-		case 0x00:
-			/* Prefetch instruction is 0x0F0D or 0x0F18 */
-			scan_more = 0;
-
-			if (probe_kernel_address(instr, opcode))
-				break;
-			prefetch = (instr_lo == 0xF) &&
-				(opcode == 0x0D || opcode == 0x18);
-			break;
-		default:
-			scan_more = 0;
-			break;
-		}
-	}
-	return prefetch;
-}
-
-static void force_sig_info_fault(int si_signo, int si_code,
-	unsigned long address, struct task_struct *tsk)
-{
-	siginfo_t info;
-
-	info.si_signo = si_signo;
-	info.si_errno = 0;
-	info.si_code = si_code;
-	info.si_addr = (void __user *)address;
-	force_sig_info(si_signo, &info, tsk);
-}
-
-#ifdef CONFIG_X86_64
-static int bad_address(void *p)
-{
-	unsigned long dummy;
-	return probe_kernel_address((unsigned long *)p, dummy);
-}
-#endif
-
-void dump_pagetable(unsigned long address)
-{
-#ifdef CONFIG_X86_32
-	__typeof__(pte_val(__pte(0))) page;
-
-	page = read_cr3();
-	page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
-#ifdef CONFIG_X86_PAE
-	printk("*pdpt = %016Lx ", page);
-	if ((page >> PAGE_SHIFT) < max_low_pfn
-	    && page & _PAGE_PRESENT) {
-		page &= PAGE_MASK;
-		page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
-		                                         & (PTRS_PER_PMD - 1)];
-		printk(KERN_CONT "*pde = %016Lx ", page);
-		page &= ~_PAGE_NX;
-	}
-#else
-	printk("*pde = %08lx ", page);
-#endif
-
-	/*
-	 * We must not directly access the pte in the highpte
-	 * case if the page table is located in highmem.
-	 * And let's rather not kmap-atomic the pte, just in case
-	 * it's allocated already.
-	 */
-	if ((page >> PAGE_SHIFT) < max_low_pfn
-	    && (page & _PAGE_PRESENT)
-	    && !(page & _PAGE_PSE)) {
-		page &= PAGE_MASK;
-		page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
-		                                         & (PTRS_PER_PTE - 1)];
-		printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
-	}
-
-	printk("\n");
-#else /* CONFIG_X86_64 */
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-
-	pgd = (pgd_t *)read_cr3();
-
-	pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
-	pgd += pgd_index(address);
-	if (bad_address(pgd)) goto bad;
-	printk("PGD %lx ", pgd_val(*pgd));
-	if (!pgd_present(*pgd)) goto ret;
-
-	pud = pud_offset(pgd, address);
-	if (bad_address(pud)) goto bad;
-	printk("PUD %lx ", pud_val(*pud));
-	if (!pud_present(*pud))	goto ret;
-
-	pmd = pmd_offset(pud, address);
-	if (bad_address(pmd)) goto bad;
-	printk("PMD %lx ", pmd_val(*pmd));
-	if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret;
-
-	pte = pte_offset_kernel(pmd, address);
-	if (bad_address(pte)) goto bad;
-	printk("PTE %lx", pte_val(*pte));
-ret:
-	printk("\n");
-	return;
-bad:
-	printk("BAD\n");
-#endif
-}
-
-#ifdef CONFIG_X86_32
-static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
-{
-	unsigned index = pgd_index(address);
-	pgd_t *pgd_k;
-	pud_t *pud, *pud_k;
-	pmd_t *pmd, *pmd_k;
-
-	pgd += index;
-	pgd_k = init_mm.pgd + index;
-
-	if (!pgd_present(*pgd_k))
-		return NULL;
-
-	/*
-	 * set_pgd(pgd, *pgd_k); here would be useless on PAE
-	 * and redundant with the set_pmd() on non-PAE. As would
-	 * set_pud.
-	 */
-
-	pud = pud_offset(pgd, address);
-	pud_k = pud_offset(pgd_k, address);
-	if (!pud_present(*pud_k))
-		return NULL;
-
-	pmd = pmd_offset(pud, address);
-	pmd_k = pmd_offset(pud_k, address);
-	if (!pmd_present(*pmd_k))
-		return NULL;
-	if (!pmd_present(*pmd)) {
-		set_pmd(pmd, *pmd_k);
-		arch_flush_lazy_mmu_mode();
-	} else
-		BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
-	return pmd_k;
-}
-#endif
-
-#ifdef CONFIG_X86_64
-static const char errata93_warning[] =
-KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
-KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
-KERN_ERR "******* Please consider a BIOS update.\n"
-KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
-#endif
-
-/* Workaround for K8 erratum #93 & buggy BIOS.
-   BIOS SMM functions are required to use a specific workaround
-   to avoid corruption of the 64bit RIP register on C stepping K8.
-   A lot of BIOS that didn't get tested properly miss this.
-   The OS sees this as a page fault with the upper 32bits of RIP cleared.
-   Try to work around it here.
-   Note we only handle faults in kernel here.
-   Does nothing for X86_32
- */
-static int is_errata93(struct pt_regs *regs, unsigned long address)
-{
-#ifdef CONFIG_X86_64
-	static int warned;
-	if (address != regs->ip)
-		return 0;
-	if ((address >> 32) != 0)
-		return 0;
-	address |= 0xffffffffUL << 32;
-	if ((address >= (u64)_stext && address <= (u64)_etext) ||
-	    (address >= MODULES_VADDR && address <= MODULES_END)) {
-		if (!warned) {
-			printk(errata93_warning);
-			warned = 1;
-		}
-		regs->ip = address;
-		return 1;
-	}
-#endif
-	return 0;
-}
-
-/*
- * Work around K8 erratum #100 K8 in compat mode occasionally jumps to illegal
- * addresses >4GB.  We catch this in the page fault handler because these
- * addresses are not reachable. Just detect this case and return.  Any code
- * segment in LDT is compatibility mode.
- */
-static int is_errata100(struct pt_regs *regs, unsigned long address)
-{
-#ifdef CONFIG_X86_64
-	if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
-	    (address >> 32))
-		return 1;
-#endif
-	return 0;
-}
-
-void do_invalid_op(struct pt_regs *, unsigned long);
-
-static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
-{
-#ifdef CONFIG_X86_F00F_BUG
-	unsigned long nr;
-	/*
-	 * Pentium F0 0F C7 C8 bug workaround.
-	 */
-	if (boot_cpu_data.f00f_bug) {
-		nr = (address - idt_descr.address) >> 3;
-
-		if (nr == 6) {
-			do_invalid_op(regs, 0);
-			return 1;
-		}
-	}
-#endif
-	return 0;
-}
-
-static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
-			    unsigned long address)
-{
-#ifdef CONFIG_X86_32
-	if (!oops_may_print())
-		return;
-
-#ifdef CONFIG_X86_PAE
-	if (error_code & PF_INSTR) {
-		int level;
-		pte_t *pte = lookup_address(address, &level);
-
-		if (pte && pte_present(*pte) && !pte_exec(*pte))
-			printk(KERN_CRIT "kernel tried to execute "
-				"NX-protected page - exploit attempt? "
-				"(uid: %d)\n", current->uid);
-	}
-#endif
-	printk(KERN_ALERT "BUG: unable to handle kernel ");
-	if (address < PAGE_SIZE)
-		printk(KERN_CONT "NULL pointer dereference");
-	else
-		printk(KERN_CONT "paging request");
-	printk(KERN_CONT " at %08lx\n", address);
-
-	printk(KERN_ALERT "IP:");
-	printk_address(regs->ip, 1);
-	dump_pagetable(address);
-#else /* CONFIG_X86_64 */
-	printk(KERN_ALERT "BUG: unable to handle kernel ");
-	if (address < PAGE_SIZE)
-		printk(KERN_CONT "NULL pointer dereference");
-	else
-		printk(KERN_CONT "paging request");
-	printk(KERN_CONT " at %016lx\n", address);
-
-	printk(KERN_ALERT "IP:");
-	printk_address(regs->ip, 1);
-	dump_pagetable(address);
-#endif
-}
-
-#ifdef CONFIG_X86_64
-static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
-				 unsigned long error_code)
-{
-	unsigned long flags = oops_begin();
-	struct task_struct *tsk;
-
-	printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
-	       current->comm, address);
-	dump_pagetable(address);
-	tsk = current;
-	tsk->thread.cr2 = address;
-	tsk->thread.trap_no = 14;
-	tsk->thread.error_code = error_code;
-	if (__die("Bad pagetable", regs, error_code))
-		regs = NULL;
-	oops_end(flags, regs, SIGKILL);
-}
-#endif
-
-/*
- * X86_32
- * Handle a fault on the vmalloc or module mapping area
- *
- * X86_64
- * Handle a fault on the vmalloc area
- *
- * This assumes no large pages in there.
- */
-static int vmalloc_fault(unsigned long address)
-{
-#ifdef CONFIG_X86_32
-	unsigned long pgd_paddr;
-	pmd_t *pmd_k;
-	pte_t *pte_k;
-	/*
-	 * Synchronize this task's top level page-table
-	 * with the 'reference' page table.
-	 *
-	 * Do _not_ use "current" here. We might be inside
-	 * an interrupt in the middle of a task switch..
-	 */
-	pgd_paddr = read_cr3();
-	pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
-	if (!pmd_k)
-		return -1;
-	pte_k = pte_offset_kernel(pmd_k, address);
-	if (!pte_present(*pte_k))
-		return -1;
-	return 0;
-#else
-	pgd_t *pgd, *pgd_ref;
-	pud_t *pud, *pud_ref;
-	pmd_t *pmd, *pmd_ref;
-	pte_t *pte, *pte_ref;
-
-	/* Copy kernel mappings over when needed. This can also
-	   happen within a race in page table update. In the later
-	   case just flush. */
-
-	pgd = pgd_offset(current->mm ?: &init_mm, address);
-	pgd_ref = pgd_offset_k(address);
-	if (pgd_none(*pgd_ref))
-		return -1;
-	if (pgd_none(*pgd))
-		set_pgd(pgd, *pgd_ref);
-	else
-		BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
-
-	/* Below here mismatches are bugs because these lower tables
-	   are shared */
-
-	pud = pud_offset(pgd, address);
-	pud_ref = pud_offset(pgd_ref, address);
-	if (pud_none(*pud_ref))
-		return -1;
-	if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
-		BUG();
-	pmd = pmd_offset(pud, address);
-	pmd_ref = pmd_offset(pud_ref, address);
-	if (pmd_none(*pmd_ref))
-		return -1;
-	if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
-		BUG();
-	pte_ref = pte_offset_kernel(pmd_ref, address);
-	if (!pte_present(*pte_ref))
-		return -1;
-	pte = pte_offset_kernel(pmd, address);
-	/* Don't use pte_page here, because the mappings can point
-	   outside mem_map, and the NUMA hash lookup cannot handle
-	   that. */
-	if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
-		BUG();
-	return 0;
-#endif
-}
-
-int show_unhandled_signals = 1;
-
-/*
- * This routine handles page faults.  It determines the address,
- * and the problem, and then passes it off to one of the appropriate
- * routines.
- */
-#ifdef CONFIG_X86_64
-asmlinkage
-#endif
-void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
-{
-	struct task_struct *tsk;
-	struct mm_struct *mm;
-	struct vm_area_struct *vma;
-	unsigned long address;
-	int write, si_code;
-	int fault;
-#ifdef CONFIG_X86_64
-	unsigned long flags;
-#endif
-
-	/*
-	 * We can fault from pretty much anywhere, with unknown IRQ state.
-	 */
-	trace_hardirqs_fixup();
-
-	tsk = current;
-	mm = tsk->mm;
-	prefetchw(&mm->mmap_sem);
-
-	/* get the address */
-	address = read_cr2();
-
-	si_code = SEGV_MAPERR;
-
-	if (notify_page_fault(regs))
-		return;
-
-	/*
-	 * We fault-in kernel-space virtual memory on-demand. The
-	 * 'reference' page table is init_mm.pgd.
-	 *
-	 * NOTE! We MUST NOT take any locks for this case. We may
-	 * be in an interrupt or a critical region, and should
-	 * only copy the information from the master page table,
-	 * nothing more.
-	 *
-	 * This verifies that the fault happens in kernel space
-	 * (error_code & 4) == 0, and that the fault was not a
-	 * protection error (error_code & 9) == 0.
-	 */
-#ifdef CONFIG_X86_32
-	if (unlikely(address >= TASK_SIZE)) {
-		if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
-		    vmalloc_fault(address) >= 0)
-			return;
-		/*
-		 * Don't take the mm semaphore here. If we fixup a prefetch
-		 * fault we could otherwise deadlock.
-		 */
-		goto bad_area_nosemaphore;
-	}
-
-	/* It's safe to allow irq's after cr2 has been saved and the vmalloc
-	   fault has been handled. */
-	if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
-		local_irq_enable();
-
-	/*
-	 * If we're in an interrupt, have no user context or are running in an
-	 * atomic region then we must not take the fault.
-	 */
-	if (in_atomic() || !mm)
-		goto bad_area_nosemaphore;
-#else /* CONFIG_X86_64 */
-	if (unlikely(address >= TASK_SIZE64)) {
-		/*
-		 * Don't check for the module range here: its PML4
-		 * is always initialized because it's shared with the main
-		 * kernel text. Only vmalloc may need PML4 syncups.
-		 */
-		if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
-		      ((address >= VMALLOC_START && address < VMALLOC_END))) {
-			if (vmalloc_fault(address) >= 0)
-				return;
-		}
-		/*
-		 * Don't take the mm semaphore here. If we fixup a prefetch
-		 * fault we could otherwise deadlock.
-		 */
-		goto bad_area_nosemaphore;
-	}
-	if (likely(regs->flags & X86_EFLAGS_IF))
-		local_irq_enable();
-
-	if (unlikely(error_code & PF_RSVD))
-		pgtable_bad(address, regs, error_code);
-
-	/*
-	 * If we're in an interrupt, have no user context or are running in an
-	 * atomic region then we must not take the fault.
-	 */
-	if (unlikely(in_atomic() || !mm))
-		goto bad_area_nosemaphore;
-
-	/*
-	 * User-mode registers count as a user access even for any
-	 * potential system fault or CPU buglet.
-	 */
-	if (user_mode_vm(regs))
-		error_code |= PF_USER;
-again:
-#endif
-	/* When running in the kernel we expect faults to occur only to
-	 * addresses in user space.  All other faults represent errors in the
-	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
-	 * erroneous fault occurring in a code path which already holds mmap_sem
-	 * we will deadlock attempting to validate the fault against the
-	 * address space.  Luckily the kernel only validly references user
-	 * space from well defined areas of code, which are listed in the
-	 * exceptions table.
-	 *
-	 * As the vast majority of faults will be valid we will only perform
-	 * the source reference check when there is a possibility of a deadlock.
-	 * Attempt to lock the address space, if we cannot we then validate the
-	 * source.  If this is invalid we can skip the address space check,
-	 * thus avoiding the deadlock.
-	 */
-	if (!down_read_trylock(&mm->mmap_sem)) {
-		if ((error_code & PF_USER) == 0 &&
-		    !search_exception_tables(regs->ip))
-			goto bad_area_nosemaphore;
-		down_read(&mm->mmap_sem);
-	}
-
-	vma = find_vma(mm, address);
-	if (!vma)
-		goto bad_area;
-#ifdef CONFIG_X86_32
-	if (vma->vm_start <= address)
-#else
-	if (likely(vma->vm_start <= address))
-#endif
-		goto good_area;
-	if (!(vma->vm_flags & VM_GROWSDOWN))
-		goto bad_area;
-	if (error_code & PF_USER) {
-		/*
-		 * Accessing the stack below %sp is always a bug.
-		 * The large cushion allows instructions like enter
-		 * and pusha to work.  ("enter $65535,$31" pushes
-		 * 32 pointers and then decrements %sp by 65535.)
-		 */
-		if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
-			goto bad_area;
-	}
-	if (expand_stack(vma, address))
-		goto bad_area;
-/*
- * Ok, we have a good vm_area for this memory access, so
- * we can handle it..
- */
-good_area:
-	si_code = SEGV_ACCERR;
-	write = 0;
-	switch (error_code & (PF_PROT|PF_WRITE)) {
-	default:	/* 3: write, present */
-		/* fall through */
-	case PF_WRITE:		/* write, not present */
-		if (!(vma->vm_flags & VM_WRITE))
-			goto bad_area;
-		write++;
-		break;
-	case PF_PROT:		/* read, present */
-		goto bad_area;
-	case 0:			/* read, not present */
-		if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
-			goto bad_area;
-	}
-
-#ifdef CONFIG_X86_32
-survive:
-#endif
-	/*
-	 * If for any reason at all we couldn't handle the fault,
-	 * make sure we exit gracefully rather than endlessly redo
-	 * the fault.
-	 */
-	fault = handle_mm_fault(mm, vma, address, write);
-	if (unlikely(fault & VM_FAULT_ERROR)) {
-		if (fault & VM_FAULT_OOM)
-			goto out_of_memory;
-		else if (fault & VM_FAULT_SIGBUS)
-			goto do_sigbus;
-		BUG();
-	}
-	if (fault & VM_FAULT_MAJOR)
-		tsk->maj_flt++;
-	else
-		tsk->min_flt++;
-
-#ifdef CONFIG_X86_32
-	/*
-	 * Did it hit the DOS screen memory VA from vm86 mode?
-	 */
-	if (v8086_mode(regs)) {
-		unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
-		if (bit < 32)
-			tsk->thread.screen_bitmap |= 1 << bit;
-	}
-#endif
-	up_read(&mm->mmap_sem);
-	return;
-
-/*
- * Something tried to access memory that isn't in our memory map..
- * Fix it, but check if it's kernel or user first..
- */
-bad_area:
-	up_read(&mm->mmap_sem);
-
-bad_area_nosemaphore:
-	/* User mode accesses just cause a SIGSEGV */
-	if (error_code & PF_USER) {
-		/*
-		 * It's possible to have interrupts off here.
-		 */
-		local_irq_enable();
-
-		/*
-		 * Valid to do another page fault here because this one came
-		 * from user space.
-		 */
-		if (is_prefetch(regs, address, error_code))
-			return;
-
-		if (is_errata100(regs, address))
-			return;
-
-		if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
-		    printk_ratelimit()) {
-			printk(
-#ifdef CONFIG_X86_32
-			"%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx",
-#else
-			"%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx",
-#endif
-			task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
-			tsk->comm, task_pid_nr(tsk), address, regs->ip,
-			regs->sp, error_code);
-			print_vma_addr(" in ", regs->ip);
-			printk("\n");
-		}
-
-		tsk->thread.cr2 = address;
-		/* Kernel addresses are always protection faults */
-		tsk->thread.error_code = error_code | (address >= TASK_SIZE);
-		tsk->thread.trap_no = 14;
-		force_sig_info_fault(SIGSEGV, si_code, address, tsk);
-		return;
-	}
-
-	if (is_f00f_bug(regs, address))
-		return;
-
-no_context:
-	/* Are we prepared to handle this kernel fault?  */
-	if (fixup_exception(regs))
-		return;
-
-	/*
-	 * X86_32
-	 * Valid to do another page fault here, because if this fault
-	 * had been triggered by is_prefetch fixup_exception would have
-	 * handled it.
-	 *
-	 * X86_64
-	 * Hall of shame of CPU/BIOS bugs.
-	 */
-	if (is_prefetch(regs, address, error_code))
-		return;
-
-	if (is_errata93(regs, address))
-		return;
-
-/*
- * Oops. The kernel tried to access some bad page. We'll have to
- * terminate things with extreme prejudice.
- */
-#ifdef CONFIG_X86_32
-	bust_spinlocks(1);
-
-	show_fault_oops(regs, error_code, address);
-
-	tsk->thread.cr2 = address;
-	tsk->thread.trap_no = 14;
-	tsk->thread.error_code = error_code;
-	die("Oops", regs, error_code);
-	bust_spinlocks(0);
-	do_exit(SIGKILL);
-#else /* CONFIG_X86_64 */
-	flags = oops_begin();
-
-	show_fault_oops(regs, error_code, address);
-
-	tsk->thread.cr2 = address;
-	tsk->thread.trap_no = 14;
-	tsk->thread.error_code = error_code;
-	if (__die("Oops", regs, error_code))
-		regs = NULL;
-	/* Executive summary in case the body of the oops scrolled away */
-	printk(KERN_EMERG "CR2: %016lx\n", address);
-	oops_end(flags, regs, SIGKILL);
-#endif
-
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
-out_of_memory:
-	up_read(&mm->mmap_sem);
-#ifdef CONFIG_X86_32
-	if (is_global_init(tsk)) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-#else
-	if (is_global_init(current)) {
-		yield();
-		goto again;
-	}
-#endif
-	printk("VM: killing process %s\n", tsk->comm);
-	if (error_code & PF_USER)
-		do_group_exit(SIGKILL);
-	goto no_context;
-
-do_sigbus:
-	up_read(&mm->mmap_sem);
-
-	/* Kernel mode? Handle exceptions or die */
-	if (!(error_code & PF_USER))
-		goto no_context;
-#ifdef CONFIG_X86_32
-	/* User space => ok to do another page fault */
-	if (is_prefetch(regs, address, error_code))
-		return;
-#endif
-	tsk->thread.cr2 = address;
-	tsk->thread.error_code = error_code;
-	tsk->thread.trap_no = 14;
-	force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
-}
-
-#ifdef CONFIG_X86_64
-DEFINE_SPINLOCK(pgd_lock);
-LIST_HEAD(pgd_list);
-#endif
-
-void vmalloc_sync_all(void)
-{
-#ifdef CONFIG_X86_32
-	/*
-	 * Note that races in the updates of insync and start aren't
-	 * problematic: insync can only get set bits added, and updates to
-	 * start are only improving performance (without affecting correctness
-	 * if undone).
-	 */
-	static DECLARE_BITMAP(insync, PTRS_PER_PGD);
-	static unsigned long start = TASK_SIZE;
-	unsigned long address;
-
-	if (SHARED_KERNEL_PMD)
-		return;
-
-	BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
-	for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
-		if (!test_bit(pgd_index(address), insync)) {
-			unsigned long flags;
-			struct page *page;
-
-			spin_lock_irqsave(&pgd_lock, flags);
-			for (page = pgd_list; page; page =
-					(struct page *)page->index)
-				if (!vmalloc_sync_one(page_address(page),
-								address)) {
-					BUG_ON(page != pgd_list);
-					break;
-				}
-			spin_unlock_irqrestore(&pgd_lock, flags);
-			if (!page)
-				set_bit(pgd_index(address), insync);
-		}
-		if (address == start && test_bit(pgd_index(address), insync))
-			start = address + PGDIR_SIZE;
-	}
-#else /* CONFIG_X86_64 */
-	/*
-	 * Note that races in the updates of insync and start aren't
-	 * problematic: insync can only get set bits added, and updates to
-	 * start are only improving performance (without affecting correctness
-	 * if undone).
-	 */
-	static DECLARE_BITMAP(insync, PTRS_PER_PGD);
-	static unsigned long start = VMALLOC_START & PGDIR_MASK;
-	unsigned long address;
-
-	for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
-		if (!test_bit(pgd_index(address), insync)) {
-			const pgd_t *pgd_ref = pgd_offset_k(address);
-			struct page *page;
-
-			if (pgd_none(*pgd_ref))
-				continue;
-			spin_lock(&pgd_lock);
-			list_for_each_entry(page, &pgd_list, lru) {
-				pgd_t *pgd;
-				pgd = (pgd_t *)page_address(page) + pgd_index(address);
-				if (pgd_none(*pgd))
-					set_pgd(pgd, *pgd_ref);
-				else
-					BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
-			}
-			spin_unlock(&pgd_lock);
-			set_bit(pgd_index(address), insync);
-		}
-		if (address == start)
-			start = address + PGDIR_SIZE;
-	}
-	/* Check that there is no need to do the same for the modules area. */
-	BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
-	BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
-				(__START_KERNEL & PGDIR_MASK)));
-#endif
-}

From b406ac61e94875723540bd56e26f634afdeef489 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:34:11 +0100
Subject: [PATCH 880/890] x86: remove nx_enabled from fault.c

On !PAE 32-bit, _PAGE_NX will be 0, making is_prefetch always
return early.  The test is sufficient on PAE as __supported_pte_mask
is updated in the same places as nx_enabled in init_32.c which also
takes disable_nx into account.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 14a0c6e541de..99d273dbc758 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -92,18 +92,13 @@ static int is_prefetch(struct pt_regs *regs, unsigned long addr,
 	unsigned char *max_instr;
 
 #ifdef CONFIG_X86_32
-# ifdef CONFIG_X86_PAE
-	/* If it was a exec fault on NX page, ignore */
-	if (nx_enabled && (error_code & PF_INSTR))
+	if (!(__supported_pte_mask & _PAGE_NX))
 		return 0;
-# else
-	return 0;
-# endif
-#else /* CONFIG_X86_64 */
+#endif
+
 	/* If it was a exec fault on NX page, ignore */
 	if (error_code & PF_INSTR)
 		return 0;
-#endif
 
 	instr = (unsigned char *)convert_ip_to_linear(current, regs);
 	max_instr = instr + 15;

From 5b727a3b0158a129827c21ce3bfb0ba997e8ddd0 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:34:11 +0100
Subject: [PATCH 881/890] x86: ignore spurious faults

When changing a kernel page from RO->RW, it's OK to leave stale TLB
entries around, since doing a global flush is expensive and they pose
no security problem.  They can, however, generate a spurious fault,
which we should catch and simply return from (which will have the
side-effect of reloading the TLB to the current PTE).

This can occur when running under Xen, because it frequently changes
kernel pages from RW->RO->RW to implement Xen's pagetable semantics.
It could also occur when using CONFIG_DEBUG_PAGEALLOC, since it avoids
doing a global TLB flush after changing page permissions.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault.c | 55 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 99d273dbc758..1c836527dde7 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -434,6 +434,51 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
 }
 #endif
 
+/*
+ * Handle a spurious fault caused by a stale TLB entry.  This allows
+ * us to lazily refresh the TLB when increasing the permissions of a
+ * kernel page (RO -> RW or NX -> X).  Doing it eagerly is very
+ * expensive since that implies doing a full cross-processor TLB
+ * flush, even if no stale TLB entries exist on other processors.
+ * There are no security implications to leaving a stale TLB when
+ * increasing the permissions on a page.
+ */
+static int spurious_fault(unsigned long address,
+			  unsigned long error_code)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	/* Reserved-bit violation or user access to kernel space? */
+	if (error_code & (PF_USER | PF_RSVD))
+		return 0;
+
+	pgd = init_mm.pgd + pgd_index(address);
+	if (!pgd_present(*pgd))
+		return 0;
+
+	pud = pud_offset(pgd, address);
+	if (!pud_present(*pud))
+		return 0;
+
+	pmd = pmd_offset(pud, address);
+	if (!pmd_present(*pmd))
+		return 0;
+
+	pte = pte_offset_kernel(pmd, address);
+	if (!pte_present(*pte))
+		return 0;
+
+	if ((error_code & PF_WRITE) && !pte_write(*pte))
+		return 0;
+	if ((error_code & PF_INSTR) && !pte_exec(*pte))
+		return 0;
+
+	return 1;
+}
+
 /*
  * X86_32
  * Handle a fault on the vmalloc or module mapping area
@@ -568,6 +613,11 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 		if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
 		    vmalloc_fault(address) >= 0)
 			return;
+
+		/* Can handle a stale RO->RW TLB */
+		if (spurious_fault(address, error_code))
+			return;
+
 		/*
 		 * Don't take the mm semaphore here. If we fixup a prefetch
 		 * fault we could otherwise deadlock.
@@ -598,6 +648,11 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 			if (vmalloc_fault(address) >= 0)
 				return;
 		}
+
+		/* Can handle a stale RO->RW TLB */
+		if (spurious_fault(address, error_code))
+			return;
+
 		/*
 		 * Don't take the mm semaphore here. If we fixup a prefetch
 		 * fault we could otherwise deadlock.

From fd40d6e3188b12c59696d6cb4a6f26333814d66f Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jan 2008 13:34:11 +0100
Subject: [PATCH 882/890] x86: shrink some ifdefs in fault.c

The change from current to tsk in do_page_fault is safe as
this is set at the very beginning of the function.

Removes a likely() annotation from the 64-bit version, this
could have instead been added to 32-bit.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault.c | 43 ++++++++++++++-----------------------------
 1 file changed, 14 insertions(+), 29 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 1c836527dde7..72547a7e32c6 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -378,6 +378,7 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
 #ifdef CONFIG_X86_32
 	if (!oops_may_print())
 		return;
+#endif
 
 #ifdef CONFIG_X86_PAE
 	if (error_code & PF_INSTR) {
@@ -390,28 +391,20 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
 				"(uid: %d)\n", current->uid);
 	}
 #endif
+
 	printk(KERN_ALERT "BUG: unable to handle kernel ");
 	if (address < PAGE_SIZE)
 		printk(KERN_CONT "NULL pointer dereference");
 	else
 		printk(KERN_CONT "paging request");
+#ifdef CONFIG_X86_32
 	printk(KERN_CONT " at %08lx\n", address);
-
-	printk(KERN_ALERT "IP:");
-	printk_address(regs->ip, 1);
-	dump_pagetable(address);
-#else /* CONFIG_X86_64 */
-	printk(KERN_ALERT "BUG: unable to handle kernel ");
-	if (address < PAGE_SIZE)
-		printk(KERN_CONT "NULL pointer dereference");
-	else
-		printk(KERN_CONT "paging request");
+#else
 	printk(KERN_CONT " at %016lx\n", address);
-
+#endif
 	printk(KERN_ALERT "IP:");
 	printk_address(regs->ip, 1);
 	dump_pagetable(address);
-#endif
 }
 
 #ifdef CONFIG_X86_64
@@ -705,11 +698,7 @@ again:
 	vma = find_vma(mm, address);
 	if (!vma)
 		goto bad_area;
-#ifdef CONFIG_X86_32
 	if (vma->vm_start <= address)
-#else
-	if (likely(vma->vm_start <= address))
-#endif
 		goto good_area;
 	if (!(vma->vm_flags & VM_GROWSDOWN))
 		goto bad_area;
@@ -858,23 +847,21 @@ no_context:
  */
 #ifdef CONFIG_X86_32
 	bust_spinlocks(1);
+#else
+	flags = oops_begin();
+#endif
 
 	show_fault_oops(regs, error_code, address);
 
 	tsk->thread.cr2 = address;
 	tsk->thread.trap_no = 14;
 	tsk->thread.error_code = error_code;
+
+#ifdef CONFIG_X86_32
 	die("Oops", regs, error_code);
 	bust_spinlocks(0);
 	do_exit(SIGKILL);
-#else /* CONFIG_X86_64 */
-	flags = oops_begin();
-
-	show_fault_oops(regs, error_code, address);
-
-	tsk->thread.cr2 = address;
-	tsk->thread.trap_no = 14;
-	tsk->thread.error_code = error_code;
+#else
 	if (__die("Oops", regs, error_code))
 		regs = NULL;
 	/* Executive summary in case the body of the oops scrolled away */
@@ -888,18 +875,16 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-#ifdef CONFIG_X86_32
 	if (is_global_init(tsk)) {
 		yield();
+#ifdef CONFIG_X86_32
 		down_read(&mm->mmap_sem);
 		goto survive;
-	}
 #else
-	if (is_global_init(current)) {
-		yield();
 		goto again;
-	}
 #endif
+	}
+
 	printk("VM: killing process %s\n", tsk->comm);
 	if (error_code & PF_USER)
 		do_group_exit(SIGKILL);

From 6194ba6ff6ccf8d5c54c857600843c67aa82c407 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:34:11 +0100
Subject: [PATCH 883/890] x86: don't special-case pmd allocations as much

In x86 PAE mode, stop treating pmds as a special case.  Previously
they were always allocated and freed with the pgd.  The modifies the
code to be the same as 64-bit mode, where they are allocated on
demand.

This is a step on the way to unifying 32/64-bit pagetable allocation
as much as possible.

There is a complicating wart, however.  When you install a new
reference to a pmd in the pgd, the processor isn't guaranteed to see
it unless you reload cr3.  Since reloading cr3 also has the
side-effect of flushing the tlb, this is an expense that we want to
avoid whereever possible.

This patch simply avoids reloading cr3 unless the update is to the
current pagetable.  Later patches will optimise this further.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: William Irwin <wli@holomorphy.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_32.c            | 13 ------
 arch/x86/mm/pgtable_32.c         | 68 --------------------------------
 include/asm-x86/pgalloc_32.h     | 22 +++++++++--
 include/asm-x86/pgtable-3level.h | 39 ++++++++++++------
 include/asm-x86/pgtable_32.h     |  3 +-
 5 files changed, 47 insertions(+), 98 deletions(-)

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 02d269c07b96..da524fb22422 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -704,19 +704,6 @@ int arch_add_memory(int nid, u64 start, u64 size)
 }
 #endif
 
-struct kmem_cache *pmd_cache;
-
-void __init pgtable_cache_init(void)
-{
-	if (PTRS_PER_PMD > 1) {
-		pmd_cache = kmem_cache_create("pmd",
-					      PTRS_PER_PMD*sizeof(pmd_t),
-					      PTRS_PER_PMD*sizeof(pmd_t),
-					      SLAB_PANIC,
-					      pmd_ctor);
-	}
-}
-
 /*
  * This function cannot be __init, since exceptions don't work in that
  * section.  Put this after the callers, so that it cannot be inlined.
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 3a6c9200058d..5ca3552474ae 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -195,11 +195,6 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-void pmd_ctor(struct kmem_cache *cache, void *pmd)
-{
-	memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
-}
-
 /*
  * List of all pgd's needed for non-PAE so it can invalidate entries
  * in both cached and uncached pgd's; not needed for PAE since the
@@ -285,7 +280,6 @@ static void pgd_dtor(void *pgd)
 	if (SHARED_KERNEL_PMD)
 		return;
 
-	paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
 	spin_lock_irqsave(&pgd_lock, flags);
 	pgd_list_del(pgd);
 	spin_unlock_irqrestore(&pgd_lock, flags);
@@ -367,84 +361,22 @@ static void pgd_mop_up_pmds(pgd_t *pgd)
 }
 #endif	/* CONFIG_X86_PAE */
 
-/* If we allocate a pmd for part of the kernel address space, then
-   make sure its initialized with the appropriate kernel mappings.
-   Otherwise use a cached zeroed pmd.  */
-static pmd_t *pmd_cache_alloc(int idx)
-{
-	pmd_t *pmd;
-
-	if (idx >= USER_PTRS_PER_PGD) {
-		pmd = (pmd_t *)__get_free_page(GFP_KERNEL);
-
-		if (pmd)
-			memcpy(pmd,
-			       (void *)pgd_page_vaddr(swapper_pg_dir[idx]),
-			       sizeof(pmd_t) * PTRS_PER_PMD);
-	} else
-		pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
-
-	return pmd;
-}
-
-static void pmd_cache_free(pmd_t *pmd, int idx)
-{
-	if (idx >= USER_PTRS_PER_PGD)
-		free_page((unsigned long)pmd);
-	else
-		kmem_cache_free(pmd_cache, pmd);
-}
-
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	int i;
 	pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor);
 
-	if (PTRS_PER_PMD == 1 || !pgd)
-		return pgd;
-
 	mm->pgd = pgd;		/* so that alloc_pd can use it */
 
- 	for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
-		pmd_t *pmd = pmd_cache_alloc(i);
-
-		if (!pmd)
-			goto out_oom;
-
-		paravirt_alloc_pd(mm, __pa(pmd) >> PAGE_SHIFT);
-		set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
-	}
 	if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
 		quicklist_free(0, pgd_dtor, pgd);
 		pgd = NULL;
 	}
 
 	return pgd;
-
-out_oom:
-	for (i--; i >= 0; i--) {
-		pgd_t pgdent = pgd[i];
-		void* pmd = (void *)__va(pgd_val(pgdent)-1);
-		paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
-		pmd_cache_free(pmd, i);
-	}
-	quicklist_free(0, pgd_dtor, pgd);
-	return NULL;
 }
 
 void pgd_free(pgd_t *pgd)
 {
-	int i;
-
-	/* in the PAE case user pgd entries are overwritten before usage */
-	if (PTRS_PER_PMD > 1)
-		for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
-			pgd_t pgdent = pgd[i];
-			void* pmd = (void *)__va(pgd_val(pgdent)-1);
-			paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
-			pmd_cache_free(pmd, i);
-		}
-	/* in the non-PAE case, free_pgtables() clears user pgd entries */
 	pgd_mop_up_pmds(pgd);
 	quicklist_free(0, pgd_dtor, pgd);
 }
diff --git a/include/asm-x86/pgalloc_32.h b/include/asm-x86/pgalloc_32.h
index 3482c3427897..0caa37a9a25f 100644
--- a/include/asm-x86/pgalloc_32.h
+++ b/include/asm-x86/pgalloc_32.h
@@ -63,21 +63,35 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
  */
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	BUG();
-	return (pmd_t *)2;
+	return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
 }
 
 static inline void pmd_free(pmd_t *pmd)
 {
+	BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
+	free_page((unsigned long)pmd);
 }
 
 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 {
+	paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
+	tlb_remove_page(tlb, virt_to_page(pmd));
 }
 
-static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
 {
-	BUG();
+	paravirt_alloc_pd(mm, __pa(pmd) >> PAGE_SHIFT);
+
+	/* Note: almost everything apart from _PAGE_PRESENT is
+	   reserved at the pmd (PDPT) level. */
+	set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
+
+	/*
+	 * Pentium-II erratum A13: in PAE mode we explicitly have to flush
+	 * the TLB via cr3 if the top-level pgd is changed...
+	 */
+	if (mm == current->active_mm)
+		write_cr3(read_cr3());
 }
 #endif	/* CONFIG_X86_PAE */
 
diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h
index 62a1ffbc8784..ed4c6f0e57ec 100644
--- a/include/asm-x86/pgtable-3level.h
+++ b/include/asm-x86/pgtable-3level.h
@@ -15,9 +15,19 @@
 #define pgd_ERROR(e) \
 	printk("%s:%d: bad pgd %p(%016Lx).\n", __FILE__, __LINE__, &(e), pgd_val(e))
 
-#define pud_none(pud)				0
-#define pud_bad(pud)				0
-#define pud_present(pud)			1
+
+static inline int pud_none(pud_t pud)
+{
+	return pud_val(pud) == 0;
+}
+static inline int pud_bad(pud_t pud)
+{
+	return (pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0;
+}
+static inline int pud_present(pud_t pud)
+{
+	return pud_val(pud) & _PAGE_PRESENT;
+}
 
 /* Rules for using set_pte: the pte being assigned *must* be
  * either not present or in a state where the hardware will
@@ -58,7 +68,7 @@ static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
 }
 static inline void native_set_pud(pud_t *pudp, pud_t pud)
 {
-	*pudp = pud;
+	set_64bit((unsigned long long *)(pudp),native_pud_val(pud));
 }
 
 /*
@@ -81,13 +91,20 @@ static inline void native_pmd_clear(pmd_t *pmd)
 	*(tmp + 1) = 0;
 }
 
-/*
- * Pentium-II erratum A13: in PAE mode we explicitly have to flush
- * the TLB via cr3 if the top-level pgd is changed...
- * We do not let the generic code free and clear pgd entries due to
- * this erratum.
- */
-static inline void pud_clear (pud_t * pud) { }
+static inline void pud_clear(pud_t *pudp)
+{
+	set_pud(pudp, __pud(0));
+
+	/*
+	 * Pentium-II erratum A13: in PAE mode we explicitly have to flush
+	 * the TLB via cr3 if the top-level pgd is changed...
+	 *
+	 * XXX I don't think we need to worry about this here, since
+	 * when clearing the pud, the calling code needs to flush the
+	 * tlb anyway.  But do it now for safety's sake. - jsgf
+	 */
+	write_cr3(read_cr3());
+}
 
 #define pud_page(pud) \
 ((struct page *) __va(pud_val(pud) & PAGE_MASK))
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index ca7b150ca8b7..7b61cb5989b0 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -31,8 +31,7 @@ extern spinlock_t pgd_lock;
 extern struct page *pgd_list;
 void check_pgt_cache(void);
 
-void pmd_ctor(struct kmem_cache *, void *);
-void pgtable_cache_init(void);
+static inline void pgtable_cache_init(void) {}
 void paging_init(void);
 
 

From f212ec4b7b4d84290f12c9c0416cdea283bf5f40 Mon Sep 17 00:00:00 2001
From: Bernhard Kaindl <bk@suse.de>
Date: Wed, 30 Jan 2008 13:34:11 +0100
Subject: [PATCH 884/890] x86: early boot debugging via FireWire
 (ohci1394_dma=early)

This patch adds a new configuration option, which adds support for a new
early_param which gets checked in arch/x86/kernel/setup_{32,64}.c:setup_arch()
to decide wether OHCI-1394 FireWire controllers should be initialized and
enabled for physical DMA access to allow remote debugging of early problems
like issues ACPI or other subsystems which are executed very early.

If the config option is not enabled, no code is changed, and if the boot
paramenter is not given, no new code is executed, and independent of that,
all new code is freed after boot, so the config option can be even enabled
in standard, non-debug kernels.

With specialized tools, it is then possible to get debugging information
from machines which have no serial ports (notebooks) such as the printk
buffer contents, or any data which can be referenced from global pointers,
if it is stored below the 4GB limit and even memory dumps of of the physical
RAM region below the 4GB limit can be taken without any cooperation from the
CPU of the host, so the machine can be crashed early, it does not matter.

In the extreme, even kernel debuggers can be accessed in this way. I wrote
a small kgdb module and an accompanying gdb stub for FireWire which allows
to gdb to talk to kgdb using remote remory reads and writes over FireWire.

An version of the gdb stub fore FireWire is able to read all global data
from a system which is running a a normal kernel without any kernel debugger,
without any interruption or support of the system's CPU. That way, e.g. the
task struct and so on can be read and even manipulated when the physical DMA
access is granted.

A HOWTO is included in this patch, in Documentation/debugging-via-ohci1394.txt
and I've put a copy online at
ftp://ftp.suse.de/private/bk/firewire/docs/debugging-via-ohci1394.txt

It also has links to all the tools which are available to make use of it
another copy of it is online at:
ftp://ftp.suse.de/private/bk/firewire/kernel/ohci1394_dma_early-v2.diff

Signed-Off-By: Bernhard Kaindl <bk@suse.de>
Tested-By: Thomas Renninger <trenn@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/debugging-via-ohci1394.txt | 179 ++++++++++++++
 arch/x86/kernel/setup_32.c               |  11 +
 arch/x86/kernel/setup_64.c               |  11 +
 drivers/Makefile                         |   2 +-
 drivers/ieee1394/Makefile                |   1 +
 drivers/ieee1394/init_ohci1394_dma.c     | 285 +++++++++++++++++++++++
 include/asm-x86/fixmap_32.h              |   3 +
 include/asm-x86/fixmap_64.h              |   3 +
 include/linux/init_ohci1394_dma.h        |   4 +
 lib/Kconfig.debug                        |  28 +++
 10 files changed, 526 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/debugging-via-ohci1394.txt
 create mode 100644 drivers/ieee1394/init_ohci1394_dma.c
 create mode 100644 include/linux/init_ohci1394_dma.h

diff --git a/Documentation/debugging-via-ohci1394.txt b/Documentation/debugging-via-ohci1394.txt
new file mode 100644
index 000000000000..de4804e8b396
--- /dev/null
+++ b/Documentation/debugging-via-ohci1394.txt
@@ -0,0 +1,179 @@
+
+  Using physical DMA provided by OHCI-1394 FireWire controllers for debugging
+  ---------------------------------------------------------------------------
+
+Introduction
+------------
+
+Basically all FireWire controllers which are in use today are compliant
+to the OHCI-1394 specification which defines the controller to be a PCI
+bus master which uses DMA to offload data transfers from the CPU and has
+a "Physical Response Unit" which executes specific requests by employing
+PCI-Bus master DMA after applying filters defined by the OHCI-1394 driver.
+
+Once properly configured, remote machines can send these requests to
+ask the OHCI-1394 controller to perform read and write requests on
+physical system memory and, for read requests, send the result of
+the physical memory read back to the requester.
+
+With that, it is possible to debug issues by reading interesting memory
+locations such as buffers like the printk buffer or the process table.
+
+Retrieving a full system memory dump is also possible over the FireWire,
+using data transfer rates in the order of 10MB/s or more.
+
+Memory access is currently limited to the low 4G of physical address
+space which can be a problem on IA64 machines where memory is located
+mostly above that limit, but it is rarely a problem on more common
+hardware such as hardware based on x86, x86-64 and PowerPC.
+
+Together with a early initialization of the OHCI-1394 controller for debugging,
+this facility proved most useful for examining long debugs logs in the printk
+buffer on to debug early boot problems in areas like ACPI where the system
+fails to boot and other means for debugging (serial port) are either not
+available (notebooks) or too slow for extensive debug information (like ACPI).
+
+Drivers
+-------
+
+The OHCI-1394 drivers in drivers/firewire and drivers/ieee1394 initialize
+the OHCI-1394 controllers to a working state and can be used to enable
+physical DMA. By default you only have to load the driver, and physical
+DMA access will be granted to all remote nodes, but it can be turned off
+when using the ohci1394 driver.
+
+Because these drivers depend on the PCI enumeration to be completed, an
+initialization routine which can runs pretty early (long before console_init(),
+which makes the printk buffer appear on the console can be called) was written.
+
+To activate it, enable CONFIG_PROVIDE_OHCI1394_DMA_INIT (Kernel hacking menu:
+Provide code for enabling DMA over FireWire early on boot) and pass the
+parameter "ohci1394_dma=early" to the recompiled kernel on boot.
+
+Tools
+-----
+
+firescope - Originally developed by Benjamin Herrenschmidt, Andi Kleen ported
+it from PowerPC to x86 and x86_64 and added functionality, firescope can now
+be used to view the printk buffer of a remote machine, even with live update.
+
+Bernhard Kaindl enhanced firescope to support accessing 64-bit machines
+from 32-bit firescope and vice versa:
+- ftp://ftp.suse.de/private/bk/firewire/tools/firescope-0.2.2.tar.bz2
+
+and he implemented fast system dump (alpha version - read README.txt):
+- ftp://ftp.suse.de/private/bk/firewire/tools/firedump-0.1.tar.bz2
+
+There is also a gdb proxy for firewire which allows to use gdb to access
+data which can be referenced from symbols found by gdb in vmlinux:
+- ftp://ftp.suse.de/private/bk/firewire/tools/fireproxy-0.33.tar.bz2
+
+The latest version of this gdb proxy (fireproxy-0.34) can communicate (not
+yet stable) with kgdb over an memory-based communication module (kgdbom).
+
+Getting Started
+---------------
+
+The OHCI-1394 specification regulates that the OHCI-1394 controller must
+disable all physical DMA on each bus reset.
+
+This means that if you want to debug an issue in a system state where
+interrupts are disabled and where no polling of the OHCI-1394 controller
+for bus resets takes place, you have to establish any FireWire cable
+connections and fully initialize all FireWire hardware __before__ the
+system enters such state.
+
+Step-by-step instructions for using firescope with early OHCI initialization:
+
+1) Verify that your hardware is supported:
+
+   Load the ohci1394 or the fw-ohci module and check your kernel logs.
+   You should see a line similar to
+
+   ohci1394: fw-host0: OHCI-1394 1.1 (PCI): IRQ=[18]  MMIO=[fe9ff800-fe9fffff]
+   ... Max Packet=[2048]  IR/IT contexts=[4/8]
+
+   when loading the driver. If you have no supported controller, many PCI,
+   CardBus and even some Express cards which are fully compliant to OHCI-1394
+   specification are available. If it requires no driver for Windows operating
+   systems, it most likely is. Only specialized shops have cards which are not
+   compliant, they are based on TI PCILynx chips and require drivers for Win-
+   dows operating systems.
+
+2) Establish a working FireWire cable connection:
+
+   Any FireWire cable, as long at it provides electrically and mechanically
+   stable connection and has matching connectors (there are small 4-pin and
+   large 6-pin FireWire ports) will do.
+
+   If an driver is running on both machines you should see a line like
+
+   ieee1394: Node added: ID:BUS[0-01:1023]  GUID[0090270001b84bba]
+
+   on both machines in the kernel log when the cable is plugged in
+   and connects the two machines.
+
+3) Test physical DMA using firescope:
+
+   On the debug host,
+	- load the raw1394 module,
+	- make sure that /dev/raw1394 is accessible,
+   then start firescope:
+
+	$ firescope
+	Port 0 (ohci1394) opened, 2 nodes detected
+
+	FireScope
+	---------
+	Target : <unspecified>
+	Gen    : 1
+	[Ctrl-T] choose target
+	[Ctrl-H] this menu
+	[Ctrl-Q] quit
+
+    ------> Press Ctrl-T now, the output should be similar to:
+
+	2 nodes available, local node is: 0
+	 0: ffc0, uuid: 00000000 00000000 [LOCAL]
+	 1: ffc1, uuid: 00279000 ba4bb801
+
+   Besides the [LOCAL] node, it must show another node without error message.
+
+4) Prepare for debugging with early OHCI-1394 initialization:
+
+   4.1) Kernel compilation and installation on debug target
+
+   Compile the kernel to be debugged with CONFIG_PROVIDE_OHCI1394_DMA_INIT
+   (Kernel hacking: Provide code for enabling DMA over FireWire early on boot)
+   enabled and install it on the machine to be debugged (debug target).
+
+   4.2) Transfer the System.map of the debugged kernel to the debug host
+
+   Copy the System.map of the kernel be debugged to the debug host (the host
+   which is connected to the debugged machine over the FireWire cable).
+
+5) Retrieving the printk buffer contents:
+
+   With the FireWire cable connected, the OHCI-1394 driver on the debugging
+   host loaded, reboot the debugged machine, booting the kernel which has
+   CONFIG_PROVIDE_OHCI1394_DMA_INIT enabled, with the option ohci1394_dma=early.
+
+   Then, on the debugging host, run firescope, for example by using -A:
+
+	firescope -A System.map-of-debug-target-kernel
+
+   Note: -A automatically attaches to the first non-local node. It only works
+   reliably if only connected two machines are connected using FireWire.
+
+   After having attached to the debug target, press Ctrl-D to view the
+   complete printk buffer or Ctrl-U to enter auto update mode and get an
+   updated live view of recent kernel messages logged on the debug target.
+
+   Call "firescope -h" to get more information on firescope's options.
+
+Notes
+-----
+Documentation and specifications: ftp://ftp.suse.de/private/bk/firewire/docs
+
+FireWire is a trademark of Apple Inc. - for more information please refer to:
+http://en.wikipedia.org/wiki/FireWire
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 9c0ef4945a58..62adc5f20be5 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -45,6 +45,7 @@
 #include <linux/dmi.h>
 #include <linux/pfn.h>
 #include <linux/pci.h>
+#include <linux/init_ohci1394_dma.h>
 
 #include <video/edid.h>
 
@@ -787,6 +788,16 @@ void __init setup_arch(char **cmdline_p)
 	smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
 #endif
 	paging_init();
+
+	/*
+	 * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
+	 */
+
+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
+	if (init_ohci1394_dma_early)
+		init_ohci1394_dma_on_all_controllers();
+#endif
+
 	remapped_pgdat_init();
 	sparse_init();
 	zone_sizes_init();
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 697533e86822..77fb87bf6e5a 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -41,6 +41,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/ctype.h>
 #include <linux/uaccess.h>
+#include <linux/init_ohci1394_dma.h>
 
 #include <asm/mtrr.h>
 #include <asm/uaccess.h>
@@ -253,6 +254,11 @@ void __attribute__((weak)) __init memory_setup(void)
        machine_specific_memory_setup();
 }
 
+/*
+ * setup_arch - architecture-specific boot-time initializations
+ *
+ * Note: On x86_64, fixmaps are ready for use even before this is called.
+ */
 void __init setup_arch(char **cmdline_p)
 {
 	unsigned i;
@@ -302,6 +308,11 @@ void __init setup_arch(char **cmdline_p)
 
 	parse_early_param();
 
+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
+	if (init_ohci1394_dma_early)
+		init_ohci1394_dma_on_all_controllers();
+#endif
+
 	finish_e820_parsing();
 
 	early_gart_iommu_check();
diff --git a/drivers/Makefile b/drivers/Makefile
index 8cb37e3557d4..d92d4d82d001 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -38,7 +38,7 @@ obj-$(CONFIG_SCSI)		+= scsi/
 obj-$(CONFIG_ATA)		+= ata/
 obj-$(CONFIG_FUSION)		+= message/
 obj-$(CONFIG_FIREWIRE)		+= firewire/
-obj-$(CONFIG_IEEE1394)		+= ieee1394/
+obj-y				+= ieee1394/
 obj-$(CONFIG_UIO)		+= uio/
 obj-y				+= cdrom/
 obj-y				+= auxdisplay/
diff --git a/drivers/ieee1394/Makefile b/drivers/ieee1394/Makefile
index 489c133664d5..1f8153b57503 100644
--- a/drivers/ieee1394/Makefile
+++ b/drivers/ieee1394/Makefile
@@ -15,3 +15,4 @@ obj-$(CONFIG_IEEE1394_SBP2) += sbp2.o
 obj-$(CONFIG_IEEE1394_DV1394) += dv1394.o
 obj-$(CONFIG_IEEE1394_ETH1394) += eth1394.o
 
+obj-$(CONFIG_PROVIDE_OHCI1394_DMA_INIT) += init_ohci1394_dma.o
diff --git a/drivers/ieee1394/init_ohci1394_dma.c b/drivers/ieee1394/init_ohci1394_dma.c
new file mode 100644
index 000000000000..ddaab6eb8ace
--- /dev/null
+++ b/drivers/ieee1394/init_ohci1394_dma.c
@@ -0,0 +1,285 @@
+/*
+ * init_ohci1394_dma.c - Initializes physical DMA on all OHCI 1394 controllers
+ *
+ * Copyright (C) 2006-2007      Bernhard Kaindl <bk@suse.de>
+ *
+ * Derived from drivers/ieee1394/ohci1394.c and arch/x86/kernel/early-quirks.c
+ * this file has functions to:
+ * - scan the PCI very early on boot for all OHCI 1394-compliant controllers
+ * - reset and initialize them and make them join the IEEE1394 bus and
+ * - enable physical DMA on them to allow remote debugging
+ *
+ * All code and data is marked as __init and __initdata, respective as
+ * during boot, all OHCI1394 controllers may be claimed by the firewire
+ * stack and at this point, this code should not touch them anymore.
+ *
+ * To use physical DMA after the initialization of the firewire stack,
+ * be sure that the stack enables it and (re-)attach after the bus reset
+ * which may be caused by the firewire stack initialization.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/interrupt.h>	/* for ohci1394.h */
+#include <linux/delay.h>
+#include <linux/pci.h>		/* for PCI defines */
+#include <linux/init_ohci1394_dma.h>
+#include <asm/pci-direct.h>	/* for direct PCI config space access */
+#include <asm/fixmap.h>
+
+#include "ieee1394_types.h"
+#include "ohci1394.h"
+
+int __initdata init_ohci1394_dma_early;
+
+/* Reads a PHY register of an OHCI-1394 controller */
+static inline u8 __init get_phy_reg(struct ti_ohci *ohci, u8 addr)
+{
+	int i;
+	quadlet_t r;
+
+	reg_write(ohci, OHCI1394_PhyControl, (addr << 8) | 0x00008000);
+
+	for (i = 0; i < OHCI_LOOP_COUNT; i++) {
+		if (reg_read(ohci, OHCI1394_PhyControl) & 0x80000000)
+			break;
+		mdelay(1);
+	}
+	r = reg_read(ohci, OHCI1394_PhyControl);
+
+	return (r & 0x00ff0000) >> 16;
+}
+
+/* Writes to a PHY register of an OHCI-1394 controller */
+static inline void __init set_phy_reg(struct ti_ohci *ohci, u8 addr, u8 data)
+{
+	int i;
+
+	reg_write(ohci, OHCI1394_PhyControl, (addr << 8) | data | 0x00004000);
+
+	for (i = 0; i < OHCI_LOOP_COUNT; i++) {
+		u32 r = reg_read(ohci, OHCI1394_PhyControl);
+		if (!(r & 0x00004000))
+			break;
+		mdelay(1);
+	}
+}
+
+/* Resets an OHCI-1394 controller (for sane state before initialization) */
+static inline void __init init_ohci1394_soft_reset(struct ti_ohci *ohci) {
+	int i;
+
+	reg_write(ohci, OHCI1394_HCControlSet, OHCI1394_HCControl_softReset);
+
+	for (i = 0; i < OHCI_LOOP_COUNT; i++) {
+		if (!(reg_read(ohci, OHCI1394_HCControlSet)
+				   & OHCI1394_HCControl_softReset))
+			break;
+		mdelay(1);
+	}
+}
+
+/* Basic OHCI-1394 register and port inititalization */
+static inline void __init init_ohci1394_initialize(struct ti_ohci *ohci)
+{
+	quadlet_t bus_options;
+	int num_ports, i;
+
+	/* Put some defaults to these undefined bus options */
+	bus_options = reg_read(ohci, OHCI1394_BusOptions);
+	bus_options |=  0x60000000; /* Enable CMC and ISC */
+	bus_options &= ~0x00ff0000; /* XXX: Set cyc_clk_acc to zero for now */
+	bus_options &= ~0x18000000; /* Disable PMC and BMC */
+	reg_write(ohci, OHCI1394_BusOptions, bus_options);
+
+	/* Set the bus number */
+	reg_write(ohci, OHCI1394_NodeID, 0x0000ffc0);
+
+	/* Enable posted writes */
+	reg_write(ohci, OHCI1394_HCControlSet,
+			OHCI1394_HCControl_postedWriteEnable);
+
+	/* Clear link control register */
+	reg_write(ohci, OHCI1394_LinkControlClear, 0xffffffff);
+
+	/* enable phys */
+	reg_write(ohci, OHCI1394_LinkControlSet,
+			OHCI1394_LinkControl_RcvPhyPkt);
+
+	/* Don't accept phy packets into AR request context */
+	reg_write(ohci, OHCI1394_LinkControlClear, 0x00000400);
+
+	/* Clear the Isochonouys interrupt masks */
+	reg_write(ohci, OHCI1394_IsoRecvIntMaskClear, 0xffffffff);
+	reg_write(ohci, OHCI1394_IsoRecvIntEventClear, 0xffffffff);
+	reg_write(ohci, OHCI1394_IsoXmitIntMaskClear, 0xffffffff);
+	reg_write(ohci, OHCI1394_IsoXmitIntEventClear, 0xffffffff);
+
+	/* Accept asyncronous transfer requests from all nodes for now */
+	reg_write(ohci,OHCI1394_AsReqFilterHiSet, 0x80000000);
+
+	/* Specify asyncronous transfer retries */
+	reg_write(ohci, OHCI1394_ATRetries,
+		  OHCI1394_MAX_AT_REQ_RETRIES |
+		  (OHCI1394_MAX_AT_RESP_RETRIES<<4) |
+		  (OHCI1394_MAX_PHYS_RESP_RETRIES<<8));
+
+	/* We don't want hardware swapping */
+	reg_write(ohci, OHCI1394_HCControlClear, OHCI1394_HCControl_noByteSwap);
+
+	/* Enable link */
+	reg_write(ohci, OHCI1394_HCControlSet, OHCI1394_HCControl_linkEnable);
+
+	/* If anything is connected to a port, make sure it is enabled */
+	num_ports = get_phy_reg(ohci, 2) & 0xf;
+	for (i = 0; i < num_ports; i++) {
+		unsigned int status;
+
+		set_phy_reg(ohci, 7, i);
+		status = get_phy_reg(ohci, 8);
+
+		if (status & 0x20)
+			set_phy_reg(ohci, 8, status & ~1);
+	}
+}
+
+/**
+ * init_ohci1394_wait_for_busresets - wait until bus resets are completed
+ *
+ * OHCI1394 initialization itself and any device going on- or offline
+ * and any cable issue cause a IEEE1394 bus reset. The OHCI1394 spec
+ * specifies that physical DMA is disabled on each bus reset and it
+ * has to be enabled after each bus reset when needed. We resort
+ * to polling here because on early boot, we have no interrupts.
+ */
+static inline void __init init_ohci1394_wait_for_busresets(struct ti_ohci *ohci)
+{
+	int i, events;
+
+	for (i=0; i < 9; i++) {
+		mdelay(200);
+		events = reg_read(ohci, OHCI1394_IntEventSet);
+		if (events & OHCI1394_busReset)
+			reg_write(ohci, OHCI1394_IntEventClear,
+					OHCI1394_busReset);
+	}
+}
+
+/**
+ * init_ohci1394_enable_physical_dma - Enable physical DMA for remote debugging
+ * This enables remote DMA access over IEEE1394 from every host for the low
+ * 4GB of address space. DMA accesses above 4GB are not available currently.
+ */
+static inline void __init init_ohci1394_enable_physical_dma(struct ti_ohci *hci)
+{
+	reg_write(hci, OHCI1394_PhyReqFilterHiSet, 0xffffffff);
+	reg_write(hci, OHCI1394_PhyReqFilterLoSet, 0xffffffff);
+	reg_write(hci, OHCI1394_PhyUpperBound, 0xffff0000);
+}
+
+/**
+ * init_ohci1394_reset_and_init_dma - init controller and enable DMA
+ * This initializes the given controller and enables physical DMA engine in it.
+ */
+static inline void __init init_ohci1394_reset_and_init_dma(struct ti_ohci *ohci)
+{
+	/* Start off with a soft reset, clears everything to a sane state. */
+	init_ohci1394_soft_reset(ohci);
+
+	/* Accessing some registers without LPS enabled may cause lock up */
+	reg_write(ohci, OHCI1394_HCControlSet, OHCI1394_HCControl_LPS);
+
+	/* Disable and clear interrupts */
+	reg_write(ohci, OHCI1394_IntEventClear, 0xffffffff);
+	reg_write(ohci, OHCI1394_IntMaskClear, 0xffffffff);
+
+	mdelay(50); /* Wait 50msec to make sure we have full link enabled */
+
+	init_ohci1394_initialize(ohci);
+	/*
+	 * The initialization causes at least one IEEE1394 bus reset. Enabling
+	 * physical DMA only works *after* *all* bus resets have calmed down:
+	 */
+	init_ohci1394_wait_for_busresets(ohci);
+
+	/* We had to wait and do this now if we want to debug early problems */
+	init_ohci1394_enable_physical_dma(ohci);
+}
+
+/**
+ * init_ohci1394_controller - Map the registers of the controller and init DMA
+ * This maps the registers of the specified controller and initializes it
+ */
+static inline void __init init_ohci1394_controller(int num, int slot, int func)
+{
+	unsigned long ohci_base;
+	struct ti_ohci ohci;
+
+	printk(KERN_INFO "init_ohci1394_dma: initializing OHCI-1394"
+			 " at %02x:%02x.%x\n", num, slot, func);
+
+	ohci_base = read_pci_config(num, slot, func, PCI_BASE_ADDRESS_0+(0<<2))
+						   & PCI_BASE_ADDRESS_MEM_MASK;
+
+	set_fixmap_nocache(FIX_OHCI1394_BASE, ohci_base);
+
+	ohci.registers = (void *)fix_to_virt(FIX_OHCI1394_BASE);
+
+	init_ohci1394_reset_and_init_dma(&ohci);
+}
+
+/**
+ * debug_init_ohci1394_dma - scan for OHCI1394 controllers and init DMA on them
+ * Scans the whole PCI space for OHCI1394 controllers and inits DMA on them
+ */
+void __init init_ohci1394_dma_on_all_controllers(void)
+{
+	int num, slot, func;
+
+	if (!early_pci_allowed())
+		return;
+
+	/* Poor man's PCI discovery, the only thing we can do at early boot */
+	for (num = 0; num < 32; num++) {
+		for (slot = 0; slot < 32; slot++) {
+			for (func = 0; func < 8; func++) {
+				u32 class = read_pci_config(num,slot,func,
+							PCI_CLASS_REVISION);
+				if ((class == 0xffffffff))
+					continue; /* No device at this func */
+
+				if (class>>8 != PCI_CLASS_SERIAL_FIREWIRE_OHCI)
+					continue; /* Not an OHCI-1394 device */
+
+				init_ohci1394_controller(num, slot, func);
+				break; /* Assume one controller per device */
+			}
+		}
+	}
+	printk(KERN_INFO "init_ohci1394_dma: finished initializing OHCI DMA\n");
+}
+
+/**
+ * setup_init_ohci1394_early - enables early OHCI1394 DMA initialization
+ */
+static int __init setup_ohci1394_dma(char *opt)
+{
+	if (!strcmp(opt, "early"))
+		init_ohci1394_dma_early = 1;
+	return 0;
+}
+
+/* passing ohci1394_dma=early on boot causes early OHCI1394 DMA initialization */
+early_param("ohci1394_dma", setup_ohci1394_dma);
diff --git a/include/asm-x86/fixmap_32.h b/include/asm-x86/fixmap_32.h
index fde140fd6d95..a7404d50686b 100644
--- a/include/asm-x86/fixmap_32.h
+++ b/include/asm-x86/fixmap_32.h
@@ -104,6 +104,9 @@ enum fixed_addresses {
 			(__end_of_permanent_fixed_addresses & 511),
 	FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,
 	FIX_WP_TEST,
+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
+	FIX_OHCI1394_BASE,
+#endif
 	__end_of_fixed_addresses
 };
 
diff --git a/include/asm-x86/fixmap_64.h b/include/asm-x86/fixmap_64.h
index 8f44782e5fe5..70ddb21e6458 100644
--- a/include/asm-x86/fixmap_64.h
+++ b/include/asm-x86/fixmap_64.h
@@ -44,6 +44,9 @@ enum fixed_addresses {
 	FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
 	FIX_EFI_IO_MAP_LAST_PAGE,
 	FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE+MAX_EFI_IO_PAGES-1,
+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
+	FIX_OHCI1394_BASE,
+#endif
 	__end_of_fixed_addresses
 };
 
diff --git a/include/linux/init_ohci1394_dma.h b/include/linux/init_ohci1394_dma.h
new file mode 100644
index 000000000000..3c03a4bba5e4
--- /dev/null
+++ b/include/linux/init_ohci1394_dma.h
@@ -0,0 +1,4 @@
+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
+extern int __initdata init_ohci1394_dma_early;
+extern void __init init_ohci1394_dma_on_all_controllers(void);
+#endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index aa56e631580d..89f4035b526c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -586,5 +586,33 @@ config LATENCYTOP
 	  Enable this option if you want to use the LatencyTOP tool
 	  to find out which userspace is blocking on what kernel operations.
 
+config PROVIDE_OHCI1394_DMA_INIT
+	bool "Provide code for enabling DMA over FireWire early on boot"
+	depends on PCI && X86
+	help
+	  If you want to debug problems which hang or crash the kernel early
+	  on boot and the crashing machine has a FireWire port, you can use
+	  this feature to remotely access the memory of the crashed machine
+	  over FireWire. This employs remote DMA as part of the OHCI1394
+	  specification which is now the standard for FireWire controllers.
+
+	  With remote DMA, you can monitor the printk buffer remotely using
+	  firescope and access all memory below 4GB using fireproxy from gdb.
+	  Even controlling a kernel debugger is possible using remote DMA.
+
+	  Usage:
+
+	  If ohci1394_dma=early is used as boot parameter, it will initialize
+	  all OHCI1394 controllers which are found in the PCI config space.
+
+	  As all changes to the FireWire bus such as enabling and disabling
+	  devices cause a bus reset and thereby disable remote DMA for all
+	  devices, be sure to have the cable plugged and FireWire enabled on
+	  the debugging host before booting the debug target for debugging.
+
+	  This code (~1k) is freed after boot. By then, the firewire stack
+	  in charge of the OHCI-1394 controllers should be used instead.
+
+	  See Documentation/debugging-via-ohci1394.txt for more information.
 
 source "samples/Kconfig"

From fa28ba21cec24d3fa1279bcae7e5d5ff6224635a Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:34:11 +0100
Subject: [PATCH 885/890] x86: defer cr3 reload when doing pud_clear()

PAE mode requires that we reload cr3 in order to guarantee that
changes to the pgd will be noticed by the processor.  This means that
in principle pud_clear needs to reload cr3 every time.  However,
because reloading cr3 implies a tlb flush, we want to avoid it where
possible.

pud_clear() is only used in a couple of places:
 - in free_pmd_range(), when pulling down a range of process address space, and
 - huge_pmd_unshare()

In both cases, the calling code will do a a tlb flush anyway, so
there's no need to do it within pud_clear().

In free_pmd_range(), the pud_clear is immediately followed by
pmd_free_tlb(); we can hook that to make the mmu_gather do an
unconditional full flush to make sure cr3 gets reloaded.

In huge_pmd_unshare, it is followed by flush_tlb_range, which always
results in a full cr3-reload tlb flush.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: William Irwin <wli@holomorphy.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgalloc_32.h     |  7 +++++++
 include/asm-x86/pgtable-3level.h | 21 +++++++++++++++------
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/include/asm-x86/pgalloc_32.h b/include/asm-x86/pgalloc_32.h
index 0caa37a9a25f..10c2b452e64c 100644
--- a/include/asm-x86/pgalloc_32.h
+++ b/include/asm-x86/pgalloc_32.h
@@ -74,6 +74,13 @@ static inline void pmd_free(pmd_t *pmd)
 
 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 {
+	/* This is called just after the pmd has been detached from
+	   the pgd, which requires a full tlb flush to be recognized
+	   by the CPU.  Rather than incurring multiple tlb flushes
+	   while the address space is being pulled down, make the tlb
+	   gathering machinery do a full flush when we're done. */
+	tlb->fullmm = 1;
+
 	paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
 	tlb_remove_page(tlb, virt_to_page(pmd));
 }
diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h
index ed4c6f0e57ec..a195c3e757b9 100644
--- a/include/asm-x86/pgtable-3level.h
+++ b/include/asm-x86/pgtable-3level.h
@@ -96,14 +96,23 @@ static inline void pud_clear(pud_t *pudp)
 	set_pud(pudp, __pud(0));
 
 	/*
-	 * Pentium-II erratum A13: in PAE mode we explicitly have to flush
-	 * the TLB via cr3 if the top-level pgd is changed...
+	 * In principle we need to do a cr3 reload here to make sure
+	 * the processor recognizes the changed pgd.  In practice, all
+	 * the places where pud_clear() gets called are followed by
+	 * full tlb flushes anyway, so we can defer the cost here.
 	 *
-	 * XXX I don't think we need to worry about this here, since
-	 * when clearing the pud, the calling code needs to flush the
-	 * tlb anyway.  But do it now for safety's sake. - jsgf
+	 * Specifically:
+	 *
+	 * mm/memory.c:free_pmd_range() - immediately after the
+	 * pud_clear() it does a pmd_free_tlb().  We change the
+	 * mmu_gather structure to do a full tlb flush (which has the
+	 * effect of reloading cr3) when the pagetable free is
+	 * complete.
+	 *
+	 * arch/x86/mm/hugetlbpage.c:huge_pmd_unshare() - the call to
+	 * this is followed by a flush_tlb_range, which on x86 does a
+	 * full tlb flush.
 	 */
-	write_cr3(read_cr3());
 }
 
 #define pud_page(pud) \

From e3ed910db221768f8fd6192b13373e17d61bcdf0 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:34:11 +0100
Subject: [PATCH 886/890] x86: use the same pgd_list for PAE and 64-bit

Use a standard list threaded through page->lru for maintaining the pgd
list on PAE.  This is the same as 64-bit, and seems saner than using a
non-standard list via page->index.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/fault.c          | 10 +++-------
 arch/x86/mm/pageattr.c       |  2 +-
 arch/x86/mm/pgtable_32.c     | 19 +++++--------------
 include/asm-x86/pgtable.h    |  2 ++
 include/asm-x86/pgtable_32.h |  2 --
 include/asm-x86/pgtable_64.h |  3 ---
 6 files changed, 11 insertions(+), 27 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 72547a7e32c6..e28cc5277b16 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -907,10 +907,8 @@ do_sigbus:
 	force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
 }
 
-#ifdef CONFIG_X86_64
 DEFINE_SPINLOCK(pgd_lock);
 LIST_HEAD(pgd_list);
-#endif
 
 void vmalloc_sync_all(void)
 {
@@ -935,13 +933,11 @@ void vmalloc_sync_all(void)
 			struct page *page;
 
 			spin_lock_irqsave(&pgd_lock, flags);
-			for (page = pgd_list; page; page =
-					(struct page *)page->index)
+			list_for_each_entry(page, &pgd_list, lru) {
 				if (!vmalloc_sync_one(page_address(page),
-								address)) {
-					BUG_ON(page != pgd_list);
+						      address))
 					break;
-				}
+			}
 			spin_unlock_irqrestore(&pgd_lock, flags);
 			if (!page)
 				set_bit(pgd_index(address), insync);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index ec07c1873d65..1cc6607eacb0 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -175,7 +175,7 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 	if (!SHARED_KERNEL_PMD) {
 		struct page *page;
 
-		for (page = pgd_list; page; page = (struct page *)page->index) {
+		list_for_each_entry(page, &pgd_list, lru) {
 			pgd_t *pgd;
 			pud_t *pud;
 			pmd_t *pmd;
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 5ca3552474ae..2ae5999a795a 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -205,27 +205,18 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
  * vmalloc faults work because attached pagetables are never freed.
  * -- wli
  */
-DEFINE_SPINLOCK(pgd_lock);
-struct page *pgd_list;
-
 static inline void pgd_list_add(pgd_t *pgd)
 {
 	struct page *page = virt_to_page(pgd);
-	page->index = (unsigned long)pgd_list;
-	if (pgd_list)
-		set_page_private(pgd_list, (unsigned long)&page->index);
-	pgd_list = page;
-	set_page_private(page, (unsigned long)&pgd_list);
+
+	list_add(&page->lru, &pgd_list);
 }
 
 static inline void pgd_list_del(pgd_t *pgd)
 {
-	struct page *next, **pprev, *page = virt_to_page(pgd);
-	next = (struct page *)page->index;
-	pprev = (struct page **)page_private(page);
-	*pprev = next;
-	if (next)
-		set_page_private(next, (unsigned long)pprev);
+	struct page *page = virt_to_page(pgd);
+
+	list_del(&page->lru);
 }
 
 
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 269e7e29ea8e..cd2524f07452 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -131,6 +131,8 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
 extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
 #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
 
+extern spinlock_t pgd_lock;
+extern struct list_head pgd_list;
 
 /*
  * The following only work if pte_present() is true.
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 7b61cb5989b0..21e70fbf1dae 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -27,8 +27,6 @@ struct vm_area_struct;
 
 extern pgd_t swapper_pg_dir[1024];
 extern struct kmem_cache *pmd_cache;
-extern spinlock_t pgd_lock;
-extern struct page *pgd_list;
 void check_pgt_cache(void);
 
 static inline void pgtable_cache_init(void) {}
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index ce1dbab75e39..6e615a103c2f 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -240,9 +240,6 @@ static inline unsigned long pmd_bad(pmd_t pmd)
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
 #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
 
-extern spinlock_t pgd_lock;
-extern struct list_head pgd_list;
-
 extern int kern_addr_valid(unsigned long addr); 
 
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)		\

From f6df72e71eba621b2f5c49b3a763116fac748f6e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 30 Jan 2008 13:34:11 +0100
Subject: [PATCH 887/890] x86: fix early_ioremap pagetable ops

Put appropriate pagetable update hooks in so that paravirt knows
what's going on in there.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/ioremap.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index ac9ab20d8092..ed795721ca8e 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -18,6 +18,7 @@
 #include <asm/fixmap.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
 
 enum ioremap_mode {
 	IOR_MODE_UNCACHED,
@@ -326,6 +327,7 @@ void __init early_ioremap_clear(void)
 
 	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
 	*pgd = 0;
+	paravirt_release_pt(__pa(pgd) >> PAGE_SHIFT);
 	__flush_tlb_all();
 }
 

From bb4a1d644a84e7e9d1d1fa9d1c7d1017b02e0947 Mon Sep 17 00:00:00 2001
From: Joachim Deguara <joachim.deguara@amd.com>
Date: Wed, 30 Jan 2008 13:34:12 +0100
Subject: [PATCH 888/890] x86: add PCI IDs to k8topology_64.c

This just adds the PCI IDs of AMD's family 10h and 11h CPU's northbridges to
k8topology discovery.

Signed-off-by: Joachim Deguara <joachim.deguara@amd.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Acked-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/k8topology_64.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index 32b963de9b8f..7a2ebce87df5 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -28,11 +28,15 @@ static __init int find_northbridge(void)
 		u32 header;
 
 		header = read_pci_config(0, num, 0, 0x00);
-		if (header != (PCI_VENDOR_ID_AMD | (0x1100<<16)))
+		if (header != (PCI_VENDOR_ID_AMD | (0x1100<<16)) &&
+			header != (PCI_VENDOR_ID_AMD | (0x1200<<16)) &&
+			header != (PCI_VENDOR_ID_AMD | (0x1300<<16)))
 			continue;
 
 		header = read_pci_config(0, num, 1, 0x00);
-		if (header != (PCI_VENDOR_ID_AMD | (0x1101<<16)))
+		if (header != (PCI_VENDOR_ID_AMD | (0x1101<<16)) &&
+			header != (PCI_VENDOR_ID_AMD | (0x1201<<16)) &&
+			header != (PCI_VENDOR_ID_AMD | (0x1301<<16)))
 			continue;
 		return num;
 	}

From 9198715763e8d0fd7fb7578c07916a5313e28b9d Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Wed, 30 Jan 2008 13:34:12 +0100
Subject: [PATCH 889/890] x86: fix overlap between pagetable with bss section

one early crash on one 8 node 256g machine:

Command line: console=uart8250,io,0x3f8,115200n8 initrd=kernel.org/mydisk11_x86_64.gz rw root=/dev/ram0 debug initcall_debug apic=debug acpi.debug_level=0x0000000f pci=routeirq ip=dhcp load_ramdisk=1 ramdisk_size=131072 BOOT_IMAGE=kernel.org/bzImage_2.6.25_k8.1
BIOS-provided physical RAM map:
 BIOS-e820: 0000000000000000 - 000000000009bc00 (usable)
 BIOS-e820: 000000000009bc00 - 00000000000a0000 (reserved)
 BIOS-e820: 00000000000e6000 - 0000000000100000 (reserved)
 BIOS-e820: 0000000000100000 - 00000000dffe0000 (usable)
 BIOS-e820: 00000000dffe0000 - 00000000dffee000 (ACPI data)
 BIOS-e820: 00000000dffee000 - 00000000dffff050 (ACPI NVS)
 BIOS-e820: 00000000dffff050 - 00000000e0000000 (reserved)
 BIOS-e820: 00000000fec00000 - 00000000fec01000 (reserved)
 BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
 BIOS-e820: 00000000ff700000 - 0000000100000000 (reserved)
 BIOS-e820: 0000000100000000 - 0000004020000000 (usable)
Early serial console at I/O port 0x3f8 (options '115200n8')
console [uart0] enabled
end_pfn_map = 67239936
Kernel panic - not syncing: Duplicated early reservation d40000-e42000

Pid: 0, comm: swapper Not tainted 2.6.24-smp-g5a514e21-dirty #3

Call Trace:
 [<ffffffff80221545>] lapic_get_maxlvt+0x0/0x10
 [<ffffffff80221657>] clear_local_APIC+0x5/0xcf
 [<ffffffff80221726>] disable_local_APIC+0x5/0x17
 [<ffffffff8021fe16>] smp_send_stop+0x46/0x4c
 [<ffffffff80235293>] panic+0x94/0x13e
 [<ffffffff80bc3b03>] sctp_eps_proc_init+0x12/0x34
 [<ffffffff80b9f1c5>] reserve_early+0x30/0x6c
 [<ffffffff80803925>] init_memory_mapping+0x2cd/0x2dc
 [<ffffffff80b9dc01>] setup_arch+0x21f/0x44e
 [<ffffffff80b978be>] start_kernel+0x6f/0x2c7
 [<ffffffff80b971cc>] _sinittext+0x1cc/0x1d3

it turns out there is overlap between pgtable and bss...

in System.map we have
ffffffff80d40420 b rsi_table
ffffffff80d40620 B krb5_seq_lock
ffffffff80d40628 b i.20437
ffffffff80d40630 b xprt_rdma_inline_write_padding
ffffffff80d40638 b sunrpc_table_header
ffffffff80d40640 b zero
ffffffff80d40644 b min_memreg
ffffffff80d40648 b rpcrdma_tk_lock_g
ffffffff80d40650 B sctp_assocs_id_lock
ffffffff80d40658 B proc_net_sctp
ffffffff80d40660 B sctp_assocs_id
ffffffff80d40680 B sysctl_sctp_mem
ffffffff80d40690 B sysctl_sctp_rmem
ffffffff80d406a0 B sysctl_sctp_wmem
ffffffff80d406b0 b sctp_ctl_socket
ffffffff80d406b8 b sctp_pf_inet6_specific
ffffffff80d406c0 b sctp_pf_inet_specific
ffffffff80d406c8 b sctp_af_v4_specific
ffffffff80d406d0 b sctp_af_v6_specific
ffffffff80d406d8 b sctp_rand.33270
ffffffff80d406dc b sctp_memory_pressure
ffffffff80d406e0 b sctp_sockets_allocated
ffffffff80d406e4 b sctp_memory_allocated
ffffffff80d406e8 b sctp_sysctl_header
ffffffff80d406f0 b zero
ffffffff80d406f4 A __bss_stop
ffffffff80d406f4 A _end

need to round up table_start to PAGE_SIZE.

also make the panic more informative.

Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/e820_64.c | 4 ++--
 arch/x86/mm/init_64.c     | 7 +++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index f8b7bebb4344..c617174e8963 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -70,8 +70,8 @@ void __init reserve_early(unsigned long start, unsigned long end)
 	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
 		r = &early_res[i];
 		if (end > r->start && start < r->end)
-			panic("Duplicated early reservation %lx-%lx\n",
-			      start, end);
+			panic("Overlapping early reservations %lx-%lx to %lx-%lx\n",
+			      start, end, r->start, r->end);
 	}
 	if (i >= MAX_EARLY_RES)
 		panic("Too many early reservations");
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a95272644591..cc50a13ce8d9 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -358,6 +358,13 @@ static void __init find_early_table_space(unsigned long end)
 	if (table_start == -1UL)
 		panic("Cannot find space for the kernel page tables");
 
+	/*
+	 * When you have a lot of RAM like 256GB, early_table will not fit
+	 * into 0x8000 range, find_e820_area() will find area after kernel
+	 * bss but the table_start is not page aligned, so need to round it
+	 * up to avoid overlap with bss:
+	 */
+	table_start = round_up(table_start, PAGE_SIZE);
 	table_start >>= PAGE_SHIFT;
 	table_end = table_start;
 

From afadcd788f37bfa62d92662e54a720c26c91becf Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Wed, 30 Jan 2008 13:34:12 +0100
Subject: [PATCH 890/890] x86: fix nodemap_size according to nodeid bits

memnode.map is s16 array because of nodeid is 16 bit now.

so need to increase the nodemap_size according to that bits.

Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/numa_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index f0e5cabe49d8..dc3b1f7e1451 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -92,7 +92,7 @@ static int __init allocate_cachealigned_memnodemap(void)
 
 	pad = L1_CACHE_BYTES - 1;
 	pad_addr = 0x8000;
-	nodemap_size = pad + memnodemapsize;
+	nodemap_size = pad + sizeof(s16) * memnodemapsize;
 	nodemap_addr = find_e820_area(pad_addr, end_pfn<<PAGE_SHIFT,
 				      nodemap_size);
 	if (nodemap_addr == -1UL) {