From 9d40ee200a527ce08ab8c793ba8ae3e242edbb0e Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 7 Oct 2009 10:54:19 -0700 Subject: [PATCH 1/9] [IA64] Squeeze ticket locks back into 4 bytes. Linus pointed out that other people have spent large amounts of time and effort to optimize the layout of frequently used structures. Often these have embedded locks, and the assumption is that a lock takes 4 bytes. Linus also pointed out how to work with the limited options for atomic instructions on Itanium. Signed-off-by: Tony Luck --- arch/ia64/include/asm/spinlock.h | 45 +++++++++++++++----------- arch/ia64/include/asm/spinlock_types.h | 2 +- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h index 30bb930e1111..4fa502739d64 100644 --- a/arch/ia64/include/asm/spinlock.h +++ b/arch/ia64/include/asm/spinlock.h @@ -25,61 +25,68 @@ * by atomically noting the tail and incrementing it by one (thus adding * ourself to the queue and noting our position), then waiting until the head * becomes equal to the the initial value of the tail. + * The pad bits in the middle are used to prevent the next_ticket number + * overflowing into the now_serving number. * - * 63 32 31 0 + * 31 17 16 15 14 0 * +----------------------------------------------------+ - * | next_ticket_number | now_serving | + * | now_serving | padding | next_ticket | * +----------------------------------------------------+ */ -#define TICKET_SHIFT 32 +#define TICKET_SHIFT 17 +#define TICKET_BITS 15 +#define TICKET_MASK ((1 << TICKET_BITS) - 1) static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) { - int *p = (int *)&lock->lock, turn, now_serving; + int *p = (int *)&lock->lock, ticket, serve; - now_serving = *p; - turn = ia64_fetchadd(1, p+1, acq); + ticket = ia64_fetchadd(1, p, acq); - if (turn == now_serving) + if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK)) return; - do { + ia64_invala(); + + for (;;) { + asm volatile ("ld4.c.nc %0=[%1]" : "=r"(serve) : "r"(p) : "memory"); + + if (!(((serve >> TICKET_SHIFT) ^ ticket) & TICKET_MASK)) + return; cpu_relax(); - } while (ACCESS_ONCE(*p) != turn); + } } static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) { - long tmp = ACCESS_ONCE(lock->lock), try; + int tmp = ACCESS_ONCE(lock->lock); - if (!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1L << TICKET_SHIFT) - 1))) { - try = tmp + (1L << TICKET_SHIFT); - - return ia64_cmpxchg(acq, &lock->lock, tmp, try, sizeof (tmp)) == tmp; - } + if (!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK)) + return ia64_cmpxchg(acq, &lock->lock, tmp, tmp + 1, sizeof (tmp)) == tmp; return 0; } static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) { - int *p = (int *)&lock->lock; + unsigned short *p = (unsigned short *)&lock->lock + 1, tmp; - (void)ia64_fetchadd(1, p, rel); + asm volatile ("ld2.bias %0=[%1]" : "=r"(tmp) : "r"(p)); + ACCESS_ONCE(*p) = (tmp + 2) & ~1; } static inline int __ticket_spin_is_locked(raw_spinlock_t *lock) { long tmp = ACCESS_ONCE(lock->lock); - return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1L << TICKET_SHIFT) - 1)); + return !!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK); } static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) { long tmp = ACCESS_ONCE(lock->lock); - return (((tmp >> TICKET_SHIFT) - tmp) & ((1L << TICKET_SHIFT) - 1)) > 1; + return ((tmp - (tmp >> TICKET_SHIFT)) & TICKET_MASK) > 1; } static inline int __raw_spin_is_locked(raw_spinlock_t *lock) diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h index b61d136d9bc2..474e46f1ab4a 100644 --- a/arch/ia64/include/asm/spinlock_types.h +++ b/arch/ia64/include/asm/spinlock_types.h @@ -6,7 +6,7 @@ #endif typedef struct { - volatile unsigned long lock; + volatile unsigned int lock; } raw_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { 0 } From 883a3acf5b0d4782ac35981227a0d094e8b44850 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 9 Oct 2009 10:52:39 -0700 Subject: [PATCH 2/9] [IA64] Re-implement spinaphores using ticket lock concepts Bound the wait time for the ptcg_sem by using similar idea to the ticket spin locks. In this case we have only one instance of a spinaphore, so make it 8 bytes rather than try to squeeze it into 4-bytes to keep the code simpler (and shorter). Signed-off-by: Tony Luck --- arch/ia64/mm/tlb.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index f426dc78d959..ee09d261f2e6 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -100,24 +100,36 @@ wrap_mmu_context (struct mm_struct *mm) * this primitive it can be moved up to a spinaphore.h header. */ struct spinaphore { - atomic_t cur; + unsigned long ticket; + unsigned long serve; }; static inline void spinaphore_init(struct spinaphore *ss, int val) { - atomic_set(&ss->cur, val); + ss->ticket = 0; + ss->serve = val; } static inline void down_spin(struct spinaphore *ss) { - while (unlikely(!atomic_add_unless(&ss->cur, -1, 0))) - while (atomic_read(&ss->cur) == 0) - cpu_relax(); + unsigned long t = ia64_fetchadd(1, &ss->ticket, acq), serve; + + if (time_before(t, ss->serve)) + return; + + ia64_invala(); + + for (;;) { + asm volatile ("ld4.c.nc %0=[%1]" : "=r"(serve) : "r"(&ss->serve) : "memory"); + if (time_before(t, serve)) + return; + cpu_relax(); + } } static inline void up_spin(struct spinaphore *ss) { - atomic_add(1, &ss->cur); + ia64_fetchadd(1, &ss->serve, rel); } static struct spinaphore ptcg_sem; From 29e4e025be52c0619b9dfe6faba29bc3deac6272 Mon Sep 17 00:00:00 2001 From: Takao Indoh Date: Thu, 1 Oct 2009 17:55:16 -0400 Subject: [PATCH 3/9] [IA64] Restore registers in the stack on INIT Registers are not saved anywhere when INIT comes during fsys mode and we cannot know what happened when we investigate vmcore captured by kdump. This patch adds new function finish_pt_regs() so registers can be saved in such a case. Signed-off-by: Takao Indoh Signed-off-by: Tony Luck --- arch/ia64/kernel/mca.c | 104 ++++++++++++++++++++++------------------- 1 file changed, 57 insertions(+), 47 deletions(-) diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index d2877a7bfe2e..496ac7a99488 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -887,6 +887,60 @@ ia64_mca_modify_comm(const struct task_struct *previous_current) memcpy(current->comm, comm, sizeof(current->comm)); } +static void +finish_pt_regs(struct pt_regs *regs, const pal_min_state_area_t *ms, + unsigned long *nat) +{ + const u64 *bank; + + /* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use + * pmsa_{xip,xpsr,xfs} + */ + if (ia64_psr(regs)->ic) { + regs->cr_iip = ms->pmsa_iip; + regs->cr_ipsr = ms->pmsa_ipsr; + regs->cr_ifs = ms->pmsa_ifs; + } else { + regs->cr_iip = ms->pmsa_xip; + regs->cr_ipsr = ms->pmsa_xpsr; + regs->cr_ifs = ms->pmsa_xfs; + } + regs->pr = ms->pmsa_pr; + regs->b0 = ms->pmsa_br0; + regs->ar_rsc = ms->pmsa_rsc; + copy_reg(&ms->pmsa_gr[1-1], ms->pmsa_nat_bits, ®s->r1, nat); + copy_reg(&ms->pmsa_gr[2-1], ms->pmsa_nat_bits, ®s->r2, nat); + copy_reg(&ms->pmsa_gr[3-1], ms->pmsa_nat_bits, ®s->r3, nat); + copy_reg(&ms->pmsa_gr[8-1], ms->pmsa_nat_bits, ®s->r8, nat); + copy_reg(&ms->pmsa_gr[9-1], ms->pmsa_nat_bits, ®s->r9, nat); + copy_reg(&ms->pmsa_gr[10-1], ms->pmsa_nat_bits, ®s->r10, nat); + copy_reg(&ms->pmsa_gr[11-1], ms->pmsa_nat_bits, ®s->r11, nat); + copy_reg(&ms->pmsa_gr[12-1], ms->pmsa_nat_bits, ®s->r12, nat); + copy_reg(&ms->pmsa_gr[13-1], ms->pmsa_nat_bits, ®s->r13, nat); + copy_reg(&ms->pmsa_gr[14-1], ms->pmsa_nat_bits, ®s->r14, nat); + copy_reg(&ms->pmsa_gr[15-1], ms->pmsa_nat_bits, ®s->r15, nat); + if (ia64_psr(regs)->bn) + bank = ms->pmsa_bank1_gr; + else + bank = ms->pmsa_bank0_gr; + copy_reg(&bank[16-16], ms->pmsa_nat_bits, ®s->r16, nat); + copy_reg(&bank[17-16], ms->pmsa_nat_bits, ®s->r17, nat); + copy_reg(&bank[18-16], ms->pmsa_nat_bits, ®s->r18, nat); + copy_reg(&bank[19-16], ms->pmsa_nat_bits, ®s->r19, nat); + copy_reg(&bank[20-16], ms->pmsa_nat_bits, ®s->r20, nat); + copy_reg(&bank[21-16], ms->pmsa_nat_bits, ®s->r21, nat); + copy_reg(&bank[22-16], ms->pmsa_nat_bits, ®s->r22, nat); + copy_reg(&bank[23-16], ms->pmsa_nat_bits, ®s->r23, nat); + copy_reg(&bank[24-16], ms->pmsa_nat_bits, ®s->r24, nat); + copy_reg(&bank[25-16], ms->pmsa_nat_bits, ®s->r25, nat); + copy_reg(&bank[26-16], ms->pmsa_nat_bits, ®s->r26, nat); + copy_reg(&bank[27-16], ms->pmsa_nat_bits, ®s->r27, nat); + copy_reg(&bank[28-16], ms->pmsa_nat_bits, ®s->r28, nat); + copy_reg(&bank[29-16], ms->pmsa_nat_bits, ®s->r29, nat); + copy_reg(&bank[30-16], ms->pmsa_nat_bits, ®s->r30, nat); + copy_reg(&bank[31-16], ms->pmsa_nat_bits, ®s->r31, nat); +} + /* On entry to this routine, we are running on the per cpu stack, see * mca_asm.h. The original stack has not been touched by this event. Some of * the original stack's registers will be in the RBS on this stack. This stack @@ -921,7 +975,6 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, u64 r12 = ms->pmsa_gr[12-1], r13 = ms->pmsa_gr[13-1]; u64 ar_bspstore = regs->ar_bspstore; u64 ar_bsp = regs->ar_bspstore + (loadrs >> 16); - const u64 *bank; const char *msg; int cpu = smp_processor_id(); @@ -1024,54 +1077,9 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, p = (char *)r12 - sizeof(*regs); old_regs = (struct pt_regs *)p; memcpy(old_regs, regs, sizeof(*regs)); - /* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use - * pmsa_{xip,xpsr,xfs} - */ - if (ia64_psr(regs)->ic) { - old_regs->cr_iip = ms->pmsa_iip; - old_regs->cr_ipsr = ms->pmsa_ipsr; - old_regs->cr_ifs = ms->pmsa_ifs; - } else { - old_regs->cr_iip = ms->pmsa_xip; - old_regs->cr_ipsr = ms->pmsa_xpsr; - old_regs->cr_ifs = ms->pmsa_xfs; - } - old_regs->pr = ms->pmsa_pr; - old_regs->b0 = ms->pmsa_br0; old_regs->loadrs = loadrs; - old_regs->ar_rsc = ms->pmsa_rsc; old_unat = old_regs->ar_unat; - copy_reg(&ms->pmsa_gr[1-1], ms->pmsa_nat_bits, &old_regs->r1, &old_unat); - copy_reg(&ms->pmsa_gr[2-1], ms->pmsa_nat_bits, &old_regs->r2, &old_unat); - copy_reg(&ms->pmsa_gr[3-1], ms->pmsa_nat_bits, &old_regs->r3, &old_unat); - copy_reg(&ms->pmsa_gr[8-1], ms->pmsa_nat_bits, &old_regs->r8, &old_unat); - copy_reg(&ms->pmsa_gr[9-1], ms->pmsa_nat_bits, &old_regs->r9, &old_unat); - copy_reg(&ms->pmsa_gr[10-1], ms->pmsa_nat_bits, &old_regs->r10, &old_unat); - copy_reg(&ms->pmsa_gr[11-1], ms->pmsa_nat_bits, &old_regs->r11, &old_unat); - copy_reg(&ms->pmsa_gr[12-1], ms->pmsa_nat_bits, &old_regs->r12, &old_unat); - copy_reg(&ms->pmsa_gr[13-1], ms->pmsa_nat_bits, &old_regs->r13, &old_unat); - copy_reg(&ms->pmsa_gr[14-1], ms->pmsa_nat_bits, &old_regs->r14, &old_unat); - copy_reg(&ms->pmsa_gr[15-1], ms->pmsa_nat_bits, &old_regs->r15, &old_unat); - if (ia64_psr(old_regs)->bn) - bank = ms->pmsa_bank1_gr; - else - bank = ms->pmsa_bank0_gr; - copy_reg(&bank[16-16], ms->pmsa_nat_bits, &old_regs->r16, &old_unat); - copy_reg(&bank[17-16], ms->pmsa_nat_bits, &old_regs->r17, &old_unat); - copy_reg(&bank[18-16], ms->pmsa_nat_bits, &old_regs->r18, &old_unat); - copy_reg(&bank[19-16], ms->pmsa_nat_bits, &old_regs->r19, &old_unat); - copy_reg(&bank[20-16], ms->pmsa_nat_bits, &old_regs->r20, &old_unat); - copy_reg(&bank[21-16], ms->pmsa_nat_bits, &old_regs->r21, &old_unat); - copy_reg(&bank[22-16], ms->pmsa_nat_bits, &old_regs->r22, &old_unat); - copy_reg(&bank[23-16], ms->pmsa_nat_bits, &old_regs->r23, &old_unat); - copy_reg(&bank[24-16], ms->pmsa_nat_bits, &old_regs->r24, &old_unat); - copy_reg(&bank[25-16], ms->pmsa_nat_bits, &old_regs->r25, &old_unat); - copy_reg(&bank[26-16], ms->pmsa_nat_bits, &old_regs->r26, &old_unat); - copy_reg(&bank[27-16], ms->pmsa_nat_bits, &old_regs->r27, &old_unat); - copy_reg(&bank[28-16], ms->pmsa_nat_bits, &old_regs->r28, &old_unat); - copy_reg(&bank[29-16], ms->pmsa_nat_bits, &old_regs->r29, &old_unat); - copy_reg(&bank[30-16], ms->pmsa_nat_bits, &old_regs->r30, &old_unat); - copy_reg(&bank[31-16], ms->pmsa_nat_bits, &old_regs->r31, &old_unat); + finish_pt_regs(old_regs, ms, &old_unat); /* Next stack a struct switch_stack. mca_asm.S built a partial * switch_stack, copy it and fill in the blanks using pt_regs and @@ -1141,6 +1149,8 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, no_mod: mprintk(KERN_INFO "cpu %d, %s %s, original stack not modified\n", smp_processor_id(), type, msg); + old_unat = regs->ar_unat; + finish_pt_regs(regs, ms, &old_unat); return previous_current; } From dec1798f81f7e8a299734bdc29197ae77bf08018 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Sat, 10 Oct 2009 22:21:59 +0200 Subject: [PATCH 4/9] [IA64] unsigned cannot be less than 0 in sn_hwperf_ioctl() struct sn_hwperf_ioctl_args member arg (u64) cannot be less than 0. Signed-off-by: Roel Kluin Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/sn2/sn_hwperf.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 4c7e74790958..55ac3c4e11d2 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -786,17 +786,18 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, unsigned long arg) break; case SN_HWPERF_GET_OBJ_NODE: - if (a.sz != sizeof(u64) || a.arg < 0) { + i = a.arg; + if (a.sz != sizeof(u64) || i < 0) { r = -EINVAL; goto error; } if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) { - if (a.arg >= nobj) { + if (i >= nobj) { r = -EINVAL; vfree(objs); goto error; } - if (objs[(i = a.arg)].id != a.arg) { + if (objs[i].id != a.arg) { for (i = 0; i < nobj; i++) { if (objs[i].id == a.arg) break; From adcd740341dbd58eb94a8c2885c171ce9eb8677c Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 12 Oct 2009 08:24:30 -0600 Subject: [PATCH 5/9] [IA64] Require SAL 3.2 in order to do extended config space ops We had assumed that SAL firmware would return an error if it didn't understand extended config space. Unfortunately, the SAL on the SGI 750 doesn't do that, it panics the machine. So, condition the extended PCI config space accesses on SAL revision 3.2. Signed-off-by: Matthew Wilcox Tested-by: Brad Spengler Signed-off-by: Tony Luck --- arch/ia64/pci/pci.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 7de76dd352fe..c0fca2c1c858 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -56,10 +56,13 @@ int raw_pci_read(unsigned int seg, unsigned int bus, unsigned int devfn, if ((seg | reg) <= 255) { addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg); mode = 0; - } else { + } else if (sal_revision >= SAL_VERSION_CODE(3,2)) { addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg); mode = 1; + } else { + return -EINVAL; } + result = ia64_sal_pci_config_read(addr, mode, len, &data); if (result != 0) return -EINVAL; @@ -80,9 +83,11 @@ int raw_pci_write(unsigned int seg, unsigned int bus, unsigned int devfn, if ((seg | reg) <= 255) { addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg); mode = 0; - } else { + } else if (sal_revision >= SAL_VERSION_CODE(3,2)) { addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg); mode = 1; + } else { + return -EINVAL; } result = ia64_sal_pci_config_write(addr, mode, len, value); if (result != 0) From 54f8dd3c99052456a65bd26aa3313f57cd145a73 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Fri, 18 Sep 2009 12:48:12 -0700 Subject: [PATCH 6/9] [IA64] use printk_once() unaligned.c/io_common.c Use printk_once() in a couple of places. Signed-off-by: Marcin Slusarz Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- arch/ia64/kernel/unaligned.c | 6 ++---- arch/ia64/sn/kernel/io_common.c | 8 ++------ 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index 6db08599ebbc..776dd40397e2 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -60,7 +60,6 @@ dump (const char *str, void *vp, size_t len) */ int no_unaligned_warning; int unaligned_dump_stack; -static int noprint_warning; /* * For M-unit: @@ -1357,9 +1356,8 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) /* watch for command names containing %s */ printk(KERN_WARNING "%s", buf); } else { - if (no_unaligned_warning && !noprint_warning) { - noprint_warning = 1; - printk(KERN_WARNING "%s(%d) encountered an " + if (no_unaligned_warning) { + printk_once(KERN_WARNING "%s(%d) encountered an " "unaligned exception which required\n" "kernel assistance, which degrades " "the performance of the application.\n" diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c index 25831c47c579..308e6595110e 100644 --- a/arch/ia64/sn/kernel/io_common.c +++ b/arch/ia64/sn/kernel/io_common.c @@ -119,7 +119,6 @@ sn_pcidev_info_get(struct pci_dev *dev) * Additionally note that the struct sn_flush_device_war also has to be * removed from arch/ia64/sn/include/xtalk/hubdev.h */ -static u8 war_implemented = 0; static s64 sn_device_fixup_war(u64 nasid, u64 widget, int device, struct sn_flush_device_common *common) @@ -128,11 +127,8 @@ static s64 sn_device_fixup_war(u64 nasid, u64 widget, int device, struct sn_flush_device_war *dev_entry; struct ia64_sal_retval isrv = {0,0,0,0}; - if (!war_implemented) { - printk(KERN_WARNING "PROM version < 4.50 -- implementing old " - "PROM flush WAR\n"); - war_implemented = 1; - } + printk_once(KERN_WARNING + "PROM version < 4.50 -- implementing old PROM flush WAR\n"); war_list = kzalloc(DEV_PER_WIDGET * sizeof(*war_list), GFP_KERNEL); BUG_ON(!war_list); From 1502f08edc040b6ba4b986454416564088995e79 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 12 Oct 2009 09:51:41 -0700 Subject: [PATCH 7/9] [IA64] SMT friendly version of spin_unlock_wait() We can be kinder to SMT systems in spin_unlock_wait. Signed-off-by: Tony Luck --- arch/ia64/include/asm/spinlock.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h index 4fa502739d64..239ecdc9516d 100644 --- a/arch/ia64/include/asm/spinlock.h +++ b/arch/ia64/include/asm/spinlock.h @@ -75,6 +75,20 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) ACCESS_ONCE(*p) = (tmp + 2) & ~1; } +static __always_inline void __ticket_spin_unlock_wait(raw_spinlock_t *lock) +{ + int *p = (int *)&lock->lock, ticket; + + ia64_invala(); + + for (;;) { + asm volatile ("ld4.c.nc %0=[%1]" : "=r"(ticket) : "r"(p) : "memory"); + if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK)) + return; + cpu_relax(); + } +} + static inline int __ticket_spin_is_locked(raw_spinlock_t *lock) { long tmp = ACCESS_ONCE(lock->lock); @@ -123,8 +137,7 @@ static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock, static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) { - while (__raw_spin_is_locked(lock)) - cpu_relax(); + __ticket_spin_unlock_wait(lock); } #define __raw_read_can_lock(rw) (*(volatile int *)(rw) >= 0) From b94b08081fcecf83fa690d6c5664f6316fe72208 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 14 Oct 2009 15:10:03 -0700 Subject: [PATCH 8/9] [IA64] fix percpu warnings Fix percpu types warning in ia64/sn: arch/ia64/sn/kernel/setup.c:74: error: conflicting types for '__pcpu_scope___sn_cnodeid_to_nasid' arch/ia64/include/asm/sn/arch.h:74: error: previous declaration of '__pcpu_scope___sn_cnodeid_to_nasid' was here Signed-off-by: Randy Dunlap Cc: Jes Sorensen Acked-by: Tejun Heo Signed-off-by: Tony Luck --- arch/ia64/include/asm/sn/arch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/include/asm/sn/arch.h b/arch/ia64/include/asm/sn/arch.h index 7caa1f44cd95..f5f493b0c077 100644 --- a/arch/ia64/include/asm/sn/arch.h +++ b/arch/ia64/include/asm/sn/arch.h @@ -71,7 +71,7 @@ DECLARE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); * Compact node ID to nasid mappings kept in the per-cpu data areas of each * cpu. */ -DECLARE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]); +DECLARE_PER_CPU(short [MAX_COMPACT_NODES], __sn_cnodeid_to_nasid); #define sn_cnodeid_to_nasid (&__get_cpu_var(__sn_cnodeid_to_nasid[0])) From e8c93fc7b7221b6ac7e7ddbd0e21e205bf9e801a Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 2 Nov 2009 09:23:08 -0800 Subject: [PATCH 9/9] Revert "[IA64] fix percpu warnings" This reverts commit b94b08081fcecf83fa690d6c5664f6316fe72208. genksyms currently cannot handle complicated types for exported percpu variables. Drop this patch for now as it prevents a module from being loaded on sn2 systems: xpc: no symbol version for per_cpu____sn_cnodeid_to_nasid xpc: Unknown symbol per_cpu____sn_cnodeid_to_nasid Signed-off-by: Tony Luck --- arch/ia64/include/asm/sn/arch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/include/asm/sn/arch.h b/arch/ia64/include/asm/sn/arch.h index f5f493b0c077..7caa1f44cd95 100644 --- a/arch/ia64/include/asm/sn/arch.h +++ b/arch/ia64/include/asm/sn/arch.h @@ -71,7 +71,7 @@ DECLARE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); * Compact node ID to nasid mappings kept in the per-cpu data areas of each * cpu. */ -DECLARE_PER_CPU(short [MAX_COMPACT_NODES], __sn_cnodeid_to_nasid); +DECLARE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]); #define sn_cnodeid_to_nasid (&__get_cpu_var(__sn_cnodeid_to_nasid[0]))