diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h index 30bb930e1111..239ecdc9516d 100644 --- a/arch/ia64/include/asm/spinlock.h +++ b/arch/ia64/include/asm/spinlock.h @@ -25,61 +25,82 @@ * by atomically noting the tail and incrementing it by one (thus adding * ourself to the queue and noting our position), then waiting until the head * becomes equal to the the initial value of the tail. + * The pad bits in the middle are used to prevent the next_ticket number + * overflowing into the now_serving number. * - * 63 32 31 0 + * 31 17 16 15 14 0 * +----------------------------------------------------+ - * | next_ticket_number | now_serving | + * | now_serving | padding | next_ticket | * +----------------------------------------------------+ */ -#define TICKET_SHIFT 32 +#define TICKET_SHIFT 17 +#define TICKET_BITS 15 +#define TICKET_MASK ((1 << TICKET_BITS) - 1) static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) { - int *p = (int *)&lock->lock, turn, now_serving; + int *p = (int *)&lock->lock, ticket, serve; - now_serving = *p; - turn = ia64_fetchadd(1, p+1, acq); + ticket = ia64_fetchadd(1, p, acq); - if (turn == now_serving) + if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK)) return; - do { + ia64_invala(); + + for (;;) { + asm volatile ("ld4.c.nc %0=[%1]" : "=r"(serve) : "r"(p) : "memory"); + + if (!(((serve >> TICKET_SHIFT) ^ ticket) & TICKET_MASK)) + return; cpu_relax(); - } while (ACCESS_ONCE(*p) != turn); + } } static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) { - long tmp = ACCESS_ONCE(lock->lock), try; + int tmp = ACCESS_ONCE(lock->lock); - if (!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1L << TICKET_SHIFT) - 1))) { - try = tmp + (1L << TICKET_SHIFT); - - return ia64_cmpxchg(acq, &lock->lock, tmp, try, sizeof (tmp)) == tmp; - } + if (!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK)) + return ia64_cmpxchg(acq, &lock->lock, tmp, tmp + 1, sizeof (tmp)) == tmp; return 0; } static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) { - int *p = (int *)&lock->lock; + unsigned short *p = (unsigned short *)&lock->lock + 1, tmp; - (void)ia64_fetchadd(1, p, rel); + asm volatile ("ld2.bias %0=[%1]" : "=r"(tmp) : "r"(p)); + ACCESS_ONCE(*p) = (tmp + 2) & ~1; +} + +static __always_inline void __ticket_spin_unlock_wait(raw_spinlock_t *lock) +{ + int *p = (int *)&lock->lock, ticket; + + ia64_invala(); + + for (;;) { + asm volatile ("ld4.c.nc %0=[%1]" : "=r"(ticket) : "r"(p) : "memory"); + if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK)) + return; + cpu_relax(); + } } static inline int __ticket_spin_is_locked(raw_spinlock_t *lock) { long tmp = ACCESS_ONCE(lock->lock); - return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1L << TICKET_SHIFT) - 1)); + return !!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK); } static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) { long tmp = ACCESS_ONCE(lock->lock); - return (((tmp >> TICKET_SHIFT) - tmp) & ((1L << TICKET_SHIFT) - 1)) > 1; + return ((tmp - (tmp >> TICKET_SHIFT)) & TICKET_MASK) > 1; } static inline int __raw_spin_is_locked(raw_spinlock_t *lock) @@ -116,8 +137,7 @@ static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock, static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) { - while (__raw_spin_is_locked(lock)) - cpu_relax(); + __ticket_spin_unlock_wait(lock); } #define __raw_read_can_lock(rw) (*(volatile int *)(rw) >= 0) diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h index b61d136d9bc2..474e46f1ab4a 100644 --- a/arch/ia64/include/asm/spinlock_types.h +++ b/arch/ia64/include/asm/spinlock_types.h @@ -6,7 +6,7 @@ #endif typedef struct { - volatile unsigned long lock; + volatile unsigned int lock; } raw_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { 0 } diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index d2877a7bfe2e..496ac7a99488 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -887,6 +887,60 @@ ia64_mca_modify_comm(const struct task_struct *previous_current) memcpy(current->comm, comm, sizeof(current->comm)); } +static void +finish_pt_regs(struct pt_regs *regs, const pal_min_state_area_t *ms, + unsigned long *nat) +{ + const u64 *bank; + + /* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use + * pmsa_{xip,xpsr,xfs} + */ + if (ia64_psr(regs)->ic) { + regs->cr_iip = ms->pmsa_iip; + regs->cr_ipsr = ms->pmsa_ipsr; + regs->cr_ifs = ms->pmsa_ifs; + } else { + regs->cr_iip = ms->pmsa_xip; + regs->cr_ipsr = ms->pmsa_xpsr; + regs->cr_ifs = ms->pmsa_xfs; + } + regs->pr = ms->pmsa_pr; + regs->b0 = ms->pmsa_br0; + regs->ar_rsc = ms->pmsa_rsc; + copy_reg(&ms->pmsa_gr[1-1], ms->pmsa_nat_bits, ®s->r1, nat); + copy_reg(&ms->pmsa_gr[2-1], ms->pmsa_nat_bits, ®s->r2, nat); + copy_reg(&ms->pmsa_gr[3-1], ms->pmsa_nat_bits, ®s->r3, nat); + copy_reg(&ms->pmsa_gr[8-1], ms->pmsa_nat_bits, ®s->r8, nat); + copy_reg(&ms->pmsa_gr[9-1], ms->pmsa_nat_bits, ®s->r9, nat); + copy_reg(&ms->pmsa_gr[10-1], ms->pmsa_nat_bits, ®s->r10, nat); + copy_reg(&ms->pmsa_gr[11-1], ms->pmsa_nat_bits, ®s->r11, nat); + copy_reg(&ms->pmsa_gr[12-1], ms->pmsa_nat_bits, ®s->r12, nat); + copy_reg(&ms->pmsa_gr[13-1], ms->pmsa_nat_bits, ®s->r13, nat); + copy_reg(&ms->pmsa_gr[14-1], ms->pmsa_nat_bits, ®s->r14, nat); + copy_reg(&ms->pmsa_gr[15-1], ms->pmsa_nat_bits, ®s->r15, nat); + if (ia64_psr(regs)->bn) + bank = ms->pmsa_bank1_gr; + else + bank = ms->pmsa_bank0_gr; + copy_reg(&bank[16-16], ms->pmsa_nat_bits, ®s->r16, nat); + copy_reg(&bank[17-16], ms->pmsa_nat_bits, ®s->r17, nat); + copy_reg(&bank[18-16], ms->pmsa_nat_bits, ®s->r18, nat); + copy_reg(&bank[19-16], ms->pmsa_nat_bits, ®s->r19, nat); + copy_reg(&bank[20-16], ms->pmsa_nat_bits, ®s->r20, nat); + copy_reg(&bank[21-16], ms->pmsa_nat_bits, ®s->r21, nat); + copy_reg(&bank[22-16], ms->pmsa_nat_bits, ®s->r22, nat); + copy_reg(&bank[23-16], ms->pmsa_nat_bits, ®s->r23, nat); + copy_reg(&bank[24-16], ms->pmsa_nat_bits, ®s->r24, nat); + copy_reg(&bank[25-16], ms->pmsa_nat_bits, ®s->r25, nat); + copy_reg(&bank[26-16], ms->pmsa_nat_bits, ®s->r26, nat); + copy_reg(&bank[27-16], ms->pmsa_nat_bits, ®s->r27, nat); + copy_reg(&bank[28-16], ms->pmsa_nat_bits, ®s->r28, nat); + copy_reg(&bank[29-16], ms->pmsa_nat_bits, ®s->r29, nat); + copy_reg(&bank[30-16], ms->pmsa_nat_bits, ®s->r30, nat); + copy_reg(&bank[31-16], ms->pmsa_nat_bits, ®s->r31, nat); +} + /* On entry to this routine, we are running on the per cpu stack, see * mca_asm.h. The original stack has not been touched by this event. Some of * the original stack's registers will be in the RBS on this stack. This stack @@ -921,7 +975,6 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, u64 r12 = ms->pmsa_gr[12-1], r13 = ms->pmsa_gr[13-1]; u64 ar_bspstore = regs->ar_bspstore; u64 ar_bsp = regs->ar_bspstore + (loadrs >> 16); - const u64 *bank; const char *msg; int cpu = smp_processor_id(); @@ -1024,54 +1077,9 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, p = (char *)r12 - sizeof(*regs); old_regs = (struct pt_regs *)p; memcpy(old_regs, regs, sizeof(*regs)); - /* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use - * pmsa_{xip,xpsr,xfs} - */ - if (ia64_psr(regs)->ic) { - old_regs->cr_iip = ms->pmsa_iip; - old_regs->cr_ipsr = ms->pmsa_ipsr; - old_regs->cr_ifs = ms->pmsa_ifs; - } else { - old_regs->cr_iip = ms->pmsa_xip; - old_regs->cr_ipsr = ms->pmsa_xpsr; - old_regs->cr_ifs = ms->pmsa_xfs; - } - old_regs->pr = ms->pmsa_pr; - old_regs->b0 = ms->pmsa_br0; old_regs->loadrs = loadrs; - old_regs->ar_rsc = ms->pmsa_rsc; old_unat = old_regs->ar_unat; - copy_reg(&ms->pmsa_gr[1-1], ms->pmsa_nat_bits, &old_regs->r1, &old_unat); - copy_reg(&ms->pmsa_gr[2-1], ms->pmsa_nat_bits, &old_regs->r2, &old_unat); - copy_reg(&ms->pmsa_gr[3-1], ms->pmsa_nat_bits, &old_regs->r3, &old_unat); - copy_reg(&ms->pmsa_gr[8-1], ms->pmsa_nat_bits, &old_regs->r8, &old_unat); - copy_reg(&ms->pmsa_gr[9-1], ms->pmsa_nat_bits, &old_regs->r9, &old_unat); - copy_reg(&ms->pmsa_gr[10-1], ms->pmsa_nat_bits, &old_regs->r10, &old_unat); - copy_reg(&ms->pmsa_gr[11-1], ms->pmsa_nat_bits, &old_regs->r11, &old_unat); - copy_reg(&ms->pmsa_gr[12-1], ms->pmsa_nat_bits, &old_regs->r12, &old_unat); - copy_reg(&ms->pmsa_gr[13-1], ms->pmsa_nat_bits, &old_regs->r13, &old_unat); - copy_reg(&ms->pmsa_gr[14-1], ms->pmsa_nat_bits, &old_regs->r14, &old_unat); - copy_reg(&ms->pmsa_gr[15-1], ms->pmsa_nat_bits, &old_regs->r15, &old_unat); - if (ia64_psr(old_regs)->bn) - bank = ms->pmsa_bank1_gr; - else - bank = ms->pmsa_bank0_gr; - copy_reg(&bank[16-16], ms->pmsa_nat_bits, &old_regs->r16, &old_unat); - copy_reg(&bank[17-16], ms->pmsa_nat_bits, &old_regs->r17, &old_unat); - copy_reg(&bank[18-16], ms->pmsa_nat_bits, &old_regs->r18, &old_unat); - copy_reg(&bank[19-16], ms->pmsa_nat_bits, &old_regs->r19, &old_unat); - copy_reg(&bank[20-16], ms->pmsa_nat_bits, &old_regs->r20, &old_unat); - copy_reg(&bank[21-16], ms->pmsa_nat_bits, &old_regs->r21, &old_unat); - copy_reg(&bank[22-16], ms->pmsa_nat_bits, &old_regs->r22, &old_unat); - copy_reg(&bank[23-16], ms->pmsa_nat_bits, &old_regs->r23, &old_unat); - copy_reg(&bank[24-16], ms->pmsa_nat_bits, &old_regs->r24, &old_unat); - copy_reg(&bank[25-16], ms->pmsa_nat_bits, &old_regs->r25, &old_unat); - copy_reg(&bank[26-16], ms->pmsa_nat_bits, &old_regs->r26, &old_unat); - copy_reg(&bank[27-16], ms->pmsa_nat_bits, &old_regs->r27, &old_unat); - copy_reg(&bank[28-16], ms->pmsa_nat_bits, &old_regs->r28, &old_unat); - copy_reg(&bank[29-16], ms->pmsa_nat_bits, &old_regs->r29, &old_unat); - copy_reg(&bank[30-16], ms->pmsa_nat_bits, &old_regs->r30, &old_unat); - copy_reg(&bank[31-16], ms->pmsa_nat_bits, &old_regs->r31, &old_unat); + finish_pt_regs(old_regs, ms, &old_unat); /* Next stack a struct switch_stack. mca_asm.S built a partial * switch_stack, copy it and fill in the blanks using pt_regs and @@ -1141,6 +1149,8 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, no_mod: mprintk(KERN_INFO "cpu %d, %s %s, original stack not modified\n", smp_processor_id(), type, msg); + old_unat = regs->ar_unat; + finish_pt_regs(regs, ms, &old_unat); return previous_current; } diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index 6db08599ebbc..776dd40397e2 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -60,7 +60,6 @@ dump (const char *str, void *vp, size_t len) */ int no_unaligned_warning; int unaligned_dump_stack; -static int noprint_warning; /* * For M-unit: @@ -1357,9 +1356,8 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) /* watch for command names containing %s */ printk(KERN_WARNING "%s", buf); } else { - if (no_unaligned_warning && !noprint_warning) { - noprint_warning = 1; - printk(KERN_WARNING "%s(%d) encountered an " + if (no_unaligned_warning) { + printk_once(KERN_WARNING "%s(%d) encountered an " "unaligned exception which required\n" "kernel assistance, which degrades " "the performance of the application.\n" diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index f426dc78d959..ee09d261f2e6 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -100,24 +100,36 @@ wrap_mmu_context (struct mm_struct *mm) * this primitive it can be moved up to a spinaphore.h header. */ struct spinaphore { - atomic_t cur; + unsigned long ticket; + unsigned long serve; }; static inline void spinaphore_init(struct spinaphore *ss, int val) { - atomic_set(&ss->cur, val); + ss->ticket = 0; + ss->serve = val; } static inline void down_spin(struct spinaphore *ss) { - while (unlikely(!atomic_add_unless(&ss->cur, -1, 0))) - while (atomic_read(&ss->cur) == 0) - cpu_relax(); + unsigned long t = ia64_fetchadd(1, &ss->ticket, acq), serve; + + if (time_before(t, ss->serve)) + return; + + ia64_invala(); + + for (;;) { + asm volatile ("ld4.c.nc %0=[%1]" : "=r"(serve) : "r"(&ss->serve) : "memory"); + if (time_before(t, serve)) + return; + cpu_relax(); + } } static inline void up_spin(struct spinaphore *ss) { - atomic_add(1, &ss->cur); + ia64_fetchadd(1, &ss->serve, rel); } static struct spinaphore ptcg_sem; diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 7de76dd352fe..c0fca2c1c858 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -56,10 +56,13 @@ int raw_pci_read(unsigned int seg, unsigned int bus, unsigned int devfn, if ((seg | reg) <= 255) { addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg); mode = 0; - } else { + } else if (sal_revision >= SAL_VERSION_CODE(3,2)) { addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg); mode = 1; + } else { + return -EINVAL; } + result = ia64_sal_pci_config_read(addr, mode, len, &data); if (result != 0) return -EINVAL; @@ -80,9 +83,11 @@ int raw_pci_write(unsigned int seg, unsigned int bus, unsigned int devfn, if ((seg | reg) <= 255) { addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg); mode = 0; - } else { + } else if (sal_revision >= SAL_VERSION_CODE(3,2)) { addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg); mode = 1; + } else { + return -EINVAL; } result = ia64_sal_pci_config_write(addr, mode, len, value); if (result != 0) diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c index 25831c47c579..308e6595110e 100644 --- a/arch/ia64/sn/kernel/io_common.c +++ b/arch/ia64/sn/kernel/io_common.c @@ -119,7 +119,6 @@ sn_pcidev_info_get(struct pci_dev *dev) * Additionally note that the struct sn_flush_device_war also has to be * removed from arch/ia64/sn/include/xtalk/hubdev.h */ -static u8 war_implemented = 0; static s64 sn_device_fixup_war(u64 nasid, u64 widget, int device, struct sn_flush_device_common *common) @@ -128,11 +127,8 @@ static s64 sn_device_fixup_war(u64 nasid, u64 widget, int device, struct sn_flush_device_war *dev_entry; struct ia64_sal_retval isrv = {0,0,0,0}; - if (!war_implemented) { - printk(KERN_WARNING "PROM version < 4.50 -- implementing old " - "PROM flush WAR\n"); - war_implemented = 1; - } + printk_once(KERN_WARNING + "PROM version < 4.50 -- implementing old PROM flush WAR\n"); war_list = kzalloc(DEV_PER_WIDGET * sizeof(*war_list), GFP_KERNEL); BUG_ON(!war_list); diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 4c7e74790958..55ac3c4e11d2 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -786,17 +786,18 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, unsigned long arg) break; case SN_HWPERF_GET_OBJ_NODE: - if (a.sz != sizeof(u64) || a.arg < 0) { + i = a.arg; + if (a.sz != sizeof(u64) || i < 0) { r = -EINVAL; goto error; } if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) { - if (a.arg >= nobj) { + if (i >= nobj) { r = -EINVAL; vfree(objs); goto error; } - if (objs[(i = a.arg)].id != a.arg) { + if (objs[i].id != a.arg) { for (i = 0; i < nobj; i++) { if (objs[i].id == a.arg) break;