From 9cedc194a7735e5d74ad26d3825247dc65a4d98e Mon Sep 17 00:00:00 2001 From: Kirill Korotaev Date: Wed, 14 Jun 2006 17:59:35 +0400 Subject: [PATCH 01/11] [PATCH] Return error in case flock_lock_file failure If flock_lock_file() failed to allocate flock with locks_alloc_lock() then "error = 0" is returned. Need to return some non-zero. Signed-off-by: Pavel Emelianov Signed-off-by: Kirill Korotaev Signed-off-by: Linus Torvalds --- fs/locks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/locks.c b/fs/locks.c index 6f99c0a6f836..ab61a8b54829 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -755,6 +755,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) if (request->fl_type == F_UNLCK) goto out; + error = -ENOMEM; new_fl = locks_alloc_lock(); if (new_fl == NULL) goto out; @@ -781,6 +782,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) locks_copy_lock(new_fl, request); locks_insert_lock(&inode->i_flock, new_fl); new_fl = NULL; + error = 0; out: unlock_kernel(); From 553698f944ed715dfe023b4cef07601f0ce735f0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 14 Jun 2006 19:11:57 +0200 Subject: [PATCH 02/11] [PATCH] cfq-iosched: fix crash in do_div() We don't clear the seek stat values in cfq_alloc_io_context(), and if ->seek_mean is unlucky enough to be set to -36 by chance, the first invocation of cfq_update_io_seektime() will oops with a divide by zero in do_div(). Just memset the entire cic instead of filling invididual values independently. Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- block/cfq-iosched.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index a46d030e092a..052b17487625 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1323,17 +1323,12 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); if (cic) { - RB_CLEAR(&cic->rb_node); - cic->key = NULL; - cic->cfqq[ASYNC] = NULL; - cic->cfqq[SYNC] = NULL; + memset(cic, 0, sizeof(*cic)); + RB_CLEAR_COLOR(&cic->rb_node); cic->last_end_request = jiffies; - cic->ttime_total = 0; - cic->ttime_samples = 0; - cic->ttime_mean = 0; + INIT_LIST_HEAD(&cic->queue_list); cic->dtor = cfq_free_io_context; cic->exit = cfq_exit_io_context; - INIT_LIST_HEAD(&cic->queue_list); atomic_inc(&ioc_count); } From 16070428d389ff47aa3476b0911179ad90c640a2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 16 Jun 2006 07:46:37 +0200 Subject: [PATCH 03/11] [PATCH] fix cdrom open Some time ago the cdrom open routine was changed so that we call the driver's open routine before checking to see if it is read only. However, if we discovered that a read write open was not possible and the open flags required a writable open, we just returned -EROFS without calling the driver's release routine. This seems to work for most cdrom drivers, but breaks the Powerpc iSeries virtual cdrom rather badly. This just inserts the release call in the error path to balance the call to "->open()" done by "open_for_data()". Signed-off-by: Stephen Rothwell Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- drivers/cdrom/cdrom.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index a59876a0bfa1..3170eaa25087 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -1009,9 +1009,9 @@ int cdrom_open(struct cdrom_device_info *cdi, struct inode *ip, struct file *fp) if (fp->f_mode & FMODE_WRITE) { ret = -EROFS; if (cdrom_open_write(cdi)) - goto err; + goto err_release; if (!CDROM_CAN(CDC_RAM)) - goto err; + goto err_release; ret = 0; cdi->media_written = 0; } @@ -1026,6 +1026,8 @@ int cdrom_open(struct cdrom_device_info *cdi, struct inode *ip, struct file *fp) not be mounting, but opening with O_NONBLOCK */ check_disk_change(ip->i_bdev); return 0; +err_release: + cdi->ops->release(cdi); err: cdi->use_count--; return ret; From 991721572ef2140c6411894aebefd3377e71a9e7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 16 Jun 2006 13:02:29 +0200 Subject: [PATCH 04/11] [PATCH] Fix missing ret assignment in __bio_map_user() error path If get_user_pages() returns less pages than what we asked for, we jump to out_unmap which will return ERR_PTR(ret). But ret can contain a positive number just smaller than local_nr_pages, so be sure to set it to -EFAULT always. Problem found and diagnosed by Damien Le Moal Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- fs/bio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/bio.c b/fs/bio.c index 098c12b2d60a..6a0b9ad8f8c9 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -654,9 +654,10 @@ static struct bio *__bio_map_user_iov(request_queue_t *q, write_to_vm, 0, &pages[cur_page], NULL); up_read(¤t->mm->mmap_sem); - if (ret < local_nr_pages) + if (ret < local_nr_pages) { + ret = -EFAULT; goto out_unmap; - + } offset = uaddr & ~PAGE_MASK; for (j = cur_page; j < page_limit; j++) { From 88d113601ca19c82feb038438c8c5db502d146f9 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 16 Jun 2006 12:10:46 -0700 Subject: [PATCH 05/11] [PATCH] sky2: netconsole suspend/resume interaction A couple of fixes that should prevent crashes when using netconsole and suspend/resume. First, netconsole poll routine shouldn't run unless the device is up; second, the NAPI poll should be disabled during suspend. This is only an issue on sky2, because it has to have one NAPI poll routine for both ports on dual port boards. Normal drivers use netif_rx_schedule_prep and that checks for netif_running. Signed-off-by: Stephen Hemminger Signed-off-by: Linus Torvalds --- drivers/net/sky2.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index 97fe95666f3b..fba1e4d4d83d 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -2255,8 +2255,10 @@ static irqreturn_t sky2_intr(int irq, void *dev_id, struct pt_regs *regs) static void sky2_netpoll(struct net_device *dev) { struct sky2_port *sky2 = netdev_priv(dev); + struct net_device *dev0 = sky2->hw->dev[0]; - sky2_intr(sky2->hw->pdev->irq, sky2->hw, NULL); + if (netif_running(dev) && __netif_rx_schedule_prep(dev0)) + __netif_rx_schedule(dev0); } #endif @@ -3446,6 +3448,7 @@ static int sky2_suspend(struct pci_dev *pdev, pm_message_t state) sky2_down(dev); netif_device_detach(dev); + netif_poll_disable(dev); } } @@ -3474,6 +3477,8 @@ static int sky2_resume(struct pci_dev *pdev) struct net_device *dev = hw->dev[i]; if (dev && netif_running(dev)) { netif_device_attach(dev); + netif_poll_enable(dev); + err = sky2_up(dev); if (err) { printk(KERN_ERR PFX "%s: could not up: %d\n", From 8f17fc20bfb75bcec4cfeda789738979c8338fdc Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 15 Jun 2006 20:11:15 +0400 Subject: [PATCH 06/11] [PATCH] check_process_timers: fix possible lockup If the local timer interrupt happens just after do_exit() sets PF_EXITING (and before it clears ->it_xxx_expires) run_posix_cpu_timers() will call check_process_timers() with tasklist_lock + ->siglock held and check_process_timers: t = tsk; do { .... do { t = next_thread(t); } while (unlikely(t->flags & PF_EXITING)); } while (t != tsk); the outer loop will never stop. Actually, the window is bigger. Another process can attach the timer after ->it_xxx_expires was cleared (see the next commit) and the 'if (PF_EXITING)' check in arm_timer() is racy (see the one after that). Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/posix-cpu-timers.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 520f6c59948d..9d9169aa2e24 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1173,6 +1173,9 @@ static void check_process_timers(struct task_struct *tsk, } t = tsk; do { + if (unlikely(t->flags & PF_EXITING)) + continue; + ticks = cputime_add(cputime_add(t->utime, t->stime), prof_left); if (!cputime_eq(prof_expires, cputime_zero) && @@ -1193,11 +1196,7 @@ static void check_process_timers(struct task_struct *tsk, t->it_sched_expires > sched)) { t->it_sched_expires = sched; } - - do { - t = next_thread(t); - } while (unlikely(t->flags & PF_EXITING)); - } while (t != tsk); + } while ((t = next_thread(t)) != tsk); } } From 30f1e3dd8c72abda343bcf415f7d8894a02b4290 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 15 Jun 2006 20:11:43 +0400 Subject: [PATCH 07/11] [PATCH] run_posix_cpu_timers: remove a bogus BUG_ON() do_exit() clears ->it_##clock##_expires, but nothing prevents another cpu to attach the timer to exiting process after that. arm_timer() tries to protect against this race, but the check is racy. After exit_notify() does 'write_unlock_irq(&tasklist_lock)' and before do_exit() calls 'schedule() local timer interrupt can find tsk->exit_state != 0. If that state was EXIT_DEAD (or another cpu does sys_wait4) interrupted task has ->signal == NULL. At this moment exiting task has no pending cpu timers, they were cleanuped in __exit_signal()->posix_cpu_timers_exit{,_group}(), so we can just return from irq. John Stultz recently confirmed this bug, see http://marc.theaimsgroup.com/?l=linux-kernel&m=115015841413687 Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/exit.c | 8 -------- kernel/posix-cpu-timers.c | 36 ++++++++++++++++++------------------ 2 files changed, 18 insertions(+), 26 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index e95b93282210..e06d0c10a24e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -881,14 +881,6 @@ fastcall NORET_TYPE void do_exit(long code) tsk->flags |= PF_EXITING; - /* - * Make sure we don't try to process any timer firings - * while we are already exiting. - */ - tsk->it_virt_expires = cputime_zero; - tsk->it_prof_expires = cputime_zero; - tsk->it_sched_expires = 0; - if (unlikely(in_atomic())) printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", current->comm, current->pid, diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 9d9169aa2e24..4882bf1e094a 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1288,30 +1288,30 @@ void run_posix_cpu_timers(struct task_struct *tsk) #undef UNEXPIRED - BUG_ON(tsk->exit_state); - /* * Double-check with locks held. */ read_lock(&tasklist_lock); - spin_lock(&tsk->sighand->siglock); + if (likely(tsk->signal != NULL)) { + spin_lock(&tsk->sighand->siglock); - /* - * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N] - * all the timers that are firing, and put them on the firing list. - */ - check_thread_timers(tsk, &firing); - check_process_timers(tsk, &firing); + /* + * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N] + * all the timers that are firing, and put them on the firing list. + */ + check_thread_timers(tsk, &firing); + check_process_timers(tsk, &firing); - /* - * We must release these locks before taking any timer's lock. - * There is a potential race with timer deletion here, as the - * siglock now protects our private firing list. We have set - * the firing flag in each timer, so that a deletion attempt - * that gets the timer lock before we do will give it up and - * spin until we've taken care of that timer below. - */ - spin_unlock(&tsk->sighand->siglock); + /* + * We must release these locks before taking any timer's lock. + * There is a potential race with timer deletion here, as the + * siglock now protects our private firing list. We have set + * the firing flag in each timer, so that a deletion attempt + * that gets the timer lock before we do will give it up and + * spin until we've taken care of that timer below. + */ + spin_unlock(&tsk->sighand->siglock); + } read_unlock(&tasklist_lock); /* From f53ae1dc3429529a58aa538e0a860d713c7079c3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 15 Jun 2006 20:12:02 +0400 Subject: [PATCH 08/11] [PATCH] arm_timer: remove a racy and obsolete PF_EXITING check arm_timer() checks PF_EXITING to prevent BUG_ON(->exit_state) in run_posix_cpu_timers(). However, for some reason it does so only for CPUCLOCK_PERTHREAD case (which is imho wrong). Also, this check is not reliable, PF_EXITING could be set on another cpu without any locks/barriers just after the check, so it can't prevent from attaching the timer to the exiting task. The previous patch makes this check unneeded. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/posix-cpu-timers.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 4882bf1e094a..d38d9ec3276c 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -555,9 +555,6 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) struct cpu_timer_list *next; unsigned long i; - if (CPUCLOCK_PERTHREAD(timer->it_clock) && (p->flags & PF_EXITING)) - return; - head = (CPUCLOCK_PERTHREAD(timer->it_clock) ? p->cpu_timers : p->signal->cpu_timers); head += CPUCLOCK_WHICH(timer->it_clock); From 19242b240793ac769f5b91b68a5e43dd39f0c530 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 15 Jun 2006 21:15:44 +1000 Subject: [PATCH 09/11] [PATCH] powerpc: Fix 64k pages on non-partitioned machines The page size encoding passed to tlbie is incorrect for new-style large pages. This fixes it. This doesn't affect anything on older machines because mmu_psize_defs[psize].penc (the page size encoding) is 0 for 4k and 16M pages (the two are distinguished by a separate "is a large page" bit). Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Arnd Bergmann Signed-off-by: Paul Mackerras Signed-off-by: Linus Torvalds --- arch/powerpc/mm/hash_native_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 33654d1b1b43..994856e55b7c 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -52,7 +52,7 @@ static inline void __tlbie(unsigned long va, unsigned int psize) default: penc = mmu_psize_defs[psize].penc; va &= ~((1ul << mmu_psize_defs[psize].shift) - 1); - va |= (0x7f >> (8 - penc)) << 12; + va |= penc << 12; asm volatile("tlbie %0,1" : : "r" (va) : "memory"); break; } @@ -74,7 +74,7 @@ static inline void __tlbiel(unsigned long va, unsigned int psize) default: penc = mmu_psize_defs[psize].penc; va &= ~((1ul << mmu_psize_defs[psize].shift) - 1); - va |= (0x7f >> (8 - penc)) << 12; + va |= penc << 12; asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)" : : "r"(va) : "memory"); break; From ce221982e0bef039d7047b0f667bb414efece5af Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 15 Jun 2006 15:09:16 +0200 Subject: [PATCH 10/11] [PATCH] powerpc: enable CPU_FTR_CI_LARGE_PAGE for cell Reflect the fact that the Cell Broadband Engine supports 64k pages by adding the bit to the CPU features. Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- include/asm-powerpc/cputable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h index 9fcf0162d859..f6265c2a0dd2 100644 --- a/include/asm-powerpc/cputable.h +++ b/include/asm-powerpc/cputable.h @@ -329,7 +329,7 @@ extern void do_cpu_ftr_fixups(unsigned long offset); #define CPU_FTRS_CELL (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ - CPU_FTR_CTRL | CPU_FTR_PAUSE_ZERO) + CPU_FTR_CTRL | CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE) #define CPU_FTRS_COMPATIBLE (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2) #endif From 427abfa28afedffadfca9dd8b067eb6d36bac53f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 17 Jun 2006 18:49:35 -0700 Subject: [PATCH 11/11] Linux v2.6.17 Being named "Crazed Snow-Weasel" instills a lot of confidence in this release, so I'm sure this will be one of the better ones. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a3a7baad8555..1700d3f6ea22 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 17 -EXTRAVERSION =-rc6 +EXTRAVERSION = NAME=Crazed Snow-Weasel # *DOCUMENTATION*