From f566a576bca09de85bf477fc0ab2c8c96405b77b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 2 Oct 2005 17:15:29 -0700 Subject: [PATCH 01/54] [SCSI] NCR5380: fix undefined preprocessor identifier Fix 12 undefined preprocessor identifier warnings (4 each in 3 driver builds): drivers/scsi/NCR5380.c:2744:16: warning: undefined preprocessor identifier 'NDEBUG_ABORT' drivers/scsi/NCR5380.c:2744:16: warning: "NDEBUG_ABORT" is not defined Signed-off-by: Randy Dunlap Signed-off-by: James Bottomley --- drivers/scsi/NCR5380.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c index d40ba0bd68a3..23392ae7df8b 100644 --- a/drivers/scsi/NCR5380.c +++ b/drivers/scsi/NCR5380.c @@ -91,7 +91,7 @@ #ifndef NDEBUG #define NDEBUG 0 #endif -#ifndef NDEBUG +#ifndef NDEBUG_ABORT #define NDEBUG_ABORT 0 #endif From 7a9366e46c167930f8bd9e378a3656861c5a41b6 Mon Sep 17 00:00:00 2001 From: "Salyzyn, Mark" Date: Wed, 5 Oct 2005 12:58:38 -0400 Subject: [PATCH 02/54] [SCSI] Fix aacraid regression Juan was kind enough to linger on site, and work on a production machine, to try the parameter to make the system stable. He discovered that reducing the maximum transfer size issued to the adapter to 128KB stabilized his system. This is related to an earlier change for the 2.6.13 tree resulting from Martin Drab's testing where the transfer size was reduced from 4G to 256KB; we needed to go still further in scaling back the request size. Here is the patch that tames this regression. Signed-off-by: Mark Salyzyn Signed-off-by: James Bottomley --- drivers/scsi/aacraid/aacraid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 4a99d2f000f4..d54b1cc88d0d 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -19,7 +19,7 @@ #define AAC_MAX_LUN (8) #define AAC_MAX_HOSTPHYSMEMPAGES (0xfffff) -#define AAC_MAX_32BIT_SGBCOUNT ((unsigned short)512) +#define AAC_MAX_32BIT_SGBCOUNT ((unsigned short)256) /* * These macros convert from physical channels to virtual channels From d16794f6ac8d9b50f62e02a6e6175ae1a30d0ccd Mon Sep 17 00:00:00 2001 From: "James.Smart@Emulex.Com" Date: Wed, 5 Oct 2005 13:50:08 -0400 Subject: [PATCH 03/54] [SCSI] FW: [PATCH] for Deadlock in transport_fc Cannot call fc_rport_terminate() under the host lock, so drop the lock. Signed-off-by: James Bottomley --- drivers/scsi/scsi_transport_fc.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 2cab556b6e82..771e97ef136e 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -819,12 +819,15 @@ show_fc_private_host_tgtid_bind_type(struct class_device *cdev, char *buf) return snprintf(buf, FC_BINDTYPE_MAX_NAMELEN, "%s\n", name); } +#define get_list_head_entry(pos, head, member) \ + pos = list_entry((head)->next, typeof(*pos), member) + static ssize_t store_fc_private_host_tgtid_bind_type(struct class_device *cdev, const char *buf, size_t count) { struct Scsi_Host *shost = transport_class_to_shost(cdev); - struct fc_rport *rport, *next_rport; + struct fc_rport *rport; enum fc_tgtid_binding_type val; unsigned long flags; @@ -834,9 +837,13 @@ store_fc_private_host_tgtid_bind_type(struct class_device *cdev, /* if changing bind type, purge all unused consistent bindings */ if (val != fc_host_tgtid_bind_type(shost)) { spin_lock_irqsave(shost->host_lock, flags); - list_for_each_entry_safe(rport, next_rport, - &fc_host_rport_bindings(shost), peers) + while (!list_empty(&fc_host_rport_bindings(shost))) { + get_list_head_entry(rport, + &fc_host_rport_bindings(shost), peers); + spin_unlock_irqrestore(shost->host_lock, flags); fc_rport_terminate(rport); + spin_lock_irqsave(shost->host_lock, flags); + } spin_unlock_irqrestore(shost->host_lock, flags); } From 7c72ce81870ded9365f4bc5caa98ef1591dd18dd Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 14 Oct 2005 11:23:27 -0400 Subject: [PATCH 04/54] [SCSI] Fix leak of Scsi_Cmnds When a request is deferred in scsi_init_io because the sg table could not be allocated, the associated scsi_cmnd is not released and the request is not marked with REQ_DONTPREP. When the command is retried, if scsi_prep_fn decides to kill it then the scsi_cmnd will never be released. This patch (as573) changes scsi_init_io so that it calls scsi_put_command before deferring a request. Signed-off-by: Alan Stern Signed-off-by: James Bottomley --- drivers/scsi/scsi_lib.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index dc9c772bc874..0074f28c37b2 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -97,7 +97,6 @@ int scsi_insert_special_req(struct scsi_request *sreq, int at_head) } static void scsi_run_queue(struct request_queue *q); -static void scsi_release_buffers(struct scsi_cmnd *cmd); /* * Function: scsi_unprep_request() @@ -1040,8 +1039,10 @@ static int scsi_init_io(struct scsi_cmnd *cmd) * if sg table allocation fails, requeue request later. */ sgpnt = scsi_alloc_sgtable(cmd, GFP_ATOMIC); - if (unlikely(!sgpnt)) + if (unlikely(!sgpnt)) { + scsi_unprep_request(req); return BLKPREP_DEFER; + } cmd->request_buffer = (char *) sgpnt; cmd->request_bufflen = req->nr_sectors << 9; @@ -1245,8 +1246,8 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req) */ ret = scsi_init_io(cmd); switch(ret) { + /* For BLKPREP_KILL/DEFER the cmd was released */ case BLKPREP_KILL: - /* BLKPREP_KILL return also releases the command */ goto kill; case BLKPREP_DEFER: goto defer; From f4fd20bf31376f29e4edde6596e3972198877309 Mon Sep 17 00:00:00 2001 From: Karl Magnus Kolstoe Date: Mon, 17 Oct 2005 10:05:42 +0200 Subject: [PATCH 05/54] [SCSI] 2.6.13.3; add Pioneer DRM-624x to drivers/scsi/scsi_devinfo.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The patch below should make the Pioneer DRM-624X automatically be set up with all 6 "drives". (6 slot SCSI CD changer) Signed-off-by: Karl Magnus Kolstø Signed-off-by: James Bottomley --- drivers/scsi/scsi_devinfo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index 64fc9e21f35b..e69477d1889b 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -185,6 +185,7 @@ static struct { {"PIONEER", "CD-ROM DRM-600", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, {"PIONEER", "CD-ROM DRM-602X", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, {"PIONEER", "CD-ROM DRM-604X", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, + {"PIONEER", "CD-ROM DRM-624X", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, {"REGAL", "CDC-4X", NULL, BLIST_MAX5LUN | BLIST_SINGLELUN}, {"SanDisk", "ImageMate CF-SD1", NULL, BLIST_FORCELUN}, {"SEAGATE", "ST34555N", "0930", BLIST_NOTQ}, /* Chokes on tagged INQUIRY */ From 16192896ea8e03d18d4228023500607b00df49e6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Oct 2005 15:40:52 +0200 Subject: [PATCH 06/54] [SCSI] mptsas: fix phy identifiers This patch from Eric fixes handling of the phy identifiers in mptsas. I've split it up from his bigger patch as it should go into 2.6.14 still. Signed-off-by: Eric Moore Signed-off-by: Christoph Hellwig Signed-off-by: James Bottomley --- drivers/message/fusion/mptsas.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c index 429820e48c69..7de19a84dc74 100644 --- a/drivers/message/fusion/mptsas.c +++ b/drivers/message/fusion/mptsas.c @@ -257,8 +257,8 @@ static void mptsas_print_device_pg0(SasDevicePage0_t *pg0) printk("SAS Address=0x%llX\n", le64_to_cpu(sas_address)); printk("Target ID=0x%X\n", pg0->TargetID); printk("Bus=0x%X\n", pg0->Bus); - printk("PhyNum=0x%X\n", pg0->PhyNum); - printk("AccessStatus=0x%X\n", le16_to_cpu(pg0->AccessStatus)); + printk("Parent Phy Num=0x%X\n", pg0->PhyNum); + printk("Access Status=0x%X\n", le16_to_cpu(pg0->AccessStatus)); printk("Device Info=0x%X\n", le32_to_cpu(pg0->DeviceInfo)); printk("Flags=0x%X\n", le16_to_cpu(pg0->Flags)); printk("Physical Port=0x%X\n", pg0->PhysicalPort); @@ -270,7 +270,7 @@ static void mptsas_print_expander_pg1(SasExpanderPage1_t *pg1) printk("---- SAS EXPANDER PAGE 1 ------------\n"); printk("Physical Port=0x%X\n", pg1->PhysicalPort); - printk("PHY Identifier=0x%X\n", pg1->Phy); + printk("PHY Identifier=0x%X\n", pg1->PhyIdentifier); printk("Negotiated Link Rate=0x%X\n", pg1->NegotiatedLinkRate); printk("Programmed Link Rate=0x%X\n", pg1->ProgrammedLinkRate); printk("Hardware Link Rate=0x%X\n", pg1->HwLinkRate); @@ -604,7 +604,7 @@ mptsas_sas_expander_pg1(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info, mptsas_print_expander_pg1(buffer); /* save config data */ - phy_info->phy_id = buffer->Phy; + phy_info->phy_id = buffer->PhyIdentifier; phy_info->port_id = buffer->PhysicalPort; phy_info->negotiated_link_rate = buffer->NegotiatedLinkRate; phy_info->programmed_link_rate = buffer->ProgrammedLinkRate; @@ -825,6 +825,8 @@ mptsas_probe_hba_phys(MPT_ADAPTER *ioc, int *index) mptsas_sas_device_pg0(ioc, &port_info->phy_info[i].identify, (MPI_SAS_DEVICE_PGAD_FORM_GET_NEXT_HANDLE << MPI_SAS_DEVICE_PGAD_FORM_SHIFT), handle); + port_info->phy_info[i].identify.phy_id = + port_info->phy_info[i].phy_id; handle = port_info->phy_info[i].identify.handle; if (port_info->phy_info[i].attached.handle) { @@ -881,6 +883,8 @@ mptsas_probe_expander_phys(MPT_ADAPTER *ioc, u32 *handle, int *index) (MPI_SAS_DEVICE_PGAD_FORM_HANDLE << MPI_SAS_DEVICE_PGAD_FORM_SHIFT), port_info->phy_info[i].identify.handle); + port_info->phy_info[i].identify.phy_id = + port_info->phy_info[i].phy_id; } if (port_info->phy_info[i].attached.handle) { From 055787e447a6cf50aa1cc42f7d3b07f08223dd9b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 19 Oct 2005 08:22:13 -0400 Subject: [PATCH 07/54] [SCSI] scsi_error thread exits in TASK_INTERRUPTIBLE state. Found in the -rt patch set. The scsi_error thread likely will be in the TASK_INTERRUPTIBLE state upon exit. This patch fixes this bug. Signed-off-by: Steven Rostedt Signed-off-by: James Bottomley --- drivers/scsi/scsi_error.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index ad5342165079..52b348c36d56 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -1645,6 +1645,8 @@ int scsi_error_handler(void *data) set_current_state(TASK_INTERRUPTIBLE); } + __set_current_state(TASK_RUNNING); + SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d" " exiting\n",shost->host_no)); From ac9b9c667c2e1194e22ebe0a441ae1c37aaa9b90 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 20 Oct 2005 16:24:28 +0100 Subject: [PATCH 08/54] [PATCH] Fix handling spurious page fault for hugetlb region This reverts commit 3359b54c8c07338f3a863d1109b42eebccdcf379 and replaces it with a cleaner version that is purely based on page table operations, so that the synchronization between inode size and hugetlb mappings becomes moot. Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 16 +++------------- mm/hugetlb.c | 22 ++++++++++++++++++++++ mm/memory.c | 14 ++------------ 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 42cb7d70f9ac..d664330d900e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -25,6 +25,8 @@ int is_hugepage_mem_enough(size_t); unsigned long hugetlb_total_pages(void); struct page *alloc_huge_page(void); void free_huge_page(struct page *); +int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, int write_access); extern unsigned long max_huge_pages; extern const unsigned long hugetlb_zero, hugetlb_infinity; @@ -99,6 +101,7 @@ static inline unsigned long hugetlb_total_pages(void) do { } while (0) #define alloc_huge_page() ({ NULL; }) #define free_huge_page(p) ({ (void)(p); BUG(); }) +#define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; }) #ifndef HPAGE_MASK #define HPAGE_MASK 0 /* Keep the compiler happy */ @@ -155,24 +158,11 @@ static inline void set_file_hugepages(struct file *file) { file->f_op = &hugetlbfs_file_operations; } - -static inline int valid_hugetlb_file_off(struct vm_area_struct *vma, - unsigned long address) -{ - struct inode *inode = vma->vm_file->f_dentry->d_inode; - loff_t file_off = address - vma->vm_start; - - file_off += (vma->vm_pgoff << PAGE_SHIFT); - - return (file_off < inode->i_size); -} - #else /* !CONFIG_HUGETLBFS */ #define is_file_hugepages(file) 0 #define set_file_hugepages(file) BUG() #define hugetlb_zero_setup(size) ERR_PTR(-ENOSYS) -#define valid_hugetlb_file_off(vma, address) 0 #endif /* !CONFIG_HUGETLBFS */ diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a1b30d45459e..61d380678030 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -394,6 +394,28 @@ out: return ret; } +/* + * On ia64 at least, it is possible to receive a hugetlb fault from a + * stale zero entry left in the TLB from earlier hardware prefetching. + * Low-level arch code should already have flushed the stale entry as + * part of its fault handling, but we do need to accept this minor fault + * and return successfully. Whereas the "normal" case is that this is + * an access to a hugetlb page which has been truncated off since mmap. + */ +int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, int write_access) +{ + int ret = VM_FAULT_SIGBUS; + pte_t *pte; + + spin_lock(&mm->page_table_lock); + pte = huge_pte_offset(mm, address); + if (pte && !pte_none(*pte)) + ret = VM_FAULT_MINOR; + spin_unlock(&mm->page_table_lock); + return ret; +} + int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i) diff --git a/mm/memory.c b/mm/memory.c index 8c88b973abc5..1db40e935e55 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2045,18 +2045,8 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, inc_page_state(pgfault); - if (unlikely(is_vm_hugetlb_page(vma))) { - if (valid_hugetlb_file_off(vma, address)) - /* We get here only if there was a stale(zero) TLB entry - * (because of HW prefetching). - * Low-level arch code (if needed) should have already - * purged the stale entry as part of this fault handling. - * Here we just return. - */ - return VM_FAULT_MINOR; - else - return VM_FAULT_SIGBUS; /* mapping truncation does this. */ - } + if (unlikely(is_vm_hugetlb_page(vma))) + return hugetlb_fault(mm, vma, address, write_access); /* * We need the page table lock to synchronize with kswapd From fda0fd6c5b722cc48e904e0daafedca275d332af Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 14 Oct 2005 16:38:49 +1000 Subject: [PATCH 09/54] [DCCP]: Use skb_set_owner_w in dccp_transmit_skb when skb->sk is NULL David S. Miller wrote: > One thing you can probably do for this bug is to mark data packets > explicitly somehow, perhaps in the SKB control block DCCP already > uses for other data. Put some boolean in there, set it true for > data packets. Then change the test in dccp_transmit_skb() as > appropriate to test the boolean flag instead of "skb_cloned(skb)". I agree. In fact we already have that flag, it's called skb->sk. So here is patch to test that instead of skb_cloned(). Signed-off-by: Herbert Xu Acked-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/output.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/dccp/output.c b/net/dccp/output.c index 4786bdcddcc9..946ec2db75de 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -62,10 +62,8 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) skb->h.raw = skb_push(skb, dccp_header_size); dh = dccp_hdr(skb); - /* - * Data packets are not cloned as they are never retransmitted - */ - if (skb_cloned(skb)) + + if (!skb->sk) skb_set_owner_w(skb, sk); /* Build DCCP header and checksum it. */ From ffa29347dfbc158d1f47f5925324a6f5713659c1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 16 Oct 2005 21:08:46 +1000 Subject: [PATCH 10/54] [DCCP]: Make dccp_write_xmit always free the packet icmp_send doesn't use skb->sk at all so even if skb->sk has already been freed it can't cause crash there (it would've crashed somewhere else first, e.g., ip_queue_xmit). I found a double-free on an skb that could explain this though. dccp_sendmsg and dccp_write_xmit are a little confused as to what should free the packet when something goes wrong. Sometimes they both go for the ball and end up in each other's way. This patch makes dccp_write_xmit always free the packet no matter what. This makes sense since dccp_transmit_skb which in turn comes from the fact that ip_queue_xmit always frees the packet. Signed-off-by: Herbert Xu Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/output.c | 3 ++- net/dccp/proto.c | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/net/dccp/output.c b/net/dccp/output.c index 946ec2db75de..7006549f7050 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -241,7 +241,8 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) err = dccp_transmit_skb(sk, skb); ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); - } + } else + kfree_skb(skb); return err; } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index a1cfd0e9e3bc..a021c3422f67 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -402,8 +402,6 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, * This bug was _quickly_ found & fixed by just looking at an OSTRA * generated callgraph 8) -acme */ - if (rc != 0) - goto out_discard; out_release: release_sock(sk); return rc ? : len; From 49c5bfaffe8ae6e6440dc4bf78b03800960d93f5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 18 Oct 2005 12:03:28 +1000 Subject: [PATCH 11/54] [DCCP]: Clear the IPCB area Turns out the problem has nothing to do with use-after-free or double-free. It's just that we're not clearing the CB area and DCCP unlike TCP uses a CB format that's incompatible with IP. Signed-off-by: Herbert Xu Signed-off-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ipv4.c | 2 ++ net/dccp/output.c | 1 + 2 files changed, 3 insertions(+) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index ae088d1347af..6298cf58ff9e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -463,6 +463,7 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, if (skb != NULL) { const struct inet_request_sock *ireq = inet_rsk(req); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, ireq->rmt_addr, ireq->opt); @@ -647,6 +648,7 @@ int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code) if (skb != NULL) { const struct inet_sock *inet = inet_sk(sk); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); err = ip_build_and_send_pkt(skb, sk, inet->saddr, inet->daddr, NULL); if (err == NET_XMIT_CN) diff --git a/net/dccp/output.c b/net/dccp/output.c index 7006549f7050..29250749f16f 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -100,6 +100,7 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); err = ip_queue_xmit(skb, 0); if (err <= 0) return err; From b2cc99f04c5a732c793519aca61a20f719b50db4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 20 Oct 2005 17:13:13 -0200 Subject: [PATCH 12/54] [TCP] Allow len == skb->len in tcp_fragment It is legitimate to call tcp_fragment with len == skb->len since that is done for FIN packets and the FIN flag counts as one byte. So we should only check for the len > skb->len case. Signed-off-by: Herbert Xu Signed-off-by: Arnaldo Carvalho de Melo --- net/ipv4/tcp_output.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7114031fdc70..b907456a79f4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -435,17 +435,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss int nsize, old_factor; u16 flags; - if (unlikely(len >= skb->len)) { - if (net_ratelimit()) { - printk(KERN_DEBUG "TCP: seg_size=%u, mss=%u, seq=%u, " - "end_seq=%u, skb->len=%u.\n", len, mss_now, - TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, - skb->len); - WARN_ON(1); - } - return 0; - } - + BUG_ON(len > skb->len); nsize = skb_headlen(skb) - len; if (nsize < 0) nsize = 0; From b2640b420a806c91f6b8799314ca96bb88a246d2 Mon Sep 17 00:00:00 2001 From: Matt Reimer Date: Thu, 20 Oct 2005 23:21:18 +0100 Subject: [PATCH 13/54] [ARM] 3025/1: Add I2S platform device for PXA Patch from Matt Reimer Adds an I2S platform_device for PXA. I2S is used to interface with sound chips on systems like iPAQ h1910/h2200/hx4700 and Asus 716. Signed-off-by: mreimer@vpop.net Signed-off-by: Russell King --- arch/arm/mach-pxa/generic.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/arm/mach-pxa/generic.c b/arch/arm/mach-pxa/generic.c index d327c127eddb..1d7677669a76 100644 --- a/arch/arm/mach-pxa/generic.c +++ b/arch/arm/mach-pxa/generic.c @@ -250,6 +250,25 @@ void __init pxa_set_i2c_info(struct i2c_pxa_platform_data *info) i2c_device.dev.platform_data = info; } +static struct resource i2s_resources[] = { + { + .start = 0x40400000, + .end = 0x40400083, + .flags = IORESOURCE_MEM, + }, { + .start = IRQ_I2S, + .end = IRQ_I2S, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device i2s_device = { + .name = "pxa2xx-i2s", + .id = -1, + .resource = i2c_resources, + .num_resources = ARRAY_SIZE(i2s_resources), +}; + static struct platform_device *devices[] __initdata = { &pxamci_device, &udc_device, @@ -258,6 +277,7 @@ static struct platform_device *devices[] __initdata = { &btuart_device, &stuart_device, &i2c_device, + &i2s_device, }; static int __init pxa_init(void) From a7ce8edc8232da51dc3a804ec9c734019d115b40 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 20 Oct 2005 23:21:18 +0100 Subject: [PATCH 14/54] [ARM] 3026/1: S3C2410 - avoid possible overflow in pll calculations Patch from Ben Dooks Avoid the possiblity that if the board is using a 16.9334 or higher crystal with a high PLL multiplier, then the pll value could overflow the capability of an int. Also fix the value types of the intermediate variables to unsigned int. Rewrite of patch from Guillaume Gourat Signed-off-by: Ben Dooks Signed-off-by: Russell King --- include/asm-arm/arch-s3c2410/regs-clock.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/include/asm-arm/arch-s3c2410/regs-clock.h b/include/asm-arm/arch-s3c2410/regs-clock.h index 16f4c3cc1388..66794b13e185 100644 --- a/include/asm-arm/arch-s3c2410/regs-clock.h +++ b/include/asm-arm/arch-s3c2410/regs-clock.h @@ -18,7 +18,8 @@ * 10-Feb-2005 Ben Dooks Fixed CAMDIVN address (Guillaume Gourat) * 10-Mar-2005 Lucas Villa Real Changed S3C2410_VA to S3C24XX_VA * 27-Aug-2005 Ben Dooks Add clock-slow info - */ + * 20-Oct-2005 Ben Dooks Fixed overflow in PLL (Guillaume Gourat) +*/ #ifndef __ASM_ARM_REGS_CLOCK #define __ASM_ARM_REGS_CLOCK "$Id: clock.h,v 1.4 2003/04/30 14:50:51 ben Exp $" @@ -83,10 +84,13 @@ #ifndef __ASSEMBLY__ +#include + static inline unsigned int -s3c2410_get_pll(int pllval, int baseclk) +s3c2410_get_pll(unsigned int pllval, unsigned int baseclk) { - int mdiv, pdiv, sdiv; + unsigned int mdiv, pdiv, sdiv; + uint64_t fvco; mdiv = pllval >> S3C2410_PLLCON_MDIVSHIFT; pdiv = pllval >> S3C2410_PLLCON_PDIVSHIFT; @@ -96,7 +100,10 @@ s3c2410_get_pll(int pllval, int baseclk) pdiv &= S3C2410_PLLCON_PDIVMASK; sdiv &= S3C2410_PLLCON_SDIVMASK; - return (baseclk * (mdiv + 8)) / ((pdiv + 2) << sdiv); + fvco = (uint64_t)baseclk * (mdiv + 8); + do_div(fvco, (pdiv + 2) << sdiv); + + return (unsigned int)fvco; } #endif /* __ASSEMBLY__ */ From b048dbf4d428c89f219efc2eddf2771f13500503 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 20 Oct 2005 23:21:19 +0100 Subject: [PATCH 15/54] [ARM] 3027/1: BAST - reduce NAND timings slightly Patch from Ben Dooks The current Simtec BAST nand area timings are a little too slow to be obtained by a 2410 running at 266MHz, so reduce the timings slightly to bring them into the acceptable range. Signed-off-by: Ben Dooks Signed-off-by: Russell King --- arch/arm/mach-s3c2410/mach-bast.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c index 8ca955984645..7b51bfd0ba6d 100644 --- a/arch/arm/mach-s3c2410/mach-bast.c +++ b/arch/arm/mach-s3c2410/mach-bast.c @@ -307,9 +307,9 @@ static void bast_nand_select(struct s3c2410_nand_set *set, int slot) } static struct s3c2410_platform_nand bast_nand_info = { - .tacls = 40, - .twrph0 = 80, - .twrph1 = 80, + .tacls = 30, + .twrph0 = 60, + .twrph1 = 60, .nr_sets = ARRAY_SIZE(bast_nand_sets), .sets = bast_nand_sets, .select_chip = bast_nand_select, From 7fe8785e4198ad6b5dfd4a76c44c97e9b4463534 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 20 Oct 2005 23:21:20 +0100 Subject: [PATCH 16/54] [ARM] 3028/1: S3C2410 - add DCLK mask definitions Patch from Ben Dooks From: Guillaume Gourat Add MASK definitions for DCLK0 and DCLK1 Signed-off-by: Guillaume Gourat Signed-off-by: Ben Dooks Signed-off-by: Russell King --- include/asm-arm/arch-s3c2410/regs-clock.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/asm-arm/arch-s3c2410/regs-clock.h b/include/asm-arm/arch-s3c2410/regs-clock.h index 66794b13e185..34360706e016 100644 --- a/include/asm-arm/arch-s3c2410/regs-clock.h +++ b/include/asm-arm/arch-s3c2410/regs-clock.h @@ -19,6 +19,7 @@ * 10-Mar-2005 Lucas Villa Real Changed S3C2410_VA to S3C24XX_VA * 27-Aug-2005 Ben Dooks Add clock-slow info * 20-Oct-2005 Ben Dooks Fixed overflow in PLL (Guillaume Gourat) + * 20-Oct-2005 Ben Dooks Add masks for DCLK (Guillaume Gourat) */ #ifndef __ASM_ARM_REGS_CLOCK @@ -67,11 +68,16 @@ #define S3C2410_DCLKCON_DCLK0_UCLK (1<<1) #define S3C2410_DCLKCON_DCLK0_DIV(x) (((x) - 1 )<<4) #define S3C2410_DCLKCON_DCLK0_CMP(x) (((x) - 1 )<<8) +#define S3C2410_DCLKCON_DCLK0_DIV_MASK ((0xf)<<4) +#define S3C2410_DCLKCON_DCLK0_CMP_MASK ((0xf)<<8) #define S3C2410_DCLKCON_DCLK1EN (1<<16) #define S3C2410_DCLKCON_DCLK1_PCLK (0<<17) #define S3C2410_DCLKCON_DCLK1_UCLK (1<<17) #define S3C2410_DCLKCON_DCLK1_DIV(x) (((x) - 1) <<20) +#define S3C2410_DCLKCON_DCLK1_CMP(x) (((x) - 1) <<24) +#define S3C2410_DCLKCON_DCLK1_DIV_MASK ((0xf) <<20) +#define S3C2410_DCLKCON_DCLK1_CMP_MASK ((0xf) <<24) #define S3C2410_CLKDIVN_PDIVN (1<<0) #define S3C2410_CLKDIVN_HDIVN (1<<1) From d18566376055046fca0b51ad536f1778ef34966a Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 21 Oct 2005 10:17:37 +0100 Subject: [PATCH 17/54] [ARM] Fix Integrator IM/PD-1 support Signed-off-by: Russell King --- arch/arm/mach-integrator/impd1.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm/mach-integrator/impd1.c b/arch/arm/mach-integrator/impd1.c index c3c2f17d030e..a1b153d1626c 100644 --- a/arch/arm/mach-integrator/impd1.c +++ b/arch/arm/mach-integrator/impd1.c @@ -67,7 +67,7 @@ static void impd1_setvco(struct clk *clk, struct icst525_vco vco) } writel(0, impd1->base + IMPD1_LOCK); -#if DEBUG +#ifdef DEBUG vco.v = val & 0x1ff; vco.r = (val >> 9) & 0x7f; vco.s = (val >> 16) & 7; @@ -427,17 +427,18 @@ static int impd1_probe(struct lm_device *dev) return ret; } +static int impd1_remove_one(struct device *dev, void *data) +{ + device_unregister(dev); + return 0; +} + static void impd1_remove(struct lm_device *dev) { struct impd1_module *impd1 = lm_get_drvdata(dev); - struct list_head *l, *n; int i; - list_for_each_safe(l, n, &dev->dev.children) { - struct device *d = list_to_dev(l); - - device_unregister(d); - } + device_for_each_child(&dev->dev, NULL, impd1_remove_one); for (i = 0; i < ARRAY_SIZE(impd1->vcos); i++) clk_unregister(&impd1->vcos[i]); From 024358eeafd44ecffd0e7a1002ef3ccc1d0acd4d Mon Sep 17 00:00:00 2001 From: Eric Moore Date: Fri, 21 Oct 2005 20:56:36 +0200 Subject: [PATCH 18/54] [PATCH] mptsas: fix phy identifiers This fixes handling of the phy identifiers in mptsas. Signed-off-by: Eric Moore [ split it a pre-2.6.14 portion from Eric's bigger patch ] Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- drivers/message/fusion/mptsas.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c index 429820e48c69..7de19a84dc74 100644 --- a/drivers/message/fusion/mptsas.c +++ b/drivers/message/fusion/mptsas.c @@ -257,8 +257,8 @@ static void mptsas_print_device_pg0(SasDevicePage0_t *pg0) printk("SAS Address=0x%llX\n", le64_to_cpu(sas_address)); printk("Target ID=0x%X\n", pg0->TargetID); printk("Bus=0x%X\n", pg0->Bus); - printk("PhyNum=0x%X\n", pg0->PhyNum); - printk("AccessStatus=0x%X\n", le16_to_cpu(pg0->AccessStatus)); + printk("Parent Phy Num=0x%X\n", pg0->PhyNum); + printk("Access Status=0x%X\n", le16_to_cpu(pg0->AccessStatus)); printk("Device Info=0x%X\n", le32_to_cpu(pg0->DeviceInfo)); printk("Flags=0x%X\n", le16_to_cpu(pg0->Flags)); printk("Physical Port=0x%X\n", pg0->PhysicalPort); @@ -270,7 +270,7 @@ static void mptsas_print_expander_pg1(SasExpanderPage1_t *pg1) printk("---- SAS EXPANDER PAGE 1 ------------\n"); printk("Physical Port=0x%X\n", pg1->PhysicalPort); - printk("PHY Identifier=0x%X\n", pg1->Phy); + printk("PHY Identifier=0x%X\n", pg1->PhyIdentifier); printk("Negotiated Link Rate=0x%X\n", pg1->NegotiatedLinkRate); printk("Programmed Link Rate=0x%X\n", pg1->ProgrammedLinkRate); printk("Hardware Link Rate=0x%X\n", pg1->HwLinkRate); @@ -604,7 +604,7 @@ mptsas_sas_expander_pg1(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info, mptsas_print_expander_pg1(buffer); /* save config data */ - phy_info->phy_id = buffer->Phy; + phy_info->phy_id = buffer->PhyIdentifier; phy_info->port_id = buffer->PhysicalPort; phy_info->negotiated_link_rate = buffer->NegotiatedLinkRate; phy_info->programmed_link_rate = buffer->ProgrammedLinkRate; @@ -825,6 +825,8 @@ mptsas_probe_hba_phys(MPT_ADAPTER *ioc, int *index) mptsas_sas_device_pg0(ioc, &port_info->phy_info[i].identify, (MPI_SAS_DEVICE_PGAD_FORM_GET_NEXT_HANDLE << MPI_SAS_DEVICE_PGAD_FORM_SHIFT), handle); + port_info->phy_info[i].identify.phy_id = + port_info->phy_info[i].phy_id; handle = port_info->phy_info[i].identify.handle; if (port_info->phy_info[i].attached.handle) { @@ -881,6 +883,8 @@ mptsas_probe_expander_phys(MPT_ADAPTER *ioc, u32 *handle, int *index) (MPI_SAS_DEVICE_PGAD_FORM_HANDLE << MPI_SAS_DEVICE_PGAD_FORM_SHIFT), port_info->phy_info[i].identify.handle); + port_info->phy_info[i].identify.phy_id = + port_info->phy_info[i].phy_id; } if (port_info->phy_info[i].attached.handle) { From a1c7e111934b6375baf07a970d6c890d18d7e34f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 21 Oct 2005 22:39:36 +1000 Subject: [PATCH 19/54] [PATCH] ppc64: Fix typo in time calculations This fixes a typo in the div128_by_32 function used in the timekeeping calculations on ppc64. If you look at the code it's quite obvious that we need (rb + c) rather than (rb + b). The "b" is clearly just a typo. Signed-off-by: Paul Mackerras Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c index 9939c206afa4..b56c6a324e17 100644 --- a/arch/ppc64/kernel/time.c +++ b/arch/ppc64/kernel/time.c @@ -870,7 +870,7 @@ void div128_by_32( unsigned long dividend_high, unsigned long dividend_low, rb = ((ra + b) - (x * divisor)) << 32; y = (rb + c)/divisor; - rc = ((rb + b) - (y * divisor)) << 32; + rc = ((rb + c) - (y * divisor)) << 32; z = (rc + d)/divisor; From 5d96551541a8f5521dcc8c634a18d42a3d349ec9 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 21 Oct 2005 14:12:51 +1000 Subject: [PATCH 20/54] [PATCH] ppc64: Fix pages marked dirty abusively While working on 64K pages, I found this little buglet in our update_mmu_cache() implementation. The code calls __hash_page() passing it an "access" parameter (the type of access that triggers the hash) containing the bits _PAGE_RW and _PAGE_USER of the linux PTE. The latter is useless in this case and the former is wrong. In fact, if we have a writeable PTE and we pass _PAGE_RW to hash_page(), it will set _PAGE_DIRTY (since we track dirty that way, by hash faulting !dirty) which is not what we want. In fact, the correct fix is to always pass 0. That means that only read-only or already dirty read write PTEs will be preloaded. The (hopefully rare) case of a non dirty read write PTE can't be preloaded this way, it will have to fault in hash_page on the actual access. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds --- arch/ppc64/mm/init.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index c2157c9c3acb..be64b157afce 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -799,8 +799,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea, if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp)) local = 1; - __hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep, - 0x300, local); + __hash_page(ea, 0, vsid, ptep, 0x300, local); local_irq_restore(flags); } From e29971f9a4ca08c3c31b98be96c293ef9f7dcc32 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 20 Oct 2005 23:49:00 +0100 Subject: [PATCH 21/54] [PATCH] drm: another mga bug The wrong state emission routines were being called for G550, and consistent maps weren't correctly mapped... Signed-off-by: Dave Airlie Signed-off-by: Linus Torvalds --- drivers/char/drm/drm_vm.c | 3 ++- drivers/char/drm/mga_drv.h | 2 +- drivers/char/drm/mga_state.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/char/drm/drm_vm.c b/drivers/char/drm/drm_vm.c index ced4215e2275..39ea96e42c5b 100644 --- a/drivers/char/drm/drm_vm.c +++ b/drivers/char/drm/drm_vm.c @@ -148,7 +148,8 @@ static __inline__ struct page *drm_do_vm_shm_nopage(struct vm_area_struct *vma, offset = address - vma->vm_start; i = (unsigned long)map->handle + offset; - page = vmalloc_to_page((void *)i); + page = (map->type == _DRM_CONSISTENT) ? + virt_to_page((void *)i) : vmalloc_to_page((void *)i); if (!page) return NOPAGE_OOM; get_page(page); diff --git a/drivers/char/drm/mga_drv.h b/drivers/char/drm/mga_drv.h index b22fdbd4f830..6059c5a5b105 100644 --- a/drivers/char/drm/mga_drv.h +++ b/drivers/char/drm/mga_drv.h @@ -227,7 +227,7 @@ static inline u32 _MGA_READ(u32 *addr) #define MGA_EMIT_STATE( dev_priv, dirty ) \ do { \ if ( (dirty) & ~MGA_UPLOAD_CLIPRECTS ) { \ - if ( dev_priv->chipset == MGA_CARD_TYPE_G400 ) { \ + if ( dev_priv->chipset >= MGA_CARD_TYPE_G400 ) { \ mga_g400_emit_state( dev_priv ); \ } else { \ mga_g200_emit_state( dev_priv ); \ diff --git a/drivers/char/drm/mga_state.c b/drivers/char/drm/mga_state.c index 05bbb4719376..6ac5e006226f 100644 --- a/drivers/char/drm/mga_state.c +++ b/drivers/char/drm/mga_state.c @@ -53,7 +53,7 @@ static void mga_emit_clip_rect( drm_mga_private_t *dev_priv, /* Force reset of DWGCTL on G400 (eliminates clip disable bit). */ - if (dev_priv->chipset == MGA_CARD_TYPE_G400) { + if (dev_priv->chipset >= MGA_CARD_TYPE_G400) { DMA_BLOCK(MGA_DWGCTL, ctx->dwgctl, MGA_LEN + MGA_EXEC, 0x80000000, MGA_DWGCTL, ctx->dwgctl, From 3078fcc1d18c7235b034dc889642c5300959fa20 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 21 Oct 2005 13:41:19 +1000 Subject: [PATCH 22/54] [PATCH] ppc64: Fix typo bug in iSeries hash code This fixes a stupid typo bug in the iSeries hash table code. When we place a hash PTE in the secondary bucket, instead of setting the SECONDARY flag bit, as we should, we (redundantly) set the VALID flag. This was introduced with the patch abolishing bitfields from the hash table code. Mea culpa, oops. It hasn't been noticed until now because in practice we don't hit the secondary bucket terribly often. Signed-off-by: David Gibson Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/iSeries_htab.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ppc64/kernel/iSeries_htab.c b/arch/ppc64/kernel/iSeries_htab.c index 2192055a90a0..073b76661747 100644 --- a/arch/ppc64/kernel/iSeries_htab.c +++ b/arch/ppc64/kernel/iSeries_htab.c @@ -66,7 +66,7 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, } if (slot < 0) { /* MSB set means secondary group */ - vflags |= HPTE_V_VALID; + vflags |= HPTE_V_SECONDARY; secondary = 1; slot &= 0x7fffffffffffffff; } From 0213df74315bbab9ccaa73146f3e11972ea6de46 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 21 Oct 2005 17:21:03 -0400 Subject: [PATCH 23/54] [PATCH] cpufreq: fix pending powernow timer stuck condition AMD recently discovered that on some hardware, there is a race condition possible when a C-state change request goes onto the bus at the same time as a P-state change request. Both requests happen, but the southbridge hardware only acknowledges the C-state change. The PowerNow! driver is then stuck in a loop, waiting for the P-state change acknowledgement. The driver eventually times out, but can no longer perform P-state changes. It turns out the solution is to resend the P-state change, which the southbridge will acknowledge normally. Thanks to Johannes Winkelmann for reporting this and testing the fix. Signed-off-by: Mark Langsdorf Signed-off-by: Dave Jones Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 30 ++++++++++++++-------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index ab6e0611303d..ec3a9e335aa3 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -44,7 +44,7 @@ #define PFX "powernow-k8: " #define BFX PFX "BIOS error: " -#define VERSION "version 1.50.3" +#define VERSION "version 1.50.4" #include "powernow-k8.h" /* serialize freq changes */ @@ -111,8 +111,8 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data) u32 i = 0; do { - if (i++ > 0x1000000) { - printk(KERN_ERR PFX "detected change pending stuck\n"); + if (i++ > 10000) { + dprintk("detected change pending stuck\n"); return 1; } rdmsr(MSR_FIDVID_STATUS, lo, hi); @@ -159,6 +159,7 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid) { u32 lo; u32 savevid = data->currvid; + u32 i = 0; if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) { printk(KERN_ERR PFX "internal error - overflow on fid write\n"); @@ -170,10 +171,13 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid) dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n", fid, lo, data->plllock * PLL_LOCK_CONVERSION); - wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); - - if (query_current_values_with_pending_wait(data)) - return 1; + do { + wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); + if (i++ > 100) { + printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); + retrun 1; + } + } while (query_current_values_with_pending_wait(data)); count_off_irt(data); @@ -197,6 +201,7 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid) { u32 lo; u32 savefid = data->currfid; + int i = 0; if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) { printk(KERN_ERR PFX "internal error - overflow on vid write\n"); @@ -208,10 +213,13 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid) dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n", vid, lo, STOP_GRANT_5NS); - wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); - - if (query_current_values_with_pending_wait(data)) - return 1; + do { + wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); + if (i++ > 100) { + printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); + return 1; + } + } while (query_current_values_with_pending_wait(data)); if (savefid != data->currfid) { printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n", From 9465bee863bc4c6cf1566c12d6f92a8133e3da5c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 21 Oct 2005 15:36:00 -0700 Subject: [PATCH 24/54] Revert "Fix cpu timers exit deadlock and races" Revert commit e03d13e985d48ac4885382c9e3b1510c78bd047f, to be replaced by a much nicer fix from Roland. --- kernel/posix-cpu-timers.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index b3f3edc475de..7a51a5597c33 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -387,19 +387,25 @@ int posix_cpu_timer_del(struct k_itimer *timer) if (unlikely(p == NULL)) return 0; - spin_lock(&p->sighand->siglock); if (!list_empty(&timer->it.cpu.entry)) { - /* - * Take us off the task's timer list. We don't need to - * take tasklist_lock and check for the task being reaped. - * If it was reaped, it already called posix_cpu_timers_exit - * and posix_cpu_timers_exit_group to clear all the timers - * that pointed to it. - */ - list_del(&timer->it.cpu.entry); - put_task_struct(p); + read_lock(&tasklist_lock); + if (unlikely(p->signal == NULL)) { + /* + * We raced with the reaping of the task. + * The deletion should have cleared us off the list. + */ + BUG_ON(!list_empty(&timer->it.cpu.entry)); + } else { + /* + * Take us off the task's timer list. + */ + spin_lock(&p->sighand->siglock); + list_del(&timer->it.cpu.entry); + spin_unlock(&p->sighand->siglock); + } + read_unlock(&tasklist_lock); } - spin_unlock(&p->sighand->siglock); + put_task_struct(p); return 0; } From 25f407f0b668f5e4ebd5d13e1fb4306ba6427ead Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 21 Oct 2005 15:03:29 -0700 Subject: [PATCH 25/54] [PATCH] Call exit_itimers from do_exit, not __exit_signal When I originally moved exit_itimers into __exit_signal, that was the only place where we could reliably know it was the last thread in the group dying, without races. Since then we've gotten the signal_struct.live counter, and do_exit can reliably do group-wide cleanup work. This patch moves the call to do_exit, where it's made without locks. This avoids the deadlock issues that the old __exit_signal code's comment talks about, and the one that Oleg found recently with process CPU timers. [ This replaces e03d13e985d48ac4885382c9e3b1510c78bd047f, which is why it was just reverted. ] Signed-off-by: Roland McGrath Signed-off-by: Linus Torvalds --- kernel/exit.c | 1 + kernel/posix-timers.c | 2 +- kernel/signal.c | 14 +------------- 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index 43077732619b..3b25b182d2be 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -843,6 +843,7 @@ fastcall NORET_TYPE void do_exit(long code) group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { del_timer_sync(&tsk->signal->real_timer); + exit_itimers(tsk->signal); acct_process(code); } exit_mm(tsk); diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index b7b532acd9fc..dda3cda73c77 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -1157,7 +1157,7 @@ retry_delete: } /* - * This is called by __exit_signal, only when there are no more + * This is called by do_exit or de_thread, only when there are no more * references to the shared signal_struct. */ void exit_itimers(struct signal_struct *sig) diff --git a/kernel/signal.c b/kernel/signal.c index 50c992643771..f2b96b08fb44 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -397,20 +397,8 @@ void __exit_signal(struct task_struct *tsk) flush_sigqueue(&tsk->pending); if (sig) { /* - * We are cleaning up the signal_struct here. We delayed - * calling exit_itimers until after flush_sigqueue, just in - * case our thread-local pending queue contained a queued - * timer signal that would have been cleared in - * exit_itimers. When that called sigqueue_free, it would - * attempt to re-take the tasklist_lock and deadlock. This - * can never happen if we ensure that all queues the - * timer's signal might be queued on have been flushed - * first. The shared_pending queue, and our own pending - * queue are the only queues the timer could be on, since - * there are no other threads left in the group and timer - * signals are constrained to threads inside the group. + * We are cleaning up the signal_struct here. */ - exit_itimers(sig); exit_thread_group_keys(sig); kmem_cache_free(signal_cachep, sig); } From 63172cb3d5ef762dcb60a292bc7f016b85cf6e1f Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Fri, 21 Oct 2005 16:56:08 -0700 Subject: [PATCH 26/54] [PATCH] typo fix in last cpufreq powernow patch Not sure how it slipped by, but here's a trivial typo fix for powernow. Signed-off-by: Chris Wright [ It's "nurter" backwards.. Maybe we have a hillbilly The Shining fan? ] Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index ec3a9e335aa3..58ca98fdc2ca 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -175,7 +175,7 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid) wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); if (i++ > 100) { printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); - retrun 1; + return 1; } } while (query_current_values_with_pending_wait(data)); From c98d80edc827277c28f88d662a7d6e9affa7e12f Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 22 Oct 2005 13:39:21 +0300 Subject: [PATCH 27/54] [SK_BUFF]: ipvs_property field must be copied IPVS used flag NFC_IPVS_PROPERTY in nfcache but as now nfcache was removed the new flag 'ipvs_property' still needs to be copied. This patch should be included in 2.6.14. Further comments from Harald Welte: Sorry, seems like the bug was introduced by me. Signed-off-by: Julian Anastasov Signed-off-by: Harald Welte Signed-off-by: Arnaldo Carvalho de Melo --- net/core/skbuff.c | 6 ++++++ net/ipv4/ip_output.c | 3 +++ 2 files changed, 9 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index af9b1516e21f..02cd4cde2112 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -410,6 +410,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) C(nfct); nf_conntrack_get(skb->nfct); C(nfctinfo); +#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) + C(ipvs_property); +#endif #ifdef CONFIG_BRIDGE_NETFILTER C(nf_bridge); nf_bridge_get(skb->nf_bridge); @@ -467,6 +470,9 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->nfct = old->nfct; nf_conntrack_get(old->nfct); new->nfctinfo = old->nfctinfo; +#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) + new->ipvs_property = old->ipvs_property; +#endif #ifdef CONFIG_BRIDGE_NETFILTER new->nf_bridge = old->nf_bridge; nf_bridge_get(old->nf_bridge); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 3f1a263e1249..1ad5202e556b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -391,6 +391,9 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->nfct = from->nfct; nf_conntrack_get(to->nfct); to->nfctinfo = from->nfctinfo; +#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) + to->ipvs_property = from->ipvs_property; +#endif #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(to->nf_bridge); to->nf_bridge = from->nf_bridge; From 4595f251058609d97a5d792de08c34a7956af816 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Fri, 14 Oct 2005 21:29:56 +0100 Subject: [PATCH 28/54] [AX.25]: Fix signed char bug On architectures where the char type defaults to unsigned some of the arithmetic in the AX.25 stack to fail, resulting in some packets being dropped on receive. Credits for tracking this down and the original patch to Bob Brose N0QBJ . Signed-off-by: Ralf Baechle DL5RB Signed-off-by: Arnaldo Carvalho de Melo --- include/net/ax25.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/ax25.h b/include/net/ax25.h index 9dbcd9e51c00..30bb4a893237 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -171,7 +171,7 @@ typedef struct { ax25_address calls[AX25_MAX_DIGIS]; unsigned char repeated[AX25_MAX_DIGIS]; unsigned char ndigi; - char lastrepeat; + signed char lastrepeat; } ax25_digi; typedef struct ax25_route { From d475f3f47a0427dfee483cecf9a7e9109e991423 Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky Date: Fri, 21 Oct 2005 22:06:15 +0400 Subject: [PATCH 29/54] [PATCH] alpha: additional smp barriers As stated in Documentation/atomic_ops.txt, atomic functions returning values must have the memory barriers both before and after the operation. Thanks to DaveM for pointing that out. Signed-off-by: Ivan Kokshaysky Signed-off-by: Linus Torvalds --- include/asm-alpha/atomic.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/include/asm-alpha/atomic.h b/include/asm-alpha/atomic.h index 1b383e3cb68c..0b40bad00289 100644 --- a/include/asm-alpha/atomic.h +++ b/include/asm-alpha/atomic.h @@ -100,18 +100,19 @@ static __inline__ void atomic64_sub(long i, atomic64_t * v) static __inline__ long atomic_add_return(int i, atomic_t * v) { long temp, result; + smp_mb(); __asm__ __volatile__( "1: ldl_l %0,%1\n" " addl %0,%3,%2\n" " addl %0,%3,%0\n" " stl_c %0,%1\n" " beq %0,2f\n" - " mb\n" ".subsection 2\n" "2: br 1b\n" ".previous" :"=&r" (temp), "=m" (v->counter), "=&r" (result) :"Ir" (i), "m" (v->counter) : "memory"); + smp_mb(); return result; } @@ -120,54 +121,57 @@ static __inline__ long atomic_add_return(int i, atomic_t * v) static __inline__ long atomic64_add_return(long i, atomic64_t * v) { long temp, result; + smp_mb(); __asm__ __volatile__( "1: ldq_l %0,%1\n" " addq %0,%3,%2\n" " addq %0,%3,%0\n" " stq_c %0,%1\n" " beq %0,2f\n" - " mb\n" ".subsection 2\n" "2: br 1b\n" ".previous" :"=&r" (temp), "=m" (v->counter), "=&r" (result) :"Ir" (i), "m" (v->counter) : "memory"); + smp_mb(); return result; } static __inline__ long atomic_sub_return(int i, atomic_t * v) { long temp, result; + smp_mb(); __asm__ __volatile__( "1: ldl_l %0,%1\n" " subl %0,%3,%2\n" " subl %0,%3,%0\n" " stl_c %0,%1\n" " beq %0,2f\n" - " mb\n" ".subsection 2\n" "2: br 1b\n" ".previous" :"=&r" (temp), "=m" (v->counter), "=&r" (result) :"Ir" (i), "m" (v->counter) : "memory"); + smp_mb(); return result; } static __inline__ long atomic64_sub_return(long i, atomic64_t * v) { long temp, result; + smp_mb(); __asm__ __volatile__( "1: ldq_l %0,%1\n" " subq %0,%3,%2\n" " subq %0,%3,%0\n" " stq_c %0,%1\n" " beq %0,2f\n" - " mb\n" ".subsection 2\n" "2: br 1b\n" ".previous" :"=&r" (temp), "=m" (v->counter), "=&r" (result) :"Ir" (i), "m" (v->counter) : "memory"); + smp_mb(); return result; } From 203755029e063066ecc4cf5eee1110ab946c2d88 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 23 Oct 2005 16:11:39 +1000 Subject: [PATCH 30/54] [NEIGH] Print stack trace in neigh_add_timer Stack traces are very helpful in determining the exact nature of a bug. So let's print a stack trace when the timer is added twice. Signed-off-by: Herbert Xu --- net/core/neighbour.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 4128fc76ac3a..766caa0dd930 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -732,6 +732,7 @@ static inline void neigh_add_timer(struct neighbour *n, unsigned long when) if (unlikely(mod_timer(&n->timer, when))) { printk("NEIGH: BUG, double timer add, state is %x\n", n->nud_state); + dump_stack(); } } From 6fb9974f49f7a6032118c5b6caa6e08e7097913e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 23 Oct 2005 16:37:48 +1000 Subject: [PATCH 31/54] [NEIGH] Fix add_timer race in neigh_add_timer neigh_add_timer cannot use add_timer unconditionally. The reason is that by the time it has obtained the write lock someone else (e.g., neigh_update) could have already added a new timer. So it should only use mod_timer and deal with its return value accordingly. This bug would have led to rare neighbour cache entry leaks. Signed-off-by: Herbert Xu --- net/core/neighbour.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 766caa0dd930..37d8d8c29522 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -816,10 +816,10 @@ static void neigh_timer_handler(unsigned long arg) } if (neigh->nud_state & NUD_IN_TIMER) { - neigh_hold(neigh); if (time_before(next, jiffies + HZ/2)) next = jiffies + HZ/2; - neigh_add_timer(neigh, next); + if (!mod_timer(&neigh->timer, next)) + neigh_hold(neigh); } if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { struct sk_buff *skb = skb_peek(&neigh->arp_queue); From 49636bb12892786e4a7b207b37ca7b0c5ca1cae0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 23 Oct 2005 17:18:00 +1000 Subject: [PATCH 32/54] [NEIGH] Fix timer leak in neigh_changeaddr neigh_changeaddr attempts to delete neighbour timers without setting nud_state. This doesn't work because the timer may have already fired when we acquire the write lock in neigh_changeaddr. The result is that the timer may keep firing for quite a while until the entry reaches NEIGH_FAILED. It should be setting the nud_state straight away so that if the timer has already fired it can simply exit once we relinquish the lock. In fact, this whole function is simply duplicating the logic in neigh_ifdown which in turn is already doing the right thing when it comes to deleting timers and setting nud_state. So all we have to do is take that code out and put it into a common function and make both neigh_changeaddr and neigh_ifdown call it. Signed-off-by: Herbert Xu --- net/core/neighbour.c | 43 +++++++++++++------------------------------ 1 file changed, 13 insertions(+), 30 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 37d8d8c29522..1dcf7fa1f0fe 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -175,39 +175,10 @@ static void pneigh_queue_purge(struct sk_buff_head *list) } } -void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) +static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) { int i; - write_lock_bh(&tbl->lock); - - for (i=0; i <= tbl->hash_mask; i++) { - struct neighbour *n, **np; - - np = &tbl->hash_buckets[i]; - while ((n = *np) != NULL) { - if (dev && n->dev != dev) { - np = &n->next; - continue; - } - *np = n->next; - write_lock_bh(&n->lock); - n->dead = 1; - neigh_del_timer(n); - write_unlock_bh(&n->lock); - neigh_release(n); - } - } - - write_unlock_bh(&tbl->lock); -} - -int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) -{ - int i; - - write_lock_bh(&tbl->lock); - for (i = 0; i <= tbl->hash_mask; i++) { struct neighbour *n, **np = &tbl->hash_buckets[i]; @@ -243,7 +214,19 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) neigh_release(n); } } +} +void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) +{ + write_lock_bh(&tbl->lock); + neigh_flush_dev(tbl, dev); + write_unlock_bh(&tbl->lock); +} + +int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) +{ + write_lock_bh(&tbl->lock); + neigh_flush_dev(tbl, dev); pneigh_ifdown(tbl, dev); write_unlock_bh(&tbl->lock); From e80eda94d3eaf1d12cfc97878eff77cd679dabc9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 23 Oct 2005 10:02:50 -0700 Subject: [PATCH 33/54] Posix timers: limit number of timers firing at once Bursty timers aren't good for anybody, very much including latency for other programs when we trigger lots of timers in interrupt context. So set a random limit, after which we'll handle the rest on the next timer tick. Noted by Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/posix-cpu-timers.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 7a51a5597c33..d30b304a3384 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -961,14 +961,16 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) static void check_thread_timers(struct task_struct *tsk, struct list_head *firing) { + int maxfire; struct list_head *timers = tsk->cpu_timers; + maxfire = 20; tsk->it_prof_expires = cputime_zero; while (!list_empty(timers)) { struct cpu_timer_list *t = list_entry(timers->next, struct cpu_timer_list, entry); - if (cputime_lt(prof_ticks(tsk), t->expires.cpu)) { + if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) { tsk->it_prof_expires = t->expires.cpu; break; } @@ -977,12 +979,13 @@ static void check_thread_timers(struct task_struct *tsk, } ++timers; + maxfire = 20; tsk->it_virt_expires = cputime_zero; while (!list_empty(timers)) { struct cpu_timer_list *t = list_entry(timers->next, struct cpu_timer_list, entry); - if (cputime_lt(virt_ticks(tsk), t->expires.cpu)) { + if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) { tsk->it_virt_expires = t->expires.cpu; break; } @@ -991,12 +994,13 @@ static void check_thread_timers(struct task_struct *tsk, } ++timers; + maxfire = 20; tsk->it_sched_expires = 0; while (!list_empty(timers)) { struct cpu_timer_list *t = list_entry(timers->next, struct cpu_timer_list, entry); - if (tsk->sched_time < t->expires.sched) { + if (!--maxfire || tsk->sched_time < t->expires.sched) { tsk->it_sched_expires = t->expires.sched; break; } @@ -1013,6 +1017,7 @@ static void check_thread_timers(struct task_struct *tsk, static void check_process_timers(struct task_struct *tsk, struct list_head *firing) { + int maxfire; struct signal_struct *const sig = tsk->signal; cputime_t utime, stime, ptime, virt_expires, prof_expires; unsigned long long sched_time, sched_expires; @@ -1045,12 +1050,13 @@ static void check_process_timers(struct task_struct *tsk, } while (t != tsk); ptime = cputime_add(utime, stime); + maxfire = 20; prof_expires = cputime_zero; while (!list_empty(timers)) { struct cpu_timer_list *t = list_entry(timers->next, struct cpu_timer_list, entry); - if (cputime_lt(ptime, t->expires.cpu)) { + if (!--maxfire || cputime_lt(ptime, t->expires.cpu)) { prof_expires = t->expires.cpu; break; } @@ -1059,12 +1065,13 @@ static void check_process_timers(struct task_struct *tsk, } ++timers; + maxfire = 20; virt_expires = cputime_zero; while (!list_empty(timers)) { struct cpu_timer_list *t = list_entry(timers->next, struct cpu_timer_list, entry); - if (cputime_lt(utime, t->expires.cpu)) { + if (!--maxfire || cputime_lt(utime, t->expires.cpu)) { virt_expires = t->expires.cpu; break; } @@ -1073,12 +1080,13 @@ static void check_process_timers(struct task_struct *tsk, } ++timers; + maxfire = 20; sched_expires = 0; while (!list_empty(timers)) { struct cpu_timer_list *t = list_entry(timers->next, struct cpu_timer_list, entry); - if (sched_time < t->expires.sched) { + if (!--maxfire || sched_time < t->expires.sched) { sched_expires = t->expires.sched; break; } From 4196c3af25d98204216a5d6c37ad2cb303a1f2bf Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 23 Oct 2005 16:31:16 -0700 Subject: [PATCH 34/54] cardbus: limit IO windows to 256 bytes That's what we've always historically done, and bigger windows seem to confuse some cardbus bridges. Or something. Alan reports that this makes the ThinkPad 600x series work properly again: the 4kB IO window for some reason made IDE DMA not work, which makes IDE painfully slow even if it works after DMA timeouts. Signed-off-by: Linus Torvalds --- drivers/pci/setup-bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 657be948baf7..28ce3a7ee434 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -40,7 +40,7 @@ * FIXME: IO should be max 256 bytes. However, since we may * have a P2P bridge below a cardbus bridge, we need 4K. */ -#define CARDBUS_IO_SIZE (4*1024) +#define CARDBUS_IO_SIZE (256) #define CARDBUS_MEM_SIZE (32*1024*1024) static void __devinit From a991304496bdaec09f497d1eb5d9dcf2f94b7d5d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 23 Oct 2005 12:57:11 -0700 Subject: [PATCH 35/54] [PATCH] kernel-parameters cleanup Fix typos & trailing whitespace. Add blank lines in a few places. Remove "AM53C974=" option: driver does not exist. Restrict to < 80 columns in most places (but don't split formatted command-line arguments). Add a few option arguments for completeness. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 492 +++++++++++++++------------- 1 file changed, 258 insertions(+), 234 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7086f0a90d14..971589a9752d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -17,7 +17,7 @@ are specified on the kernel command line with the module name plus usbcore.blinkenlights=1 -The text in square brackets at the beginning of the description state the +The text in square brackets at the beginning of the description states the restrictions on the kernel for the said kernel parameter to be valid. The restrictions referred to are that the relevant option is valid if: @@ -27,8 +27,8 @@ restrictions referred to are that the relevant option is valid if: APM Advanced Power Management support is enabled. AX25 Appropriate AX.25 support is enabled. CD Appropriate CD support is enabled. - DEVFS devfs support is enabled. - DRM Direct Rendering Management support is enabled. + DEVFS devfs support is enabled. + DRM Direct Rendering Management support is enabled. EDD BIOS Enhanced Disk Drive Services (EDD) is enabled EFI EFI Partitioning (GPT) is enabled EIDE EIDE/ATAPI support is enabled. @@ -71,7 +71,7 @@ restrictions referred to are that the relevant option is valid if: SERIAL Serial support is enabled. SMP The kernel is an SMP kernel. SPARC Sparc architecture is enabled. - SWSUSP Software suspension is enabled. + SWSUSP Software suspend is enabled. TS Appropriate touchscreen support is enabled. USB USB support is enabled. USBHID USB Human Interface Device support is enabled. @@ -105,13 +105,13 @@ running once the system is up. See header of drivers/scsi/53c7xx.c. See also Documentation/scsi/ncr53c7xx.txt. - acpi= [HW,ACPI] Advanced Configuration and Power Interface - Format: { force | off | ht | strict } + acpi= [HW,ACPI] Advanced Configuration and Power Interface + Format: { force | off | ht | strict | noirq } force -- enable ACPI if default was off off -- disable ACPI if default was on noirq -- do not use ACPI for IRQ routing ht -- run only enough ACPI to enable Hyper Threading - strict -- Be less tolerant of platforms that are not + strict -- Be less tolerant of platforms that are not strictly ACPI specification compliant. See also Documentation/pm.txt, pci=noacpi @@ -119,20 +119,23 @@ running once the system is up. acpi_sleep= [HW,ACPI] Sleep options Format: { s3_bios, s3_mode } See Documentation/power/video.txt - + acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode - Format: { level | edge | high | low } + Format: { level | edge | high | low } - acpi_irq_balance [HW,ACPI] ACPI will balance active IRQs - default in APIC mode + acpi_irq_balance [HW,ACPI] + ACPI will balance active IRQs + default in APIC mode - acpi_irq_nobalance [HW,ACPI] ACPI will not move active IRQs (default) - default in PIC mode + acpi_irq_nobalance [HW,ACPI] + ACPI will not move active IRQs (default) + default in PIC mode - acpi_irq_pci= [HW,ACPI] If irq_balance, Clear listed IRQs for use by PCI + acpi_irq_pci= [HW,ACPI] If irq_balance, clear listed IRQs for + use by PCI Format: ,... - acpi_irq_isa= [HW,ACPI] If irq_balance, Mark listed IRQs used by ISA + acpi_irq_isa= [HW,ACPI] If irq_balance, mark listed IRQs used by ISA Format: ,... acpi_osi= [HW,ACPI] empty param disables _OSI @@ -145,14 +148,14 @@ running once the system is up. acpi_dbg_layer= [HW,ACPI] Format: - Each bit of the indicates an acpi debug layer, + Each bit of the indicates an ACPI debug layer, 1: enable, 0: disable. It is useful for boot time debugging. After system has booted up, it can be set via /proc/acpi/debug_layer. acpi_dbg_level= [HW,ACPI] Format: - Each bit of the indicates an acpi debug level, + Each bit of the indicates an ACPI debug level, 1: enable, 0: disable. It is useful for boot time debugging. After system has booted up, it can be set via /proc/acpi/debug_level. @@ -161,12 +164,13 @@ running once the system is up. acpi_generic_hotkey [HW,ACPI] Allow consolidated generic hotkey driver to - over-ride platform specific driver. + override platform specific driver. See also Documentation/acpi-hotkey.txt. enable_timer_pin_1 [i386,x86-64] Enable PIN 1 of APIC timer - Can be useful to work around chipset bugs (in particular on some ATI chipsets) + Can be useful to work around chipset bugs + (in particular on some ATI chipsets). The kernel tries to set a reasonable default. disable_timer_pin_1 [i386,x86-64] @@ -182,7 +186,7 @@ running once the system is up. adlib= [HW,OSS] Format: - + advansys= [HW,SCSI] See header of drivers/scsi/advansys.c. @@ -192,7 +196,7 @@ running once the system is up. aedsp16= [HW,OSS] Audio Excel DSP 16 Format: ,,,,, See also header of sound/oss/aedsp16.c. - + aha152x= [HW,SCSI] See Documentation/scsi/aha152x.txt. @@ -205,10 +209,6 @@ running once the system is up. aic79xx= [HW,SCSI] See Documentation/scsi/aic79xx.txt. - AM53C974= [HW,SCSI] - Format: ,,, - See also header of drivers/scsi/AM53C974.c. - amijoy.map= [HW,JOY] Amiga joystick support Map of devices attached to JOY0DAT and JOY1DAT Format: , @@ -219,23 +219,24 @@ running once the system is up. connected to one of 16 gameports Format: ,,.. - apc= [HW,SPARC] Power management functions (SPARCstation-4/5 + deriv.) + apc= [HW,SPARC] + Power management functions (SPARCstation-4/5 + deriv.) Format: noidle Disable APC CPU standby support. SPARCstation-Fox does not play well with APC CPU idle - disable it if you have APC and your system crashes randomly. - apic= [APIC,i386] Change the output verbosity whilst booting + apic= [APIC,i386] Change the output verbosity whilst booting Format: { quiet (default) | verbose | debug } Change the amount of debugging information output when initialising the APIC and IO-APIC components. - + apm= [APM] Advanced Power Management See header of arch/i386/kernel/apm.c. applicom= [HW] Format: , - + arcrimi= [HW,NET] ARCnet - "RIM I" (entirely mem-mapped) cards Format: ,, @@ -250,38 +251,40 @@ running once the system is up. atkbd.reset= [HW] Reset keyboard during initialization - atkbd.set= [HW] Select keyboard code set - Format: (2 = AT (default) 3 = PS/2) + atkbd.set= [HW] Select keyboard code set + Format: (2 = AT (default), 3 = PS/2) atkbd.scroll= [HW] Enable scroll wheel on MS Office and similar keyboards atkbd.softraw= [HW] Choose between synthetic and real raw mode Format: (0 = real, 1 = synthetic (default)) - - atkbd.softrepeat= - [HW] Use software keyboard repeat + + atkbd.softrepeat= [HW] + Use software keyboard repeat autotest [IA64] awe= [HW,OSS] AWE32/SB32/AWE64 wave table synth Format: ,, - + aztcd= [HW,CD] Aztech CD268 CDROM driver Format: ,0x79 (?) baycom_epp= [HW,AX25] Format: , - + baycom_par= [HW,AX25] BayCom Parallel Port AX.25 Modem Format: , See header of drivers/net/hamradio/baycom_par.c. - baycom_ser_fdx= [HW,AX25] BayCom Serial Port AX.25 Modem (Full Duplex Mode) + baycom_ser_fdx= [HW,AX25] + BayCom Serial Port AX.25 Modem (Full Duplex Mode) Format: ,,[,] See header of drivers/net/hamradio/baycom_ser_fdx.c. - baycom_ser_hdx= [HW,AX25] BayCom Serial Port AX.25 Modem (Half Duplex Mode) + baycom_ser_hdx= [HW,AX25] + BayCom Serial Port AX.25 Modem (Half Duplex Mode) Format: ,, See header of drivers/net/hamradio/baycom_ser_hdx.c. @@ -292,7 +295,8 @@ running once the system is up. blkmtd_count= bttv.card= [HW,V4L] bttv (bt848 + bt878 based grabber cards) - bttv.radio= Most important insmod options are available as kernel args too. + bttv.radio= Most important insmod options are available as + kernel args too. bttv.pll= See Documentation/video4linux/bttv/Insmod-options bttv.tuner= and Documentation/video4linux/bttv/CARDLIST @@ -318,15 +322,17 @@ running once the system is up. checkreqprot [SELINUX] Set initial checkreqprot flag value. Format: { "0" | "1" } See security/selinux/Kconfig help text. - 0 -- check protection applied by kernel (includes any implied execute protection). + 0 -- check protection applied by kernel (includes + any implied execute protection). 1 -- check protection requested by application. Default value is set via a kernel config option. - Value can be changed at runtime via /selinux/checkreqprot. - - clock= [BUGS=IA-32, HW] gettimeofday timesource override. + Value can be changed at runtime via + /selinux/checkreqprot. + + clock= [BUGS=IA-32,HW] gettimeofday timesource override. Forces specified timesource (if avaliable) to be used - when calculating gettimeofday(). If specicified timesource - is not avalible, it defaults to PIT. + when calculating gettimeofday(). If specicified + timesource is not avalible, it defaults to PIT. Format: { pit | tsc | cyclone | pmtmr } hpet= [IA-32,HPET] option to disable HPET and use PIT. @@ -336,17 +342,19 @@ running once the system is up. Format: { auto | [,][] } com20020= [HW,NET] ARCnet - COM20020 chipset - Format: [,[,[,[,[,]]]]] + Format: + [,[,[,[,[,]]]]] com90io= [HW,NET] ARCnet - COM90xx chipset (IO-mapped buffers) Format: [,] - com90xx= [HW,NET] ARCnet - COM90xx chipset (memory-mapped buffers) + com90xx= [HW,NET] + ARCnet - COM90xx chipset (memory-mapped buffers) Format: [,[,]] condev= [HW,S390] console device conmode= - + console= [KNL] Output console device and options. tty Use the virtual console device . @@ -367,7 +375,8 @@ running once the system is up. options are the same as for ttyS, above. cpcihp_generic= [HW,PCI] Generic port I/O CompactPCI driver - Format: ,,,[,] + Format: + ,,,[,] cpia_pp= [HW,PPT] Format: { parport | auto | none } @@ -384,10 +393,10 @@ running once the system is up. cs89x0_media= [HW,NET] Format: { rj45 | aui | bnc } - + cyclades= [HW,SERIAL] Cyclades multi-serial port adapter. - - dasd= [HW,NET] + + dasd= [HW,NET] See header of drivers/s390/block/dasd_devmap.c. db9.dev[2|3]= [HW,JOY] Multisystem joystick support via parallel port @@ -406,7 +415,7 @@ running once the system is up. dhash_entries= [KNL] Set number of hash buckets for dentry cache. - + digi= [HW,SERIAL] IO parameters + enable/disable command. @@ -424,11 +433,11 @@ running once the system is up. dtc3181e= [HW,SCSI] - earlyprintk= [IA-32, X86-64] + earlyprintk= [IA-32,X86-64] earlyprintk=vga earlyprintk=serial[,ttySn[,baudrate]] - Append ,keep to not disable it when the real console + Append ",keep" to not disable it when the real console takes over. Only vga or serial at a time, not both. @@ -451,7 +460,7 @@ running once the system is up. Format: {"of[f]" | "sk[ipmbr]"} See comment in arch/i386/boot/edd.S - eicon= [HW,ISDN] + eicon= [HW,ISDN] Format: ,, eisa_irq_edge= [PARISC,HW] @@ -462,12 +471,13 @@ running once the system is up. arch/i386/kernel/cpu/cpufreq/elanfreq.c. elevator= [IOSCHED] - Format: {"as"|"cfq"|"deadline"|"noop"} - See Documentation/block/as-iosched.txt - and Documentation/block/deadline-iosched.txt for details. + Format: {"as" | "cfq" | "deadline" | "noop"} + See Documentation/block/as-iosched.txt and + Documentation/block/deadline-iosched.txt for details. + elfcorehdr= [IA-32] - Specifies physical address of start of kernel core image - elf header. + Specifies physical address of start of kernel core + image elf header. See Documentation/kdump.txt for details. enforcing [SELINUX] Set initial enforcing status. @@ -485,7 +495,7 @@ running once the system is up. es1371= [HW,OSS] Format: ,[,[]] See also header of sound/oss/es1371.c. - + ether= [HW,NET] Ethernet cards parameters This option is obsoleted by the "netdev=" option, which has equivalent usage. See its documentation for details. @@ -526,12 +536,13 @@ running once the system is up. gus= [HW,OSS] Format: ,,, - + gvp11= [HW,SCSI] hashdist= [KNL,NUMA] Large hashes allocated during boot are distributed across NUMA nodes. Defaults on for IA-64, off otherwise. + Format: 0 | 1 (for off | on) hcl= [IA-64] SGI's Hardware Graph compatibility layer @@ -595,13 +606,13 @@ running once the system is up. ide?= [HW] (E)IDE subsystem Format: ide?=noprobe or chipset specific parameters. See Documentation/ide.txt. - + idebus= [HW] (E)IDE subsystem - VLB/PCI bus speed See Documentation/ide.txt. idle= [HW] Format: idle=poll or idle=halt - + ihash_entries= [KNL] Set number of hash buckets for inode cache. @@ -649,7 +660,7 @@ running once the system is up. firmware running. isapnp= [ISAPNP] - Format: , , , + Format: ,,, isolcpus= [KNL,SMP] Isolate CPUs from the general scheduler. Format: ,..., @@ -661,32 +672,33 @@ running once the system is up. "number of CPUs in system - 1". This option is the preferred way to isolate CPUs. The - alternative - manually setting the CPU mask of all tasks - in the system can cause problems and suboptimal load - balancer performance. + alternative -- manually setting the CPU mask of all + tasks in the system -- can cause problems and + suboptimal load balancer performance. isp16= [HW,CD] Format: ,,, - iucv= [HW,NET] + iucv= [HW,NET] js= [HW,JOY] Analog joystick See Documentation/input/joystick.txt. keepinitrd [HW,ARM] - kstack=N [IA-32, X86-64] Print N words from the kernel stack + kstack=N [IA-32,X86-64] Print N words from the kernel stack in oops dumps. l2cr= [PPC] - lapic [IA-32,APIC] Enable the local APIC even if BIOS disabled it. + lapic [IA-32,APIC] Enable the local APIC even if BIOS + disabled it. lasi= [HW,SCSI] PARISC LASI driver for the 53c700 chip Format: addr:,irq: - llsc*= [IA64] - See function print_params() in arch/ia64/sn/kernel/llsc4.c. + llsc*= [IA64] See function print_params() in + arch/ia64/sn/kernel/llsc4.c. load_ramdisk= [RAM] List of ramdisks to load from floppy See Documentation/ramdisk.txt. @@ -713,8 +725,9 @@ running once the system is up. 7 (KERN_DEBUG) debug-level messages log_buf_len=n Sets the size of the printk ring buffer, in bytes. - Format is n, nk, nM. n must be a power of two. The - default is set in kernel config. + Format: { n | nk | nM } + n must be a power of two. The default size + is set in the kernel config file. lp=0 [LP] Specify parallel ports to use, e.g, lp=port[,port...] lp=none,parport0 (lp0 not configured, lp1 uses @@ -750,23 +763,23 @@ running once the system is up. ltpc= [NET] Format: ,, - mac5380= [HW,SCSI] - Format: ,,,, + mac5380= [HW,SCSI] Format: + ,,,, - mac53c9x= [HW,SCSI] - Format: ,,,,,,, + mac53c9x= [HW,SCSI] Format: + ,,,,,,, - machvec= [IA64] - Force the use of a particular machine-vector (machvec) in a generic - kernel. Example: machvec=hpzx1_swiotlb + machvec= [IA64] Force the use of a particular machine-vector + (machvec) in a generic kernel. + Example: machvec=hpzx1_swiotlb - mad16= [HW,OSS] - Format: ,,,,,, + mad16= [HW,OSS] Format: + ,,,,,, maui= [HW,OSS] Format: , - - max_loop= [LOOP] Maximum number of loopback devices that can + + max_loop= [LOOP] Maximum number of loopback devices that can be mounted Format: <1-256> @@ -776,11 +789,11 @@ running once the system is up. max_addr=[KMG] [KNL,BOOT,ia64] All physical memory greater than or equal to this physical address is ignored. - max_luns= [SCSI] Maximum number of LUNs to probe + max_luns= [SCSI] Maximum number of LUNs to probe. Should be between 1 and 2^32-1. max_report_luns= - [SCSI] Maximum number of LUNs received + [SCSI] Maximum number of LUNs received. Should be between 1 and 16384. mca-pentium [BUGS=IA-32] @@ -796,11 +809,11 @@ running once the system is up. md= [HW] RAID subsystems devices and level See Documentation/md.txt. - + mdacon= [MDA] Format: , Specifies range of consoles to be captured by the MDA. - + mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory Amount of memory to be used when the kernel is not able to see the whole system memory or for test. @@ -851,15 +864,15 @@ running once the system is up. MTD_Partition= [MTD] Format: ,,, - MTD_Region= [MTD] - Format: ,[,,,,] + MTD_Region= [MTD] Format: + ,[,,,,] mtdparts= [MTD] See drivers/mtd/cmdline.c. mtouchusb.raw_coordinates= - [HW] Make the MicroTouch USB driver use raw coordinates ('y', default) - or cooked coordinates ('n') + [HW] Make the MicroTouch USB driver use raw coordinates + ('y', default) or cooked coordinates ('n') n2= [NET] SDL Inc. RISCom/N2 synchronous serial card @@ -880,7 +893,9 @@ running once the system is up. Format: ,,,, Note that mem_start is often overloaded to mean something different and driver-specific. - + This usage is only documented in each driver source + file if at all. + nfsaddrs= [NFS] See Documentation/nfsroot.txt. @@ -893,8 +908,8 @@ running once the system is up. emulation library even if a 387 maths coprocessor is present. - noalign [KNL,ARM] - + noalign [KNL,ARM] + noapic [SMP,APIC] Tells the kernel to not make use of any IOAPICs that may be present in the system. @@ -905,19 +920,19 @@ running once the system is up. on "Classic" PPC cores. nocache [ARM] - + nodisconnect [HW,SCSI,M68K] Disables SCSI disconnects. noexec [IA-64] - noexec [IA-32, X86-64] + noexec [IA-32,X86-64] noexec=on: enable non-executable mappings (default) noexec=off: disable nn-executable mappings nofxsr [BUGS=IA-32] nohlt [BUGS=ARM] - + no-hlt [BUGS=IA-32] Tells the kernel that the hlt instruction doesn't work correctly and not to use it. @@ -948,8 +963,9 @@ running once the system is up. noresidual [PPC] Don't use residual data on PReP machines. - noresume [SWSUSP] Disables resume and restore original swap space. - + noresume [SWSUSP] Disables resume and restores original swap + space. + no-scroll [VGA] Disables scrollback. This is required for the Braillex ib80-piezo Braille reader made by F.H. Papenmeier (Germany). @@ -965,16 +981,16 @@ running once the system is up. nousb [USB] Disable the USB subsystem nowb [ARM] - + opl3= [HW,OSS] Format: opl3sa= [HW,OSS] Format: ,,,,, - opl3sa2= [HW,OSS] - Format: ,,,,,,,[,,,,,,,,,[,, parkbd.mode= [HW] Parallel port keyboard adapter mode of operation, 0 for XT, 1 for AT (default is AT). - Format: + Format: - parport=0 [HW,PPT] Specify parallel ports. 0 disables. - parport=auto Use 'auto' to force the driver to use - parport=0xBBB[,IRQ[,DMA]] any IRQ/DMA settings detected (the - default is to ignore detected IRQ/DMA - settings because of possible - conflicts). You can specify the base - address, IRQ, and DMA settings; IRQ and - DMA should be numbers, or 'auto' (for - using detected settings on that - particular port), or 'nofifo' (to avoid - using a FIFO even if it is detected). - Parallel ports are assigned in the - order they are specified on the command - line, starting with parport0. + parport= [HW,PPT] Specify parallel ports. 0 disables. + Format: { 0 | auto | 0xBBB[,IRQ[,DMA]] } + Use 'auto' to force the driver to use any + IRQ/DMA settings detected (the default is to + ignore detected IRQ/DMA settings because of + possible conflicts). You can specify the base + address, IRQ, and DMA settings; IRQ and DMA + should be numbers, or 'auto' (for using detected + settings on that particular port), or 'nofifo' + (to avoid using a FIFO even if it is detected). + Parallel ports are assigned in the order they + are specified on the command line, starting + with parport0. - parport_init_mode= - [HW,PPT] Configure VIA parallel port to - operate in specific mode. This is - necessary on Pegasos computer where - firmware has no options for setting up - parallel port mode and sets it to - spp. Currently this function knows - 686a and 8231 chips. + parport_init_mode= [HW,PPT] + Configure VIA parallel port to operate in + a specific mode. This is necessary on Pegasos + computer where firmware has no options for setting + up parallel port mode and sets it to spp. + Currently this function knows 686a and 8231 chips. Format: [spp|ps2|epp|ecp|ecpepp] - pas2= [HW,OSS] - Format: ,,,,,,, - + pas2= [HW,OSS] Format: + ,,,,,,, + pas16= [HW,SCSI] See header of drivers/scsi/pas16.c. @@ -1032,64 +1045,67 @@ running once the system is up. See header of drivers/block/paride/pcd.c. See also Documentation/paride.txt. - pci=option[,option...] [PCI] various PCI subsystem options: - off [IA-32] don't probe for the PCI bus - bios [IA-32] force use of PCI BIOS, don't access - the hardware directly. Use this if your machine - has a non-standard PCI host bridge. - nobios [IA-32] disallow use of PCI BIOS, only direct - hardware access methods are allowed. Use this - if you experience crashes upon bootup and you - suspect they are caused by the BIOS. - conf1 [IA-32] Force use of PCI Configuration Mechanism 1. - conf2 [IA-32] Force use of PCI Configuration Mechanism 2. - nosort [IA-32] Don't sort PCI devices according to - order given by the PCI BIOS. This sorting is done - to get a device order compatible with older kernels. - biosirq [IA-32] Use PCI BIOS calls to get the interrupt - routing table. These calls are known to be buggy - on several machines and they hang the machine when used, - but on other computers it's the only way to get the - interrupt routing table. Try this option if the kernel - is unable to allocate IRQs or discover secondary PCI - buses on your motherboard. - rom [IA-32] Assign address space to expansion ROMs. - Use with caution as certain devices share address - decoders between ROMs and other resources. - irqmask=0xMMMM [IA-32] Set a bit mask of IRQs allowed to be assigned - automatically to PCI devices. You can make the kernel - exclude IRQs of your ISA cards this way. + pci=option[,option...] [PCI] various PCI subsystem options: + off [IA-32] don't probe for the PCI bus + bios [IA-32] force use of PCI BIOS, don't access + the hardware directly. Use this if your machine + has a non-standard PCI host bridge. + nobios [IA-32] disallow use of PCI BIOS, only direct + hardware access methods are allowed. Use this + if you experience crashes upon bootup and you + suspect they are caused by the BIOS. + conf1 [IA-32] Force use of PCI Configuration + Mechanism 1. + conf2 [IA-32] Force use of PCI Configuration + Mechanism 2. + nosort [IA-32] Don't sort PCI devices according to + order given by the PCI BIOS. This sorting is + done to get a device order compatible with + older kernels. + biosirq [IA-32] Use PCI BIOS calls to get the interrupt + routing table. These calls are known to be buggy + on several machines and they hang the machine + when used, but on other computers it's the only + way to get the interrupt routing table. Try + this option if the kernel is unable to allocate + IRQs or discover secondary PCI buses on your + motherboard. + rom [IA-32] Assign address space to expansion ROMs. + Use with caution as certain devices share + address decoders between ROMs and other + resources. + irqmask=0xMMMM [IA-32] Set a bit mask of IRQs allowed to be + assigned automatically to PCI devices. You can + make the kernel exclude IRQs of your ISA cards + this way. pirqaddr=0xAAAAA [IA-32] Specify the physical address - of the PIRQ table (normally generated - by the BIOS) if it is outside the - F0000h-100000h range. - lastbus=N [IA-32] Scan all buses till bus #N. Can be useful - if the kernel is unable to find your secondary buses - and you want to tell it explicitly which ones they are. - assign-busses [IA-32] Always assign all PCI bus - numbers ourselves, overriding - whatever the firmware may have - done. - usepirqmask [IA-32] Honor the possible IRQ mask - stored in the BIOS $PIR table. This is - needed on some systems with broken - BIOSes, notably some HP Pavilion N5400 - and Omnibook XE3 notebooks. This will - have no effect if ACPI IRQ routing is - enabled. - noacpi [IA-32] Do not use ACPI for IRQ routing - or for PCI scanning. - routeirq Do IRQ routing for all PCI devices. - This is normally done in pci_enable_device(), - so this option is a temporary workaround - for broken drivers that don't call it. - - firmware [ARM] Do not re-enumerate the bus but - instead just use the configuration - from the bootloader. This is currently - used on IXP2000 systems where the - bus has to be configured a certain way - for adjunct CPUs. + of the PIRQ table (normally generated + by the BIOS) if it is outside the + F0000h-100000h range. + lastbus=N [IA-32] Scan all buses thru bus #N. Can be + useful if the kernel is unable to find your + secondary buses and you want to tell it + explicitly which ones they are. + assign-busses [IA-32] Always assign all PCI bus + numbers ourselves, overriding + whatever the firmware may have done. + usepirqmask [IA-32] Honor the possible IRQ mask stored + in the BIOS $PIR table. This is needed on + some systems with broken BIOSes, notably + some HP Pavilion N5400 and Omnibook XE3 + notebooks. This will have no effect if ACPI + IRQ routing is enabled. + noacpi [IA-32] Do not use ACPI for IRQ routing + or for PCI scanning. + routeirq Do IRQ routing for all PCI devices. + This is normally done in pci_enable_device(), + so this option is a temporary workaround + for broken drivers that don't call it. + firmware [ARM] Do not re-enumerate the bus but instead + just use the configuration from the + bootloader. This is currently used on + IXP2000 systems where the bus has to be + configured a certain way for adjunct CPUs. pcmv= [HW,PCMCIA] BadgePAD 4 @@ -1127,19 +1143,20 @@ running once the system is up. [ISAPNP] Exclude DMAs for the autoconfiguration pnp_reserve_io= [ISAPNP] Exclude I/O ports for the autoconfiguration - Ranges are in pairs (I/O port base and size). + Ranges are in pairs (I/O port base and size). pnp_reserve_mem= - [ISAPNP] Exclude memory regions for the autoconfiguration + [ISAPNP] Exclude memory regions for the + autoconfiguration. Ranges are in pairs (memory base and size). profile= [KNL] Enable kernel profiling via /proc/profile - { schedule | } - (param: schedule - profile schedule points} - (param: profile step/bucket size as a power of 2 for - statistical time based profiling) + Format: [schedule,] + Param: "schedule" - profile schedule points. + Param: - step/bucket size as a power of 2 for + statistical time based profiling. - processor.max_cstate= [HW, ACPI] + processor.max_cstate= [HW,ACPI] Limit processor to maximum C-state max_cstate=9 overrides any DMI blacklist limit. @@ -1147,27 +1164,28 @@ running once the system is up. before loading. See Documentation/ramdisk.txt. - psmouse.proto= [HW,MOUSE] Highest PS2 mouse protocol extension to - probe for (bare|imps|exps|lifebook|any). + psmouse.proto= [HW,MOUSE] Highest PS2 mouse protocol extension to + probe for; one of (bare|imps|exps|lifebook|any). psmouse.rate= [HW,MOUSE] Set desired mouse report rate, in reports per second. - psmouse.resetafter= - [HW,MOUSE] Try to reset the device after so many bad packets + psmouse.resetafter= [HW,MOUSE] + Try to reset the device after so many bad packets (0 = never). psmouse.resolution= [HW,MOUSE] Set desired mouse resolution, in dpi. psmouse.smartscroll= - [HW,MOUSE] Controls Logitech smartscroll autorepeat, + [HW,MOUSE] Controls Logitech smartscroll autorepeat. 0 = disabled, 1 = enabled (default). pss= [HW,OSS] Personal Sound System (ECHO ESC614) - Format: ,,,,, + Format: + ,,,,, pt. [PARIDE] See Documentation/paride.txt. quiet= [KNL] Disable log messages - + r128= [HW,DRM] raid= [HW,RAID] @@ -1176,10 +1194,9 @@ running once the system is up. ramdisk= [RAM] Sizes of RAM disks in kilobytes [deprecated] See Documentation/ramdisk.txt. - ramdisk_blocksize= - [RAM] + ramdisk_blocksize= [RAM] See Documentation/ramdisk.txt. - + ramdisk_size= [RAM] Sizes of RAM disks in kilobytes New name for the ramdisk parameter. See Documentation/ramdisk.txt. @@ -1195,7 +1212,8 @@ running once the system is up. reserve= [KNL,BUGS] Force the kernel to ignore some iomem area - resume= [SWSUSP] Specify the partition device for software suspension + resume= [SWSUSP] + Specify the partition device for software suspend rhash_entries= [KNL,NET] Set number of hash buckets for route cache @@ -1225,7 +1243,7 @@ running once the system is up. Format: ,,, sbni= [NET] Granch SBNI12 leased line adapter - + sbpcd= [HW,CD] Soundblaster CD adapter Format: , See a comment before function sbpcd_setup() in @@ -1258,21 +1276,20 @@ running once the system is up. serialnumber [BUGS=IA-32] - sg_def_reserved_size= - [SCSI] - + sg_def_reserved_size= [SCSI] + sgalaxy= [HW,OSS] Format: ,,,, shapers= [NET] Maximal number of shapers. - + sim710= [SCSI,HW] See header of drivers/scsi/sim710.c. simeth= [IA-64] simscsi= - + sjcd= [HW,CD] Format: ,, See header of drivers/cdrom/sjcd.c. @@ -1403,10 +1420,10 @@ running once the system is up. snd-wavefront= [HW,ALSA] snd-ymfpci= [HW,ALSA] - + sonicvibes= [HW,OSS] Format: - + sonycd535= [HW,CD] Format: [,] @@ -1423,7 +1440,7 @@ running once the system is up. sscape= [HW,OSS] Format: ,,,, - + st= [HW,SCSI] SCSI tape parameters (buffers, etc.) See Documentation/scsi/st.txt. @@ -1446,7 +1463,7 @@ running once the system is up. stram_swap= [HW,M68k] swiotlb= [IA-64] Number of I/O TLB slabs - + switches= [HW,M68k] sym53c416= [HW,SCSI] @@ -1479,14 +1496,16 @@ running once the system is up. tp720= [HW,PS2] trix= [HW,OSS] MediaTrix AudioTrix Pro - Format: ,,,,,,,, - + Format: + ,,,,,,,, + tsdev.xres= [TS] Horizontal screen resolution. tsdev.yres= [TS] Vertical screen resolution. - turbografx.map[2|3]= - [HW,JOY] TurboGraFX parallel port interface - Format: ,,,,,,, + turbografx.map[2|3]= [HW,JOY] + TurboGraFX parallel port interface + Format: + ,,,,,,, See also Documentation/input/joystick-parport.txt u14-34f= [HW,SCSI] UltraStor 14F/34F SCSI host adapter @@ -1502,17 +1521,18 @@ running once the system is up. usbhid.mousepoll= [USBHID] The interval which mice are to be polled at. - + video= [FB] Frame buffer configuration See Documentation/fb/modedb.txt. vga= [BOOT,IA-32] Select a particular video mode - See Documentation/i386/boot.txt and Documentation/svga.txt. + See Documentation/i386/boot.txt and + Documentation/svga.txt. Use vga=ask for menu. This is actually a boot loader parameter; the value is passed to the kernel using a special protocol. - vmalloc=nn[KMG] [KNL,BOOT] forces the vmalloc area to have an exact + vmalloc=nn[KMG] [KNL,BOOT] Forces the vmalloc area to have an exact size of . This can be used to increase the minimum size (128MB on x86). It can also be used to decrease the size and leave more room for directly @@ -1520,11 +1540,11 @@ running once the system is up. vmhalt= [KNL,S390] - vmpoff= [KNL,S390] - + vmpoff= [KNL,S390] + waveartist= [HW,OSS] Format: ,,, - + wd33c93= [HW,SCSI] See header of drivers/scsi/wd33c93.c. @@ -1538,21 +1558,25 @@ running once the system is up. xd_geo= See header of drivers/block/xd.c. xirc2ps_cs= [NET,PCMCIA] - Format: ,,,,,[,[,[,]]] - + Format: + ,,,,,[,[,[,]]] +______________________________________________________________________ Changelog: +2000-06-?? Mr. Unknown The last known update (for 2.4.0) - the changelog was not kept before. - 2000-06-?? Mr. Unknown +2002-11-24 Petr Baudis + Randy Dunlap Update for 2.5.49, description for most of the options introduced, references to other documentation (C files, READMEs, ..), added S390, PPC, SPARC, MTD, ALSA and OSS category. Minor corrections and reformatting. - 2002-11-24 Petr Baudis - Randy Dunlap + +2005-10-19 Randy Dunlap + Lots of typos, whitespace, some reformatting. TODO: From 8766ce41018a0cb80fbe0ce7dbf747f357c752da Mon Sep 17 00:00:00 2001 From: Kostik Belousov Date: Sun, 23 Oct 2005 12:57:13 -0700 Subject: [PATCH 36/54] [PATCH] aio syscalls are not checked by lsm Another case of missing call to security_file_permission: aio functions (namely, io_submit) does not check credentials with security modules. Below is the simple patch to the problem. It seems that it is enough to check for rights at the request submission time. Signed-off-by: Kostik Belousov Signed-off-by: Chris Wright Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/aio.c b/fs/aio.c index 9fe7216457d8..edfca5b75535 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1397,6 +1397,9 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb) if (unlikely(!access_ok(VERIFY_WRITE, kiocb->ki_buf, kiocb->ki_left))) break; + ret = security_file_permission(file, MAY_READ); + if (unlikely(ret)) + break; ret = -EINVAL; if (file->f_op->aio_read) kiocb->ki_retry = aio_pread; @@ -1409,6 +1412,9 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb) if (unlikely(!access_ok(VERIFY_READ, kiocb->ki_buf, kiocb->ki_left))) break; + ret = security_file_permission(file, MAY_WRITE); + if (unlikely(ret)) + break; ret = -EINVAL; if (file->f_op->aio_write) kiocb->ki_retry = aio_pwrite; From 282c1f5eba150d0b156ffa9e6b064f1d92f8315f Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Sun, 23 Oct 2005 12:57:15 -0700 Subject: [PATCH 37/54] [PATCH] selinux: Fix NULL deref in policydb_destroy This patch fixes a possible NULL dereference in policydb_destroy, where p->type_attr_map can be NULL if policydb_destroy is called to clean up a partially loaded policy upon an error during policy load. Please apply. Signed-off-by: Stephen Smalley Acked-by: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- security/selinux/ss/policydb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 0a758323a9cf..8e6262d12aa9 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -650,8 +650,10 @@ void policydb_destroy(struct policydb *p) } if (lrt) kfree(lrt); - for (i = 0; i < p->p_types.nprim; i++) - ebitmap_destroy(&p->type_attr_map[i]); + if (p->type_attr_map) { + for (i = 0; i < p->p_types.nprim; i++) + ebitmap_destroy(&p->type_attr_map[i]); + } kfree(p->type_attr_map); return; From 20c19e4179884d7e843314071e2dfb1ea7b0afcd Mon Sep 17 00:00:00 2001 From: Davi Arnaut Date: Sun, 23 Oct 2005 12:57:16 -0700 Subject: [PATCH 38/54] [PATCH] SELinux: handle sel_make_bools() failure in selinuxfs This patch fixes error handling in sel_make_bools(), where currently we'd get a memory leak via security_get_bools() and try to kfree() the wrong pointer if called again. Signed-off-by: James Morris Acked-by: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- security/selinux/selinuxfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 8eb140dd2e4b..a45cc971e735 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -879,7 +879,7 @@ static ssize_t sel_commit_bools_write(struct file *filep, if (sscanf(page, "%d", &new_value) != 1) goto out; - if (new_value) { + if (new_value && bool_pending_values) { security_set_bools(bool_num, bool_pending_values); } @@ -952,6 +952,7 @@ static int sel_make_bools(void) /* remove any existing files */ kfree(bool_pending_values); + bool_pending_values = NULL; sel_remove_bools(dir); @@ -1002,6 +1003,7 @@ out: } return ret; err: + kfree(values); d_genocide(dir); ret = -ENOMEM; goto out; From c0fef676bbd75e711711ed3ff5bebb7bfd1bdb00 Mon Sep 17 00:00:00 2001 From: Mike Krufky Date: Sun, 23 Oct 2005 12:57:17 -0700 Subject: [PATCH 39/54] [PATCH] Kconfig: saa7134-dvb should not select cx22702 On 2005-05-01, Gerd Knorr sent in a patch to add cx22702 to cx88-dvb: [PATCH] dvb: cx22702 frontend driver update http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=9990d744bea7d28e83c420e2c9d524c7a8a2d136 ...but as we can see, the Kconfig portion of his patch was incorrectly applied to saa7134-dvb instead of cx88-dvb. On 2005-06-24, Adrian bunk fixed cx88-dvb: [PATCH] VIDEO_CX88_DVB must select DVB_CX22702 http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=d6988588e13616587aa879c2e0bd7cd811705e5d ...but we never removed the original patch from Gerd. This patch sets things straight: saa7134-dvb should not select cx22702 Signed-off-by: Michael Krufky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/media/video/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig index 93570355819a..bbb989df4cf0 100644 --- a/drivers/media/video/Kconfig +++ b/drivers/media/video/Kconfig @@ -262,7 +262,6 @@ config VIDEO_SAA7134_DVB depends on VIDEO_SAA7134 && DVB_CORE select VIDEO_BUF_DVB select DVB_MT352 - select DVB_CX22702 select DVB_TDA1004X ---help--- This adds support for DVB cards based on the From 8d3b35914aa54232b27e6a2b57d84092aadc5e86 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 23 Oct 2005 12:57:18 -0700 Subject: [PATCH 40/54] [PATCH] inotify/idr leak fix Fix a bug which was reported and diagnosed by Stefan Jones IDR trees include a cache of idr_layer objects. There's no way to destroy this cache, so when we discard an overall idr tree we end up leaking some memory. Add and use idr_destroy() for this. v9fs and infiniband also need to use idr_destroy() to avoid leaks. Or, we make the cache global, like radix_tree_preload(). Which is probably better. Later. Cc: Eric Van Hensbergen Cc: Roland Dreier Cc: Robert Love Cc: John McCutchan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inotify.c | 1 + include/linux/idr.h | 1 + lib/idr.c | 13 +++++++++++++ 3 files changed, 15 insertions(+) diff --git a/fs/inotify.c b/fs/inotify.c index a37e9fb1da58..9fbaebfdf40b 100644 --- a/fs/inotify.c +++ b/fs/inotify.c @@ -176,6 +176,7 @@ static inline void put_inotify_dev(struct inotify_device *dev) if (atomic_dec_and_test(&dev->count)) { atomic_dec(&dev->user->inotify_devs); free_uid(dev->user); + idr_destroy(&dev->idr); kfree(dev); } } diff --git a/include/linux/idr.h b/include/linux/idr.h index ca3b7e462576..3d5de45f961b 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -75,4 +75,5 @@ int idr_pre_get(struct idr *idp, unsigned gfp_mask); int idr_get_new(struct idr *idp, void *ptr, int *id); int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); void idr_remove(struct idr *idp, int id); +void idr_destroy(struct idr *idp); void idr_init(struct idr *idp); diff --git a/lib/idr.c b/lib/idr.c index 6415d053e2bf..d4df21debc4d 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -345,6 +345,19 @@ void idr_remove(struct idr *idp, int id) } EXPORT_SYMBOL(idr_remove); +/** + * idr_destroy - release all cached layers within an idr tree + * idp: idr handle + */ +void idr_destroy(struct idr *idp) +{ + while (idp->id_free_cnt) { + struct idr_layer *p = alloc_layer(idp); + kmem_cache_free(idr_layer_cache, p); + } +} +EXPORT_SYMBOL(idr_destroy); + /** * idr_find - return pointer for given id * @idp: idr handle From 75eeec2f3fd9e8a16777219ebf1bf8395845faa7 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sun, 23 Oct 2005 12:57:19 -0700 Subject: [PATCH 41/54] [PATCH] ib: mthca: Always re-arm EQs in mthca_tavor_interrupt() We should always re-arm an event queue's interrupt in mthca_tavor_interrupt() if the corresponding bit is set in the event cause register (ECR), even if we didn't find any entries in the EQ. If we don't, then there's a window where we miss an EQ entry and then get stuck because we don't get another EQ event. Signed-off-by: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/hw/mthca/mthca_eq.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index c81fa8e975ef..8dfafda5ed24 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -396,20 +396,21 @@ static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr, struct pt_regs writel(dev->eq_table.clr_mask, dev->eq_table.clr_int); ecr = readl(dev->eq_regs.tavor.ecr_base + 4); - if (ecr) { - writel(ecr, dev->eq_regs.tavor.ecr_base + - MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4); + if (!ecr) + return IRQ_NONE; - for (i = 0; i < MTHCA_NUM_EQ; ++i) - if (ecr & dev->eq_table.eq[i].eqn_mask && - mthca_eq_int(dev, &dev->eq_table.eq[i])) { + writel(ecr, dev->eq_regs.tavor.ecr_base + + MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4); + + for (i = 0; i < MTHCA_NUM_EQ; ++i) + if (ecr & dev->eq_table.eq[i].eqn_mask) { + if (mthca_eq_int(dev, &dev->eq_table.eq[i])) tavor_set_eq_ci(dev, &dev->eq_table.eq[i], dev->eq_table.eq[i].cons_index); - tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn); - } - } + tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn); + } - return IRQ_RETVAL(ecr); + return IRQ_HANDLED; } static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr, From 108150ea78003044e41150c75259447b2c0953b6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 23 Oct 2005 20:25:39 +0400 Subject: [PATCH 42/54] [PATCH] posix-timers: fix cleanup_timers() and run_posix_cpu_timers() races 1. cleanup_timers() sets timer->task = NULL under tasklist + ->sighand locks. That means that this code in posix_cpu_timer_del() and posix_cpu_timer_set() lock_timer(timer); if (timer->task == NULL) return; read_lock(tasklist); put_task_struct(timer->task) is racy. With this patch timer->task modified and accounted only under timer->it_lock. Sadly, this means that dead task_struct won't be freed until timer deleted or armed. 2. run_posix_cpu_timers() collects expired timers into local list under tasklist + ->sighand again. That means that posix_cpu_timer_del() should check timer->it.cpu.firing under these locks too. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/posix-cpu-timers.c | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index d30b304a3384..30ab39a27736 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -380,14 +380,9 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) int posix_cpu_timer_del(struct k_itimer *timer) { struct task_struct *p = timer->it.cpu.task; + int ret = 0; - if (timer->it.cpu.firing) - return TIMER_RETRY; - - if (unlikely(p == NULL)) - return 0; - - if (!list_empty(&timer->it.cpu.entry)) { + if (likely(p != NULL)) { read_lock(&tasklist_lock); if (unlikely(p->signal == NULL)) { /* @@ -396,18 +391,20 @@ int posix_cpu_timer_del(struct k_itimer *timer) */ BUG_ON(!list_empty(&timer->it.cpu.entry)); } else { - /* - * Take us off the task's timer list. - */ spin_lock(&p->sighand->siglock); - list_del(&timer->it.cpu.entry); + if (timer->it.cpu.firing) + ret = TIMER_RETRY; + else + list_del(&timer->it.cpu.entry); spin_unlock(&p->sighand->siglock); } read_unlock(&tasklist_lock); - } - put_task_struct(p); - return 0; + if (!ret) + put_task_struct(p); + } + + return ret; } /* @@ -424,8 +421,6 @@ static void cleanup_timers(struct list_head *head, cputime_t ptime = cputime_add(utime, stime); list_for_each_entry_safe(timer, next, head, entry) { - put_task_struct(timer->task); - timer->task = NULL; list_del_init(&timer->entry); if (cputime_lt(timer->expires.cpu, ptime)) { timer->expires.cpu = cputime_zero; @@ -437,8 +432,6 @@ static void cleanup_timers(struct list_head *head, ++head; list_for_each_entry_safe(timer, next, head, entry) { - put_task_struct(timer->task); - timer->task = NULL; list_del_init(&timer->entry); if (cputime_lt(timer->expires.cpu, utime)) { timer->expires.cpu = cputime_zero; @@ -450,8 +443,6 @@ static void cleanup_timers(struct list_head *head, ++head; list_for_each_entry_safe(timer, next, head, entry) { - put_task_struct(timer->task); - timer->task = NULL; list_del_init(&timer->entry); if (timer->expires.sched < sched_time) { timer->expires.sched = 0; From 3de463c7d9d58f8cf3395268230cb20a4c15bffa Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 24 Oct 2005 14:34:03 +0400 Subject: [PATCH 43/54] [PATCH] posix-timers: remove false BUG_ON() from run_posix_cpu_timers() do_exit() clears ->it_##clock##_expires, but nothing prevents another cpu to attach the timer to exiting process after that. After exit_notify() does 'write_unlock_irq(&tasklist_lock)' and before do_exit() calls 'schedule() local timer interrupt can find tsk->exit_state != 0. If that state was EXIT_DEAD (or another cpu does sys_wait4) interrupted task has ->signal == NULL. At this moment exiting task has no pending cpu timers, they were cleaned up in __exit_signal()->posix_cpu_timers_exit{,_group}(), so we can just return from irq. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/exit.c | 8 -------- kernel/posix-cpu-timers.c | 36 ++++++++++++++++++------------------ 2 files changed, 18 insertions(+), 26 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index 3b25b182d2be..4897977a1f4b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -825,14 +825,6 @@ fastcall NORET_TYPE void do_exit(long code) tsk->flags |= PF_EXITING; - /* - * Make sure we don't try to process any timer firings - * while we are already exiting. - */ - tsk->it_virt_expires = cputime_zero; - tsk->it_prof_expires = cputime_zero; - tsk->it_sched_expires = 0; - if (unlikely(in_atomic())) printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", current->comm, current->pid, diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 30ab39a27736..ccb04683bf18 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1285,30 +1285,30 @@ void run_posix_cpu_timers(struct task_struct *tsk) #undef UNEXPIRED - BUG_ON(tsk->exit_state); - /* * Double-check with locks held. */ read_lock(&tasklist_lock); - spin_lock(&tsk->sighand->siglock); + if (likely(tsk->signal != NULL)) { + spin_lock(&tsk->sighand->siglock); - /* - * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N] - * all the timers that are firing, and put them on the firing list. - */ - check_thread_timers(tsk, &firing); - check_process_timers(tsk, &firing); + /* + * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N] + * all the timers that are firing, and put them on the firing list. + */ + check_thread_timers(tsk, &firing); + check_process_timers(tsk, &firing); - /* - * We must release these locks before taking any timer's lock. - * There is a potential race with timer deletion here, as the - * siglock now protects our private firing list. We have set - * the firing flag in each timer, so that a deletion attempt - * that gets the timer lock before we do will give it up and - * spin until we've taken care of that timer below. - */ - spin_unlock(&tsk->sighand->siglock); + /* + * We must release these locks before taking any timer's lock. + * There is a potential race with timer deletion here, as the + * siglock now protects our private firing list. We have set + * the firing flag in each timer, so that a deletion attempt + * that gets the timer lock before we do will give it up and + * spin until we've taken care of that timer below. + */ + spin_unlock(&tsk->sighand->siglock); + } read_unlock(&tasklist_lock); /* From ca531a0a5e01e5122f67cb6aca8fcbfc70e18e0b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 24 Oct 2005 14:36:28 +0400 Subject: [PATCH 44/54] [PATCH] posix-timers: exit path cleanup No need to rebalance when task exited Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/posix-cpu-timers.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index ccb04683bf18..92a038064628 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -486,6 +486,9 @@ static void process_timer_rebalance(struct task_struct *p, struct task_struct *t = p; unsigned int nthreads = atomic_read(&p->signal->live); + if (!nthreads) + return; + switch (clock_idx) { default: BUG(); @@ -1160,6 +1163,9 @@ static void check_process_timers(struct task_struct *tsk, unsigned long long sched_left, sched; const unsigned int nthreads = atomic_read(&sig->live); + if (!nthreads) + return; + prof_left = cputime_sub(prof_expires, utime); prof_left = cputime_sub(prof_left, stime); prof_left = cputime_div(prof_left, nthreads); From a69ac4a78d8bd9e1ec478bd7297d4f047fcd44a8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 24 Oct 2005 18:29:58 +0400 Subject: [PATCH 45/54] [PATCH] posix-timers: fix posix_cpu_timer_set() vs run_posix_cpu_timers() race This might be harmless, but looks like a race from code inspection (I was unable to trigger it). I must admit, I don't understand why we can't return TIMER_RETRY after 'spin_unlock(&p->sighand->siglock)' without doing bump_cpu_timer(), but this is what original code does. posix_cpu_timer_set: read_lock(&tasklist_lock); spin_lock(&p->sighand->siglock); list_del_init(&timer->it.cpu.entry); spin_unlock(&p->sighand->siglock); We are probaly deleting the timer from run_posix_cpu_timers's 'firing' local list_head while run_posix_cpu_timers() does list_for_each_safe. Various bad things can happen, for example we can just delete this timer so that list_for_each() will not notice it and run_posix_cpu_timers() will not reset '->firing' flag. In that case, .... if (timer->it.cpu.firing) { read_unlock(&tasklist_lock); timer->it.cpu.firing = -1; return TIMER_RETRY; } sys_timer_settime() goes to 'retry:', calls posix_cpu_timer_set() again, it returns TIMER_RETRY ... Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/posix-cpu-timers.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 92a038064628..b15462b17a58 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -730,9 +730,15 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, * Disarm any old timer after extracting its expiry time. */ BUG_ON(!irqs_disabled()); + + ret = 0; spin_lock(&p->sighand->siglock); old_expires = timer->it.cpu.expires; - list_del_init(&timer->it.cpu.entry); + if (unlikely(timer->it.cpu.firing)) { + timer->it.cpu.firing = -1; + ret = TIMER_RETRY; + } else + list_del_init(&timer->it.cpu.entry); spin_unlock(&p->sighand->siglock); /* @@ -780,7 +786,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, } } - if (unlikely(timer->it.cpu.firing)) { + if (unlikely(ret)) { /* * We are colliding with the timer actually firing. * Punt after filling in the timer's old value, and @@ -788,8 +794,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, * it as an overrun (thanks to bump_cpu_timer above). */ read_unlock(&tasklist_lock); - timer->it.cpu.firing = -1; - ret = TIMER_RETRY; goto out; } From 10ded9493ec4a566977ed68b65158eba280e61e5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 24 Oct 2005 17:49:34 +0200 Subject: [PATCH 46/54] [PATCH] uml: fix compile failure for TT mode Without this patch, uml compile fails with: LD .tmp_vmlinux1 arch/um/kernel/built-in.o: In function `config_gdb_cb': arch/um/kernel/tt/gdb.c:129: undefined reference to `TASK_EXTERN_PID' Tested on i386, but fix needed on x86_64 too AFAICS. Signed-off-by: Miklos Szeredi Signed-off-by: Linus Torvalds --- arch/um/include/sysdep-i386/thread.h | 2 +- arch/um/include/sysdep-x86_64/thread.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/um/include/sysdep-i386/thread.h b/arch/um/include/sysdep-i386/thread.h index e2bd6bae8b8a..243fed44d780 100644 --- a/arch/um/include/sysdep-i386/thread.h +++ b/arch/um/include/sysdep-i386/thread.h @@ -4,7 +4,7 @@ #include #define TASK_DEBUGREGS(task) ((unsigned long *) &(((char *) (task))[HOST_TASK_DEBUGREGS])) -#ifdef CONFIG_MODE_TT +#ifdef UML_CONFIG_MODE_TT #define TASK_EXTERN_PID(task) *((int *) &(((char *) (task))[HOST_TASK_EXTERN_PID])) #endif diff --git a/arch/um/include/sysdep-x86_64/thread.h b/arch/um/include/sysdep-x86_64/thread.h index 6a76a7f3683f..cbef3e1697f4 100644 --- a/arch/um/include/sysdep-x86_64/thread.h +++ b/arch/um/include/sysdep-x86_64/thread.h @@ -3,7 +3,7 @@ #include -#ifdef CONFIG_MODE_TT +#ifdef UML_CONFIG_MODE_TT #define TASK_EXTERN_PID(task) *((int *) &(((char *) (task))[HOST_TASK_EXTERN_PID])) #endif From d5c5d8fe32a4b9b14363c6031061e98e26da59a2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 24 Oct 2005 18:16:50 +0200 Subject: [PATCH 47/54] [PATCH] ALSA: Fix Oops of suspend/resume with generic drivers The patch fixes Oops from sound drivers using generic platform device but have no suspend/resume callbacks. Signed-off-by: Takashi Iwai Signed-off-by: Linus Torvalds --- sound/core/init.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/core/init.c b/sound/core/init.c index a5702014a704..c72a79115cca 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -828,7 +828,8 @@ static int snd_generic_suspend(struct device *dev, pm_message_t state, u32 level card = get_snd_generic_card(dev); if (card->power_state == SNDRV_CTL_POWER_D3hot) return 0; - card->pm_suspend(card, PMSG_SUSPEND); + if (card->pm_suspend) + card->pm_suspend(card, PMSG_SUSPEND); snd_power_change_state(card, SNDRV_CTL_POWER_D3hot); return 0; } @@ -843,7 +844,8 @@ static int snd_generic_resume(struct device *dev, u32 level) card = get_snd_generic_card(dev); if (card->power_state == SNDRV_CTL_POWER_D0) return 0; - card->pm_resume(card); + if (card->pm_suspend) + card->pm_resume(card); snd_power_change_state(card, SNDRV_CTL_POWER_D0); return 0; } From c14e2cfc18659c6ca67c2e10417c432eb978d976 Mon Sep 17 00:00:00 2001 From: James Simmons Date: Mon, 24 Oct 2005 21:46:21 +0100 Subject: [PATCH 48/54] [PATCH] Return the line length via sysfs for fbdev This small patch returns the stride/line length of the framebuffer via sysfs. Signed-off-by: James Simmons Signed-off-by: Linus Torvalds --- drivers/video/fbsysfs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/video/fbsysfs.c b/drivers/video/fbsysfs.c index 1147b899f007..007c8e9b2b39 100644 --- a/drivers/video/fbsysfs.c +++ b/drivers/video/fbsysfs.c @@ -242,6 +242,13 @@ static ssize_t show_virtual(struct class_device *class_device, char *buf) fb_info->var.yres_virtual); } +static ssize_t show_stride(struct class_device *class_device, char *buf) +{ + struct fb_info *fb_info = + (struct fb_info *)class_get_devdata(class_device); + return snprintf(buf, PAGE_SIZE, "%d\n", fb_info->fix.line_length); +} + /* Format for cmap is "%02x%c%4x%4x%4x\n" */ /* %02x entry %c transp %4x red %4x blue %4x green \n */ /* 256 rows at 16 chars equals 4096, the normal page size */ @@ -432,6 +439,7 @@ static struct class_device_attribute class_device_attrs[] = { __ATTR(pan, S_IRUGO|S_IWUSR, show_pan, store_pan), __ATTR(virtual_size, S_IRUGO|S_IWUSR, show_virtual, store_virtual), __ATTR(name, S_IRUGO, show_name, NULL), + __ATTR(stride, S_IRUGO, show_stride, NULL), }; int fb_init_class_device(struct fb_info *fb_info) From 0db9ae4a79381a5a3d272ccb51851c48c4bcbb6d Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 24 Oct 2005 23:05:58 -0700 Subject: [PATCH 49/54] [PATCH] alpha: atomic dependency fix My alpha build is exploding because asm/atomic.h now needs smb_mb(), which is over in the (not included) system.h. I fear what will happen if I include system.h into atomic.h, so let's put the barriers into their own header file. Cc: Richard Henderson Cc: Ivan Kokshaysky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-alpha/atomic.h | 2 ++ include/asm-alpha/barrier.h | 34 ++++++++++++++++++++++++++++++++++ include/asm-alpha/system.h | 31 +------------------------------ 3 files changed, 37 insertions(+), 30 deletions(-) create mode 100644 include/asm-alpha/barrier.h diff --git a/include/asm-alpha/atomic.h b/include/asm-alpha/atomic.h index 0b40bad00289..20ac3d95ecd9 100644 --- a/include/asm-alpha/atomic.h +++ b/include/asm-alpha/atomic.h @@ -1,6 +1,8 @@ #ifndef _ALPHA_ATOMIC_H #define _ALPHA_ATOMIC_H +#include + /* * Atomic operations that C can't guarantee us. Useful for * resource counting etc... diff --git a/include/asm-alpha/barrier.h b/include/asm-alpha/barrier.h new file mode 100644 index 000000000000..229c83fe77cb --- /dev/null +++ b/include/asm-alpha/barrier.h @@ -0,0 +1,34 @@ +#ifndef __BARRIER_H +#define __BARRIER_H + +#define mb() \ +__asm__ __volatile__("mb": : :"memory") + +#define rmb() \ +__asm__ __volatile__("mb": : :"memory") + +#define wmb() \ +__asm__ __volatile__("wmb": : :"memory") + +#define read_barrier_depends() \ +__asm__ __volatile__("mb": : :"memory") + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#define smp_read_barrier_depends() read_barrier_depends() +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define smp_read_barrier_depends() barrier() +#endif + +#define set_mb(var, value) \ +do { var = value; mb(); } while (0) + +#define set_wmb(var, value) \ +do { var = value; wmb(); } while (0) + +#endif /* __BARRIER_H */ diff --git a/include/asm-alpha/system.h b/include/asm-alpha/system.h index bdb4d66418f1..050e86d12891 100644 --- a/include/asm-alpha/system.h +++ b/include/asm-alpha/system.h @@ -4,6 +4,7 @@ #include #include #include +#include /* * System defines.. Note that this is included both from .c and .S @@ -139,36 +140,6 @@ extern void halt(void) __attribute__((noreturn)); struct task_struct; extern struct task_struct *alpha_switch_to(unsigned long, struct task_struct*); -#define mb() \ -__asm__ __volatile__("mb": : :"memory") - -#define rmb() \ -__asm__ __volatile__("mb": : :"memory") - -#define wmb() \ -__asm__ __volatile__("wmb": : :"memory") - -#define read_barrier_depends() \ -__asm__ __volatile__("mb": : :"memory") - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() -#define smp_read_barrier_depends() read_barrier_depends() -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() barrier() -#endif - -#define set_mb(var, value) \ -do { var = value; mb(); } while (0) - -#define set_wmb(var, value) \ -do { var = value; wmb(); } while (0) - #define imb() \ __asm__ __volatile__ ("call_pal %0 #imb" : : "i" (PAL_imb) : "memory") From 444d1d9bb5b724f03344c9317bc01d54a9b39073 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 25 Oct 2005 11:00:56 -0700 Subject: [PATCH 50/54] [PATCH] qlogic lockup fix If qla2x00_probe_one()'s call to qla2x00_iospace_config() fails, we call qla2x00_free_device() to clean up. But because ha->dpc_pid hasn't been set yet, qla2x00_free_device() tries to stop a kernel thread which hasn't started yet. It does wait_for_completion() against an uninitialised completion struct and the kernel hangs up. Fix it by initialising ha->dpc_pid a bit earlier. Cc: Andrew Vasquez Cc: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/scsi/qla2xxx/qla_os.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 8982978c42fd..7aec93f9d423 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1325,6 +1325,8 @@ int qla2x00_probe_one(struct pci_dev *pdev, struct qla_board_info *brd_info) ha->brd_info = brd_info; sprintf(ha->host_str, "%s_%ld", ha->brd_info->drv_name, ha->host_no); + ha->dpc_pid = -1; + /* Configure PCI I/O space */ ret = qla2x00_iospace_config(ha); if (ret) @@ -1448,7 +1450,6 @@ int qla2x00_probe_one(struct pci_dev *pdev, struct qla_board_info *brd_info) */ spin_lock_init(&ha->mbx_reg_lock); - ha->dpc_pid = -1; init_completion(&ha->dpc_inited); init_completion(&ha->dpc_exited); From 4ea6a8046bb49d43c950898f0cb4e1994ef6c89d Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Mon, 24 Oct 2005 19:55:23 +0800 Subject: [PATCH 51/54] [IPV6]: Fix refcnt of struct ip6_flowlabel Signed-off-by: Yan Zheng Acked-by: YOSHIFUJI Hideaki Signed-off-by: Arnaldo Carvalho de Melo --- net/ipv6/ip6_flowlabel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index f841bde30c18..bbbe80cdaf72 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -483,7 +483,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) goto done; } fl1 = sfl->fl; - atomic_inc(&fl->users); + atomic_inc(&fl1->users); break; } } From 5ac660ee1334b401450280cd282113b2c18398f5 Mon Sep 17 00:00:00 2001 From: Jochen Friedrich Date: Sun, 23 Oct 2005 10:31:45 +0200 Subject: [PATCH 52/54] [TR]: Preserve RIF flag even for 2 byte RIF fields. Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- net/802/tr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/802/tr.c b/net/802/tr.c index 1eaa3d19d8bf..afd8385c0c9c 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -340,9 +340,10 @@ static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev) unsigned int hash, rii_p = 0; unsigned long flags; struct rif_cache *entry; - + unsigned char saddr0; spin_lock_irqsave(&rif_lock, flags); + saddr0 = trh->saddr[0]; /* * Firstly see if the entry exists @@ -395,7 +396,6 @@ printk("adding rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n", entry->rcf = trh->rcf & htons((unsigned short)~TR_RCF_BROADCAST_MASK); memcpy(&(entry->rseg[0]),&(trh->rseg[0]),8*sizeof(unsigned short)); entry->local_ring = 0; - trh->saddr[0]|=TR_RII; /* put the routing indicator back for tcpdump */ } else { @@ -422,6 +422,7 @@ printk("updating rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n", } entry->last_used=jiffies; } + trh->saddr[0]=saddr0; /* put the routing indicator back for tcpdump */ spin_unlock_irqrestore(&rif_lock, flags); } From 5ed688a7162ff8d28d7cf98b34a1f825e4c2c2ac Mon Sep 17 00:00:00 2001 From: Jochen Friedrich Date: Sun, 23 Oct 2005 10:33:52 +0200 Subject: [PATCH 53/54] [LLC]: Strip RIF flag from source MAC address Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- include/net/llc_pdu.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index f45c37d89cf7..c7a959428b4f 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -254,8 +254,10 @@ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa) { if (skb->protocol == ntohs(ETH_P_802_2)) memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN); - else if (skb->protocol == ntohs(ETH_P_TR_802_2)) + else if (skb->protocol == ntohs(ETH_P_TR_802_2)) { memcpy(sa, tr_hdr(skb)->saddr, ETH_ALEN); + *sa &= 0x7F; + } } /** From 6693e74a16ef563960764bd963f1048392135c3c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 25 Oct 2005 20:40:09 -0700 Subject: [PATCH 54/54] PCI: be more verbose about resource quirks When reserving an PCI quirk, note that in the kernel bootup messages. Also, parse the strange PIIX4 device resources - they should get their own PCI resource quirks, but for now just print out what it finds to verify that the code does the right thing. Signed-off-by: Linus Torvalds --- drivers/pci/quirks.c | 101 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 88 insertions(+), 13 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index a6a630a950d0..7992bc8cc6a4 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -241,7 +241,8 @@ static void __devinit quirk_s3_64M(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_S3, PCI_DEVICE_ID_S3_868, quirk_s3_64M ); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_S3, PCI_DEVICE_ID_S3_968, quirk_s3_64M ); -static void __devinit quirk_io_region(struct pci_dev *dev, unsigned region, unsigned size, int nr) +static void __devinit quirk_io_region(struct pci_dev *dev, unsigned region, + unsigned size, int nr, const char *name) { region &= ~(size-1); if (region) { @@ -259,6 +260,7 @@ static void __devinit quirk_io_region(struct pci_dev *dev, unsigned region, unsi pcibios_bus_to_resource(dev, res, &bus_region); pci_claim_resource(dev, nr); + printk("PCI quirk: region %04x-%04x claimed by %s\n", region, region + size - 1, name); } } @@ -291,25 +293,98 @@ static void __devinit quirk_ali7101_acpi(struct pci_dev *dev) u16 region; pci_read_config_word(dev, 0xE0, ®ion); - quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES); + quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES, "ali7101 ACPI"); pci_read_config_word(dev, 0xE2, ®ion); - quirk_io_region(dev, region, 32, PCI_BRIDGE_RESOURCES+1); + quirk_io_region(dev, region, 32, PCI_BRIDGE_RESOURCES+1, "ali7101 SMB"); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M7101, quirk_ali7101_acpi ); +static void piix4_io_quirk(struct pci_dev *dev, const char *name, unsigned int port, unsigned int enable) +{ + u32 devres; + u32 mask, size, base; + + pci_read_config_dword(dev, port, &devres); + if ((devres & enable) != enable) + return; + mask = (devres >> 16) & 15; + base = devres & 0xffff; + size = 16; + for (;;) { + unsigned bit = size >> 1; + if ((bit & mask) == bit) + break; + size = bit; + } + /* + * For now we only print it out. Eventually we'll want to + * reserve it (at least if it's in the 0x1000+ range), but + * let's get enough confirmation reports first. + */ + base &= -size; + printk("%s PIO at %04x-%04x\n", name, base, base + size - 1); +} + +static void piix4_mem_quirk(struct pci_dev *dev, const char *name, unsigned int port, unsigned int enable) +{ + u32 devres; + u32 mask, size, base; + + pci_read_config_dword(dev, port, &devres); + if ((devres & enable) != enable) + return; + base = devres & 0xffff0000; + mask = (devres & 0x3f) << 16; + size = 128 << 16; + for (;;) { + unsigned bit = size >> 1; + if ((bit & mask) == bit) + break; + size = bit; + } + /* + * For now we only print it out. Eventually we'll want to + * reserve it, but let's get enough confirmation reports first. + */ + base &= -size; + printk("%s MMIO at %04x-%04x\n", name, base, base + size - 1); +} + /* * PIIX4 ACPI: Two IO regions pointed to by longwords at * 0x40 (64 bytes of ACPI registers) * 0x90 (32 bytes of SMB registers) + * and a few strange programmable PIIX4 device resources. */ static void __devinit quirk_piix4_acpi(struct pci_dev *dev) { - u32 region; + u32 region, res_a; pci_read_config_dword(dev, 0x40, ®ion); - quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES); + quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES, "PIIX4 ACPI"); pci_read_config_dword(dev, 0x90, ®ion); - quirk_io_region(dev, region, 32, PCI_BRIDGE_RESOURCES+1); + quirk_io_region(dev, region, 32, PCI_BRIDGE_RESOURCES+1, "PIIX4 SMB"); + + /* Device resource A has enables for some of the other ones */ + pci_read_config_dword(dev, 0x5c, &res_a); + + piix4_io_quirk(dev, "PIIX4 devres B", 0x60, 3 << 21); + piix4_io_quirk(dev, "PIIX4 devres C", 0x64, 3 << 21); + + /* Device resource D is just bitfields for static resources */ + + /* Device 12 enabled? */ + if (res_a & (1 << 29)) { + piix4_io_quirk(dev, "PIIX4 devres E", 0x68, 1 << 20); + piix4_mem_quirk(dev, "PIIX4 devres F", 0x6c, 1 << 7); + } + /* Device 13 enabled? */ + if (res_a & (1 << 30)) { + piix4_io_quirk(dev, "PIIX4 devres G", 0x70, 1 << 20); + piix4_mem_quirk(dev, "PIIX4 devres H", 0x74, 1 << 7); + } + piix4_io_quirk(dev, "PIIX4 devres I", 0x78, 1 << 20); + piix4_io_quirk(dev, "PIIX4 devres J", 0x7c, 1 << 20); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, quirk_piix4_acpi ); @@ -323,10 +398,10 @@ static void __devinit quirk_ich4_lpc_acpi(struct pci_dev *dev) u32 region; pci_read_config_dword(dev, 0x40, ®ion); - quirk_io_region(dev, region, 128, PCI_BRIDGE_RESOURCES); + quirk_io_region(dev, region, 128, PCI_BRIDGE_RESOURCES, "ICH4 ACPI/GPIO/TCO"); pci_read_config_dword(dev, 0x58, ®ion); - quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES+1); + quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES+1, "ICH4 GPIO"); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AA_0, quirk_ich4_lpc_acpi ); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AB_0, quirk_ich4_lpc_acpi ); @@ -352,7 +427,7 @@ static void __devinit quirk_vt82c586_acpi(struct pci_dev *dev) if (rev & 0x10) { pci_read_config_dword(dev, 0x48, ®ion); region &= PCI_BASE_ADDRESS_IO_MASK; - quirk_io_region(dev, region, 256, PCI_BRIDGE_RESOURCES); + quirk_io_region(dev, region, 256, PCI_BRIDGE_RESOURCES, "vt82c586 ACPI"); } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_3, quirk_vt82c586_acpi ); @@ -372,11 +447,11 @@ static void __devinit quirk_vt82c686_acpi(struct pci_dev *dev) pci_read_config_word(dev, 0x70, &hm); hm &= PCI_BASE_ADDRESS_IO_MASK; - quirk_io_region(dev, hm, 128, PCI_BRIDGE_RESOURCES + 1); + quirk_io_region(dev, hm, 128, PCI_BRIDGE_RESOURCES + 1, "vt82c868 HW-mon"); pci_read_config_dword(dev, 0x90, &smb); smb &= PCI_BASE_ADDRESS_IO_MASK; - quirk_io_region(dev, smb, 16, PCI_BRIDGE_RESOURCES + 2); + quirk_io_region(dev, smb, 16, PCI_BRIDGE_RESOURCES + 2, "vt82c868 SMB"); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_4, quirk_vt82c686_acpi ); @@ -391,11 +466,11 @@ static void __devinit quirk_vt8235_acpi(struct pci_dev *dev) pci_read_config_word(dev, 0x88, &pm); pm &= PCI_BASE_ADDRESS_IO_MASK; - quirk_io_region(dev, pm, 128, PCI_BRIDGE_RESOURCES); + quirk_io_region(dev, pm, 128, PCI_BRIDGE_RESOURCES, "vt8235 PM"); pci_read_config_word(dev, 0xd0, &smb); smb &= PCI_BASE_ADDRESS_IO_MASK; - quirk_io_region(dev, smb, 16, PCI_BRIDGE_RESOURCES + 1); + quirk_io_region(dev, smb, 16, PCI_BRIDGE_RESOURCES + 1, "vt8235 SMB"); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, quirk_vt8235_acpi);