From 0e2bedaa394f74fa9f75ee937488c33d90039b5a Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 30 Jan 2009 21:24:41 +0000 Subject: [PATCH 001/263] [CIFS] ipv6_addr_equal for address comparison Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 2209be943051..005df85219a8 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -35,6 +34,7 @@ #include #include #include +#include #include "cifspdu.h" #include "cifsglob.h" #include "cifsproto.h" @@ -1379,8 +1379,8 @@ cifs_find_tcp_session(struct sockaddr_storage *addr) server->addr.sockAddr.sin_addr.s_addr)) continue; else if (addr->ss_family == AF_INET6 && - memcmp(&server->addr.sockAddr6.sin6_addr, - &addr6->sin6_addr, sizeof(addr6->sin6_addr))) + !ipv6_addr_equal(&server->addr.sockAddr6.sin6_addr, + &addr6->sin6_addr)) continue; ++server->srv_count; From c88ccea3143975294f5a52097546bcbb75975f52 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 10 Feb 2009 11:27:46 -0500 Subject: [PATCH 002/263] jbd2: Fix return value of jbd2_journal_start_commit() The function jbd2_journal_start_commit() returns 1 if either a transaction is committing or the function has queued a transaction commit. But it returns 0 if we raced with somebody queueing the transaction commit as well. This resulted in ext4_sync_fs() not functioning correctly (description from Arthur Jones): In the case of a data=ordered umount with pending long symlinks which are delayed due to a long list of other I/O on the backing block device, this causes the buffer associated with the long symlinks to not be moved to the inode dirty list in the second phase of fsync_super. Then, before they can be dirtied again, kjournald exits, seeing the UMOUNT flag and the dirty pages are never written to the backing block device, causing long symlink corruption and exposing new or previously freed block data to userspace. This can be reproduced with a script created by Eric Sandeen : #!/bin/bash umount /mnt/test2 mount /dev/sdb4 /mnt/test2 rm -f /mnt/test2/* dd if=/dev/zero of=/mnt/test2/bigfile bs=1M count=512 touch /mnt/test2/thisisveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryverylongfilename ln -s /mnt/test2/thisisveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryverylongfilename /mnt/test2/link umount /mnt/test2 mount /dev/sdb4 /mnt/test2 ls /mnt/test2/ This patch fixes jbd2_journal_start_commit() to always return 1 when there's a transaction committing or queued for commit. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" CC: Eric Sandeen CC: linux-ext4@vger.kernel.org --- fs/jbd2/journal.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index eb343008eded..58144102bf25 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -450,7 +450,7 @@ int __jbd2_log_space_left(journal_t *journal) } /* - * Called under j_state_lock. Returns true if a transaction was started. + * Called under j_state_lock. Returns true if a transaction commit was started. */ int __jbd2_log_start_commit(journal_t *journal, tid_t target) { @@ -518,7 +518,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal) /* * Start a commit of the current running transaction (if any). Returns true - * if a transaction was started, and fills its tid in at *ptid + * if a transaction is going to be committed (or is currently already + * committing), and fills its tid in at *ptid */ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) { @@ -528,15 +529,19 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) if (journal->j_running_transaction) { tid_t tid = journal->j_running_transaction->t_tid; - ret = __jbd2_log_start_commit(journal, tid); - if (ret && ptid) + __jbd2_log_start_commit(journal, tid); + /* There's a running transaction and we've just made sure + * it's commit has been scheduled. */ + if (ptid) *ptid = tid; - } else if (journal->j_committing_transaction && ptid) { + ret = 1; + } else if (journal->j_committing_transaction) { /* * If ext3_write_super() recently started a commit, then we * have to wait for completion of that transaction */ - *ptid = journal->j_committing_transaction->t_tid; + if (ptid) + *ptid = journal->j_committing_transaction->t_tid; ret = 1; } spin_unlock(&journal->j_state_lock); From 9eddacf9e9c03578ef2c07c9534423e823d677f8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 10 Feb 2009 06:46:05 -0500 Subject: [PATCH 003/263] Revert "ext4: wait on all pending commits in ext4_sync_fs()" This undoes commit 14ce0cb411c88681ab8f3a4c9caa7f42e97a3184. Since jbd2_journal_start_commit() is now fixed to return 1 when we started a transaction commit, there's some transaction waiting to be committed or there's a transaction already committing, we don't need to call ext4_force_commit() in ext4_sync_fs(). Furthermore ext4_force_commit() can unnecessarily create sync transaction which is expensive so it's worthwhile to remove it when we can. http://bugzilla.kernel.org/show_bug.cgi?id=12224 Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" Cc: Eric Sandeen Cc: linux-ext4@vger.kernel.org --- fs/ext4/super.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e5f06a5f045e..a5732c58f676 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3046,14 +3046,17 @@ static void ext4_write_super(struct super_block *sb) static int ext4_sync_fs(struct super_block *sb, int wait) { int ret = 0; + tid_t target; trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); sb->s_dirt = 0; if (EXT4_SB(sb)->s_journal) { - if (wait) - ret = ext4_force_commit(sb); - else - jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL); + if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, + &target)) { + if (wait) + jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, + target); + } } else { ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait); } From 7f5aa215088b817add9c71914b83650bdd49f8a9 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 10 Feb 2009 11:15:34 -0500 Subject: [PATCH 004/263] jbd2: Avoid possible NULL dereference in jbd2_journal_begin_ordered_truncate() If we race with commit code setting i_transaction to NULL, we could possibly dereference it. Proper locking requires the journal pointer (to access journal->j_list_lock), which we don't have. So we have to change the prototype of the function so that filesystem passes us the journal pointer. Also add a more detailed comment about why the function jbd2_journal_begin_ordered_truncate() does what it does and how it should be used. Thanks to Dan Carpenter for pointing to the suspitious code. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" Acked-by: Joel Becker CC: linux-ext4@vger.kernel.org CC: ocfs2-devel@oss.oracle.com CC: mfasheh@suse.de CC: Dan Carpenter --- fs/ext4/inode.c | 6 ++++-- fs/jbd2/transaction.c | 42 +++++++++++++++++++++++++++++++----------- fs/ocfs2/journal.h | 6 ++++-- include/linux/jbd2.h | 3 ++- 4 files changed, 41 insertions(+), 16 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 03ba20be1329..658c4a7f2578 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -47,8 +47,10 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, loff_t new_size) { - return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode, - new_size); + return jbd2_journal_begin_ordered_truncate( + EXT4_SB(inode->i_sb)->s_journal, + &EXT4_I(inode)->jinode, + new_size); } static void ext4_invalidatepage(struct page *page, unsigned long offset); diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 46b4e347ed7d..28ce21d8598e 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -2129,26 +2129,46 @@ done: } /* - * This function must be called when inode is journaled in ordered mode - * before truncation happens. It starts writeout of truncated part in - * case it is in the committing transaction so that we stand to ordered - * mode consistency guarantees. + * File truncate and transaction commit interact with each other in a + * non-trivial way. If a transaction writing data block A is + * committing, we cannot discard the data by truncate until we have + * written them. Otherwise if we crashed after the transaction with + * write has committed but before the transaction with truncate has + * committed, we could see stale data in block A. This function is a + * helper to solve this problem. It starts writeout of the truncated + * part in case it is in the committing transaction. + * + * Filesystem code must call this function when inode is journaled in + * ordered mode before truncation happens and after the inode has been + * placed on orphan list with the new inode size. The second condition + * avoids the race that someone writes new data and we start + * committing the transaction after this function has been called but + * before a transaction for truncate is started (and furthermore it + * allows us to optimize the case where the addition to orphan list + * happens in the same transaction as write --- we don't have to write + * any data in such case). */ -int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, +int jbd2_journal_begin_ordered_truncate(journal_t *journal, + struct jbd2_inode *jinode, loff_t new_size) { - journal_t *journal; - transaction_t *commit_trans; + transaction_t *inode_trans, *commit_trans; int ret = 0; - if (!inode->i_transaction && !inode->i_next_transaction) + /* This is a quick check to avoid locking if not necessary */ + if (!jinode->i_transaction) goto out; - journal = inode->i_transaction->t_journal; + /* Locks are here just to force reading of recent values, it is + * enough that the transaction was not committing before we started + * a transaction adding the inode to orphan list */ spin_lock(&journal->j_state_lock); commit_trans = journal->j_committing_transaction; spin_unlock(&journal->j_state_lock); - if (inode->i_transaction == commit_trans) { - ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping, + spin_lock(&journal->j_list_lock); + inode_trans = jinode->i_transaction; + spin_unlock(&journal->j_list_lock); + if (inode_trans == commit_trans) { + ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping, new_size, LLONG_MAX); if (ret) jbd2_journal_abort(journal, ret); diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 3c3532e1307c..172850a9a12a 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -513,8 +513,10 @@ static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode) static inline int ocfs2_begin_ordered_truncate(struct inode *inode, loff_t new_size) { - return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode, - new_size); + return jbd2_journal_begin_ordered_truncate( + OCFS2_SB(inode->i_sb)->journal->j_journal, + &OCFS2_I(inode)->ip_jinode, + new_size); } #endif /* OCFS2_JOURNAL_H */ diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index b28b37eb11c6..4d248b3f1323 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1150,7 +1150,8 @@ extern int jbd2_journal_clear_err (journal_t *); extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); extern int jbd2_journal_force_commit(journal_t *); extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode); -extern int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size); +extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, + struct jbd2_inode *inode, loff_t new_size); extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode); From 7be2baaa0322c59ba888aa5260a8c130666acd41 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 10 Feb 2009 09:53:42 -0500 Subject: [PATCH 005/263] ext4: Fix to read empty directory blocks correctly in 64k The rec_len field in the directory entry is 16 bits, so there was a problem representing rec_len for filesystems with a 64k block size in the case where the directory entry takes the entire 64k block. Unfortunately, there were two schemes that were proposed; one where all zeros meant 65536 and one where all ones (65535) meant 65536. E2fsprogs used 0, whereas the kernel used 65535. Oops. Fortunately this case happens extremely rarely, with the most common case being the lost+found directory, created by mke2fs. So we will be liberal in what we accept, and accept both encodings, but we will continue to encode 65536 as 65535. This will require a change in e2fsprogs, but with fortunately ext4 filesystems normally have the dir_index feature enabled, which precludes having a completely empty directory block. Signed-off-by: Wei Yongjun Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index aafc9eba1c25..b0c87dce66a3 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -868,7 +868,7 @@ static inline unsigned ext4_rec_len_from_disk(__le16 dlen) { unsigned len = le16_to_cpu(dlen); - if (len == EXT4_MAX_REC_LEN) + if (len == EXT4_MAX_REC_LEN || len == 0) return 1 << 16; return len; } From ba4439165f0f0d25b2fe065cf0c1ff8130b802eb Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 10 Feb 2009 11:14:34 -0500 Subject: [PATCH 006/263] ext4: Fix lockdep warning We should not call ext4_mb_add_n_trim while holding alloc_semp. ============================================= [ INFO: possible recursive locking detected ] 2.6.29-rc4-git1-dirty #124 --------------------------------------------- ffsb/3116 is trying to acquire lock: (&meta_group_info[i]->alloc_sem){----}, at: [] ext4_mb_load_buddy+0xd2/0x343 but task is already holding lock: (&meta_group_info[i]->alloc_sem){----}, at: [] ext4_mb_load_buddy+0xd2/0x343 http://bugzilla.kernel.org/show_bug.cgi?id=12672 Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index deba54f6cbed..c962d0690505 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4476,23 +4476,26 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) pa->pa_free -= ac->ac_b_ex.fe_len; pa->pa_len -= ac->ac_b_ex.fe_len; spin_unlock(&pa->pa_lock); - /* - * We want to add the pa to the right bucket. - * Remove it from the list and while adding - * make sure the list to which we are adding - * doesn't grow big. - */ - if (likely(pa->pa_free)) { - spin_lock(pa->pa_obj_lock); - list_del_rcu(&pa->pa_inode_list); - spin_unlock(pa->pa_obj_lock); - ext4_mb_add_n_trim(ac); - } } - ext4_mb_put_pa(ac, ac->ac_sb, pa); } if (ac->alloc_semp) up_read(ac->alloc_semp); + if (pa) { + /* + * We want to add the pa to the right bucket. + * Remove it from the list and while adding + * make sure the list to which we are adding + * doesn't grow big. We need to release + * alloc_semp before calling ext4_mb_add_n_trim() + */ + if (pa->pa_linear && likely(pa->pa_free)) { + spin_lock(pa->pa_obj_lock); + list_del_rcu(&pa->pa_inode_list); + spin_unlock(pa->pa_obj_lock); + ext4_mb_add_n_trim(ac); + } + ext4_mb_put_pa(ac, ac->ac_sb, pa); + } if (ac->ac_bitmap_page) page_cache_release(ac->ac_bitmap_page); if (ac->ac_buddy_page) From e637d553199e264327714da437e6c808f2f4b096 Mon Sep 17 00:00:00 2001 From: Robert Jennings Date: Thu, 22 Jan 2009 13:40:09 -0600 Subject: [PATCH 007/263] [SCSI] ibmvscsi: Correct DMA mapping leak The ibmvscsi client driver is not unmapping the SCSI command after encountering a DMA mapping error while trying to map an indirect scattergather list for the event pool. This leads to a leak of DMA entitlement that could result in the device failing future DMA operations in a CMO environment. Signed-off-by: Robert Jennings Acked-by: Brian King Signed-off-by: James Bottomley --- drivers/scsi/ibmvscsi/ibmvscsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 74d07d137dae..c9aa7611e408 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -432,6 +432,7 @@ static int map_sg_data(struct scsi_cmnd *cmd, sdev_printk(KERN_ERR, cmd->device, "Can't allocate memory " "for indirect table\n"); + scsi_dma_unmap(cmd); return 0; } } From c2f9e49f9bbfa2e111ab1e1628b96b560bae7cec Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 27 Jan 2009 11:41:36 -0500 Subject: [PATCH 008/263] [SCSI] scsi_scan: add missing interim SDEV_DEL state if slave_alloc fails We were running i/o and performing a bunch of hba resets in a loop. This forces a lot of target removes and then rescans. Since the resets are occuring during scan it's causing the scan i/o to timeout, invoking error recovery, etc. We end up getting some nasty crashing in scsi_scan.c due to references to old sdevs that are failing but had some lingering references that kept them around. Fix by setting device state to SDEV_DEL if the LLD's slave_alloc fails. Signed-off-by: James Smart Signed-off-by: James Bottomley --- drivers/scsi/scsi_scan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 66505bb79410..8f4de20c9deb 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -317,6 +317,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, return sdev; out_device_destroy: + scsi_device_set_state(sdev, SDEV_DEL); transport_destroy_device(&sdev->sdev_gendev); put_device(&sdev->sdev_gendev); out: From 76e3a19d0691bbfcc559ce77ab3004818fab8f22 Mon Sep 17 00:00:00 2001 From: Martin Peschke Date: Fri, 30 Jan 2009 15:46:23 +0100 Subject: [PATCH 009/263] [SCSI] sg: fix device number in blktrace data Hi, we have run into an issue with blktrace being started for sg devices. Please apply. Thanks, Martin From: Martin Peschke The device number denoting a generic SCSI devices (sg) in a blktrace trace is broken; major and minor are always 0. It looks like sdp->device->sdev_gendev.devt is not initialized properly. The fix below uses other data to make up a valid device number, similar to the way an sg device number is generated for sysfs output. Reported-by: Stefan Raspl Signed-off-by: Martin Peschke Acked-by: Douglas Gilbert Signed-off-by: James Bottomley --- drivers/scsi/sg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 8f0bd3f7a59f..516925d8b570 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1078,7 +1078,7 @@ sg_ioctl(struct inode *inode, struct file *filp, case BLKTRACESETUP: return blk_trace_setup(sdp->device->request_queue, sdp->disk->disk_name, - sdp->device->sdev_gendev.devt, + MKDEV(SCSI_GENERIC_MAJOR, sdp->index), (char *)arg); case BLKTRACESTART: return blk_trace_startstop(sdp->device->request_queue, 1); From d4b17a20f30faf0debbc225bfbf98dba4e351c4d Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 4 Feb 2009 16:13:08 -0600 Subject: [PATCH 010/263] [SCSI] ibmvfc: Fix command timeout errors Currently the ibmvfc driver sets the IBMVFC_CLASS_3_ERR flag in the VFC Frame if both the adapter and the device claim support for Class 3. However, this bit actually refers to Class 3 Error Recovery, which is currently not supported by the VIOS. Setting this bit can cause lots of command timeout responses from the VIOS resulting in general instability. Fix this by never setting this bit. Signed-off-by: Brian King Signed-off-by: James Bottomley --- drivers/scsi/ibmvscsi/ibmvfc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index a1a511bdec8c..41226e3a741f 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -1573,9 +1573,6 @@ static int ibmvfc_queuecommand(struct scsi_cmnd *cmnd, vfc_cmd->resp_len = sizeof(vfc_cmd->rsp); vfc_cmd->cancel_key = (unsigned long)cmnd->device->hostdata; vfc_cmd->tgt_scsi_id = rport->port_id; - if ((rport->supported_classes & FC_COS_CLASS3) && - (fc_host_supported_classes(vhost->host) & FC_COS_CLASS3)) - vfc_cmd->flags = IBMVFC_CLASS_3_ERR; vfc_cmd->iu.xfer_len = scsi_bufflen(cmnd); int_to_scsilun(cmnd->device->lun, &vfc_cmd->iu.lun); memcpy(vfc_cmd->iu.cdb, cmnd->cmnd, cmnd->cmd_len); From 0883e3b3a85b5860b7729f1279a52e95b87dea97 Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 4 Feb 2009 16:13:12 -0600 Subject: [PATCH 011/263] [SCSI] ibmvfc: Fix rport relogin The ibmvfc driver has a bug in its SCN handling. If it receives an ELS event such asn an N-Port SCN event or an unsolicited PLOGI, or any other SCN event which causes ibmvfc_reinit_host to be called, it is possible that we will call fc_remote_port_add for a target that already has an rport added, which can result in duplicate rports getting created for the same targets. Fix this by calling fc_remote_port_rolechg in this scenario instead to report any possible role change that may have occurred. Signed-off-by: Brian King Signed-off-by: James Bottomley --- drivers/scsi/ibmvscsi/ibmvfc.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 41226e3a741f..ed1e728763a2 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -3263,6 +3263,7 @@ static int ibmvfc_alloc_target(struct ibmvfc_host *vhost, u64 scsi_id) return -ENOMEM; } + memset(tgt, 0, sizeof(*tgt)); tgt->scsi_id = scsi_id; tgt->new_scsi_id = scsi_id; tgt->vhost = vhost; @@ -3573,9 +3574,18 @@ static void ibmvfc_log_ae(struct ibmvfc_host *vhost, int events) static void ibmvfc_tgt_add_rport(struct ibmvfc_target *tgt) { struct ibmvfc_host *vhost = tgt->vhost; - struct fc_rport *rport; + struct fc_rport *rport = tgt->rport; unsigned long flags; + if (rport) { + tgt_dbg(tgt, "Setting rport roles\n"); + fc_remote_port_rolechg(rport, tgt->ids.roles); + spin_lock_irqsave(vhost->host->host_lock, flags); + ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_NONE); + spin_unlock_irqrestore(vhost->host->host_lock, flags); + return; + } + tgt_dbg(tgt, "Adding rport\n"); rport = fc_remote_port_add(vhost->host, 0, &tgt->ids); spin_lock_irqsave(vhost->host->host_lock, flags); From 14ae6faca11889d80f795993dbe932d82305b564 Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 4 Feb 2009 16:13:13 -0600 Subject: [PATCH 012/263] [SCSI] ibmvfc: Increase cancel timeout During cancel testing it has been shown that 15 seconds is not nearly long enough for the VIOS to respond to a cancel under loaded situations. Increasing this timeout to 60 seconds allows time for the VIOS to cancel the outstanding commands and prevents us from escalating to a full host reset, which can take much longer. Signed-off-by: Brian King Signed-off-by: James Bottomley --- drivers/scsi/ibmvscsi/ibmvfc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h index 87dafd0f8d44..b21e071b9862 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.h +++ b/drivers/scsi/ibmvscsi/ibmvfc.h @@ -32,7 +32,7 @@ #define IBMVFC_DRIVER_VERSION "1.0.4" #define IBMVFC_DRIVER_DATE "(November 14, 2008)" -#define IBMVFC_DEFAULT_TIMEOUT 15 +#define IBMVFC_DEFAULT_TIMEOUT 60 #define IBMVFC_INIT_TIMEOUT 120 #define IBMVFC_MAX_REQUESTS_DEFAULT 100 From 7f977ddd0eedfd5aac7865794f220f65aae8f361 Mon Sep 17 00:00:00 2001 From: "Shyam_Iyer@Dell.com" Date: Thu, 5 Feb 2009 20:12:37 +0530 Subject: [PATCH 013/263] [SCSI] qla2xxx: fix Kernel Panic with Qlogic 2472 Card. Kernel Panic is observed with a Qlogic 2472 Card is plugged into the system and the qla2xxx driver is loaded: QLogic Fibre Channel HBA Driver: 8.02.01.02.11.0-k9 vendor=8086 device=3410 qla2xxx 0000:05:00.0: PCI INT A -> GSI 40 (level, low) -> IRQ 40 qla2xxx 0000:05:00.0: Found an ISP2432, irq 40, iobase 0xffffc2001091c000 qla2xxx 0000:05:00.0: Configuring PCI space... qla2xxx 0000:05:00.0: setting latency timer to 64 qla2xxx 0000:05:00.0: Configure NVRAM parameters... BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 IP: [] strncpy+0x5/0x1e PGD 7c564067 PUD 78d8c067 PMD 0 Oops: 0000 [1] SMP last sysfs file: /sys/devices/pci0000:00/0000:00:1d.1/usb6/6-2/6-2:1.1/input/input4/event 4/dev CPU 1 Modules linked in: qla2xxx(+) squashfs usb_storage scsi_transport_fc scsi_tgt parport_pc parport arc4 ecb crypto_blkcipher acpi_cpufreq fan loop nfs nfs_acl lockd sunrpc nls_iso8859_1 nls_cp437 ipv6 af_packet st sr_mod ide_disk ide_cd_mod ide_core cdrom usbhid hid ff_memless sg sd_mod crc_t10dif uhci_hcd mptsas mptscsih ehci_hcd mptbase scsi_transport_sas rtc_cmos rtc_core rtc_lib usbcore scsi_mod thermal bnx2 button processor thermal_sys hwmon edd Supported: Yes Pid: 4415, comm: insmod Not tainted 2.6.27.13-1-default #1 RIP: 0010:[] [] strncpy+0x5/0x1e RSP: 0018:ffff88007b04fbc0 EFLAGS: 00010202 RAX: 00000000000000b7 RBX: ffff88007b9641e0 RCX: ffff88007c1b2ad7 RDX: 000000000000004f RSI: 0000000000000000 RDI: ffff88007c1b2ad7 RBP: ffff88007c1b0620 R08: 0000000000000010 R09: 0000000100000000 R10: 0000000000000046 R11: ffffffff803651c6 R12: ffff88007b074000 R13: ffff88007b964000 R14: ffff88007c1b2ac6 R15: 0000000000000000 FS: 00007f91a6c366f0(0000) GS:ffff88007dbeee40(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000000 CR3: 000000007bd7c000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process insmod (pid: 4415, threadinfo ffff88007b04e000, task ffff880078586180) Stack: ffffffffa02d82c4 0000000000002432 ffff88007d385000 ffff88007c1b0620 ffff88007c1b0620 ffff88007c1b0000 ffff88007d385000 0000000000002432 ffffffffa02dcb1e 0000000000002432 ffffc2001091c000 ffff88007c1b0620 Call Trace: [] qla24xx_nvram_config+0x385/0x6c2 [qla2xxx] [] qla2x00_initialize_adapter+0x169/0x383 [qla2xxx] [] qla2x00_probe_one+0x6bc/0x9c6 [qla2xxx] [] pci_device_probe+0xb8/0x105 [] really_probe+0xdd/0x1e5 [] __driver_attach+0x46/0x6d [] bus_for_each_dev+0x44/0x78 [] bus_add_driver+0xef/0x235 [] driver_register+0xa2/0x11f [] __pci_register_driver+0x5d/0x90 [] qla2x00_module_init+0x126/0x159 [qla2xxx] [] _stext+0x41/0x110 [] sys_init_module+0xa0/0x1ba [] system_call_fastpath+0x16/0x1b [<00007f91a679b76a>] 0x7f91a679b76a Code: ff c1 41 39 c0 75 05 45 85 c0 75 bf 41 29 c0 44 89 c0 c3 31 d2 8a 04 16 88 04 17 48 ff c2 84 c0 75 f3 48 89 f8 c3 48 89 f9 eb 10 <8a> 06 3c 01 88 01 48 83 de ff 48 ff c1 48 ff ca 48 85 d2 75 eb RIP [] strncpy+0x5/0x1e RSP CR2: 0000000000000000 ---[ end trace 829d7d78dfafb785 ]--- The attached patch fixes the issue. Signed-off-by: Shyam Iyer Acked-by: Seokmann Ju Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_devtbl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_devtbl.h b/drivers/scsi/qla2xxx/qla_devtbl.h index d78d35e681ab..d6ea69df7c5c 100644 --- a/drivers/scsi/qla2xxx/qla_devtbl.h +++ b/drivers/scsi/qla2xxx/qla_devtbl.h @@ -72,7 +72,7 @@ static char *qla2x00_model_name[QLA_MODEL_NAMES*2] = { "QLA2462", "Sun PCI-X 2.0 to 4Gb FC, Dual Channel", /* 0x141 */ "QLE2460", "Sun PCI-Express to 2Gb FC, Single Channel", /* 0x142 */ "QLE2462", "Sun PCI-Express to 4Gb FC, Single Channel", /* 0x143 */ - "QEM2462" "Server I/O Module 4Gb FC, Dual Channel", /* 0x144 */ + "QEM2462", "Server I/O Module 4Gb FC, Dual Channel", /* 0x144 */ "QLE2440", "PCI-Express to 4Gb FC, Single Channel", /* 0x145 */ "QLE2464", "PCI-Express to 4Gb FC, Quad Channel", /* 0x146 */ "QLA2440", "PCI-X 2.0 to 4Gb FC, Single Channel", /* 0x147 */ From 308cec14e6710b4d5b70e9778ce117be8371735d Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 6 Feb 2009 12:06:20 -0600 Subject: [PATCH 014/263] [SCSI] libiscsi: Fix scsi command timeout oops in iscsi_eh_timed_out MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Yanling Qi from LSI found the root cause of the panic, below is his analysis: Problem description: the open iscsi driver installs eh_timed_out handler to the blank_transport_template of the scsi middle level that causes panic of timed out command of other host Here are the details Iscsi Session creation During iscsi session creation time, the iscsi_tcp_session_create() of iscsi_tpc.c will create a scsi-host for the session. See the statement marked with the label A. The statement B replaces the shost->transportt point with a local struct variable. static struct iscsi_cls_session * iscsi_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max, uint16_t qdepth, uint32_t initial_cmdsn, uint32_t *hostno) { struct iscsi_cls_session *cls_session; struct iscsi_session *session; struct Scsi_Host *shost; int cmd_i; if (ep) { printk(KERN_ERR "iscsi_tcp: invalid ep %p.\n", ep); return NULL; } A shost = iscsi_host_alloc(&iscsi_sht, 0, qdepth); if (!shost) return NULL; B shost->transportt = iscsi_tcp_scsi_transport; shost->max_lun = iscsi_max_lun; Please note the scsi host is allocated by invoking isccsi_host_alloc() in libiscsi.c Polluting the middle level blank_transport_template in iscsi_host_alloc() of libiscsi.c The iscsi_host_alloc() invokes the middle level function scsi_host_alloc() in hosts.c for allocating a scsi_host. Then the statement marked with C assigns the iscsi_eh_cmd_timed_out handler to the eh_timed_out callback function. struct Scsi_Host *iscsi_host_alloc(struct scsi_host_template *sht, int dd_data_size, uint16_t qdepth) { struct Scsi_Host *shost; struct iscsi_host *ihost; shost = scsi_host_alloc(sht, sizeof(struct iscsi_host) + dd_data_size); if (!shost) return NULL; C shost->transportt->eh_timed_out = iscsi_eh_cmd_timed_out; Please note the shost->transport is the middle level blank_transport_template as shown in the code segment below. We see two problems here. 1. iscsi_eh_cmd_timed_out is installed to the blank_transport_template that will cause some body else problem. 2. iscsi_eh_cmd_timed_out will never be invoked when iscsi command gets timeout because the statement B resets the pointer. Middle level blank_transport_template In the middle level function scsi_host_alloc() of hosts.c, the middle level assigns a blank_transport_template for those hosts not implementing its transport layer. All HBAs without supporting a specific scsi_transport will share the middle level blank_transport_template. Please see the statement D struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) { struct Scsi_Host *shost; gfp_t gfp_mask = GFP_KERNEL; int rval; if (sht->unchecked_isa_dma && privsize) gfp_mask |= __GFP_DMA; shost = kzalloc(sizeof(struct Scsi_Host) + privsize, gfp_mask); if (!shost) return NULL; shost->host_lock = &shost->default_lock; spin_lock_init(shost->host_lock); shost->shost_state = SHOST_CREATED; INIT_LIST_HEAD(&shost->__devices); INIT_LIST_HEAD(&shost->__targets); INIT_LIST_HEAD(&shost->eh_cmd_q); INIT_LIST_HEAD(&shost->starved_list); init_waitqueue_head(&shost->host_wait); mutex_init(&shost->scan_mutex); shost->host_no = scsi_host_next_hn++; /* XXX(hch): still racy */ shost->dma_channel = 0xff; /* These three are default values which can be overridden */ shost->max_channel = 0; shost->max_id = 8; shost->max_lun = 8; /* Give each shost a default transportt */ D shost->transportt = &blank_transport_template; Why we see panic at iscsi_eh_cmd_timed_out() The mpp virtual HBA doesn’t have a specific scsi_transport. Therefore, the blank_transport_template will be assigned to the virtual host of the MPP virtual HBA by SCSI middle level. Please note that the statement C has assigned iscsi-transport eh_timedout handler to the blank_transport_template. When a mpp virtual command gets timedout, the iscsi_eh_cmd_timed_out() will be invoked to handle mpp virtual command timeout from the middle level scsi_times_out() function of the scsi_error.c. enum blk_eh_timer_return scsi_times_out(struct request *req) { struct scsi_cmnd *scmd = req->special; enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *); enum blk_eh_timer_return rtn = BLK_EH_NOT_HANDLED; scsi_log_completion(scmd, TIMEOUT_ERROR); if (scmd->device->host->transportt->eh_timed_out) E eh_timed_out = scmd->device->host->transportt->eh_timed_out; else if (scmd->device->host->hostt->eh_timed_out) eh_timed_out = scmd->device->host->hostt->eh_timed_out; else eh_timed_out = NULL; if (eh_timed_out) { rtn = eh_timed_out(scmd); It is very easy to understand why we get panic in the iscsi_eh_cmd_timed_out(). A scsi_cmnd from a no-iscsi device definitely can not resolve out a session and session->lock. The panic can be happed anywhere during the differencing. static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd) { struct iscsi_cls_session *cls_session; struct iscsi_session *session; struct iscsi_conn *conn; enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED; cls_session = starget_to_session(scsi_target(scmd->device)); session = cls_session->dd_data; debug_scsi("scsi cmd %p timedout\n", scmd); spin_lock(&session->lock); This patch fixes the problem by moving the setting of the iscsi_eh_cmd_timed_out to iscsi_add_host, which is after the LLDs have set their transport template to shost->transportt. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/libiscsi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 257c24115de9..809d32d95c76 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1998,6 +1998,8 @@ int iscsi_host_add(struct Scsi_Host *shost, struct device *pdev) if (!shost->can_queue) shost->can_queue = ISCSI_DEF_XMIT_CMDS_MAX; + if (!shost->transportt->eh_timed_out) + shost->transportt->eh_timed_out = iscsi_eh_cmd_timed_out; return scsi_add_host(shost, pdev); } EXPORT_SYMBOL_GPL(iscsi_host_add); @@ -2020,7 +2022,6 @@ struct Scsi_Host *iscsi_host_alloc(struct scsi_host_template *sht, shost = scsi_host_alloc(sht, sizeof(struct iscsi_host) + dd_data_size); if (!shost) return NULL; - shost->transportt->eh_timed_out = iscsi_eh_cmd_timed_out; if (qdepth > ISCSI_MAX_CMD_PER_LUN || qdepth < 1) { if (qdepth != 0) From e916141c6889e2a35869d7057ef1cc5e5a2e86eb Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 8 Feb 2009 22:43:19 +0100 Subject: [PATCH 015/263] [SCSI] lpfc: introduce missing kfree Error handling code following a kmalloc should free the allocated data. The semantic match that finds the problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @r exists@ local idexpression x; statement S; expression E; identifier f,l; position p1,p2; expression *ptr != NULL; @@ ( if ((x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...)) == NULL) S | x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S ) <... when != x when != if (...) { <+...x...+> } x->f = E ...> ( return \(0\|<+...x...+>\|ptr\); | return@p2 ...; ) @script:python@ p1 << r.p1; p2 << r.p2; @@ print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line) // Signed-off-by: Julia Lawall Acked-by: James Smart Signed-off-by: James Bottomley --- drivers/scsi/lpfc/lpfc_els.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index a8f30bdaff69..a7302480bc4a 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -5258,6 +5258,7 @@ lpfc_send_els_event(struct lpfc_vport *vport, sizeof(struct lpfc_name)); break; default: + kfree(els_data); return; } memcpy(els_data->wwpn, &ndlp->nlp_portname, sizeof(struct lpfc_name)); From 618a752319503a64d1b66615e8ea2a0e7edaf914 Mon Sep 17 00:00:00 2001 From: Anirban Chakraborty Date: Sun, 8 Feb 2009 20:50:11 -0800 Subject: [PATCH 016/263] [SCSI] qla2xxx: Remove interrupt request bit check in the response processing path in multiq mode. Correct response-queue-0 processing by instructing the firmware to run with interrupt-handshaking disabled, similarly to what is now done for all non-0 response queues. Since all response-queues now run in the same mode, the driver no longer needs the hot-path 'is-disabled-HCCR' test. Signed-off-by: Anirban Chakraborty Signed-off-by: Andrew Vasquez Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_gbl.h | 6 ++---- drivers/scsi/qla2xxx/qla_init.c | 7 +++---- drivers/scsi/qla2xxx/qla_isr.c | 10 ---------- drivers/scsi/qla2xxx/qla_mbx.c | 14 ++++++-------- drivers/scsi/qla2xxx/qla_mid.c | 10 +++++----- 5 files changed, 16 insertions(+), 31 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index a336b4bc81a7..bce5b0435188 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -376,10 +376,8 @@ extern int qla2x00_dfs_remove(scsi_qla_host_t *); /* Globa function prototypes for multi-q */ extern int qla25xx_request_irq(struct rsp_que *); -extern int qla25xx_init_req_que(struct scsi_qla_host *, struct req_que *, - uint8_t); -extern int qla25xx_init_rsp_que(struct scsi_qla_host *, struct rsp_que *, - uint8_t); +extern int qla25xx_init_req_que(struct scsi_qla_host *, struct req_que *); +extern int qla25xx_init_rsp_que(struct scsi_qla_host *, struct rsp_que *); extern int qla25xx_create_req_que(struct qla_hw_data *, uint16_t, uint8_t, uint16_t, uint8_t, uint8_t); extern int qla25xx_create_rsp_que(struct qla_hw_data *, uint16_t, uint8_t, diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index f6368a1d3021..986501759ad4 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1226,9 +1226,8 @@ qla24xx_config_rings(struct scsi_qla_host *vha) icb->firmware_options_2 |= __constant_cpu_to_le32(BIT_18); - icb->firmware_options_2 |= __constant_cpu_to_le32(BIT_22); + icb->firmware_options_2 &= __constant_cpu_to_le32(~BIT_22); icb->firmware_options_2 |= __constant_cpu_to_le32(BIT_23); - ha->rsp_q_map[0]->options = icb->firmware_options_2; WRT_REG_DWORD(®->isp25mq.req_q_in, 0); WRT_REG_DWORD(®->isp25mq.req_q_out, 0); @@ -3493,7 +3492,7 @@ qla25xx_init_queues(struct qla_hw_data *ha) rsp = ha->rsp_q_map[i]; if (rsp) { rsp->options &= ~BIT_0; - ret = qla25xx_init_rsp_que(base_vha, rsp, rsp->options); + ret = qla25xx_init_rsp_que(base_vha, rsp); if (ret != QLA_SUCCESS) DEBUG2_17(printk(KERN_WARNING "%s Rsp que:%d init failed\n", __func__, @@ -3507,7 +3506,7 @@ qla25xx_init_queues(struct qla_hw_data *ha) if (req) { /* Clear outstanding commands array. */ req->options &= ~BIT_0; - ret = qla25xx_init_req_que(base_vha, req, req->options); + ret = qla25xx_init_req_que(base_vha, req); if (ret != QLA_SUCCESS) DEBUG2_17(printk(KERN_WARNING "%s Req que:%d init failed\n", __func__, diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index e28ad81baf1e..b5554ea4693b 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1707,7 +1707,6 @@ qla25xx_msix_rsp_q(int irq, void *dev_id) struct qla_hw_data *ha; struct rsp_que *rsp; struct device_reg_24xx __iomem *reg; - uint16_t msix_disabled_hccr = 0; rsp = (struct rsp_que *) dev_id; if (!rsp) { @@ -1720,17 +1719,8 @@ qla25xx_msix_rsp_q(int irq, void *dev_id) spin_lock_irq(&ha->hardware_lock); - msix_disabled_hccr = rsp->options; - if (!rsp->id) - msix_disabled_hccr &= __constant_cpu_to_le32(BIT_22); - else - msix_disabled_hccr &= __constant_cpu_to_le32(BIT_6); - qla24xx_process_response_queue(rsp); - if (!msix_disabled_hccr) - WRT_REG_DWORD(®->hccr, HCCRX_CLR_RISC_INT); - spin_unlock_irq(&ha->hardware_lock); return IRQ_HANDLED; diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index f94ffbb98e95..723cd37294c3 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -3090,8 +3090,7 @@ verify_done: } int -qla25xx_init_req_que(struct scsi_qla_host *vha, struct req_que *req, - uint8_t options) +qla25xx_init_req_que(struct scsi_qla_host *vha, struct req_que *req) { int rval; unsigned long flags; @@ -3101,7 +3100,7 @@ qla25xx_init_req_que(struct scsi_qla_host *vha, struct req_que *req, struct qla_hw_data *ha = vha->hw; mcp->mb[0] = MBC_INITIALIZE_MULTIQ; - mcp->mb[1] = options; + mcp->mb[1] = req->options; mcp->mb[2] = MSW(LSD(req->dma)); mcp->mb[3] = LSW(LSD(req->dma)); mcp->mb[6] = MSW(MSD(req->dma)); @@ -3128,7 +3127,7 @@ qla25xx_init_req_que(struct scsi_qla_host *vha, struct req_que *req, mcp->tov = 60; spin_lock_irqsave(&ha->hardware_lock, flags); - if (!(options & BIT_0)) { + if (!(req->options & BIT_0)) { WRT_REG_DWORD(®->req_q_in, 0); WRT_REG_DWORD(®->req_q_out, 0); } @@ -3142,8 +3141,7 @@ qla25xx_init_req_que(struct scsi_qla_host *vha, struct req_que *req, } int -qla25xx_init_rsp_que(struct scsi_qla_host *vha, struct rsp_que *rsp, - uint8_t options) +qla25xx_init_rsp_que(struct scsi_qla_host *vha, struct rsp_que *rsp) { int rval; unsigned long flags; @@ -3153,7 +3151,7 @@ qla25xx_init_rsp_que(struct scsi_qla_host *vha, struct rsp_que *rsp, struct qla_hw_data *ha = vha->hw; mcp->mb[0] = MBC_INITIALIZE_MULTIQ; - mcp->mb[1] = options; + mcp->mb[1] = rsp->options; mcp->mb[2] = MSW(LSD(rsp->dma)); mcp->mb[3] = LSW(LSD(rsp->dma)); mcp->mb[6] = MSW(MSD(rsp->dma)); @@ -3178,7 +3176,7 @@ qla25xx_init_rsp_que(struct scsi_qla_host *vha, struct rsp_que *rsp, mcp->tov = 60; spin_lock_irqsave(&ha->hardware_lock, flags); - if (!(options & BIT_0)) { + if (!(rsp->options & BIT_0)) { WRT_REG_DWORD(®->rsp_q_out, 0); WRT_REG_DWORD(®->rsp_q_in, 0); } diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index f53179c46423..d27ceda50791 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -471,7 +471,7 @@ qla25xx_delete_req_que(struct scsi_qla_host *vha, struct req_que *req) if (req) { req->options |= BIT_0; - ret = qla25xx_init_req_que(vha, req, req->options); + ret = qla25xx_init_req_que(vha, req); } if (ret == QLA_SUCCESS) qla25xx_free_req_que(vha, req); @@ -486,7 +486,7 @@ qla25xx_delete_rsp_que(struct scsi_qla_host *vha, struct rsp_que *rsp) if (rsp) { rsp->options |= BIT_0; - ret = qla25xx_init_rsp_que(vha, rsp, rsp->options); + ret = qla25xx_init_rsp_que(vha, rsp); } if (ret == QLA_SUCCESS) qla25xx_free_rsp_que(vha, rsp); @@ -502,7 +502,7 @@ int qla25xx_update_req_que(struct scsi_qla_host *vha, uint8_t que, uint8_t qos) req->options |= BIT_3; req->qos = qos; - ret = qla25xx_init_req_que(vha, req, req->options); + ret = qla25xx_init_req_que(vha, req); if (ret != QLA_SUCCESS) DEBUG2_17(printk(KERN_WARNING "%s failed\n", __func__)); /* restore options bit */ @@ -632,7 +632,7 @@ qla25xx_create_req_que(struct qla_hw_data *ha, uint16_t options, req->max_q_depth = ha->req_q_map[0]->max_q_depth; mutex_unlock(&ha->vport_lock); - ret = qla25xx_init_req_que(base_vha, req, options); + ret = qla25xx_init_req_que(base_vha, req); if (ret != QLA_SUCCESS) { qla_printk(KERN_WARNING, ha, "%s failed\n", __func__); mutex_lock(&ha->vport_lock); @@ -710,7 +710,7 @@ qla25xx_create_rsp_que(struct qla_hw_data *ha, uint16_t options, if (ret) goto que_failed; - ret = qla25xx_init_rsp_que(base_vha, rsp, options); + ret = qla25xx_init_rsp_que(base_vha, rsp); if (ret != QLA_SUCCESS) { qla_printk(KERN_WARNING, ha, "%s failed\n", __func__); mutex_lock(&ha->vport_lock); From 8a659571eccfde1df9bd057d67be51d1aaa0e2db Mon Sep 17 00:00:00 2001 From: Andrew Vasquez Date: Sun, 8 Feb 2009 20:50:12 -0800 Subject: [PATCH 017/263] [SCSI] qla2xxx: Properly acknowledge IDC notification messages. To ensure smooth operations amongst the FCoE and NIC side components of the ISP81xx chip, the FCoE driver (qla2xxx) must ensure the 10gb NIC driver (qlge) does not timeout waiting for IDC (Inter-Driver Communication) acknowledgments. The acknowledgment requirements are trivial -- a simple mirroring of incoming mailbox registers during the AEN to a process-context capable mailbox command. Signed-off-by: Andrew Vasquez Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_def.h | 5 ++++ drivers/scsi/qla2xxx/qla_fw.h | 2 ++ drivers/scsi/qla2xxx/qla_gbl.h | 3 +++ drivers/scsi/qla2xxx/qla_isr.c | 48 +++++++++++++++++++++++++--------- drivers/scsi/qla2xxx/qla_mbx.c | 26 ++++++++++++++++++ drivers/scsi/qla2xxx/qla_os.c | 16 ++++++++++++ 6 files changed, 87 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 023ee77fb027..e0c5bb54b258 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -2135,6 +2135,7 @@ struct qla_msix_entry { /* Work events. */ enum qla_work_type { QLA_EVT_AEN, + QLA_EVT_IDC_ACK, }; @@ -2149,6 +2150,10 @@ struct qla_work_evt { enum fc_host_event_code code; u32 data; } aen; + struct { +#define QLA_IDC_ACK_REGS 7 + uint16_t mb[QLA_IDC_ACK_REGS]; + } idc_ack; } u; }; diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h index 7abb045a0410..ffff42554087 100644 --- a/drivers/scsi/qla2xxx/qla_fw.h +++ b/drivers/scsi/qla2xxx/qla_fw.h @@ -1402,6 +1402,8 @@ struct access_chip_rsp_84xx { #define MBA_IDC_NOTIFY 0x8101 #define MBA_IDC_TIME_EXT 0x8102 +#define MBC_IDC_ACK 0x101 + struct nvram_81xx { /* NVRAM header. */ uint8_t id[4]; diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index bce5b0435188..6de283f8f111 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -72,6 +72,7 @@ extern int qla2x00_loop_reset(scsi_qla_host_t *); extern void qla2x00_abort_all_cmds(scsi_qla_host_t *, int); extern int qla2x00_post_aen_work(struct scsi_qla_host *, enum fc_host_event_code, u32); +extern int qla2x00_post_idc_ack_work(struct scsi_qla_host *, uint16_t *); extern void qla2x00_abort_fcport_cmds(fc_port_t *); extern struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *, @@ -266,6 +267,8 @@ qla2x00_set_idma_speed(scsi_qla_host_t *, uint16_t, uint16_t, uint16_t *); extern int qla84xx_verify_chip(struct scsi_qla_host *, uint16_t *); +extern int qla81xx_idc_ack(scsi_qla_host_t *, uint16_t *); + /* * Global Function Prototypes in qla_isr.c source file. */ diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index b5554ea4693b..f250e5b7897c 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -266,6 +266,40 @@ qla2x00_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0) } } +static void +qla81xx_idc_event(scsi_qla_host_t *vha, uint16_t aen, uint16_t descr) +{ + static char *event[] = + { "Complete", "Request Notification", "Time Extension" }; + int rval; + struct device_reg_24xx __iomem *reg24 = &vha->hw->iobase->isp24; + uint16_t __iomem *wptr; + uint16_t cnt, timeout, mb[QLA_IDC_ACK_REGS]; + + /* Seed data -- mailbox1 -> mailbox7. */ + wptr = (uint16_t __iomem *)®24->mailbox1; + for (cnt = 0; cnt < QLA_IDC_ACK_REGS; cnt++, wptr++) + mb[cnt] = RD_REG_WORD(wptr); + + DEBUG2(printk("scsi(%ld): Inter-Driver Commucation %s -- " + "%04x %04x %04x %04x %04x %04x %04x.\n", vha->host_no, + event[aen & 0xff], + mb[0], mb[1], mb[2], mb[3], mb[4], mb[5], mb[6])); + + /* Acknowledgement needed? [Notify && non-zero timeout]. */ + timeout = (descr >> 8) & 0xf; + if (aen != MBA_IDC_NOTIFY || !timeout) + return; + + DEBUG2(printk("scsi(%ld): Inter-Driver Commucation %s -- " + "ACK timeout=%d.\n", vha->host_no, event[aen & 0xff], timeout)); + + rval = qla2x00_post_idc_ack_work(vha, mb); + if (rval != QLA_SUCCESS) + qla_printk(KERN_WARNING, vha->hw, + "IDC failed to post ACK.\n"); +} + /** * qla2x00_async_event() - Process aynchronous events. * @ha: SCSI driver HA context @@ -714,21 +748,9 @@ skip_rio: "%04x %04x %04x\n", vha->host_no, mb[1], mb[2], mb[3])); break; case MBA_IDC_COMPLETE: - DEBUG2(printk("scsi(%ld): Inter-Driver Commucation " - "Complete -- %04x %04x %04x\n", vha->host_no, mb[1], mb[2], - mb[3])); - break; case MBA_IDC_NOTIFY: - DEBUG2(printk("scsi(%ld): Inter-Driver Commucation " - "Request Notification -- %04x %04x %04x\n", vha->host_no, - mb[1], mb[2], mb[3])); - /**** Mailbox registers 4 - 7 valid!!! */ - break; case MBA_IDC_TIME_EXT: - DEBUG2(printk("scsi(%ld): Inter-Driver Commucation " - "Time Extension -- %04x %04x %04x\n", vha->host_no, mb[1], - mb[2], mb[3])); - /**** Mailbox registers 4 - 7 valid!!! */ + qla81xx_idc_event(vha, mb[0], mb[1]); break; } diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 723cd37294c3..4c7504cb3990 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -3191,3 +3191,29 @@ qla25xx_init_rsp_que(struct scsi_qla_host *vha, struct rsp_que *rsp) return rval; } +int +qla81xx_idc_ack(scsi_qla_host_t *vha, uint16_t *mb) +{ + int rval; + mbx_cmd_t mc; + mbx_cmd_t *mcp = &mc; + + DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no)); + + mcp->mb[0] = MBC_IDC_ACK; + memcpy(&mcp->mb[1], mb, QLA_IDC_ACK_REGS * sizeof(uint16_t)); + mcp->out_mb = MBX_7|MBX_6|MBX_5|MBX_4|MBX_3|MBX_2|MBX_1|MBX_0; + mcp->in_mb = MBX_0; + mcp->tov = MBX_TOV_SECONDS; + mcp->flags = 0; + rval = qla2x00_mailbox_command(vha, mcp); + + if (rval != QLA_SUCCESS) { + DEBUG2_3_11(printk("%s(%ld): failed=%x (%x).\n", __func__, + vha->host_no, rval, mcp->mb[0])); + } else { + DEBUG11(printk("%s(%ld): done.\n", __func__, vha->host_no)); + } + + return rval; +} diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index c11f872d3e10..2f5f72531e23 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -2522,6 +2522,19 @@ qla2x00_post_aen_work(struct scsi_qla_host *vha, enum fc_host_event_code code, return qla2x00_post_work(vha, e, 1); } +int +qla2x00_post_idc_ack_work(struct scsi_qla_host *vha, uint16_t *mb) +{ + struct qla_work_evt *e; + + e = qla2x00_alloc_work(vha, QLA_EVT_IDC_ACK, 1); + if (!e) + return QLA_FUNCTION_FAILED; + + memcpy(e->u.idc_ack.mb, mb, QLA_IDC_ACK_REGS * sizeof(uint16_t)); + return qla2x00_post_work(vha, e, 1); +} + static void qla2x00_do_work(struct scsi_qla_host *vha) { @@ -2539,6 +2552,9 @@ qla2x00_do_work(struct scsi_qla_host *vha) fc_host_post_event(vha->host, fc_get_event_number(), e->u.aen.code, e->u.aen.data); break; + case QLA_EVT_IDC_ACK: + qla81xx_idc_ack(vha, e->u.idc_ack.mb); + break; } if (e->flags & QLA_EVT_FLAG_FREE) kfree(e); From cf5a163127118325296c90670093b14afebb8424 Mon Sep 17 00:00:00 2001 From: Anirban Chakraborty Date: Sun, 8 Feb 2009 20:50:13 -0800 Subject: [PATCH 018/263] [SCSI] qla2xxx: Correct slab-error overwrite during vport creation and deletion. The clearing of a vha's req_ques were overrunning during vport creation. During deletion, vport queues should be torn-down after all cleanup has occurred. Signed-off-by: Anirban Chakraborty Signed-off-by: Andrew Vasquez Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_attr.c | 13 ++++++------- drivers/scsi/qla2xxx/qla_mid.c | 2 +- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 33a3c13fd893..f4c57227ec18 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -1265,13 +1265,6 @@ qla24xx_vport_delete(struct fc_vport *fc_vport) test_bit(FCPORT_UPDATE_NEEDED, &vha->dpc_flags)) msleep(1000); - if (ha->mqenable) { - if (qla25xx_delete_queues(vha, 0) != QLA_SUCCESS) - qla_printk(KERN_WARNING, ha, - "Queue delete failed.\n"); - vha->req_ques[0] = ha->req_q_map[0]->id; - } - qla24xx_disable_vp(vha); fc_remove_host(vha->host); @@ -1293,6 +1286,12 @@ qla24xx_vport_delete(struct fc_vport *fc_vport) vha->host_no, vha->vp_idx, vha)); } + if (ha->mqenable) { + if (qla25xx_delete_queues(vha, 0) != QLA_SUCCESS) + qla_printk(KERN_WARNING, ha, + "Queue delete failed.\n"); + } + scsi_host_put(vha->host); qla_printk(KERN_INFO, ha, "vport %d deleted\n", id); return 0; diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index d27ceda50791..3f23932210c4 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -396,7 +396,7 @@ qla24xx_create_vhost(struct fc_vport *fc_vport) qla2x00_start_timer(vha, qla2x00_timer, WATCH_INTERVAL); - memset(vha->req_ques, 0, sizeof(vha->req_ques) * QLA_MAX_HOST_QUES); + memset(vha->req_ques, 0, sizeof(vha->req_ques)); vha->req_ques[0] = ha->req_q_map[0]->id; host->can_queue = ha->req_q_map[0]->length + 128; host->this_id = 255; From 9088608e00d0d9e2a772532d828312e11b118340 Mon Sep 17 00:00:00 2001 From: Andrew Vasquez Date: Sun, 8 Feb 2009 20:50:14 -0800 Subject: [PATCH 019/263] [SCSI] qla2xxx: Mask out 'reserved' bits while processing FLT regions. Bits 31-8 are marked as reserved and should be ignored while interpreting a region's code. Signed-off-by: Andrew Vasquez Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_sup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c index 9c3b694c049d..284827926eff 100644 --- a/drivers/scsi/qla2xxx/qla_sup.c +++ b/drivers/scsi/qla2xxx/qla_sup.c @@ -684,7 +684,7 @@ qla2xxx_get_flt_info(scsi_qla_host_t *vha, uint32_t flt_addr) "end=0x%x size=0x%x.\n", le32_to_cpu(region->code), start, le32_to_cpu(region->end) >> 2, le32_to_cpu(region->size))); - switch (le32_to_cpu(region->code)) { + switch (le32_to_cpu(region->code) & 0xff) { case FLT_REG_FW: ha->flt_region_fw = start; break; From 822c05b6335534f74f90bd0edc12aeb5a591117a Mon Sep 17 00:00:00 2001 From: Andrew Vasquez Date: Sun, 8 Feb 2009 20:50:16 -0800 Subject: [PATCH 020/263] [SCSI] qla2xxx: Update version number to 8.03.00-k3. Signed-off-by: Andrew Vasquez Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h index cfa4c11a4797..79f7053da99b 100644 --- a/drivers/scsi/qla2xxx/qla_version.h +++ b/drivers/scsi/qla2xxx/qla_version.h @@ -7,7 +7,7 @@ /* * Driver version */ -#define QLA2XXX_VERSION "8.03.00-k2" +#define QLA2XXX_VERSION "8.03.00-k3" #define QLA_DRIVER_MAJOR_VER 8 #define QLA_DRIVER_MINOR_VER 3 From 9f339e7028e2855717af3193c938f9960ad13b38 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Wed, 11 Feb 2009 15:10:27 +0100 Subject: [PATCH 021/263] x86, ptrace, mm: fix double-free on race Ptrace_detach() races with __ptrace_unlink() if the traced task is reaped while detaching. This might cause a double-free of the BTS buffer. Change the ptrace_detach() path to only do the memory accounting in ptrace_bts_detach() and leave the buffer free to ptrace_bts_untrace() which will be called from __ptrace_unlink(). The fix follows a proposal from Oleg Nesterov. Reported-by: Oleg Nesterov Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 16 ++++++++++------ include/linux/mm.h | 1 + mm/mlock.c | 7 ++++++- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0a5df5f82fb9..5a4c23d89892 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -810,12 +810,16 @@ static void ptrace_bts_untrace(struct task_struct *child) static void ptrace_bts_detach(struct task_struct *child) { - if (unlikely(child->bts)) { - ds_release_bts(child->bts); - child->bts = NULL; - - ptrace_bts_free_buffer(child); - } + /* + * Ptrace_detach() races with ptrace_untrace() in case + * the child dies and is reaped by another thread. + * + * We only do the memory accounting at this point and + * leave the buffer deallocation and the bts tracer + * release to ptrace_bts_untrace() which will be called + * later on with tasklist_lock held. + */ + release_locked_buffer(child->bts_buffer, child->bts_size); } #else static inline void ptrace_bts_fork(struct task_struct *tsk) {} diff --git a/include/linux/mm.h b/include/linux/mm.h index e8ddc98b8405..3d7fb44d7d7e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1305,5 +1305,6 @@ void vmemmap_populate_print_last(void); extern void *alloc_locked_buffer(size_t size); extern void free_locked_buffer(void *buffer, size_t size); +extern void release_locked_buffer(void *buffer, size_t size); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/mm/mlock.c b/mm/mlock.c index 028ec482fdd4..2b57f7e60390 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -657,7 +657,7 @@ void *alloc_locked_buffer(size_t size) return buffer; } -void free_locked_buffer(void *buffer, size_t size) +void release_locked_buffer(void *buffer, size_t size) { unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; @@ -667,6 +667,11 @@ void free_locked_buffer(void *buffer, size_t size) current->mm->locked_vm -= pgsz; up_write(¤t->mm->mmap_sem); +} + +void free_locked_buffer(void *buffer, size_t size) +{ + release_locked_buffer(buffer, size); kfree(buffer); } From 2fff78c784ed97a8e5aa225ef5228f0a6d862d82 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Feb 2009 18:10:10 +0100 Subject: [PATCH 022/263] futex: fix reference leak Catalin noticed that (38d47c1b7075: futex: rely on get_user_pages() for shared futexes) caused an mm_struct leak. Some tracing with the function graph tracer quickly pointed out that futex_wait() has exit paths with unbalanced reference counts. This regression was discovered by kmemleak. Reported-by: Catalin Marinas Signed-off-by: Peter Zijlstra Tested-by: "Pallipadi, Venkatesh" Tested-by: Catalin Marinas Signed-off-by: Ingo Molnar --- kernel/futex.c | 51 ++++++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index f89d373a9c6d..438701adce23 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1165,6 +1165,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, u32 val, ktime_t *abs_time, u32 bitset, int clockrt) { struct task_struct *curr = current; + struct restart_block *restart; DECLARE_WAITQUEUE(wait, curr); struct futex_hash_bucket *hb; struct futex_q q; @@ -1216,11 +1217,13 @@ retry: if (!ret) goto retry; - return ret; + goto out; } ret = -EWOULDBLOCK; - if (uval != val) - goto out_unlock_put_key; + if (unlikely(uval != val)) { + queue_unlock(&q, hb); + goto out_put_key; + } /* Only actually queue if *uaddr contained val. */ queue_me(&q, hb); @@ -1284,38 +1287,38 @@ retry: */ /* If we were woken (and unqueued), we succeeded, whatever. */ + ret = 0; if (!unqueue_me(&q)) - return 0; + goto out_put_key; + ret = -ETIMEDOUT; if (rem) - return -ETIMEDOUT; + goto out_put_key; /* * We expect signal_pending(current), but another thread may * have handled it for us already. */ + ret = -ERESTARTSYS; if (!abs_time) - return -ERESTARTSYS; - else { - struct restart_block *restart; - restart = ¤t_thread_info()->restart_block; - restart->fn = futex_wait_restart; - restart->futex.uaddr = (u32 *)uaddr; - restart->futex.val = val; - restart->futex.time = abs_time->tv64; - restart->futex.bitset = bitset; - restart->futex.flags = 0; + goto out_put_key; - if (fshared) - restart->futex.flags |= FLAGS_SHARED; - if (clockrt) - restart->futex.flags |= FLAGS_CLOCKRT; - return -ERESTART_RESTARTBLOCK; - } + restart = ¤t_thread_info()->restart_block; + restart->fn = futex_wait_restart; + restart->futex.uaddr = (u32 *)uaddr; + restart->futex.val = val; + restart->futex.time = abs_time->tv64; + restart->futex.bitset = bitset; + restart->futex.flags = 0; -out_unlock_put_key: - queue_unlock(&q, hb); + if (fshared) + restart->futex.flags |= FLAGS_SHARED; + if (clockrt) + restart->futex.flags |= FLAGS_CLOCKRT; + + ret = -ERESTART_RESTARTBLOCK; + +out_put_key: put_futex_key(fshared, &q.key); - out: return ret; } From 4f06b0436b2ddbd3b67b10e77098a6862787b3eb Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 11 Feb 2009 09:32:19 -0800 Subject: [PATCH 023/263] x86/cpa: make sure cpa is safe to call in lazy mmu mode Impact: fix race leading to crash under KVM and Xen The CPA code may be called while we're in lazy mmu update mode - for example, when using DEBUG_PAGE_ALLOC and doing a slab allocation in an interrupt handler which interrupted a lazy mmu update. In this case, the in-memory pagetable state may be out of date due to pending queued updates. We need to flush any pending updates before inspecting the page table. Similarly, we must explicitly flush any modifications CPA may have made (which comes down to flushing queued operations when flushing the TLB). Signed-off-by: Jeremy Fitzhardinge Acked-by: Marcelo Tosatti Cc: Stable Kernel Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 84ba74820ad6..f664bc1c4093 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -576,6 +576,13 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) else address = *cpa->vaddr; + /* + * If we're called with lazy mmu updates enabled, the + * in-memory pte state may be stale. Flush pending updates to + * bring them up to date. + */ + arch_flush_lazy_mmu_mode(); + repeat: kpte = lookup_address(address, &level); if (!kpte) @@ -854,6 +861,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, } else cpa_flush_all(cache); + /* + * If we've been called with lazy mmu updates enabled, then + * make sure that everything gets flushed out before we + * return. + */ + arch_flush_lazy_mmu_mode(); + out: return ret; } From be03d9e8022030c16abf534e33e185bfc3d40eef Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 11 Feb 2009 11:20:23 -0800 Subject: [PATCH 024/263] x86, pat: fix warn_on_once() while mapping 0-1MB range with /dev/mem Jeff Mahoney reported: > With Suse's hwinfo tool, on -tip: > WARNING: at arch/x86/mm/pat.c:637 reserve_pfn_range+0x5b/0x26d() reserve_pfn_range() is not tracking the memory range below 1MB as non-RAM and as such is inconsistent with similar checks in reserve_memtype() and free_memtype() Rename the pagerange_is_ram() to pat_pagerange_is_ram() and add the "track legacy 1MB region as non RAM" condition. And also, fix reserve_pfn_range() to return -EINVAL, when the pfn range is RAM. This is to be consistent with this API design. Reported-and-tested-by: Jeff Mahoney Signed-off-by: Suresh Siddha Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page.h | 1 - arch/x86/mm/ioremap.c | 19 --------- arch/x86/mm/pat.c | 83 ++++++++++++++++++++----------------- 3 files changed, 45 insertions(+), 58 deletions(-) diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index e9873a2e8695..776579119a00 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -57,7 +57,6 @@ typedef struct { pgdval_t pgd; } pgd_t; typedef struct { pgprotval_t pgprot; } pgprot_t; extern int page_is_ram(unsigned long pagenr); -extern int pagerange_is_ram(unsigned long start, unsigned long end); extern int devmem_is_allowed(unsigned long pagenr); extern void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index af750ab973b6..f45d5e29a72e 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -134,25 +134,6 @@ int page_is_ram(unsigned long pagenr) return 0; } -int pagerange_is_ram(unsigned long start, unsigned long end) -{ - int ram_page = 0, not_rampage = 0; - unsigned long page_nr; - - for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT); - ++page_nr) { - if (page_is_ram(page_nr)) - ram_page = 1; - else - not_rampage = 1; - - if (ram_page == not_rampage) - return -1; - } - - return ram_page; -} - /* * Fix up the linear direct mapping of the kernel to avoid cache attribute * conflicts. diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 7b61036427df..aebbf67a79d0 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -211,6 +211,33 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) static struct memtype *cached_entry; static u64 cached_start; +static int pat_pagerange_is_ram(unsigned long start, unsigned long end) +{ + int ram_page = 0, not_rampage = 0; + unsigned long page_nr; + + for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT); + ++page_nr) { + /* + * For legacy reasons, physical address range in the legacy ISA + * region is tracked as non-RAM. This will allow users of + * /dev/mem to map portions of legacy ISA region, even when + * some of those portions are listed(or not even listed) with + * different e820 types(RAM/reserved/..) + */ + if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) && + page_is_ram(page_nr)) + ram_page = 1; + else + not_rampage = 1; + + if (ram_page == not_rampage) + return -1; + } + + return ram_page; +} + /* * For RAM pages, mark the pages as non WB memory type using * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or @@ -336,20 +363,12 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, if (new_type) *new_type = actual_type; - /* - * For legacy reasons, some parts of the physical address range in the - * legacy 1MB region is treated as non-RAM (even when listed as RAM in - * the e820 tables). So we will track the memory attributes of this - * legacy 1MB region using the linear memtype_list always. - */ - if (end >= ISA_END_ADDRESS) { - is_range_ram = pagerange_is_ram(start, end); - if (is_range_ram == 1) - return reserve_ram_pages_type(start, end, req_type, - new_type); - else if (is_range_ram < 0) - return -EINVAL; - } + is_range_ram = pat_pagerange_is_ram(start, end); + if (is_range_ram == 1) + return reserve_ram_pages_type(start, end, req_type, + new_type); + else if (is_range_ram < 0) + return -EINVAL; new = kmalloc(sizeof(struct memtype), GFP_KERNEL); if (!new) @@ -446,19 +465,11 @@ int free_memtype(u64 start, u64 end) if (is_ISA_range(start, end - 1)) return 0; - /* - * For legacy reasons, some parts of the physical address range in the - * legacy 1MB region is treated as non-RAM (even when listed as RAM in - * the e820 tables). So we will track the memory attributes of this - * legacy 1MB region using the linear memtype_list always. - */ - if (end >= ISA_END_ADDRESS) { - is_range_ram = pagerange_is_ram(start, end); - if (is_range_ram == 1) - return free_ram_pages_type(start, end); - else if (is_range_ram < 0) - return -EINVAL; - } + is_range_ram = pat_pagerange_is_ram(start, end); + if (is_range_ram == 1) + return free_ram_pages_type(start, end); + else if (is_range_ram < 0) + return -EINVAL; spin_lock(&memtype_lock); list_for_each_entry(entry, &memtype_list, nd) { @@ -626,17 +637,13 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, unsigned long flags; unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); - is_ram = pagerange_is_ram(paddr, paddr + size); + is_ram = pat_pagerange_is_ram(paddr, paddr + size); - if (is_ram != 0) { - /* - * For mapping RAM pages, drivers need to call - * set_memory_[uc|wc|wb] directly, for reserve and free, before - * setting up the PTE. - */ - WARN_ON_ONCE(1); - return 0; - } + /* + * reserve_pfn_range() doesn't support RAM pages. + */ + if (is_ram != 0) + return -EINVAL; ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); if (ret) @@ -693,7 +700,7 @@ static void free_pfn_range(u64 paddr, unsigned long size) { int is_ram; - is_ram = pagerange_is_ram(paddr, paddr + size); + is_ram = pat_pagerange_is_ram(paddr, paddr + size); if (is_ram == 0) free_memtype(paddr, paddr + size); } From a0490fa35dc0022ef95f64802e8edf18c411c790 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 12 Feb 2009 11:35:40 +0100 Subject: [PATCH 025/263] sched: cpu hotplug fix rq_attach_root() does a kfree() with the runqueue lock held. That's not a very wise move, fix it. Signed-off-by: Ingo Molnar --- kernel/sched.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index c1d0ed360088..410eec404133 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6944,20 +6944,26 @@ static void free_rootdomain(struct root_domain *rd) static void rq_attach_root(struct rq *rq, struct root_domain *rd) { + struct root_domain *old_rd = NULL; unsigned long flags; spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { - struct root_domain *old_rd = rq->rd; + old_rd = rq->rd; if (cpumask_test_cpu(rq->cpu, old_rd->online)) set_rq_offline(rq); cpumask_clear_cpu(rq->cpu, old_rd->span); - if (atomic_dec_and_test(&old_rd->refcount)) - free_rootdomain(old_rd); + /* + * If we dont want to free the old_rt yet then + * set old_rd to NULL to skip the freeing later + * in this function: + */ + if (!atomic_dec_and_test(&old_rd->refcount)) + old_rd = NULL; } atomic_inc(&rd->refcount); @@ -6968,6 +6974,9 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) set_rq_online(rq); spin_unlock_irqrestore(&rq->lock, flags); + + if (old_rd) + free_rootdomain(old_rd); } static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) From eb099670895f22970cd143875467c2768d6d87e5 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 12 Feb 2009 09:27:38 -0500 Subject: [PATCH 026/263] Btrfs: make sure all pending extent operations are complete Theres a slight problem with finish_current_insert, if we set all to 1 and then go through and don't actually skip any of the extents on the pending list, we could exit right after we've added new extents. This is a problem because by inserting the new extents we could have gotten new COW's to happen and such, so we may have some pending updates to do or even more inserts to do after that. So this patch will only exit if we have never skipped any of the extents in the pending list, and we have no extents to insert, this will make sure that all of the pending work is truly done before we return. I've been running with this patch for a few days with all of my other testing and have not seen issues. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 71 +++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 29 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7527523c2d2d..376656f65b33 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1323,8 +1323,25 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - finish_current_insert(trans, root->fs_info->extent_root, 1); - del_pending_extents(trans, root->fs_info->extent_root, 1); + u64 start; + u64 end; + int ret; + + while(1) { + finish_current_insert(trans, root->fs_info->extent_root, 1); + del_pending_extents(trans, root->fs_info->extent_root, 1); + + /* is there more work to do? */ + ret = find_first_extent_bit(&root->fs_info->pending_del, + 0, &start, &end, EXTENT_WRITEBACK); + if (!ret) + continue; + ret = find_first_extent_bit(&root->fs_info->extent_ins, + 0, &start, &end, EXTENT_WRITEBACK); + if (!ret) + continue; + break; + } return 0; } @@ -2211,13 +2228,12 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, u64 end; u64 priv; u64 search = 0; - u64 skipped = 0; struct btrfs_fs_info *info = extent_root->fs_info; struct btrfs_path *path; struct pending_extent_op *extent_op, *tmp; struct list_head insert_list, update_list; int ret; - int num_inserts = 0, max_inserts; + int num_inserts = 0, max_inserts, restart = 0; path = btrfs_alloc_path(); INIT_LIST_HEAD(&insert_list); @@ -2233,19 +2249,19 @@ again: ret = find_first_extent_bit(&info->extent_ins, search, &start, &end, EXTENT_WRITEBACK); if (ret) { - if (skipped && all && !num_inserts && + if (restart && !num_inserts && list_empty(&update_list)) { - skipped = 0; + restart = 0; search = 0; continue; } - mutex_unlock(&info->extent_ins_mutex); break; } ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS); if (!ret) { - skipped = 1; + if (all) + restart = 1; search = end + 1; if (need_resched()) { mutex_unlock(&info->extent_ins_mutex); @@ -2264,7 +2280,7 @@ again: list_add_tail(&extent_op->list, &insert_list); search = end + 1; if (num_inserts == max_inserts) { - mutex_unlock(&info->extent_ins_mutex); + restart = 1; break; } } else if (extent_op->type == PENDING_BACKREF_UPDATE) { @@ -2280,7 +2296,6 @@ again: * somebody marked this thing for deletion then just unlock it and be * done, the free_extents will handle it */ - mutex_lock(&info->extent_ins_mutex); list_for_each_entry_safe(extent_op, tmp, &update_list, list) { clear_extent_bits(&info->extent_ins, extent_op->bytenr, extent_op->bytenr + extent_op->num_bytes - 1, @@ -2302,6 +2317,10 @@ again: if (!list_empty(&update_list)) { ret = update_backrefs(trans, extent_root, path, &update_list); BUG_ON(ret); + + /* we may have COW'ed new blocks, so lets start over */ + if (all) + restart = 1; } /* @@ -2309,9 +2328,9 @@ again: * need to make sure everything is cleaned then reset everything and * go back to the beginning */ - if (!num_inserts && all && skipped) { + if (!num_inserts && restart) { search = 0; - skipped = 0; + restart = 0; INIT_LIST_HEAD(&update_list); INIT_LIST_HEAD(&insert_list); goto again; @@ -2368,27 +2387,19 @@ again: BUG_ON(ret); /* - * if we broke out of the loop in order to insert stuff because we hit - * the maximum number of inserts at a time we can handle, then loop - * back and pick up where we left off + * if restart is set for whatever reason we need to go back and start + * searching through the pending list again. + * + * We just inserted some extents, which could have resulted in new + * blocks being allocated, which would result in new blocks needing + * updates, so if all is set we _must_ restart to get the updated + * blocks. */ - if (num_inserts == max_inserts) { - INIT_LIST_HEAD(&insert_list); - INIT_LIST_HEAD(&update_list); - num_inserts = 0; - goto again; - } - - /* - * again, if we need to make absolutely sure there are no more pending - * extent operations left and we know that we skipped some, go back to - * the beginning and do it all again - */ - if (all && skipped) { + if (restart || all) { INIT_LIST_HEAD(&insert_list); INIT_LIST_HEAD(&update_list); search = 0; - skipped = 0; + restart = 0; num_inserts = 0; goto again; } @@ -2709,6 +2720,8 @@ again: goto again; } + if (!err) + finish_current_insert(trans, extent_root, 0); return err; } From b288052e1779261ae80138074989ef50358c4e58 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 12 Feb 2009 09:37:35 -0500 Subject: [PATCH 027/263] Btrfs: process mount options on mount -o remount, Btrfs wasn't parsing any new mount options during remount, making it difficult to set mount options on a root drive. Signed-off-by: Chris Mason --- fs/btrfs/super.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f3fd7e2cbc38..66b8341e2dba 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -511,6 +511,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) struct btrfs_root *root = btrfs_sb(sb); int ret; + ret = btrfs_parse_options(root, data); + if (ret) + return -EINVAL; + if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) return 0; From 536ac8ae86e68bb5574d7cc81c7d229a86b82601 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 12 Feb 2009 09:41:38 -0500 Subject: [PATCH 028/263] Btrfs: use larger metadata clusters in ssd mode Larger metadata clusters can significantly improve writeback performance on ssd drives with large erasure blocks. The larger clusters make it more likely a given IO will completely overwrite the ssd block, so it doesn't have to do an internal rwm cycle. On spinning media, lager metadata clusters end up spreading out the metadata more over time, which makes fsck slower, so we don't want this to be the default. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 376656f65b33..c59e12036e20 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2872,7 +2872,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, if (data & BTRFS_BLOCK_GROUP_METADATA) { last_ptr = &root->fs_info->last_alloc; - empty_cluster = 64 * 1024; + if (!btrfs_test_opt(root, SSD)) + empty_cluster = 64 * 1024; } if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) From e1df36d2f18254d0690a0fbe036cece74ec311b8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 12 Feb 2009 09:45:08 -0500 Subject: [PATCH 029/263] Btrfs: don't clean old snapshots on sync(1) Cleaning old snapshots can make sync(1) somewhat slow, and some users and applications still use it in a global fsync kind of workload. This patch changes btrfs not to clean old snapshots during sync, which is safe from a FS consistency point of view. The major downside is that it makes it difficult to tell when old snapshots have been reaped and the space they were using has been reclaimed. A new ioctl will be added for this purpose instead. Signed-off-by: Chris Mason --- fs/btrfs/super.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 66b8341e2dba..19a4daf03ccb 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -379,7 +379,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait) btrfs_start_delalloc_inodes(root); btrfs_wait_ordered_extents(root, 0); - btrfs_clean_old_snapshots(root); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sb->s_dirt = 0; From b335b0034e252e79ec2e9c6697f5d663c4627bec Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Thu, 12 Feb 2009 10:06:04 -0500 Subject: [PATCH 030/263] Btrfs: Avoid using __GFP_HIGHMEM with slab allocator btrfs_releasepage may call kmem_cache_alloc indirectly, and provide same GFP flags it gets to kmem_cache_alloc. So it's possible to use __GFP_HIGHMEM with the slab allocator. Signed-off-by: Yan Zheng --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8f0706210a47..638bcb5e49f6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4263,7 +4263,7 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) { if (PageWriteback(page) || PageDirty(page)) return 0; - return __btrfs_releasepage(page, gfp_flags); + return __btrfs_releasepage(page, gfp_flags & GFP_NOFS); } static void btrfs_invalidatepage(struct page *page, unsigned long offset) From 7951f3cefbd711f4429a0cd014aa83a844c399a0 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 12 Feb 2009 10:06:15 -0500 Subject: [PATCH 031/263] Btrfs: balance_level checks !child after access The BUG_ON() is in the wrong spot. Signed-off-by: Jeff Mahoney Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 35443cc4b9a9..6674692f7023 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -917,9 +917,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, /* promote the child to a root */ child = read_node_slot(root, mid, 0); + BUG_ON(!child); btrfs_tree_lock(child); btrfs_set_lock_blocking(child); - BUG_ON(!child); ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); BUG_ON(ret); From e00f7308658622fbd483cb0d9fe41165bf9050d0 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 12 Feb 2009 14:11:25 -0500 Subject: [PATCH 032/263] Btrfs: remove btrfs_init_path btrfs_init_path was initially used when the path objects were on the stack. Now all the work is done by btrfs_alloc_path and btrfs_init_path isn't required. This patch removes it, and just uses kmem_cache_zalloc to zero out the object. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 11 ++--------- fs/btrfs/ctree.h | 1 - fs/btrfs/inode-map.c | 1 - fs/btrfs/inode.c | 2 -- 4 files changed, 2 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6674692f7023..c8f4c540cc2c 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -38,19 +38,12 @@ static int balance_node_right(struct btrfs_trans_handle *trans, static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); -inline void btrfs_init_path(struct btrfs_path *p) -{ - memset(p, 0, sizeof(*p)); -} - struct btrfs_path *btrfs_alloc_path(void) { struct btrfs_path *path; - path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS); - if (path) { - btrfs_init_path(path); + path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); + if (path) path->reada = 1; - } return path; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 531db112c8bd..3f7a8058df2b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1834,7 +1834,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); -void btrfs_init_path(struct btrfs_path *p); void btrfs_set_path_blocking(struct btrfs_path *p); void btrfs_clear_path_blocking(struct btrfs_path *p); void btrfs_unlock_up_safe(struct btrfs_path *p, int level); diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 2aa79873eb46..cc7334d833c9 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -84,7 +84,6 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, search_key.type = 0; search_key.offset = 0; - btrfs_init_path(path); start_found = 0; ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0); if (ret < 0) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 638bcb5e49f6..3cee77ae03c8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2531,8 +2531,6 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, key.offset = (u64)-1; key.type = (u8)-1; - btrfs_init_path(path); - search_again: ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) From a48ddf08ba9bab91efd95e458737afa9d7699623 Mon Sep 17 00:00:00 2001 From: Qinghuang Feng Date: Thu, 12 Feb 2009 14:25:23 -0500 Subject: [PATCH 033/263] Btrfs: remove unused code in split_state() These two lines are not used, remove them. Signed-off-by: Qinghuang Feng Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 37d43b516b79..ebe6b29e6069 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -415,8 +415,6 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); if (node) { - struct extent_state *found; - found = rb_entry(node, struct extent_state, rb_node); free_extent_state(prealloc); return -EEXIST; } From 3f3420df505e47751ef76a652b5cb660e5360d6f Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 12 Feb 2009 10:16:03 -0500 Subject: [PATCH 034/263] Btrfs: fs/btrfs/volumes.c: remove useless kzalloc The call to kzalloc is followed by a kmalloc whose result is stored in the same variable. The semantic match that finds the problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @r exists@ local idexpression x; statement S; expression E; identifier f,l; position p1,p2; expression *ptr != NULL; @@ ( if ((x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...)) == NULL) S | x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S ) <... when != x when != if (...) { <+...x...+> } x->f = E ...> ( return \(0\|<+...x...+>\|ptr\); | return@p2 ...; ) @script:python@ p1 << r.p1; p2 << r.p2; @@ print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line) // Signed-off-by: Julia Lawall Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bcd14ebccae1..c793b6f50d8d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2894,10 +2894,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, free_extent_map(em); } - map = kzalloc(sizeof(*map), GFP_NOFS); - if (!map) - return -ENOMEM; - em = alloc_extent_map(GFP_NOFS); if (!em) return -ENOMEM; From 4008c04a07c73ec3cb1be4c1391d2159a8f75d6d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 12 Feb 2009 14:09:45 -0500 Subject: [PATCH 035/263] Btrfs: make a lockdep class for the extent buffer locks Btrfs is currently using spin_lock_nested with a nested value based on the tree depth of the block. But, this doesn't quite work because the max tree depth is bigger than what spin_lock_nested can deal with, and because locks are sometimes taken before the level field is filled in. The solution here is to use lockdep_set_class_and_name instead, and to set the class before unlocking the pages when the block is read from the disk and just after init of a freshly allocated tree block. btrfs_clear_path_blocking is also changed to take the locks in the proper order, and it also makes sure all the locks currently held are properly set to blocking before it tries to retake the spinlocks. Otherwise, lockdep gets upset about bad lock orderin. The lockdep magic cam from Peter Zijlstra Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 45 +++++++++++++++++++++++++++++++---------- fs/btrfs/ctree.h | 10 +++------ fs/btrfs/disk-io.c | 46 +++++++++++++++++++++++++++++++++++++++++- fs/btrfs/disk-io.h | 10 +++++++++ fs/btrfs/extent-tree.c | 7 +++++-- fs/btrfs/locking.c | 11 ---------- fs/btrfs/volumes.c | 2 ++ 7 files changed, 99 insertions(+), 32 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c8f4c540cc2c..42491d728e99 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -62,14 +62,38 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p) /* * reset all the locked nodes in the patch to spinning locks. + * + * held is used to keep lockdep happy, when lockdep is enabled + * we set held to a blocking lock before we go around and + * retake all the spinlocks in the path. You can safely use NULL + * for held */ -noinline void btrfs_clear_path_blocking(struct btrfs_path *p) +noinline void btrfs_clear_path_blocking(struct btrfs_path *p, + struct extent_buffer *held) { int i; - for (i = 0; i < BTRFS_MAX_LEVEL; i++) { + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* lockdep really cares that we take all of these spinlocks + * in the right order. If any of the locks in the path are not + * currently blocking, it is going to complain. So, make really + * really sure by forcing the path to blocking before we clear + * the path blocking. + */ + if (held) + btrfs_set_lock_blocking(held); + btrfs_set_path_blocking(p); +#endif + + for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { if (p->nodes[i] && p->locks[i]) btrfs_clear_lock_blocking(p->nodes[i]); } + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + if (held) + btrfs_clear_lock_blocking(held); +#endif } /* this also releases the path */ @@ -279,7 +303,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, trans->transid, level, &ins); BUG_ON(ret); cow = btrfs_init_new_buffer(trans, root, prealloc_dest, - buf->len); + buf->len, level); } else { cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, @@ -1559,7 +1583,7 @@ cow_done: if (!p->skip_locking) p->locks[level] = 1; - btrfs_clear_path_blocking(p); + btrfs_clear_path_blocking(p, NULL); /* * we have a lock on b and as long as we aren't changing @@ -1598,7 +1622,7 @@ cow_done: btrfs_set_path_blocking(p); sret = split_node(trans, root, p, level); - btrfs_clear_path_blocking(p); + btrfs_clear_path_blocking(p, NULL); BUG_ON(sret > 0); if (sret) { @@ -1618,7 +1642,7 @@ cow_done: btrfs_set_path_blocking(p); sret = balance_level(trans, root, p, level); - btrfs_clear_path_blocking(p); + btrfs_clear_path_blocking(p, NULL); if (sret) { ret = sret; @@ -1681,13 +1705,13 @@ cow_done: if (!p->skip_locking) { int lret; - btrfs_clear_path_blocking(p); + btrfs_clear_path_blocking(p, NULL); lret = btrfs_try_spin_lock(b); if (!lret) { btrfs_set_path_blocking(p); btrfs_tree_lock(b); - btrfs_clear_path_blocking(p); + btrfs_clear_path_blocking(p, b); } } } else { @@ -1699,7 +1723,7 @@ cow_done: btrfs_set_path_blocking(p); sret = split_leaf(trans, root, key, p, ins_len, ret == 0); - btrfs_clear_path_blocking(p); + btrfs_clear_path_blocking(p, NULL); BUG_ON(sret > 0); if (sret) { @@ -3919,7 +3943,6 @@ find_next_key: btrfs_release_path(root, path); goto again; } else { - btrfs_clear_path_blocking(path); goto out; } } @@ -3939,7 +3962,7 @@ find_next_key: path->locks[level - 1] = 1; path->nodes[level - 1] = cur; unlock_up(path, level, 1); - btrfs_clear_path_blocking(path); + btrfs_clear_path_blocking(path, NULL); } out: if (ret == 0) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3f7a8058df2b..766b31ae3186 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -43,11 +43,7 @@ struct btrfs_ordered_sum; #define BTRFS_ACL_NOT_CACHED ((void *)-1) -#ifdef CONFIG_LOCKDEP -# define BTRFS_MAX_LEVEL 7 -#else -# define BTRFS_MAX_LEVEL 8 -#endif +#define BTRFS_MAX_LEVEL 8 /* holds pointers to all of the tree roots */ #define BTRFS_ROOT_TREE_OBJECTID 1ULL @@ -1715,7 +1711,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, u64 empty_size); struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 bytenr, u32 blocksize); + u64 bytenr, u32 blocksize, + int level); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 parent, u64 min_bytes, @@ -1835,7 +1832,6 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); void btrfs_set_path_blocking(struct btrfs_path *p); -void btrfs_clear_path_blocking(struct btrfs_path *p); void btrfs_unlock_up_safe(struct btrfs_path *p, int level); int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5aebddd71193..adda739a0215 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -75,6 +75,40 @@ struct async_submit_bio { struct btrfs_work work; }; +/* These are used to set the lockdep class on the extent buffer locks. + * The class is set by the readpage_end_io_hook after the buffer has + * passed csum validation but before the pages are unlocked. + * + * The lockdep class is also set by btrfs_init_new_buffer on freshly + * allocated blocks. + * + * The class is based on the level in the tree block, which allows lockdep + * to know that lower nodes nest inside the locks of higher nodes. + * + * We also add a check to make sure the highest level of the tree is + * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this + * code needs update as well. + */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# if BTRFS_MAX_LEVEL != 8 +# error +# endif +static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1]; +static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = { + /* leaf */ + "btrfs-extent-00", + "btrfs-extent-01", + "btrfs-extent-02", + "btrfs-extent-03", + "btrfs-extent-04", + "btrfs-extent-05", + "btrfs-extent-06", + "btrfs-extent-07", + /* highest possible level */ + "btrfs-extent-08", +}; +#endif + /* * extents on the btree inode are pretty simple, there's one extent * that covers the entire device @@ -347,6 +381,15 @@ static int check_tree_block_fsid(struct btrfs_root *root, return ret; } +#ifdef CONFIG_DEBUG_LOCK_ALLOC +void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) +{ + lockdep_set_class_and_name(&eb->lock, + &btrfs_eb_class[level], + btrfs_eb_name[level]); +} +#endif + static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state) { @@ -392,6 +435,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, } found_level = btrfs_header_level(eb); + btrfs_set_buffer_lockdep_class(eb, found_level); + ret = csum_tree_block(root, eb, 1); if (ret) ret = -EIO; @@ -1777,7 +1822,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = find_and_setup_root(tree_root, fs_info, BTRFS_DEV_TREE_OBJECTID, dev_root); dev_root->track_dirty = 1; - if (ret) goto fail_extent_root; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 494a56eb2986..95029db227be 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -101,4 +101,14 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, int btrfs_add_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btree_lock_page_hook(struct page *page); + + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level); +#else +static inline void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, + int level) +{ +} +#endif #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c59e12036e20..cd86bffbdc9f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3416,7 +3416,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 bytenr, u32 blocksize) + u64 bytenr, u32 blocksize, + int level) { struct extent_buffer *buf; @@ -3424,6 +3425,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, if (!buf) return ERR_PTR(-ENOMEM); btrfs_set_header_generation(buf, trans->transid); + btrfs_set_buffer_lockdep_class(buf, level); btrfs_tree_lock(buf); clean_tree_block(trans, root, buf); @@ -3467,7 +3469,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return ERR_PTR(ret); } - buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize); + buf = btrfs_init_new_buffer(trans, root, ins.objectid, + blocksize, level); return buf; } diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 9ebe9385129b..85506c4a3af7 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -25,21 +25,10 @@ #include "extent_io.h" #include "locking.h" -/* - * btrfs_header_level() isn't free, so don't call it when lockdep isn't - * on - */ -#ifdef CONFIG_DEBUG_LOCK_ALLOC -static inline void spin_nested(struct extent_buffer *eb) -{ - spin_lock_nested(&eb->lock, BTRFS_MAX_LEVEL - btrfs_header_level(eb)); -} -#else static inline void spin_nested(struct extent_buffer *eb) { spin_lock(&eb->lock); } -#endif /* * Setting a lock to blocking will drop the spinlock and set the diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c793b6f50d8d..1316139bf9e8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3102,6 +3102,8 @@ int btrfs_read_sys_array(struct btrfs_root *root) if (!sb) return -ENOMEM; btrfs_set_buffer_uptodate(sb); + btrfs_set_buffer_lockdep_class(sb, 0); + write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); array_size = btrfs_super_sys_array_size(super_copy); From 2456242530a21cfee82646ebeeda65d3f74faa4c Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Thu, 12 Feb 2009 14:14:53 -0500 Subject: [PATCH 036/263] Btrfs: hold trans_mutex when using btrfs_record_root_in_trans btrfs_record_root_in_trans needs the trans_mutex held to make sure two callers don't race to setup the root in a given transaction. This adds it to all the places that were missing it. Signed-off-by: Yan Zheng --- fs/btrfs/extent-tree.c | 2 ++ fs/btrfs/transaction.c | 2 ++ fs/btrfs/tree-log.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cd86bffbdc9f..0a5d796c9f7e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5658,7 +5658,9 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root, prev_block = block_start; } + mutex_lock(&extent_root->fs_info->trans_mutex); btrfs_record_root_in_trans(found_root); + mutex_unlock(&extent_root->fs_info->trans_mutex); if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { /* * try to update data extent references while diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 919172de5c9a..4112d53d4f4d 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -688,7 +688,9 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, num_bytes -= btrfs_root_used(&dirty->root->root_item); bytes_used = btrfs_root_used(&root->root_item); if (num_bytes) { + mutex_lock(&root->fs_info->trans_mutex); btrfs_record_root_in_trans(root); + mutex_unlock(&root->fs_info->trans_mutex); btrfs_set_root_used(&root->root_item, bytes_used - num_bytes); } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 20794290256b..9c462fbd60fa 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2832,7 +2832,9 @@ again: BUG_ON(!wc.replay_dest); wc.replay_dest->log_root = log; + mutex_lock(&fs_info->trans_mutex); btrfs_record_root_in_trans(wc.replay_dest); + mutex_unlock(&fs_info->trans_mutex); ret = walk_log_tree(trans, log, &wc); BUG_ON(ret); From d85cf93da66977dbc645352be1b2084a659d8a0b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 12 Feb 2009 10:02:56 -0800 Subject: [PATCH 037/263] x86/paravirt: make arch_flush_lazy_mmu/cpu disable preemption Impact: avoid access to percpu vars in preempible context They are intended to be used whenever there's the possibility that there's some stale state which is going to be overwritten with a queued update, or to force a state change when we may be in lazy mode. Either way, we could end up calling it with preemption enabled, so wrap the functions in their own little preempt-disable section so they can be safely called in any context (though preemption should never be enabled if we're actually in a lazy state). (Move out of line to avoid #include dependencies.) Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/paravirt.h | 17 ++--------------- arch/x86/kernel/paravirt.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ba3e2ff6aedc..a660eceaa273 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1352,14 +1352,7 @@ static inline void arch_leave_lazy_cpu_mode(void) PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); } -static inline void arch_flush_lazy_cpu_mode(void) -{ - if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) { - arch_leave_lazy_cpu_mode(); - arch_enter_lazy_cpu_mode(); - } -} - +void arch_flush_lazy_cpu_mode(void); #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE static inline void arch_enter_lazy_mmu_mode(void) @@ -1372,13 +1365,7 @@ static inline void arch_leave_lazy_mmu_mode(void) PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); } -static inline void arch_flush_lazy_mmu_mode(void) -{ - if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) { - arch_leave_lazy_mmu_mode(); - arch_enter_lazy_mmu_mode(); - } -} +void arch_flush_lazy_mmu_mode(void); static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, unsigned long phys, pgprot_t flags) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e4c8fb608873..dcba6c567a2a 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -268,6 +268,30 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return __get_cpu_var(paravirt_lazy_mode); } +void arch_flush_lazy_mmu_mode(void) +{ + preempt_disable(); + + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { + arch_leave_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode(); + } + + preempt_enable(); +} + +void arch_flush_lazy_cpu_mode(void) +{ + preempt_disable(); + + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { + arch_leave_lazy_cpu_mode(); + arch_enter_lazy_cpu_mode(); + } + + preempt_enable(); +} + struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, From 34b0900d323122113683685b200aae9f9b75e63b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 12 Feb 2009 21:30:48 +0100 Subject: [PATCH 038/263] x86: warn if arch_flush_lazy_mmu_cpu is called in preemptible context Impact: Catch cases where lazy MMU state is active in a preemtible context arch_flush_lazy_mmu_cpu() has been changed to disable preemption so the checks in enter/leave will never trigger. Put the preemtible() check into arch_flush_lazy_mmu_cpu() to catch such cases. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/paravirt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index dcba6c567a2a..c6520a4e85d4 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -273,6 +273,7 @@ void arch_flush_lazy_mmu_mode(void) preempt_disable(); if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { + WARN_ON(preempt_count() == 1); arch_leave_lazy_mmu_mode(); arch_enter_lazy_mmu_mode(); } @@ -285,6 +286,7 @@ void arch_flush_lazy_cpu_mode(void) preempt_disable(); if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { + WARN_ON(preempt_count() == 1); arch_leave_lazy_cpu_mode(); arch_enter_lazy_cpu_mode(); } From 7ad9de6ac83bd825996d2de98c92e0f425c31050 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 12 Feb 2009 21:16:09 +0100 Subject: [PATCH 039/263] x86: CPA avoid repeated lazy mmu flush Impact: Flush the lazy MMU only once Pending mmu updates only need to be flushed once to bring the in-memory pagetable state up to date. Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index f664bc1c4093..8ca0d8566fc8 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -575,14 +575,6 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) address = cpa->vaddr[cpa->curpage]; else address = *cpa->vaddr; - - /* - * If we're called with lazy mmu updates enabled, the - * in-memory pte state may be stale. Flush pending updates to - * bring them up to date. - */ - arch_flush_lazy_mmu_mode(); - repeat: kpte = lookup_address(address, &level); if (!kpte) @@ -819,6 +811,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, vm_unmap_aliases(); + /* + * If we're called with lazy mmu updates enabled, the + * in-memory pte state may be stale. Flush pending updates to + * bring them up to date. + */ + arch_flush_lazy_mmu_mode(); + cpa.vaddr = addr; cpa.numpages = numpages; cpa.mask_set = mask_set; From fbc78b07ba53ace155f27491c81a009e541a93ad Mon Sep 17 00:00:00 2001 From: Philippe Gerum Date: Thu, 12 Feb 2009 12:18:46 +0000 Subject: [PATCH 040/263] powerpc/mm: Fix _PAGE_CHG_MASK to protect _PAGE_SPECIAL Fix _PAGE_CHG_MASK so that pte_modify() does not affect the _PAGE_SPECIAL bit. Signed-off-by: Philippe Gerum Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/pgtable-4k.h | 2 +- arch/powerpc/include/asm/pgtable-64k.h | 2 +- arch/powerpc/include/asm/pgtable-ppc32.h | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/pgtable-4k.h b/arch/powerpc/include/asm/pgtable-4k.h index 6b18ba9d2d85..1dbca4e7de67 100644 --- a/arch/powerpc/include/asm/pgtable-4k.h +++ b/arch/powerpc/include/asm/pgtable-4k.h @@ -60,7 +60,7 @@ /* It should be preserving the high 48 bits and then specifically */ /* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */ #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | \ - _PAGE_HPTEFLAGS) + _PAGE_HPTEFLAGS | _PAGE_SPECIAL) /* Bits to mask out from a PMD to get to the PTE page */ #define PMD_MASKED_BITS 0 diff --git a/arch/powerpc/include/asm/pgtable-64k.h b/arch/powerpc/include/asm/pgtable-64k.h index 07b0d8f09cb6..7389003349a6 100644 --- a/arch/powerpc/include/asm/pgtable-64k.h +++ b/arch/powerpc/include/asm/pgtable-64k.h @@ -114,7 +114,7 @@ static inline struct subpage_prot_table *pgd_subpage_prot(pgd_t *pgd) * pgprot changes */ #define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ - _PAGE_ACCESSED) + _PAGE_ACCESSED | _PAGE_SPECIAL) /* Bits to mask out from a PMD to get to the PTE page */ #define PMD_MASKED_BITS 0x1ff diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index f69a4d977729..820b5f0a35ce 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -429,7 +429,8 @@ extern int icache_44x_need_flush; #define PMD_PAGE_SIZE(pmd) bad_call_to_PMD_PAGE_SIZE() #endif -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | \ + _PAGE_SPECIAL) #define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \ From 06eccea6c34e00c8fedce49229ed35fa84619fb7 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 12 Feb 2009 12:36:04 +0000 Subject: [PATCH 041/263] powerpc/mm: Fix numa reserve bootmem page selection Fix the powerpc NUMA reserve bootmem page selection logic. commit 8f64e1f2d1e09267ac926e15090fd505c1c0cbcb (powerpc: Reserve in bootmem lmb reserved regions that cross NUMA nodes) changed the logic for how the powerpc LMB reserved regions were converted to bootmen reserved regions. As the folowing discussion reports, the new logic was not correct. mark_reserved_regions_for_nid() goes through each LMB on the system that specifies a reserved area. It searches for active regions that intersect with that LMB and are on the specified node. It attempts to bootmem-reserve only the area where the active region and the reserved LMB intersect. We can not reserve things on other nodes as they may not have bootmem structures allocated, yet. We base the size of the bootmem reservation on two possible things. Normally, we just make the reservation start and stop exactly at the start and end of the LMB. However, the LMB reservations are not aware of NUMA nodes and on occasion a single LMB may cross into several adjacent active regions. Those may even be on different NUMA nodes and will require separate calls to the bootmem reserve functions. So, the bootmem reservation must be trimmed to fit inside the current active region. That's all fine and dandy, but we trim the reservation in a page-aligned fashion. That's bad because we start the reservation at a non-page-aligned address: physbase. The reservation may only span 2 bytes, but that those bytes may span two pfns and cause a reserve_size of 2*PAGE_SIZE. Take the case where you reserve 0x2 bytes at 0x0fff and where the active region ends at 0x1000. You'll jump into that if() statment, but node_ar.end_pfn=0x1 and start_pfn=0x0. You'll end up with a reserve_size=0x1000, and then call reserve_bootmem_node(node, physbase=0xfff, size=0x1000); 0x1000 may not be on the same node as 0xfff. Oops. In almost all the vm code, end_ is not inclusive. If you have an end_pfn of 0x1234, page 0x1234 is not included in the range. Using PFN_UP instead of the (>> >> PAGE_SHIFT) will make this consistent with the other VM code. We also need to do math for the reserved size with physbase instead of start_pfn. node_ar.end_pfn << PAGE_SHIFT is *precisely* the end of the node. However, (start_pfn << PAGE_SHIFT) is *NOT* precisely the beginning of the reserved area. That is, of course, physbase. If we don't use physbase here, the reserve_size can be made too large. From: Dave Hansen Tested-by: Geoff Levand Tested on PS3. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/numa.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 7393bd76d698..5ac08b8ab654 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -882,7 +883,7 @@ static void mark_reserved_regions_for_nid(int nid) unsigned long physbase = lmb.reserved.region[i].base; unsigned long size = lmb.reserved.region[i].size; unsigned long start_pfn = physbase >> PAGE_SHIFT; - unsigned long end_pfn = ((physbase + size) >> PAGE_SHIFT); + unsigned long end_pfn = PFN_UP(physbase + size); struct node_active_region node_ar; unsigned long node_end_pfn = node->node_start_pfn + node->node_spanned_pages; @@ -908,7 +909,7 @@ static void mark_reserved_regions_for_nid(int nid) */ if (end_pfn > node_ar.end_pfn) reserve_size = (node_ar.end_pfn << PAGE_SHIFT) - - (start_pfn << PAGE_SHIFT); + - physbase; /* * Only worry about *this* node, others may not * yet have valid NODE_DATA(). From 0047656e2a97d4452dd7df9e52591a7afe21a263 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Thu, 12 Feb 2009 12:36:16 +0000 Subject: [PATCH 042/263] powerpc/ps3: Move ps3_mm_add_memory to device_initcall Change the PS3 hotplug memory routine ps3_mm_add_memory() from a core_initcall to a device_initcall. core_initcall routines run before the powerpc topology_init() startup routine, which is a subsys_initcall, resulting in failure of ps3_mm_add_memory() when CONFIG_NUMA=y. When ps3_mm_add_memory() fails the system will boot with just the 128 MiB of boot memory Signed-off-by: Geoff Levand Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/ps3/mm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index 67de6bf3db3d..d281cc0bca71 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -328,7 +328,7 @@ static int __init ps3_mm_add_memory(void) return result; } -core_initcall(ps3_mm_add_memory); +device_initcall(ps3_mm_add_memory); /*============================================================================*/ /* dma routines */ From 26456dcfb8d8e43b1b64b2a14710694cf7a72f05 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 12 Feb 2009 19:08:58 +0000 Subject: [PATCH 043/263] powerpc/vsx: Fix VSX alignment handler for regs 32-63 Fix the VSX alignment handler for VSX registers > 32. 32-63 are stored in the VMX part of the thread_struct not the FPR part. Signed-off-by: Michael Neuling CC: stable@kernel.org (2.6.27 & .28 please) Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/align.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 5af4e9b2dbe2..ada06924a423 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -646,11 +646,16 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg, unsigned int areg, struct pt_regs *regs, unsigned int flags, unsigned int length) { - char *ptr = (char *) ¤t->thread.TS_FPR(reg); + char *ptr; int ret = 0; flush_vsx_to_thread(current); + if (reg < 32) + ptr = (char *) ¤t->thread.TS_FPR(reg); + else + ptr = (char *) ¤t->thread.vr[reg - 32]; + if (flags & ST) ret = __copy_to_user(addr, ptr, length); else { From b13e24644c138d0ddbc451403c30a96b09bfd556 Mon Sep 17 00:00:00 2001 From: john stultz Date: Thu, 12 Feb 2009 18:48:53 -0800 Subject: [PATCH 044/263] x86, hpet: fix for LS21 + HPET = boot hang Between 2.6.23 and 2.6.24-rc1 a change was made that broke IBM LS21 systems that had the HPET enabled in the BIOS, resulting in boot hangs for x86_64. Specifically commit b8ce33590687888ebb900d09557b8807c4539022, which merges the i386 and x86_64 HPET code. Prior to this commit, when we setup the HPET timers in x86_64, we did the following: hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT, HPET_T0_CFG); However after the i386/x86_64 HPET merge, we do the following: cfg = hpet_readl(HPET_Tn_CFG(timer)); cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; hpet_writel(cfg, HPET_Tn_CFG(timer)); However on LS21s with HPET enabled in the BIOS, the HPET_T0_CFG register boots with Level triggered interrupts (HPET_TN_LEVEL) enabled. This causes the periodic interrupt to be not so periodic, and that results in the boot time hang I reported earlier in the delay calibration. My fix: Always disable HPET_TN_LEVEL when setting up periodic mode. Signed-off-by: John Stultz Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 64d5ad0b8add..5c8da2c2c185 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -269,6 +269,8 @@ static void hpet_set_mode(enum clock_event_mode mode, now = hpet_readl(HPET_COUNTER); cmp = now + (unsigned long) delta; cfg = hpet_readl(HPET_Tn_CFG(timer)); + /* Make sure we use edge triggered interrupts */ + cfg &= ~HPET_TN_LEVEL; cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; hpet_writel(cfg, HPET_Tn_CFG(timer)); From 3997ad317fdf9ecdb5702e2b4fd1f8229814ff8c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Feb 2009 15:00:52 +0100 Subject: [PATCH 045/263] timers: more consistently use clock vs timer While reviewing the manpages, I noticed I'd missed some clock vs timer sites. Make sure that all timer functions call cpu_timer_sample_group() and not cpu_clock_sample_group(). This ensures that we enable the process wide timer in time, and therefore pay the O(n) thread group cost from the syscall. Not doing it here, will result in the first jiffy tick after setting the timer doing this, resulting in a very expensive tick (but only once) and a delay in actually starting the timer. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/posix-cpu-timers.c | 60 +++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 2313a4cc14ea..e976e505648d 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -680,6 +680,33 @@ static void cpu_timer_fire(struct k_itimer *timer) } } +/* + * Sample a process (thread group) timer for the given group_leader task. + * Must be called with tasklist_lock held for reading. + */ +static int cpu_timer_sample_group(const clockid_t which_clock, + struct task_struct *p, + union cpu_time_count *cpu) +{ + struct task_cputime cputime; + + thread_group_cputimer(p, &cputime); + switch (CPUCLOCK_WHICH(which_clock)) { + default: + return -EINVAL; + case CPUCLOCK_PROF: + cpu->cpu = cputime_add(cputime.utime, cputime.stime); + break; + case CPUCLOCK_VIRT: + cpu->cpu = cputime.utime; + break; + case CPUCLOCK_SCHED: + cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); + break; + } + return 0; +} + /* * Guts of sys_timer_settime for CPU timers. * This is called with the timer locked and interrupts disabled. @@ -741,7 +768,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, if (CPUCLOCK_PERTHREAD(timer->it_clock)) { cpu_clock_sample(timer->it_clock, p, &val); } else { - cpu_clock_sample_group(timer->it_clock, p, &val); + cpu_timer_sample_group(timer->it_clock, p, &val); } if (old) { @@ -889,7 +916,7 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) read_unlock(&tasklist_lock); goto dead; } else { - cpu_clock_sample_group(timer->it_clock, p, &now); + cpu_timer_sample_group(timer->it_clock, p, &now); clear_dead = (unlikely(p->exit_state) && thread_group_empty(p)); } @@ -1244,7 +1271,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) clear_dead_task(timer, now); goto out_unlock; } - cpu_clock_sample_group(timer->it_clock, p, &now); + cpu_timer_sample_group(timer->it_clock, p, &now); bump_cpu_timer(timer, now); /* Leave the tasklist_lock locked for the call below. */ } @@ -1408,33 +1435,6 @@ void run_posix_cpu_timers(struct task_struct *tsk) } } -/* - * Sample a process (thread group) timer for the given group_leader task. - * Must be called with tasklist_lock held for reading. - */ -static int cpu_timer_sample_group(const clockid_t which_clock, - struct task_struct *p, - union cpu_time_count *cpu) -{ - struct task_cputime cputime; - - thread_group_cputimer(p, &cputime); - switch (CPUCLOCK_WHICH(which_clock)) { - default: - return -EINVAL; - case CPUCLOCK_PROF: - cpu->cpu = cputime_add(cputime.utime, cputime.stime); - break; - case CPUCLOCK_VIRT: - cpu->cpu = cputime.utime; - break; - case CPUCLOCK_SCHED: - cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); - break; - } - return 0; -} - /* * Set one of the process-wide special case CPU timers. * The tsk->sighand->siglock must be held by the caller. From 0b49ec37a20bc7eb7178105aadaa8d1ecba825f8 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Sun, 8 Feb 2009 20:27:47 -0700 Subject: [PATCH 046/263] PCI/MSI: fix msi_mask() shift fix Hidetoshi Seto points out that commit bffac3c593eba1f9da3efd0199e49ea6558a40ce has wrong values in the array. Rather than correct the array, we can just use a bounds check and perform the calculation specified in the comment. As a bonus, this will not run off the end of the array if the device specifies an illegal value in the MSI capability. Signed-off-by: Matthew Wilcox Signed-off-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 44f15ff70c1d..baba2eb5367d 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -103,14 +103,12 @@ static void msix_set_enable(struct pci_dev *dev, int enable) } } -/* - * Essentially, this is ((1 << (1 << x)) - 1), but without the - * undefinedness of a << 32. - */ static inline __attribute_const__ u32 msi_mask(unsigned x) { - static const u32 mask[] = { 1, 2, 4, 0xf, 0xff, 0xffff, 0xffffffff }; - return mask[x]; + /* Don't shift by >= width of type */ + if (x >= 5) + return 0xffffffff; + return (1 << (1 << x)) - 1; } static void msix_flush_writes(struct irq_desc *desc) From 4cc59c721cba27a4644e29103afac0f91af8da3c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 9 Feb 2009 09:31:20 -0800 Subject: [PATCH 047/263] PCI: fix rom.c kernel-doc warning Fix PCI kernel-doc warning: Warning(linux-2.6.29-rc4-git1/drivers/pci/rom.c:67): No description found for parameter 'pdev' Signed-off-by: Randy Dunlap cc: Jesse Barnes Signed-off-by: Jesse Barnes --- drivers/pci/rom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c index 29cbe47f219f..36864a935d68 100644 --- a/drivers/pci/rom.c +++ b/drivers/pci/rom.c @@ -55,6 +55,7 @@ void pci_disable_rom(struct pci_dev *pdev) /** * pci_get_rom_size - obtain the actual size of the ROM image + * @pdev: target PCI device * @rom: kernel virtual pointer to image of ROM * @size: size of PCI window * return: size of actual ROM image From b33bfdef24565fe54da91adf3cd4eea13488d7fc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 9 Jan 2009 17:04:26 -0800 Subject: [PATCH 048/263] PCI: fix struct pci_platform_pm_ops kernel-doc Fix struct pci_platform_pm_ops kernel-doc notation. Signed-off-by: Randy Dunlap Signed-off-by: Jesse Barnes --- drivers/pci/pci.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 26ddf78ac300..07c0aa5275e6 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -16,21 +16,21 @@ extern int pci_mmap_fits(struct pci_dev *pdev, int resno, #endif /** - * Firmware PM callbacks + * struct pci_platform_pm_ops - Firmware PM callbacks * - * @is_manageable - returns 'true' if given device is power manageable by the - * platform firmware + * @is_manageable: returns 'true' if given device is power manageable by the + * platform firmware * - * @set_state - invokes the platform firmware to set the device's power state + * @set_state: invokes the platform firmware to set the device's power state * - * @choose_state - returns PCI power state of given device preferred by the - * platform; to be used during system-wide transitions from a - * sleeping state to the working state and vice versa + * @choose_state: returns PCI power state of given device preferred by the + * platform; to be used during system-wide transitions from a + * sleeping state to the working state and vice versa * - * @can_wakeup - returns 'true' if given device is capable of waking up the - * system from a sleeping state + * @can_wakeup: returns 'true' if given device is capable of waking up the + * system from a sleeping state * - * @sleep_wake - enables/disables the system wake up capability of given device + * @sleep_wake: enables/disables the system wake up capability of given device * * If given platform is generally capable of power managing PCI devices, all of * these callbacks are mandatory. From f5ddcac435b6c6133a9c137c345abef53b93cf55 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 9 Jan 2009 17:03:20 -0800 Subject: [PATCH 049/263] PCI: fix missing kernel-doc and typos Fix pci kernel-doc parameter missing notation, correct function name, and fix typo: Warning(linux-2.6.28-git10//drivers/pci/pci.c:1511): No description found for parameter 'exclusive' Signed-off-by: Randy Dunlap Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e3efe6b19ee7..6d6120007af4 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1540,16 +1540,21 @@ void pci_release_region(struct pci_dev *pdev, int bar) } /** - * pci_request_region - Reserved PCI I/O and memory resource + * __pci_request_region - Reserved PCI I/O and memory resource * @pdev: PCI device whose resources are to be reserved * @bar: BAR to be reserved * @res_name: Name to be associated with resource. + * @exclusive: whether the region access is exclusive or not * * Mark the PCI region associated with PCI device @pdev BR @bar as * being reserved by owner @res_name. Do not access any * address inside the PCI regions unless this call returns * successfully. * + * If @exclusive is set, then the region is marked so that userspace + * is explicitly not allowed to map the resource via /dev/mem or + * sysfs MMIO access. + * * Returns 0 on success, or %EBUSY on error. A warning * message is also printed on failure. */ @@ -1588,12 +1593,12 @@ err_out: } /** - * pci_request_region - Reserved PCI I/O and memory resource + * pci_request_region - Reserve PCI I/O and memory resource * @pdev: PCI device whose resources are to be reserved * @bar: BAR to be reserved - * @res_name: Name to be associated with resource. + * @res_name: Name to be associated with resource * - * Mark the PCI region associated with PCI device @pdev BR @bar as + * Mark the PCI region associated with PCI device @pdev BAR @bar as * being reserved by owner @res_name. Do not access any * address inside the PCI regions unless this call returns * successfully. From a08f6e04d74478d91f34a0484e69e89870adb33d Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Fri, 13 Feb 2009 12:03:17 -0700 Subject: [PATCH 050/263] PCI: Documentation: fix minor PCIe HOWTO thinko Update doc to correctly refer to replacing the pci_register_driver API, and not the non-existent "pci_module_init" API. Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- Documentation/PCI/PCIEBUS-HOWTO.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/PCI/PCIEBUS-HOWTO.txt b/Documentation/PCI/PCIEBUS-HOWTO.txt index 9a07e38631b0..6bd5f372adec 100644 --- a/Documentation/PCI/PCIEBUS-HOWTO.txt +++ b/Documentation/PCI/PCIEBUS-HOWTO.txt @@ -93,7 +93,7 @@ the PCI Express Port Bus driver from loading a service driver. int pcie_port_service_register(struct pcie_port_service_driver *new) -This API replaces the Linux Driver Model's pci_module_init API. A +This API replaces the Linux Driver Model's pci_register_driver API. A service driver should always calls pcie_port_service_register at module init. Note that after service driver being loaded, calls such as pci_enable_device(dev) and pci_set_master(dev) are no longer From 12d60e28bed3f593aac5385acbdbb089eb8ae21e Mon Sep 17 00:00:00 2001 From: Wim Van Sebroeck Date: Wed, 28 Jan 2009 20:51:04 +0000 Subject: [PATCH 051/263] [WATCHDOG] iTCO_wdt: fix SMI_EN regression 2 bugzilla: #12363 commit 7cd5b08be3c489df11b559fef210b81133764ad4 added a second regression: some Dell's and Compaq's lockup on boot. So we revert most of the code. The ICH9 reboot issue remains in place and will need some more fixing... :-( Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 2 +- drivers/watchdog/iTCO_vendor_support.c | 32 ++++++++++++++++++++--- drivers/watchdog/iTCO_wdt.c | 35 +++++++++++--------------- 3 files changed, 43 insertions(+), 26 deletions(-) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 09a3d5522b43..325c10ff6a2c 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -406,7 +406,7 @@ config ITCO_WDT ---help--- Hardware driver for the intel TCO timer based watchdog devices. These drivers are included in the Intel 82801 I/O Controller - Hub family (from ICH0 up to ICH8) and in the Intel 6300ESB + Hub family (from ICH0 up to ICH10) and in the Intel 63xxESB controller hub. The TCO (Total Cost of Ownership) timer is a watchdog timer diff --git a/drivers/watchdog/iTCO_vendor_support.c b/drivers/watchdog/iTCO_vendor_support.c index 2474ebca88f6..d8264ad0be41 100644 --- a/drivers/watchdog/iTCO_vendor_support.c +++ b/drivers/watchdog/iTCO_vendor_support.c @@ -1,7 +1,7 @@ /* * intel TCO vendor specific watchdog driver support * - * (c) Copyright 2006-2008 Wim Van Sebroeck . + * (c) Copyright 2006-2009 Wim Van Sebroeck . * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -19,7 +19,7 @@ /* Module and version information */ #define DRV_NAME "iTCO_vendor_support" -#define DRV_VERSION "1.02" +#define DRV_VERSION "1.03" #define PFX DRV_NAME ": " /* Includes */ @@ -77,6 +77,26 @@ MODULE_PARM_DESC(vendorsupport, "iTCO vendor specific support mode, default=0 (n * 20.6 seconds. */ +static void supermicro_old_pre_start(unsigned long acpibase) +{ + unsigned long val32; + + /* Bit 13: TCO_EN -> 0 = Disables TCO logic generating an SMI# */ + val32 = inl(SMI_EN); + val32 &= 0xffffdfff; /* Turn off SMI clearing watchdog */ + outl(val32, SMI_EN); /* Needed to activate watchdog */ +} + +static void supermicro_old_pre_stop(unsigned long acpibase) +{ + unsigned long val32; + + /* Bit 13: TCO_EN -> 1 = Enables the TCO logic to generate SMI# */ + val32 = inl(SMI_EN); + val32 |= 0x00002000; /* Turn on SMI clearing watchdog */ + outl(val32, SMI_EN); /* Needed to deactivate watchdog */ +} + static void supermicro_old_pre_keepalive(unsigned long acpibase) { /* Reload TCO Timer (done in iTCO_wdt_keepalive) + */ @@ -228,14 +248,18 @@ static void supermicro_new_pre_set_heartbeat(unsigned int heartbeat) void iTCO_vendor_pre_start(unsigned long acpibase, unsigned int heartbeat) { - if (vendorsupport == SUPERMICRO_NEW_BOARD) + if (vendorsupport == SUPERMICRO_OLD_BOARD) + supermicro_old_pre_start(acpibase); + else if (vendorsupport == SUPERMICRO_NEW_BOARD) supermicro_new_pre_start(heartbeat); } EXPORT_SYMBOL(iTCO_vendor_pre_start); void iTCO_vendor_pre_stop(unsigned long acpibase) { - if (vendorsupport == SUPERMICRO_NEW_BOARD) + if (vendorsupport == SUPERMICRO_OLD_BOARD) + supermicro_old_pre_stop(acpibase); + else if (vendorsupport == SUPERMICRO_NEW_BOARD) supermicro_new_pre_stop(); } EXPORT_SYMBOL(iTCO_vendor_pre_stop); diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c index 5b395a4ddfdf..352334947ea3 100644 --- a/drivers/watchdog/iTCO_wdt.c +++ b/drivers/watchdog/iTCO_wdt.c @@ -1,7 +1,7 @@ /* - * intel TCO Watchdog Driver (Used in i82801 and i6300ESB chipsets) + * intel TCO Watchdog Driver (Used in i82801 and i63xxESB chipsets) * - * (c) Copyright 2006-2008 Wim Van Sebroeck . + * (c) Copyright 2006-2009 Wim Van Sebroeck . * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -63,7 +63,7 @@ /* Module and version information */ #define DRV_NAME "iTCO_wdt" -#define DRV_VERSION "1.04" +#define DRV_VERSION "1.05" #define PFX DRV_NAME ": " /* Includes */ @@ -236,16 +236,16 @@ MODULE_DEVICE_TABLE(pci, iTCO_wdt_pci_tbl); /* Address definitions for the TCO */ /* TCO base address */ -#define TCOBASE iTCO_wdt_private.ACPIBASE + 0x60 +#define TCOBASE iTCO_wdt_private.ACPIBASE + 0x60 /* SMI Control and Enable Register */ -#define SMI_EN iTCO_wdt_private.ACPIBASE + 0x30 +#define SMI_EN iTCO_wdt_private.ACPIBASE + 0x30 #define TCO_RLD TCOBASE + 0x00 /* TCO Timer Reload and Curr. Value */ #define TCOv1_TMR TCOBASE + 0x01 /* TCOv1 Timer Initial Value */ -#define TCO_DAT_IN TCOBASE + 0x02 /* TCO Data In Register */ -#define TCO_DAT_OUT TCOBASE + 0x03 /* TCO Data Out Register */ -#define TCO1_STS TCOBASE + 0x04 /* TCO1 Status Register */ -#define TCO2_STS TCOBASE + 0x06 /* TCO2 Status Register */ +#define TCO_DAT_IN TCOBASE + 0x02 /* TCO Data In Register */ +#define TCO_DAT_OUT TCOBASE + 0x03 /* TCO Data Out Register */ +#define TCO1_STS TCOBASE + 0x04 /* TCO1 Status Register */ +#define TCO2_STS TCOBASE + 0x06 /* TCO2 Status Register */ #define TCO1_CNT TCOBASE + 0x08 /* TCO1 Control Register */ #define TCO2_CNT TCOBASE + 0x0a /* TCO2 Control Register */ #define TCOv2_TMR TCOBASE + 0x12 /* TCOv2 Timer Initial Value */ @@ -338,7 +338,6 @@ static int iTCO_wdt_unset_NO_REBOOT_bit(void) static int iTCO_wdt_start(void) { unsigned int val; - unsigned long val32; spin_lock(&iTCO_wdt_private.io_lock); @@ -351,11 +350,6 @@ static int iTCO_wdt_start(void) return -EIO; } - /* Bit 13: TCO_EN -> 0 = Disables TCO logic generating an SMI# */ - val32 = inl(SMI_EN); - val32 &= 0xffffdfff; /* Turn off SMI clearing watchdog */ - outl(val32, SMI_EN); - /* Force the timer to its reload value by writing to the TCO_RLD register */ if (iTCO_wdt_private.iTCO_version == 2) @@ -378,7 +372,6 @@ static int iTCO_wdt_start(void) static int iTCO_wdt_stop(void) { unsigned int val; - unsigned long val32; spin_lock(&iTCO_wdt_private.io_lock); @@ -390,11 +383,6 @@ static int iTCO_wdt_stop(void) outw(val, TCO1_CNT); val = inw(TCO1_CNT); - /* Bit 13: TCO_EN -> 1 = Enables the TCO logic to generate SMI# */ - val32 = inl(SMI_EN); - val32 |= 0x00002000; - outl(val32, SMI_EN); - /* Set the NO_REBOOT bit to prevent later reboots, just for sure */ iTCO_wdt_set_NO_REBOOT_bit(); @@ -649,6 +637,7 @@ static int __devinit iTCO_wdt_init(struct pci_dev *pdev, int ret; u32 base_address; unsigned long RCBA; + unsigned long val32; /* * Find the ACPI/PM base I/O address which is the base @@ -695,6 +684,10 @@ static int __devinit iTCO_wdt_init(struct pci_dev *pdev, ret = -EIO; goto out; } + /* Bit 13: TCO_EN -> 0 = Disables TCO logic generating an SMI# */ + val32 = inl(SMI_EN); + val32 &= 0xffffdfff; /* Turn off SMI clearing watchdog */ + outl(val32, SMI_EN); /* The TCO I/O registers reside in a 32-byte range pointed to by the TCOBASE value */ From 9132f1b453924e7595ce2dc1853704b2a31f42de Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 14 Feb 2009 13:24:10 +0000 Subject: [PATCH 052/263] [ARM] omap: fix omap2_divisor_to_clksel() error return value The error checks for omap2_divisor_to_clksel() and comment disagree with the actual value returned on error. Fix this to return the correct error value. Signed-off-by: Russell King --- arch/arm/mach-omap2/clock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c index ad721e0cbf7a..2899cba2aaa0 100644 --- a/arch/arm/mach-omap2/clock.c +++ b/arch/arm/mach-omap2/clock.c @@ -565,7 +565,7 @@ u32 omap2_clksel_to_divisor(struct clk *clk, u32 field_val) * * Given a struct clk of a rate-selectable clksel clock, and a clock divisor, * find the corresponding register field value. The return register value is - * the value before left-shifting. Returns 0xffffffff on error + * the value before left-shifting. Returns ~0 on error */ u32 omap2_divisor_to_clksel(struct clk *clk, u32 div) { @@ -577,7 +577,7 @@ u32 omap2_divisor_to_clksel(struct clk *clk, u32 div) clks = omap2_get_clksel_by_parent(clk, clk->parent); if (clks == NULL) - return 0; + return ~0; for (clkr = clks->rates; clkr->div; clkr++) { if ((clkr->flags & cpu_mask) && (clkr->div == div)) @@ -588,7 +588,7 @@ u32 omap2_divisor_to_clksel(struct clk *clk, u32 div) printk(KERN_ERR "clock: Could not find divisor %d for " "clock %s parent %s\n", div, clk->name, clk->parent->name); - return 0; + return ~0; } return clkr->val; From abf239657b88fa7e75d5b44a65a4177e7bb8acce Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 14 Feb 2009 13:25:38 +0000 Subject: [PATCH 053/263] [ARM] omap: fix _omap2_clksel_get_src_field() _omap2_clksel_get_src_field() was returning the first entry which was either the default _or_ applicable to the SoC. This is wrong - we should be returning the first default which is applicable to the SoC. Signed-off-by: Russell King --- arch/arm/mach-omap2/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c index 2899cba2aaa0..e64549e4e326 100644 --- a/arch/arm/mach-omap2/clock.c +++ b/arch/arm/mach-omap2/clock.c @@ -708,7 +708,7 @@ static u32 omap2_clksel_get_src_field(void __iomem **src_addr, return 0; for (clkr = clks->rates; clkr->div; clkr++) { - if (clkr->flags & (cpu_mask | DEFAULT_RATE)) + if (clkr->flags & cpu_mask && clkr->flags & DEFAULT_RATE) break; /* Found the default rate for this platform */ } From d794bf8e0936dce45104565cd48c571061f4c1e3 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sat, 14 Feb 2009 10:31:16 -0500 Subject: [PATCH 054/263] ext4: Initialize preallocation list_head's properly When creating a new ext4_prealloc_space structure, we have to initialize its list_head pointers before we add them to any prealloc lists. Otherwise, with list debug enabled, we will get list corruption warnings. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c962d0690505..4415beeb0b62 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3693,6 +3693,8 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) pa->pa_free = pa->pa_len; atomic_set(&pa->pa_count, 1); spin_lock_init(&pa->pa_lock); + INIT_LIST_HEAD(&pa->pa_inode_list); + INIT_LIST_HEAD(&pa->pa_group_list); pa->pa_deleted = 0; pa->pa_linear = 0; @@ -3755,6 +3757,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac) atomic_set(&pa->pa_count, 1); spin_lock_init(&pa->pa_lock); INIT_LIST_HEAD(&pa->pa_inode_list); + INIT_LIST_HEAD(&pa->pa_group_list); pa->pa_deleted = 0; pa->pa_linear = 1; From 2acf2c261b823d9d9ed954f348b97620297a36b5 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sat, 14 Feb 2009 10:42:58 -0500 Subject: [PATCH 055/263] ext4: Implement range_cyclic in ext4_da_writepages instead of write_cache_pages With delayed allocation we lock the page in write_cache_pages() and try to build an in memory extent of contiguous blocks. This is needed so that we can get large contiguous blocks request. If range_cyclic mode is enabled, write_cache_pages() will loop back to the 0 index if no I/O has been done yet, and try to start writing from the beginning of the range. That causes an attempt to take the page lock of lower index page while holding the page lock of higher index page, which can cause a dead lock with another writeback thread. The solution is to implement the range_cyclic behavior in ext4_da_writepages() instead. http://bugzilla.kernel.org/show_bug.cgi?id=12579 Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 658c4a7f2578..cbd2ca99d113 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2439,6 +2439,7 @@ static int ext4_da_writepages(struct address_space *mapping, int no_nrwrite_index_update; int pages_written = 0; long pages_skipped; + int range_cyclic, cycled = 1, io_done = 0; int needed_blocks, ret = 0, nr_to_writebump = 0; struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); @@ -2490,9 +2491,15 @@ static int ext4_da_writepages(struct address_space *mapping, if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; - if (wbc->range_cyclic) + range_cyclic = wbc->range_cyclic; + if (wbc->range_cyclic) { index = mapping->writeback_index; - else + if (index) + cycled = 0; + wbc->range_start = index << PAGE_CACHE_SHIFT; + wbc->range_end = LLONG_MAX; + wbc->range_cyclic = 0; + } else index = wbc->range_start >> PAGE_CACHE_SHIFT; mpd.wbc = wbc; @@ -2506,6 +2513,7 @@ static int ext4_da_writepages(struct address_space *mapping, wbc->no_nrwrite_index_update = 1; pages_skipped = wbc->pages_skipped; +retry: while (!ret && wbc->nr_to_write > 0) { /* @@ -2548,6 +2556,7 @@ static int ext4_da_writepages(struct address_space *mapping, pages_written += mpd.pages_written; wbc->pages_skipped = pages_skipped; ret = 0; + io_done = 1; } else if (wbc->nr_to_write) /* * There is no more writeout needed @@ -2556,6 +2565,13 @@ static int ext4_da_writepages(struct address_space *mapping, */ break; } + if (!io_done && !cycled) { + cycled = 1; + index = 0; + wbc->range_start = index << PAGE_CACHE_SHIFT; + wbc->range_end = mapping->writeback_index - 1; + goto retry; + } if (pages_skipped != wbc->pages_skipped) printk(KERN_EMERG "This should not happen leaving %s " "with nr_to_write = %ld ret = %d\n", @@ -2563,6 +2579,7 @@ static int ext4_da_writepages(struct address_space *mapping, /* Update index */ index += pages_written; + wbc->range_cyclic = range_cyclic; if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) /* * set the writeback_index so that range_cyclic From 98ad6e3b1f5f2303815c063ece91291a597f6044 Mon Sep 17 00:00:00 2001 From: wanzongshun Date: Fri, 13 Feb 2009 06:04:32 +0100 Subject: [PATCH 056/263] [ARM] 5398/1: Add Wan ZongShun to MAINTAINERS for W90P910 Add Wan ZongShun to MAINTAINERS for W90P910. Signed-off-by: Wan ZongShun Signed-off-by: Russell King --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index db65b4e6d132..2a91b10a8a2a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -692,6 +692,13 @@ M: kernel@wantstofly.org L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) S: Maintained +ARM/NUVOTON W90X900 ARM ARCHITECTURE +P: Wan ZongShun +M: mcuos.com@gmail.com +L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) +W: http://www.mcuos.com +S: Maintained + ARPD SUPPORT P: Jonathan Layes L: netdev@vger.kernel.org From 2af29b78618ac8b3a8746337002f108f8fdf56ad Mon Sep 17 00:00:00 2001 From: Andrew Victor Date: Wed, 11 Feb 2009 21:23:10 +0100 Subject: [PATCH 057/263] [ARM] 5390/1: AT91: Watchdog fixes The recently merged AT91SAM9 watchdog driver uses the AT91SAM9X_WATCHDOG config variable, whereas the original version of the driver (and the platform support code) used AT91SAM9_WATCHDOG. This causes the watchdog platform_device to never be registered, and therefore the driver not to be initialized. This patch: - updates the platform support code to use AT91SAM9X_WATCHDOG. - includes to fix compile error (same fix as was applied to at91rm9200_wdt.c) - fixes comment regarding watchdog clock-rates in at91rm9200. Signed-off-by: Andrew Victor Signed-off-by: Russell King --- arch/arm/configs/at91sam9260ek_defconfig | 2 +- arch/arm/configs/at91sam9261ek_defconfig | 2 +- arch/arm/configs/at91sam9263ek_defconfig | 2 +- arch/arm/configs/at91sam9rlek_defconfig | 2 +- arch/arm/configs/qil-a9260_defconfig | 2 +- arch/arm/mach-at91/at91cap9_devices.c | 2 +- arch/arm/mach-at91/at91sam9260_devices.c | 2 +- arch/arm/mach-at91/at91sam9261_devices.c | 2 +- arch/arm/mach-at91/at91sam9263_devices.c | 2 +- arch/arm/mach-at91/at91sam9rl_devices.c | 2 +- drivers/watchdog/at91rm9200_wdt.c | 4 ++-- drivers/watchdog/at91sam9_wdt.c | 1 + 12 files changed, 13 insertions(+), 12 deletions(-) diff --git a/arch/arm/configs/at91sam9260ek_defconfig b/arch/arm/configs/at91sam9260ek_defconfig index e0ee7060f9aa..98e2f3de4bc5 100644 --- a/arch/arm/configs/at91sam9260ek_defconfig +++ b/arch/arm/configs/at91sam9260ek_defconfig @@ -608,7 +608,7 @@ CONFIG_WATCHDOG_NOWAYOUT=y # Watchdog Device Drivers # # CONFIG_SOFT_WATCHDOG is not set -CONFIG_AT91SAM9_WATCHDOG=y +CONFIG_AT91SAM9X_WATCHDOG=y # # USB-based Watchdog Cards diff --git a/arch/arm/configs/at91sam9261ek_defconfig b/arch/arm/configs/at91sam9261ek_defconfig index 01d1ef97d8be..149456142392 100644 --- a/arch/arm/configs/at91sam9261ek_defconfig +++ b/arch/arm/configs/at91sam9261ek_defconfig @@ -700,7 +700,7 @@ CONFIG_WATCHDOG_NOWAYOUT=y # Watchdog Device Drivers # # CONFIG_SOFT_WATCHDOG is not set -CONFIG_AT91SAM9_WATCHDOG=y +CONFIG_AT91SAM9X_WATCHDOG=y # # USB-based Watchdog Cards diff --git a/arch/arm/configs/at91sam9263ek_defconfig b/arch/arm/configs/at91sam9263ek_defconfig index 036a126725c1..21599f3c6275 100644 --- a/arch/arm/configs/at91sam9263ek_defconfig +++ b/arch/arm/configs/at91sam9263ek_defconfig @@ -710,7 +710,7 @@ CONFIG_WATCHDOG_NOWAYOUT=y # Watchdog Device Drivers # # CONFIG_SOFT_WATCHDOG is not set -CONFIG_AT91SAM9_WATCHDOG=y +CONFIG_AT91SAM9X_WATCHDOG=y # # USB-based Watchdog Cards diff --git a/arch/arm/configs/at91sam9rlek_defconfig b/arch/arm/configs/at91sam9rlek_defconfig index 237a2a6a8517..e2df81a3e804 100644 --- a/arch/arm/configs/at91sam9rlek_defconfig +++ b/arch/arm/configs/at91sam9rlek_defconfig @@ -606,7 +606,7 @@ CONFIG_WATCHDOG_NOWAYOUT=y # Watchdog Device Drivers # # CONFIG_SOFT_WATCHDOG is not set -CONFIG_AT91SAM9_WATCHDOG=y +CONFIG_AT91SAM9X_WATCHDOG=y # # Sonics Silicon Backplane diff --git a/arch/arm/configs/qil-a9260_defconfig b/arch/arm/configs/qil-a9260_defconfig index cd1d717903ac..9b32d0eb89ba 100644 --- a/arch/arm/configs/qil-a9260_defconfig +++ b/arch/arm/configs/qil-a9260_defconfig @@ -727,7 +727,7 @@ CONFIG_WATCHDOG_NOWAYOUT=y # Watchdog Device Drivers # # CONFIG_SOFT_WATCHDOG is not set -# CONFIG_AT91SAM9_WATCHDOG is not set +# CONFIG_AT91SAM9X_WATCHDOG is not set # # USB-based Watchdog Cards diff --git a/arch/arm/mach-at91/at91cap9_devices.c b/arch/arm/mach-at91/at91cap9_devices.c index 9eca2209cde6..412aa49ad2fb 100644 --- a/arch/arm/mach-at91/at91cap9_devices.c +++ b/arch/arm/mach-at91/at91cap9_devices.c @@ -697,7 +697,7 @@ static void __init at91_add_device_rtt(void) * Watchdog * -------------------------------------------------------------------- */ -#if defined(CONFIG_AT91SAM9_WATCHDOG) || defined(CONFIG_AT91SAM9_WATCHDOG_MODULE) +#if defined(CONFIG_AT91SAM9X_WATCHDOG) || defined(CONFIG_AT91SAM9X_WATCHDOG_MODULE) static struct platform_device at91cap9_wdt_device = { .name = "at91_wdt", .id = -1, diff --git a/arch/arm/mach-at91/at91sam9260_devices.c b/arch/arm/mach-at91/at91sam9260_devices.c index fdde1ea21b07..d74c9ac007e7 100644 --- a/arch/arm/mach-at91/at91sam9260_devices.c +++ b/arch/arm/mach-at91/at91sam9260_devices.c @@ -643,7 +643,7 @@ static void __init at91_add_device_rtt(void) * Watchdog * -------------------------------------------------------------------- */ -#if defined(CONFIG_AT91SAM9_WATCHDOG) || defined(CONFIG_AT91SAM9_WATCHDOG_MODULE) +#if defined(CONFIG_AT91SAM9X_WATCHDOG) || defined(CONFIG_AT91SAM9X_WATCHDOG_MODULE) static struct platform_device at91sam9260_wdt_device = { .name = "at91_wdt", .id = -1, diff --git a/arch/arm/mach-at91/at91sam9261_devices.c b/arch/arm/mach-at91/at91sam9261_devices.c index 17289756f80f..59fc48311fb0 100644 --- a/arch/arm/mach-at91/at91sam9261_devices.c +++ b/arch/arm/mach-at91/at91sam9261_devices.c @@ -621,7 +621,7 @@ static void __init at91_add_device_rtt(void) * Watchdog * -------------------------------------------------------------------- */ -#if defined(CONFIG_AT91SAM9_WATCHDOG) || defined(CONFIG_AT91SAM9_WATCHDOG_MODULE) +#if defined(CONFIG_AT91SAM9X_WATCHDOG) || defined(CONFIG_AT91SAM9X_WATCHDOG_MODULE) static struct platform_device at91sam9261_wdt_device = { .name = "at91_wdt", .id = -1, diff --git a/arch/arm/mach-at91/at91sam9263_devices.c b/arch/arm/mach-at91/at91sam9263_devices.c index b753cb879d8e..134af97ff340 100644 --- a/arch/arm/mach-at91/at91sam9263_devices.c +++ b/arch/arm/mach-at91/at91sam9263_devices.c @@ -854,7 +854,7 @@ static void __init at91_add_device_rtt(void) * Watchdog * -------------------------------------------------------------------- */ -#if defined(CONFIG_AT91SAM9_WATCHDOG) || defined(CONFIG_AT91SAM9_WATCHDOG_MODULE) +#if defined(CONFIG_AT91SAM9X_WATCHDOG) || defined(CONFIG_AT91SAM9X_WATCHDOG_MODULE) static struct platform_device at91sam9263_wdt_device = { .name = "at91_wdt", .id = -1, diff --git a/arch/arm/mach-at91/at91sam9rl_devices.c b/arch/arm/mach-at91/at91sam9rl_devices.c index 145324f4ec56..728186515cdf 100644 --- a/arch/arm/mach-at91/at91sam9rl_devices.c +++ b/arch/arm/mach-at91/at91sam9rl_devices.c @@ -609,7 +609,7 @@ static void __init at91_add_device_rtt(void) * Watchdog * -------------------------------------------------------------------- */ -#if defined(CONFIG_AT91SAM9_WATCHDOG) || defined(CONFIG_AT91SAM9_WATCHDOG_MODULE) +#if defined(CONFIG_AT91SAM9X_WATCHDOG) || defined(CONFIG_AT91SAM9X_WATCHDOG_MODULE) static struct platform_device at91sam9rl_wdt_device = { .name = "at91_wdt", .id = -1, diff --git a/drivers/watchdog/at91rm9200_wdt.c b/drivers/watchdog/at91rm9200_wdt.c index 5531691f46ea..e35d54589232 100644 --- a/drivers/watchdog/at91rm9200_wdt.c +++ b/drivers/watchdog/at91rm9200_wdt.c @@ -107,10 +107,10 @@ static int at91_wdt_close(struct inode *inode, struct file *file) static int at91_wdt_settimeout(int new_time) { /* - * All counting occurs at SLOW_CLOCK / 128 = 0.256 Hz + * All counting occurs at SLOW_CLOCK / 128 = 256 Hz * * Since WDV is a 16-bit counter, the maximum period is - * 65536 / 0.256 = 256 seconds. + * 65536 / 256 = 256 seconds. */ if ((new_time <= 0) || (new_time > WDT_MAX_TIME)) return -EINVAL; diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c index b1da287f90ec..a56ac84381b1 100644 --- a/drivers/watchdog/at91sam9_wdt.c +++ b/drivers/watchdog/at91sam9_wdt.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include From 2b768b6cdbcf7fa0761e6c35c6ea288297582c43 Mon Sep 17 00:00:00 2001 From: Andrew Victor Date: Wed, 11 Feb 2009 21:39:05 +0100 Subject: [PATCH 058/263] [ARM] 5391/1: AT91: Enable GPIO clocks earlier Enable the GPIO clocks earlier in the initialization sequence. This allow the board-setup code to read and set GPIO pins. Signed-off-by: Marc Pignat Signed-off-by: Andrew Victor Signed-off-by: Russell King --- arch/arm/mach-at91/gpio.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-at91/gpio.c b/arch/arm/mach-at91/gpio.c index 9b0447c3d59b..2f7d4977dce9 100644 --- a/arch/arm/mach-at91/gpio.c +++ b/arch/arm/mach-at91/gpio.c @@ -490,7 +490,8 @@ postcore_initcall(at91_gpio_debugfs_init); /*--------------------------------------------------------------------------*/ -/* This lock class tells lockdep that GPIO irqs are in a different +/* + * This lock class tells lockdep that GPIO irqs are in a different * category than their parents, so it won't report false recursion. */ static struct lock_class_key gpio_lock_class; @@ -509,9 +510,6 @@ void __init at91_gpio_irq_setup(void) unsigned id = this->id; unsigned i; - /* enable PIO controller's clock */ - clk_enable(this->clock); - __raw_writel(~0, this->regbase + PIO_IDR); for (i = 0, pin = this->chipbase; i < 32; i++, pin++) { @@ -556,7 +554,14 @@ void __init at91_gpio_init(struct at91_gpio_bank *data, int nr_banks) data->chipbase = PIN_BASE + i * 32; data->regbase = data->offset + (void __iomem *)AT91_VA_BASE_SYS; - /* AT91SAM9263_ID_PIOCDE groups PIOC, PIOD, PIOE */ + /* enable PIO controller's clock */ + clk_enable(data->clock); + + /* + * Some processors share peripheral ID between multiple GPIO banks. + * SAM9263 (PIOC, PIOD, PIOE) + * CAP9 (PIOA, PIOB, PIOC, PIOD) + */ if (last && last->id == data->id) last->next = data; } From 9af88143b277f52fc6ce0d69137f435c73c39c1a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 13 Feb 2009 23:18:03 +0000 Subject: [PATCH 059/263] iommu: fix Intel IOMMU write-buffer flushing This is the cause of the DMA faults and disk corruption that people have been seeing. Some chipsets neglect to report the RWBF "capability" -- the flag which says that we need to flush the chipset write-buffer when changing the DMA page tables, to ensure that the change is visible to the IOMMU. Override that bit on the affected chipsets, and everything is happy again. Thanks to Chris and Bhavesh and others for helping to debug. Should resolve: https://bugzilla.redhat.com/show_bug.cgi?id=479996 http://bugzilla.kernel.org/show_bug.cgi?id=12578 Signed-off-by: David Woodhouse Cc: Linus Torvalds Tested-and-acked-by: Chris Wright Reviewed-by: Bhavesh Davda Signed-off-by: Ingo Molnar --- drivers/pci/intel-iommu.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index f4b7c79023ff..f3f686581a90 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -61,6 +61,8 @@ /* global iommu list, set NULL for ignored DMAR units */ static struct intel_iommu **g_iommus; +static int rwbf_quirk; + /* * 0: Present * 1-11: Reserved @@ -785,7 +787,7 @@ static void iommu_flush_write_buffer(struct intel_iommu *iommu) u32 val; unsigned long flag; - if (!cap_rwbf(iommu->cap)) + if (!rwbf_quirk && !cap_rwbf(iommu->cap)) return; val = iommu->gcmd | DMA_GCMD_WBF; @@ -3137,3 +3139,15 @@ static struct iommu_ops intel_iommu_ops = { .unmap = intel_iommu_unmap_range, .iova_to_phys = intel_iommu_iova_to_phys, }; + +static void __devinit quirk_iommu_rwbf(struct pci_dev *dev) +{ + /* + * Mobile 4 Series Chipset neglects to set RWBF capability, + * but needs it: + */ + printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n"); + rwbf_quirk = 1; +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf); From e49590b6dd356f8ef10ba3531a29e5086f6f2e3a Mon Sep 17 00:00:00 2001 From: Chris Ball Date: Fri, 13 Feb 2009 20:56:18 -0500 Subject: [PATCH 060/263] x86, olpc: fix model detection without OFW Impact: fix "garbled display, laptop is unusable" bug Commit e51a1ac2dfca9ad869471e88f828281db7e810c0 ("x86, olpc: fix endian bug in openfirmware workaround") breaks model comparison on OLPC; the value 0xc2 needs to be scaled up by olpc_board(). The pre-patch version was wrong, but accidentally worked anyway (big-endian 0xc2 is big enough to satisfy all other board revisions, but little endian 0xc2 is not). Signed-off-by: Chris Ball Cc: Andrew Morton Acked-by: Andres Salomon Cc: Harvey Harrison Signed-off-by: Ingo Molnar --- arch/x86/kernel/olpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 7a13fac63a1f..4006c522adc7 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c @@ -203,7 +203,7 @@ static void __init platform_detect(void) static void __init platform_detect(void) { /* stopgap until OFW support is added to the kernel */ - olpc_platform_info.boardrev = 0xc2; + olpc_platform_info.boardrev = olpc_board(0xc2); } #endif From d39123a486524fed9b4e43e08a8757fd90a5859a Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Thu, 8 Jan 2009 15:13:31 +0800 Subject: [PATCH 061/263] KVM: ia64: fix fp fault/trap handler The floating-point registers f6-f11 is used by vmm and saved in kvm-pt-regs, so should set the correct bit mask and the pointer in fp_state, otherwise, fpswa may touch vmm's fp registers instead of guests'. In addition, for fp trap handling, since the instruction which leads to fp trap is completely executed, so can't use retry machanism to re-execute it, because it may pollute some registers. Signed-off-by: Yang Zhang Signed-off-by: Avi Kivity --- arch/ia64/kvm/process.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c index 552d07724207..230eae482f32 100644 --- a/arch/ia64/kvm/process.c +++ b/arch/ia64/kvm/process.c @@ -455,13 +455,18 @@ fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr, if (!vmm_fpswa_interface) return (fpswa_ret_t) {-1, 0, 0, 0}; - /* - * Just let fpswa driver to use hardware fp registers. - * No fp register is valid in memory. - */ memset(&fp_state, 0, sizeof(fp_state_t)); /* + * compute fp_state. only FP registers f6 - f11 are used by the + * vmm, so set those bits in the mask and set the low volatile + * pointer to point to these registers. + */ + fp_state.bitmask_low64 = 0xfc0; /* bit6..bit11 */ + + fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) ®s->f6; + + /* * unsigned long (*EFI_FPSWA) ( * unsigned long trap_type, * void *Bundle, @@ -545,10 +550,6 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim, status = vmm_handle_fpu_swa(0, regs, isr); if (!status) return ; - else if (-EAGAIN == status) { - vcpu_decrement_iip(vcpu); - return ; - } break; } From 7a0eb1960e8ddcb68ea631caf16815485af0e228 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 19 Jan 2009 14:57:52 +0200 Subject: [PATCH 062/263] KVM: Avoid using CONFIG_ in userspace visible headers Kconfig symbols are not available in userspace, and are not stripped by headers-install. Avoid their use by adding #defines in to suit each architecture. Signed-off-by: Avi Kivity --- arch/ia64/include/asm/kvm.h | 4 ++++ arch/x86/include/asm/kvm.h | 7 +++++++ include/linux/kvm.h | 10 +++++----- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h index 68aa6da807c1..bfa86b6af7cd 100644 --- a/arch/ia64/include/asm/kvm.h +++ b/arch/ia64/include/asm/kvm.h @@ -25,6 +25,10 @@ #include +/* Select x86 specific features in */ +#define __KVM_HAVE_IOAPIC +#define __KVM_HAVE_DEVICE_ASSIGNMENT + /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index d2e3bf3608af..886c9402ec45 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -9,6 +9,13 @@ #include #include +/* Select x86 specific features in */ +#define __KVM_HAVE_PIT +#define __KVM_HAVE_IOAPIC +#define __KVM_HAVE_DEVICE_ASSIGNMENT +#define __KVM_HAVE_MSI +#define __KVM_HAVE_USER_NMI + /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 5715f1907601..0424326f1679 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -58,10 +58,10 @@ struct kvm_irqchip { __u32 pad; union { char dummy[512]; /* reserving space */ -#ifdef CONFIG_X86 +#ifdef __KVM_HAVE_PIT struct kvm_pic_state pic; #endif -#if defined(CONFIG_X86) || defined(CONFIG_IA64) +#ifdef __KVM_HAVE_IOAPIC struct kvm_ioapic_state ioapic; #endif } chip; @@ -384,16 +384,16 @@ struct kvm_trace_rec { #define KVM_CAP_MP_STATE 14 #define KVM_CAP_COALESCED_MMIO 15 #define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ -#if defined(CONFIG_X86)||defined(CONFIG_IA64) +#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT #define KVM_CAP_DEVICE_ASSIGNMENT 17 #endif #define KVM_CAP_IOMMU 18 -#if defined(CONFIG_X86) +#ifdef __KVM_HAVE_MSI #define KVM_CAP_DEVICE_MSI 20 #endif /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 -#if defined(CONFIG_X86) +#ifdef __KVM_HAVE_USER_NMI #define KVM_CAP_USER_NMI 22 #endif From 85db06e514422ae429b5f85742d8111b70bd56f3 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 10 Dec 2008 21:23:26 +0100 Subject: [PATCH 063/263] KVM: mmu_notifiers release method The destructor for huge pages uses the backing inode for adjusting hugetlbfs accounting. Hugepage mappings are destroyed by exit_mmap, after mmu_notifier_release, so there are no notifications through unmap_hugepage_range at this point. The hugetlbfs inode can be freed with pages backed by it referenced by the shadow. When the shadow releases its reference, the huge page destructor will access a now freed inode. Implement the release operation for kvm mmu notifiers to release page refs before the hugetlbfs inode is gone. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3a5a08298aab..0b6f2f71271f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -789,11 +789,19 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, return young; } +static void kvm_mmu_notifier_release(struct mmu_notifier *mn, + struct mm_struct *mm) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + kvm_arch_flush_shadow(kvm); +} + static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { .invalidate_page = kvm_mmu_notifier_invalidate_page, .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, .clear_flush_young = kvm_mmu_notifier_clear_flush_young, + .release = kvm_mmu_notifier_release, }; #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ From ad8ba2cd44d4d39fb3fe55d5dcc565b19fc3a7fb Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 6 Jan 2009 10:03:02 +0800 Subject: [PATCH 064/263] KVM: Add kvm_arch_sync_events to sync with asynchronize events kvm_arch_sync_events is introduced to quiet down all other events may happen contemporary with VM destroy process, like IRQ handler and work struct for assigned device. For kvm_arch_sync_events is called at the very beginning of kvm_destroy_vm(), so the state of KVM here is legal and can provide a environment to quiet down other events. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 4 ++++ arch/powerpc/kvm/powerpc.c | 4 ++++ arch/s390/kvm/kvm-s390.c | 4 ++++ arch/x86/kvm/x86.c | 4 ++++ include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 1 + 6 files changed, 18 insertions(+) diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 4e586f6110aa..28f982045f29 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1337,6 +1337,10 @@ static void kvm_release_vm_pages(struct kvm *kvm) } } +void kvm_arch_sync_events(struct kvm *kvm) +{ +} + void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_iommu_unmap_guest(kvm); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2822c8ccfaaf..5f81256287f5 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -125,6 +125,10 @@ static void kvmppc_free_vcpus(struct kvm *kvm) } } +void kvm_arch_sync_events(struct kvm *kvm) +{ +} + void kvm_arch_destroy_vm(struct kvm *kvm) { kvmppc_free_vcpus(kvm); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index be8497186b96..0d33893e1e89 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -212,6 +212,10 @@ static void kvm_free_vcpus(struct kvm *kvm) } } +void kvm_arch_sync_events(struct kvm *kvm) +{ +} + void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_free_vcpus(kvm); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cc17546a2406..b0fc079f1bee 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4127,6 +4127,10 @@ static void kvm_free_vcpus(struct kvm *kvm) } +void kvm_arch_sync_events(struct kvm *kvm) +{ +} + void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_free_all_assigned_devices(kvm); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ec49d0be7f52..bf6f703642fc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -285,6 +285,7 @@ void kvm_free_physmem(struct kvm *kvm); struct kvm *kvm_arch_create_vm(void); void kvm_arch_destroy_vm(struct kvm *kvm); void kvm_free_all_assigned_devices(struct kvm *kvm); +void kvm_arch_sync_events(struct kvm *kvm); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *v); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0b6f2f71271f..68e3f1ec1674 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -891,6 +891,7 @@ static void kvm_destroy_vm(struct kvm *kvm) { struct mm_struct *mm = kvm->mm; + kvm_arch_sync_events(kvm); spin_lock(&kvm_lock); list_del(&kvm->vm_list); spin_unlock(&kvm_lock); From ba4cef31d5a397b64ba6d3ff713ce06c62f0c597 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 6 Jan 2009 10:03:03 +0800 Subject: [PATCH 065/263] KVM: Fix racy in kvm_free_assigned_irq In the past, kvm_get_kvm() and kvm_put_kvm() was called in assigned device irq handler and interrupt_work, in order to prevent cancel_work_sync() in kvm_free_assigned_irq got a illegal state when waiting for interrupt_work done. But it's tricky and still got two problems: 1. A bug ignored two conditions that cancel_work_sync() would return true result in a additional kvm_put_kvm(). 2. If interrupt type is MSI, we would got a window between cancel_work_sync() and free_irq(), which interrupt would be injected again... This patch discard the reference count used for irq handler and interrupt_work, and ensure the legal state by moving the free function at the very beginning of kvm_destroy_vm(). And the patch fix the second bug by disable irq before cancel_work_sync(), which may result in nested disable of irq but OK for we are going to free it. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 2 +- virt/kvm/kvm_main.c | 27 +++++++++++++++++++-------- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b0fc079f1bee..fc3e329f6ade 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4129,11 +4129,11 @@ static void kvm_free_vcpus(struct kvm *kvm) void kvm_arch_sync_events(struct kvm *kvm) { + kvm_free_all_assigned_devices(kvm); } void kvm_arch_destroy_vm(struct kvm *kvm) { - kvm_free_all_assigned_devices(kvm); kvm_iommu_unmap_guest(kvm); kvm_free_pit(kvm); kfree(kvm->arch.vpic); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 68e3f1ec1674..277ea7f39fc8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -173,7 +173,6 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) assigned_dev->host_irq_disabled = false; } mutex_unlock(&assigned_dev->kvm->lock); - kvm_put_kvm(assigned_dev->kvm); } static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) @@ -181,8 +180,6 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) struct kvm_assigned_dev_kernel *assigned_dev = (struct kvm_assigned_dev_kernel *) dev_id; - kvm_get_kvm(assigned_dev->kvm); - schedule_work(&assigned_dev->interrupt_work); disable_irq_nosync(irq); @@ -213,6 +210,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) } } +/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ static void kvm_free_assigned_irq(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev) { @@ -228,11 +226,24 @@ static void kvm_free_assigned_irq(struct kvm *kvm, if (!assigned_dev->irq_requested_type) return; - if (cancel_work_sync(&assigned_dev->interrupt_work)) - /* We had pending work. That means we will have to take - * care of kvm_put_kvm. - */ - kvm_put_kvm(kvm); + /* + * In kvm_free_device_irq, cancel_work_sync return true if: + * 1. work is scheduled, and then cancelled. + * 2. work callback is executed. + * + * The first one ensured that the irq is disabled and no more events + * would happen. But for the second one, the irq may be enabled (e.g. + * for MSI). So we disable irq here to prevent further events. + * + * Notice this maybe result in nested disable if the interrupt type is + * INTx, but it's OK for we are going to free it. + * + * If this function is a part of VM destroy, please ensure that till + * now, the kvm state is still legal for probably we also have to wait + * interrupt_work done. + */ + disable_irq_nosync(assigned_dev->host_irq); + cancel_work_sync(&assigned_dev->interrupt_work); free_irq(assigned_dev->host_irq, (void *)assigned_dev); From d2a8284e8fca9e2a938bee6cd074064d23864886 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 30 Dec 2008 15:55:05 -0200 Subject: [PATCH 066/263] KVM: PIT: fix i8254 pending count read count_load_time assignment is bogus: its supposed to contain what it means, not the expiration time. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/i8254.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index e665d1c623ca..72bd275a9b5c 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -207,7 +207,7 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps) hrtimer_add_expires_ns(&pt->timer, pt->period); pt->scheduled = hrtimer_get_expires_ns(&pt->timer); if (pt->period) - ps->channels[0].count_load_time = hrtimer_get_expires(&pt->timer); + ps->channels[0].count_load_time = ktime_get(); return (pt->period == 0 ? 0 : 1); } From abe6655dd699069b53bcccbc65b2717f60203b12 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 10 Feb 2009 20:59:45 -0200 Subject: [PATCH 067/263] KVM: x86: disable kvmclock on non constant TSC hosts This is better. Currently, this code path is posing us big troubles, and we won't have a decent patch in time. So, temporarily disable it. Signed-off-by: Glauber Costa Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fc3e329f6ade..758b7a155ae9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -967,7 +967,6 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: case KVM_CAP_SET_TSS_ADDR: case KVM_CAP_EXT_CPUID: - case KVM_CAP_CLOCKSOURCE: case KVM_CAP_PIT: case KVM_CAP_NOP_IO_DELAY: case KVM_CAP_MP_STATE: @@ -992,6 +991,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IOMMU: r = iommu_found(); break; + case KVM_CAP_CLOCKSOURCE: + r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC); + break; default: r = 0; break; From 2aaf69dcee864f4fb6402638dd2f263324ac839f Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 21 Jan 2009 16:52:16 +0800 Subject: [PATCH 068/263] KVM: MMU: Map device MMIO as UC in EPT Software are not allow to access device MMIO using cacheable memory type, the patch limit MMIO region with UC and WC(guest can select WC using PAT and PCD/PWT). Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 9 +++++++-- arch/x86/kvm/vmx.c | 3 +-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 83f11c7474a1..2d4477c71473 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1698,8 +1698,13 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, if (largepage) spte |= PT_PAGE_SIZE_MASK; if (mt_mask) { - mt_mask = get_memory_type(vcpu, gfn) << - kvm_x86_ops->get_mt_mask_shift(); + if (!kvm_is_mmio_pfn(pfn)) { + mt_mask = get_memory_type(vcpu, gfn) << + kvm_x86_ops->get_mt_mask_shift(); + mt_mask |= VMX_EPT_IGMT_BIT; + } else + mt_mask = MTRR_TYPE_UNCACHABLE << + kvm_x86_ops->get_mt_mask_shift(); spte |= mt_mask; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6259d7467648..07491c9c6ed0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3687,8 +3687,7 @@ static int __init vmx_init(void) if (vm_need_ept()) { bypass_guest_pf = 0; kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | - VMX_EPT_WRITABLE_MASK | - VMX_EPT_IGMT_BIT); + VMX_EPT_WRITABLE_MASK); kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, VMX_EPT_EXECUTABLE_MASK, VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); From d7cff1c37664971aae32bb0de82231ed34933d8e Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 6 Jan 2009 16:25:10 +0800 Subject: [PATCH 069/263] KVM: Fix INTx for device assignment Missing buckets and wrong parameter for free_irq() Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 277ea7f39fc8..d9bbb20f230f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -296,8 +296,8 @@ static int assigned_device_update_intx(struct kvm *kvm, if (irqchip_in_kernel(kvm)) { if (!msi2intx && - adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) { - free_irq(adev->host_irq, (void *)kvm); + (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)) { + free_irq(adev->host_irq, (void *)adev); pci_disable_msi(adev->dev); } From b682b814e3cc340f905c14dff87ce8bdba7c5eba Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 10 Feb 2009 20:41:41 -0200 Subject: [PATCH 070/263] KVM: x86: fix LAPIC pending count calculation Simplify LAPIC TMCCT calculation by using hrtimer provided function to query remaining time until expiration. Fixes host hang with nested ESX. Signed-off-by: Marcelo Tosatti Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/kvm/irq.c | 7 ----- arch/x86/kvm/irq.h | 1 - arch/x86/kvm/lapic.c | 64 ++++++++++---------------------------------- arch/x86/kvm/lapic.h | 2 -- arch/x86/kvm/svm.c | 1 - arch/x86/kvm/vmx.c | 1 - 6 files changed, 14 insertions(+), 62 deletions(-) diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index c019b8edcdb7..cf17ed52f6fb 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -87,13 +87,6 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); -void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) -{ - kvm_apic_timer_intr_post(vcpu, vec); - /* TODO: PIT, RTC etc. */ -} -EXPORT_SYMBOL_GPL(kvm_timer_intr_post); - void __kvm_migrate_timers(struct kvm_vcpu *vcpu) { __kvm_migrate_apic_timer(vcpu); diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 2bf32a03ceec..82579ee538d0 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -89,7 +89,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm) void kvm_pic_reset(struct kvm_kpic_state *s); -void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index afac68c0815c..f0b67f2cdd69 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -35,6 +35,12 @@ #include "kvm_cache_regs.h" #include "irq.h" +#ifndef CONFIG_X86_64 +#define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) +#else +#define mod_64(x, y) ((x) % (y)) +#endif + #define PRId64 "d" #define PRIx64 "llx" #define PRIu64 "u" @@ -511,52 +517,22 @@ static void apic_send_ipi(struct kvm_lapic *apic) static u32 apic_get_tmcct(struct kvm_lapic *apic) { - u64 counter_passed; - ktime_t passed, now; + ktime_t remaining; + s64 ns; u32 tmcct; ASSERT(apic != NULL); - now = apic->timer.dev.base->get_time(); - tmcct = apic_get_reg(apic, APIC_TMICT); - /* if initial count is 0, current count should also be 0 */ - if (tmcct == 0) + if (apic_get_reg(apic, APIC_TMICT) == 0) return 0; - if (unlikely(ktime_to_ns(now) <= - ktime_to_ns(apic->timer.last_update))) { - /* Wrap around */ - passed = ktime_add(( { - (ktime_t) { - .tv64 = KTIME_MAX - - (apic->timer.last_update).tv64}; } - ), now); - apic_debug("time elapsed\n"); - } else - passed = ktime_sub(now, apic->timer.last_update); + remaining = hrtimer_expires_remaining(&apic->timer.dev); + if (ktime_to_ns(remaining) < 0) + remaining = ktime_set(0, 0); - counter_passed = div64_u64(ktime_to_ns(passed), - (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); - - if (counter_passed > tmcct) { - if (unlikely(!apic_lvtt_period(apic))) { - /* one-shot timers stick at 0 until reset */ - tmcct = 0; - } else { - /* - * periodic timers reset to APIC_TMICT when they - * hit 0. The while loop simulates this happening N - * times. (counter_passed %= tmcct) would also work, - * but might be slower or not work on 32-bit?? - */ - while (counter_passed > tmcct) - counter_passed -= tmcct; - tmcct -= counter_passed; - } - } else { - tmcct -= counter_passed; - } + ns = mod_64(ktime_to_ns(remaining), apic->timer.period); + tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); return tmcct; } @@ -653,8 +629,6 @@ static void start_apic_timer(struct kvm_lapic *apic) { ktime_t now = apic->timer.dev.base->get_time(); - apic->timer.last_update = now; - apic->timer.period = apic_get_reg(apic, APIC_TMICT) * APIC_BUS_CYCLE_NS * apic->timer.divide_count; atomic_set(&apic->timer.pending, 0); @@ -1110,16 +1084,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) } } -void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - - if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec) - apic->timer.last_update = ktime_add_ns( - apic->timer.last_update, - apic->timer.period); -} - int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) { int vector = kvm_apic_has_interrupt(vcpu); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 81858881287e..45ab6ee71209 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -12,7 +12,6 @@ struct kvm_lapic { atomic_t pending; s64 period; /* unit: ns */ u32 divide_count; - ktime_t last_update; struct hrtimer dev; } timer; struct kvm_vcpu *vcpu; @@ -42,7 +41,6 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); int kvm_lapic_enabled(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); -void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec); void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1452851ae258..a9e769e4e251 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1600,7 +1600,6 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu) /* Okay, we can deliver the interrupt: grab it and update PIC state. */ intr_vector = kvm_cpu_get_interrupt(vcpu); svm_inject_irq(svm, intr_vector); - kvm_timer_intr_post(vcpu, intr_vector); out: update_cr8_intercept(vcpu); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 07491c9c6ed0..b1fe1422afb1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3285,7 +3285,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) } if (vcpu->arch.interrupt.pending) { vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); - kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); if (kvm_cpu_has_interrupt(vcpu)) enable_irq_window(vcpu); } From 682edb4c01e690c7c7cd772dbd6f4e0fd74dc572 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 5 Feb 2009 18:23:46 +0000 Subject: [PATCH 071/263] KVM: Fix assigned devices circular locking dependency kvm->slots_lock is outer to kvm->lock, so take slots_lock in kvm_vm_ioctl_assign_device() before taking kvm->lock, rather than taking it in kvm_iommu_map_memslots(). Cc: stable@kernel.org Signed-off-by: Mark McLoughlin Acked-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- virt/kvm/iommu.c | 6 ++---- virt/kvm/kvm_main.c | 3 +++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index e9693a29d00e..4c4037503600 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -73,14 +73,13 @@ static int kvm_iommu_map_memslots(struct kvm *kvm) { int i, r = 0; - down_read(&kvm->slots_lock); for (i = 0; i < kvm->nmemslots; i++) { r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn, kvm->memslots[i].npages); if (r) break; } - up_read(&kvm->slots_lock); + return r; } @@ -190,12 +189,11 @@ static void kvm_iommu_put_pages(struct kvm *kvm, static int kvm_iommu_unmap_memslots(struct kvm *kvm) { int i; - down_read(&kvm->slots_lock); + for (i = 0; i < kvm->nmemslots; i++) { kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn, kvm->memslots[i].npages); } - up_read(&kvm->slots_lock); return 0; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d9bbb20f230f..29a667ce35b0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -466,6 +466,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, struct kvm_assigned_dev_kernel *match; struct pci_dev *dev; + down_read(&kvm->slots_lock); mutex_lock(&kvm->lock); match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, @@ -527,6 +528,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, out: mutex_unlock(&kvm->lock); + up_read(&kvm->slots_lock); return r; out_list_del: list_del(&match->list); @@ -538,6 +540,7 @@ out_put: out_free: kfree(match); mutex_unlock(&kvm->lock); + up_read(&kvm->slots_lock); return r; } #endif From 516a1a7e9dc80358030fe01aabb3bedf882db9e2 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Feb 2009 02:32:07 +0200 Subject: [PATCH 072/263] KVM: VMX: Flush volatile msrs before emulating rdmsr Some msrs (notable MSR_KERNEL_GS_BASE) are held in the processor registers and need to be flushed to the vcpu struture before they can be read. This fixes cygwin longjmp() failure on Windows x64. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b1fe1422afb1..7611af576829 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -903,6 +903,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) data = vmcs_readl(GUEST_SYSENTER_ESP); break; default: + vmx_load_host_state(to_vmx(vcpu)); msr = find_msr_entry(to_vmx(vcpu), msr_index); if (msr) { data = msr->data; From be716615fe596ee117292dc615e95f707fb67fd1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Jan 2009 23:36:34 +0100 Subject: [PATCH 073/263] x86, vm86: fix preemption bug Commit 3d2a71a596bd9c761c8487a2178e95f8a61da083 ("x86, traps: converge do_debug handlers") changed the preemption disable logic of do_debug() so vm86_handle_trap() is called with preemption disabled resulting in: BUG: sleeping function called from invalid context at include/linux/kernel.h:155 in_atomic(): 1, irqs_disabled(): 0, pid: 3005, name: dosemu.bin Pid: 3005, comm: dosemu.bin Tainted: G W 2.6.29-rc1 #51 Call Trace: [] copy_to_user+0x33/0x108 [] save_v86_state+0x65/0x149 [] handle_vm86_trap+0x20/0x8f [] do_debug+0x15b/0x1a4 [] debug_stack_correct+0x27/0x2c [] sysenter_do_call+0x12/0x2f BUG: scheduling while atomic: dosemu.bin/3005/0x10000001 Restore the original calling convention and reenable preemption before calling handle_vm86_trap(). Reported-by: Michal Suchanek Cc: stable@kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7932338d7cb3..a9e7548e1790 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -99,6 +99,12 @@ static inline void preempt_conditional_sti(struct pt_regs *regs) local_irq_enable(); } +static inline void conditional_cli(struct pt_regs *regs) +{ + if (regs->flags & X86_EFLAGS_IF) + local_irq_disable(); +} + static inline void preempt_conditional_cli(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) @@ -626,8 +632,10 @@ clear_dr7: #ifdef CONFIG_X86_32 debug_vm86: + /* reenable preemption: handle_vm86_trap() might sleep */ + dec_preempt_count(); handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); - preempt_conditional_cli(regs); + conditional_cli(regs); return; #endif From d21d52d4a155e36d4dc93d642cd52cb63f7ef91b Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Thu, 5 Feb 2009 16:13:32 +0800 Subject: [PATCH 074/263] kbuild,setlocalversion: shorten the make time when using svn Don't bother doing `svn st` as it takes a retarded amount of time when the source is cold Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu Signed-off-by: Sam Ravnborg --- scripts/setlocalversion | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/scripts/setlocalversion b/scripts/setlocalversion index f6946cf99ce1..f1c4b35bc324 100755 --- a/scripts/setlocalversion +++ b/scripts/setlocalversion @@ -58,14 +58,7 @@ fi # Check for svn and a svn repo. if rev=`svn info 2>/dev/null | grep '^Last Changed Rev'`; then rev=`echo $rev | awk '{print $NF}'` - changes=`svn status 2>/dev/null | grep '^[AMD]' | wc -l` - - # Are there uncommitted changes? - if [ $changes != 0 ]; then - printf -- '-svn%s%s' "$rev" -dirty - else - printf -- '-svn%s' "$rev" - fi + printf -- '-svn%s' "$rev" # All done with svn exit From fc370ecfdb37b853bd8e2118c7ad9f99fa9ac5cd Mon Sep 17 00:00:00 2001 From: Josh Hunt Date: Wed, 11 Feb 2009 21:10:57 -0800 Subject: [PATCH 075/263] kbuild: add vmlinux to kernel rpm We are building an automated system to test kernels weekly and need to provide an rpm to our QA dept. We would like to use the ability to create kernel rpms already in the kernel's Makefile, but need the vmlinux file included in the rpm for later debugging. This patch adds a compressed vmlinux to the kernel rpm when doing a make rpm-pkg or binrpm-pkg and upon install places the vmlinux file in /boot. Signed-off-by: Josh Hunt Signed-off-by: Sam Ravnborg --- scripts/package/mkspec | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/package/mkspec b/scripts/package/mkspec index 2500886fb90a..ee448cdc6a2b 100755 --- a/scripts/package/mkspec +++ b/scripts/package/mkspec @@ -86,6 +86,14 @@ echo "%endif" echo 'cp System.map $RPM_BUILD_ROOT'"/boot/System.map-$KERNELRELEASE" echo 'cp .config $RPM_BUILD_ROOT'"/boot/config-$KERNELRELEASE" + +echo "%ifnarch ppc64" +echo 'cp vmlinux vmlinux.orig' +echo 'bzip2 -9 vmlinux' +echo 'mv vmlinux.bz2 $RPM_BUILD_ROOT'"/boot/vmlinux-$KERNELRELEASE.bz2" +echo 'mv vmlinux.orig vmlinux' +echo "%endif" + echo "" echo "%clean" echo '#echo -rf $RPM_BUILD_ROOT' From 0bb98e231803860e978c302b9faccaf776881137 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Sun, 15 Feb 2009 10:20:30 +0100 Subject: [PATCH 076/263] bootgraph: fix for use with dot symbols powerpc has dot symbols, so the dmesg output looks like: <4>[ 0.327310] calling .migration_init+0x0/0x9c @ 1 <4>[ 0.327595] initcall .migration_init+0x0/0x9c returned 1 after 0 usecs The below fixes bootgraph.pl so it handles this correctly. Signed-off-by: Michael Neuling Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Sam Ravnborg --- scripts/bootgraph.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl index b0246307aac4..12caa822a232 100644 --- a/scripts/bootgraph.pl +++ b/scripts/bootgraph.pl @@ -51,7 +51,7 @@ my %pidctr; while (<>) { my $line = $_; - if ($line =~ /([0-9\.]+)\] calling ([a-zA-Z0-9\_]+)\+/) { + if ($line =~ /([0-9\.]+)\] calling ([a-zA-Z0-9\_\.]+)\+/) { my $func = $2; if ($done == 0) { $start{$func} = $1; @@ -87,7 +87,7 @@ while (<>) { $count = $count + 1; } - if ($line =~ /([0-9\.]+)\] initcall ([a-zA-Z0-9\_]+)\+.*returned/) { + if ($line =~ /([0-9\.]+)\] initcall ([a-zA-Z0-9\_\.]+)\+.*returned/) { if ($done == 0) { $end{$2} = $1; $maxtime = $1; From 953fae66d124486c9e284806429c52c5402f59ac Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 11 Feb 2009 13:24:09 -0800 Subject: [PATCH 077/263] kbuild: fix tags generation of config symbols commit 4f628248a578585472e19e4cba2c604643af8c6c aka "kbuild: reintroduce ALLSOURCE_ARCHS support for tags/cscope" breaks tags generation for Kconfig symbols. Steps to reproduce: make tags vi -t PROC_FS It should jump to 'config PROC_FS' line. Signed-off-by: Alexey Dobriyan Tested-by: Pete Wyckoff Signed-off-by: Andrew Morton Signed-off-by: Sam Ravnborg --- scripts/tags.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/tags.sh b/scripts/tags.sh index fdbe78bb5e2b..7e21e9146118 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -76,7 +76,10 @@ all_sources() all_kconfigs() { - find_sources $ALLSOURCE_ARCHS 'Kconfig*' + for arch in $ALLSOURCE_ARCHS; do + find_sources $arch 'Kconfig*' + done + find_other_sources 'Kconfig*' } all_defconfigs() From 5123b327c107db9e560fd62d50c27a3816e5a078 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Sun, 25 Jan 2009 18:39:12 +0530 Subject: [PATCH 078/263] kbuild: add sys_* entries for syscalls in tags Currently, it is no longer possible to use the tags file to jump to system call function definitions with sys_foo, because the definitions are obscured by use of the SYSCALL_DEFINE* macros. This patch adds the appropriate option to ctags to make it see through the macro. Also, it adds the ENTRY() work already done for Exuberant to Emacs too. Signed-off-by: Rabin Vincent Signed-off-by: Sam Ravnborg --- scripts/tags.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/tags.sh b/scripts/tags.sh index 7e21e9146118..5bd8b1003d44 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -102,7 +102,8 @@ exuberant() -I ____cacheline_internodealigned_in_smp \ -I EXPORT_SYMBOL,EXPORT_SYMBOL_GPL \ --extra=+f --c-kinds=+px \ - --regex-asm='/^ENTRY\(([^)]*)\).*/\1/' + --regex-asm='/^ENTRY\(([^)]*)\).*/\1/' \ + --regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/' all_kconfigs | xargs $1 -a \ --langdef=kconfig --language-force=kconfig \ @@ -120,7 +121,9 @@ exuberant() emacs() { - all_sources | xargs $1 -a + all_sources | xargs $1 -a \ + --regex='/^ENTRY(\([^)]*\)).*/\1/' \ + --regex='/^SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/sys_\1/' all_kconfigs | xargs $1 -a \ --regex='/^[ \t]*\(\(menu\)*config\)[ \t]+\([a-zA-Z0-9_]+\)/\3/' From c19ef7fd8e534c870166213e9e30de9c44b34a76 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 15 Feb 2009 11:30:52 +0100 Subject: [PATCH 079/263] scripts: add x86 register parser to markup_oops.pl An oops dump also contains the register values. This patch parses these for (32 bit) x86, and then annotates the disassembly with these values; this helps in analysis of the oops by the developer, for example, NULL pointer or other pointer bugs show up clearly this way. Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Sam Ravnborg --- scripts/markup_oops.pl | 106 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 100 insertions(+), 6 deletions(-) diff --git a/scripts/markup_oops.pl b/scripts/markup_oops.pl index d40449cafa84..bb2046891c90 100644 --- a/scripts/markup_oops.pl +++ b/scripts/markup_oops.pl @@ -32,6 +32,78 @@ my $module = ""; my $func_offset; my $vmaoffset = 0; +my %regs; + + +sub parse_x86_regs +{ + my ($line) = @_; + if ($line =~ /EAX: ([0-9a-f]+) EBX: ([0-9a-f]+) ECX: ([0-9a-f]+) EDX: ([0-9a-f]+)/) { + $regs{"%eax"} = $1; + $regs{"%ebx"} = $2; + $regs{"%ecx"} = $3; + $regs{"%edx"} = $4; + } + if ($line =~ /ESI: ([0-9a-f]+) EDI: ([0-9a-f]+) EBP: ([0-9a-f]+) ESP: ([0-9a-f]+)/) { + $regs{"%esi"} = $1; + $regs{"%edi"} = $2; + $regs{"%esp"} = $4; + } +} + +sub process_x86_regs +{ + my ($line, $cntr) = @_; + my $str = ""; + if (length($line) < 40) { + return ""; # not an asm istruction + } + + # find the arguments to the instruction + if ($line =~ /([0-9a-zA-Z\,\%\(\)\-\+]+)$/) { + $lastword = $1; + } else { + return ""; + } + + # we need to find the registers that get clobbered, + # since their value is no longer relevant for previous + # instructions in the stream. + + $clobber = $lastword; + # first, remove all memory operands, they're read only + $clobber =~ s/\([a-z0-9\%\,]+\)//g; + # then, remove everything before the comma, thats the read part + $clobber =~ s/.*\,//g; + + # if this is the instruction that faulted, we haven't actually done + # the write yet... nothing is clobbered. + if ($cntr == 0) { + $clobber = ""; + } + + foreach $reg (keys(%regs)) { + my $val = $regs{$reg}; + # first check if we're clobbering this register; if we do + # we print it with a =>, and then delete its value + if ($clobber =~ /$reg/) { + if (length($val) > 0) { + $str = $str . " $reg => $val "; + } + $regs{$reg} = ""; + $val = ""; + } + # now check if we're reading this register + if ($lastword =~ /$reg/) { + if (length($val) > 0) { + $str = $str . " $reg = $val "; + } + } + } + return $str; +} + +# parse the oops while () { my $line = $_; if ($line =~ /EIP: 0060:\[\<([a-z0-9]+)\>\]/) { @@ -46,10 +118,11 @@ while () { if ($line =~ /EIP is at ([a-zA-Z0-9\_]+)\+(0x[0-9a-f]+)\/0x[a-f0-9]+\W\[([a-zA-Z0-9\_\-]+)\]/) { $module = $3; } + parse_x86_regs($line); } my $decodestart = hex($target) - hex($func_offset); -my $decodestop = $decodestart + 8192; +my $decodestop = hex($target) + 8192; if ($target eq "0") { print "No oops found!\n"; print "Usage: \n"; @@ -84,6 +157,7 @@ my $counter = 0; my $state = 0; my $center = 0; my @lines; +my @reglines; sub InRange { my ($address, $target) = @_; @@ -188,16 +262,36 @@ while ($finish < $counter) { my $i; -my $fulltext = ""; + +# start annotating the registers in the asm. +# this goes from the oopsing point back, so that the annotator +# can track (opportunistically) which registers got written and +# whos value no longer is relevant. + +$i = $center; +while ($i >= $start) { + $reglines[$i] = process_x86_regs($lines[$i], $center - $i); + $i = $i - 1; +} + $i = $start; while ($i < $finish) { + my $line; if ($i == $center) { - $fulltext = $fulltext . "*$lines[$i] <----- faulting instruction\n"; + $line = "*$lines[$i] "; } else { - $fulltext = $fulltext . " $lines[$i]\n"; + $line = " $lines[$i] "; } + print $line; + if (defined($reglines[$i]) && length($reglines[$i]) > 0) { + my $c = 60 - length($line); + while ($c > 0) { print " "; $c = $c - 1; }; + print "| $reglines[$i]"; + } + if ($i == $center) { + print "<--- faulting instruction"; + } + print "\n"; $i = $i +1; } -print $fulltext; - From 11df65c3c6f7fdc837a5be8787d31011e8bb93c1 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 15 Feb 2009 11:30:55 +0100 Subject: [PATCH 080/263] scripts: add x86 64 bit support to the markup_oops.pl script Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Sam Ravnborg --- scripts/markup_oops.pl | 59 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/scripts/markup_oops.pl b/scripts/markup_oops.pl index bb2046891c90..528492bcba5b 100644 --- a/scripts/markup_oops.pl +++ b/scripts/markup_oops.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl -w +#!/usr/bin/perl use File::Basename; @@ -29,7 +29,7 @@ my $filename = $vmlinux_name; my $target = "0"; my $function; my $module = ""; -my $func_offset; +my $func_offset = 0; my $vmaoffset = 0; my %regs; @@ -49,6 +49,39 @@ sub parse_x86_regs $regs{"%edi"} = $2; $regs{"%esp"} = $4; } + if ($line =~ /RAX: ([0-9a-f]+) RBX: ([0-9a-f]+) RCX: ([0-9a-f]+)/) { + $regs{"%eax"} = $1; + $regs{"%ebx"} = $2; + $regs{"%ecx"} = $3; + } + if ($line =~ /RDX: ([0-9a-f]+) RSI: ([0-9a-f]+) RDI: ([0-9a-f]+)/) { + $regs{"%edx"} = $1; + $regs{"%esi"} = $2; + $regs{"%edi"} = $3; + } + if ($line =~ /RBP: ([0-9a-f]+) R08: ([0-9a-f]+) R09: ([0-9a-f]+)/) { + $regs{"%r08"} = $2; + $regs{"%r09"} = $3; + } + if ($line =~ /R10: ([0-9a-f]+) R11: ([0-9a-f]+) R12: ([0-9a-f]+)/) { + $regs{"%r10"} = $1; + $regs{"%r11"} = $2; + $regs{"%r12"} = $3; + } + if ($line =~ /R13: ([0-9a-f]+) R14: ([0-9a-f]+) R15: ([0-9a-f]+)/) { + $regs{"%r13"} = $1; + $regs{"%r14"} = $2; + $regs{"%r15"} = $3; + } +} + +sub reg_name +{ + my ($reg) = @_; + $reg =~ s/r(.)x/e\1x/; + $reg =~ s/r(.)i/e\1i/; + $reg =~ s/r(.)p/e\1p/; + return $reg; } sub process_x86_regs @@ -83,10 +116,18 @@ sub process_x86_regs } foreach $reg (keys(%regs)) { + my $clobberprime = reg_name($clobber); + my $lastwordprime = reg_name($lastword); my $val = $regs{$reg}; + if ($val =~ /^[0]+$/) { + $val = "0"; + } else { + $val =~ s/^0*//; + } + # first check if we're clobbering this register; if we do # we print it with a =>, and then delete its value - if ($clobber =~ /$reg/) { + if ($clobber =~ /$reg/ || $clobberprime =~ /$reg/) { if (length($val) > 0) { $str = $str . " $reg => $val "; } @@ -94,7 +135,7 @@ sub process_x86_regs $val = ""; } # now check if we're reading this register - if ($lastword =~ /$reg/) { + if ($lastword =~ /$reg/ || $lastwordprime =~ /$reg/) { if (length($val) > 0) { $str = $str . " $reg = $val "; } @@ -109,15 +150,25 @@ while () { if ($line =~ /EIP: 0060:\[\<([a-z0-9]+)\>\]/) { $target = $1; } + if ($line =~ /RIP: 0010:\[\<([a-z0-9]+)\>\]/) { + $target = $1; + } if ($line =~ /EIP is at ([a-zA-Z0-9\_]+)\+(0x[0-9a-f]+)\/0x[a-f0-9]/) { $function = $1; $func_offset = $2; } + if ($line =~ /RIP: 0010:\[\<[0-9a-f]+\>\] \[\<[0-9a-f]+\>\] ([a-zA-Z0-9\_]+)\+(0x[0-9a-f]+)\/0x[a-f0-9]/) { + $function = $1; + $func_offset = $2; + } # check if it's a module if ($line =~ /EIP is at ([a-zA-Z0-9\_]+)\+(0x[0-9a-f]+)\/0x[a-f0-9]+\W\[([a-zA-Z0-9\_\-]+)\]/) { $module = $3; } + if ($line =~ /RIP: 0010:\[\<[0-9a-f]+\>\] \[\<[0-9a-f]+\>\] ([a-zA-Z0-9\_]+)\+(0x[0-9a-f]+)\/0x[a-f0-9]+\W\[([a-zA-Z0-9\_\-]+)\]/) { + $module = $3; + } parse_x86_regs($line); } From 929799973ba4a40f7b8001e9cc461c13d04c4124 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 10 Jan 2009 04:56:13 +0100 Subject: [PATCH 081/263] kbuild: create the source symlink earlier in the objdir It's useful to already have the source symlink in a objdir when one just runs make *config. Then one can do mkdir obj-allyes cd obj-allyes make -C ../sourcedir O=$(pwd) allyesconfig ./source/scripts/config --disable debug_info make CC=icecc -j18 without having to interrupt the make first just to get the source symlink. Signed-off-by: Andi Kleen [sam: deleted the other source symlink statement] Signed-off-by: Sam Ravnborg --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 22d758495ad2..59cf6dacec07 100644 --- a/Makefile +++ b/Makefile @@ -389,6 +389,7 @@ PHONY += outputmakefile # output directory. outputmakefile: ifneq ($(KBUILD_SRC),) + $(Q)ln -fsn $(srctree) source $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkmakefile \ $(srctree) $(objtree) $(VERSION) $(PATCHLEVEL) endif @@ -946,7 +947,6 @@ ifneq ($(KBUILD_SRC),) mkdir -p include2; \ ln -fsn $(srctree)/include/asm-$(SRCARCH) include2/asm; \ fi - ln -fsn $(srctree) source endif # prepare2 creates a makefile if using a separate output directory From 391b170f10e669dd429aa47bce998c2fa839404c Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Tue, 6 Jan 2009 13:57:11 +0200 Subject: [PATCH 082/263] mmiotrace: count events lost due to not recording Impact: enhances lost events counting in mmiotrace The tracing framework, or the ring buffer facility it uses, has a switch to stop recording data. When recording is off, the trace events will be lost. The framework does not count these, so mmiotrace has to count them itself. Signed-off-by: Pekka Paalanen Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace_mmiotrace.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index fffcb069f1dc..80e503ef6136 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "trace.h" @@ -19,6 +20,7 @@ struct header_iter { static struct trace_array *mmio_trace_array; static bool overrun_detected; static unsigned long prev_overruns; +static atomic_t dropped_count; static void mmio_reset_data(struct trace_array *tr) { @@ -121,11 +123,11 @@ static void mmio_close(struct trace_iterator *iter) static unsigned long count_overruns(struct trace_iterator *iter) { - unsigned long cnt = 0; + unsigned long cnt = atomic_xchg(&dropped_count, 0); unsigned long over = ring_buffer_overruns(iter->tr->buffer); if (over > prev_overruns) - cnt = over - prev_overruns; + cnt += over - prev_overruns; prev_overruns = over; return cnt; } @@ -310,8 +312,10 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq_flags); - if (!event) + if (!event) { + atomic_inc(&dropped_count); return; + } entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, 0, preempt_count()); entry->ent.type = TRACE_MMIO_RW; @@ -338,8 +342,10 @@ static void __trace_mmiotrace_map(struct trace_array *tr, event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq_flags); - if (!event) + if (!event) { + atomic_inc(&dropped_count); return; + } entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, 0, preempt_count()); entry->ent.type = TRACE_MMIO_MAP; From 6bc5c366b1a45ca18fba6851f62db5743b3f6db5 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sat, 3 Jan 2009 21:23:51 +0200 Subject: [PATCH 083/263] trace: mmiotrace to the tracer menu in Kconfig Impact: cosmetic change in Kconfig menu layout This patch was originally suggested by Peter Zijlstra, but seems it was forgotten. CONFIG_MMIOTRACE and CONFIG_MMIOTRACE_TEST were selectable directly under the Kernel hacking / debugging menu in the kernel configuration system. They were present only for x86 and x86_64. Other tracers that use the ftrace tracing framework are in their own sub-menu. This patch moves the mmiotrace configuration options there. Since the Kconfig file, where the tracer menu is, is not architecture specific, HAVE_MMIOTRACE_SUPPORT is introduced and provided only by x86/x86_64. CONFIG_MMIOTRACE now depends on it. Signed-off-by: Pekka Paalanen Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 24 ++---------------------- kernel/trace/Kconfig | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 10d6cc3fd052..e1983fa025d2 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -174,28 +174,8 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. -config MMIOTRACE - bool "Memory mapped IO tracing" - depends on DEBUG_KERNEL && PCI - select TRACING - help - Mmiotrace traces Memory Mapped I/O access and is meant for - debugging and reverse engineering. It is called from the ioremap - implementation and works via page faults. Tracing is disabled by - default and can be enabled at run-time. - - See Documentation/tracers/mmiotrace.txt. - If you are not helping to develop drivers, say N. - -config MMIOTRACE_TEST - tristate "Test module for mmiotrace" - depends on MMIOTRACE && m - help - This is a dumb module for testing mmiotrace. It is very dangerous - as it will write garbage to IO memory starting at a given address. - However, it should be safe to use on e.g. unused portion of VRAM. - - Say N, unless you absolutely know what you are doing. +config HAVE_MMIOTRACE_SUPPORT + def_bool y # # IO delay types: diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e2a4ff6fc3a6..58a93fbd68aa 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -302,4 +302,27 @@ config FTRACE_STARTUP_TEST functioning properly. It will do tests on all the configured tracers of ftrace. +config MMIOTRACE + bool "Memory mapped IO tracing" + depends on HAVE_MMIOTRACE_SUPPORT && DEBUG_KERNEL && PCI + select TRACING + help + Mmiotrace traces Memory Mapped I/O access and is meant for + debugging and reverse engineering. It is called from the ioremap + implementation and works via page faults. Tracing is disabled by + default and can be enabled at run-time. + + See Documentation/tracers/mmiotrace.txt. + If you are not helping to develop drivers, say N. + +config MMIOTRACE_TEST + tristate "Test module for mmiotrace" + depends on MMIOTRACE && m + help + This is a dumb module for testing mmiotrace. It is very dangerous + as it will write garbage to IO memory starting at a given address. + However, it should be safe to use on e.g. unused portion of VRAM. + + Say N, unless you absolutely know what you are doing. + endmenu From f9aa28adfc6a4b01268ebb6d88566cca8627905f Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sat, 3 Jan 2009 21:09:27 +0200 Subject: [PATCH 084/263] doc: mmiotrace.txt, buffer size control change Impact: prevents confusing the user when buffer size is inadequate The tracing framework offers a resizeable buffer, which mmiotrace uses to record events. If the buffer is full, the following events will be lost. Events should not be lost, so the documentation instructs the user to increase the buffer size. The buffer size is set via a debugfs file. Mmiotrace documentation was not updated the same time the debugfs file was changed. The old file was tracing/trace_entries and first contained the number of entries the buffer had space for, per cpu. Nowadays this file is replaced with the file tracing/buffer_size_kb, which tells the amount of memory reserved for the buffer, per cpu, in kilobytes. Previously, a flag had to be toggled via the debugfs file tracing/tracing_enabled when the buffer size was changed. This is no longer necessary. The mmiotrace documentation is updated to reflect the current state of the tracing framework. Signed-off-by: Pekka Paalanen Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- Documentation/tracers/mmiotrace.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/tracers/mmiotrace.txt index cde23b4a12a1..5731c67abc55 100644 --- a/Documentation/tracers/mmiotrace.txt +++ b/Documentation/tracers/mmiotrace.txt @@ -78,12 +78,10 @@ to view your kernel log and look for "mmiotrace has lost events" warning. If events were lost, the trace is incomplete. You should enlarge the buffers and try again. Buffers are enlarged by first seeing how large the current buffers are: -$ cat /debug/tracing/trace_entries +$ cat /debug/tracing/buffer_size_kb gives you a number. Approximately double this number and write it back, for instance: -$ echo 0 > /debug/tracing/tracing_enabled -$ echo 128000 > /debug/tracing/trace_entries -$ echo 1 > /debug/tracing/tracing_enabled +$ echo 128000 > /debug/tracing/buffer_size_kb Then start again from the top. If you are doing a trace for a driver project, e.g. Nouveau, you should also From 090542641de833c6f756895fc2f139f046e298f9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 15 Feb 2009 20:02:19 -0500 Subject: [PATCH 085/263] ext4: Fix NULL dereference in ext4_ext_migrate()'s error handling This was found through a code checker (http://repo.or.cz/w/smatch.git/). It looks like you might be able to trigger the error by trying to migrate a readonly file system. Signed-off-by: Dan Carpenter Signed-off-by: "Theodore Ts'o" --- fs/ext4/migrate.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 734abca25e35..fe64d9f79852 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -481,7 +481,7 @@ int ext4_ext_migrate(struct inode *inode) + 1); if (IS_ERR(handle)) { retval = PTR_ERR(handle); - goto err_out; + return retval; } tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, @@ -489,8 +489,7 @@ int ext4_ext_migrate(struct inode *inode) if (IS_ERR(tmp_inode)) { retval = -ENOMEM; ext4_journal_stop(handle); - tmp_inode = NULL; - goto err_out; + return retval; } i_size_write(tmp_inode, i_size_read(inode)); /* @@ -618,8 +617,7 @@ err_out: ext4_journal_stop(handle); - if (tmp_inode) - iput(tmp_inode); + iput(tmp_inode); return retval; } From a0abd520fd69295f4a3735e29a9448a32e101d47 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 16 Feb 2009 17:31:58 -0600 Subject: [PATCH 086/263] cpumask: fix powernow-k8: partial revert of 2fdf66b491ac706657946442789ec644cc317e1a Impact: fix powernow-k8 when acpi=off (or other error). There was a spurious change introduced into powernow-k8 in this patch: so that we try to "restore" the cpus_allowed we never saved. We revert that file. See lkml "[PATCH] x86/powernow: fix cpus_allowed brokage when acpi=off" from Yinghai for the bug report. Cc: Mike Travis Cc: Yinghai Lu Signed-off-by: Rusty Russell Acked-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index fb039cd345d8..6428aa17b40e 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1157,8 +1157,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) data->cpu = pol->cpu; data->currpstate = HW_PSTATE_INVALID; - rc = powernow_k8_cpu_init_acpi(data); - if (rc) { + if (powernow_k8_cpu_init_acpi(data)) { /* * Use the PSB BIOS structure. This is only availabe on * an UP version, and is deprecated by AMD. @@ -1176,17 +1175,20 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) "ACPI maintainers and complain to your BIOS " "vendor.\n"); #endif - goto err_out; + kfree(data); + return -ENODEV; } if (pol->cpu != 0) { printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " "CPU other than CPU0. Complain to your BIOS " "vendor.\n"); - goto err_out; + kfree(data); + return -ENODEV; } rc = find_psb_table(data); if (rc) { - goto err_out; + kfree(data); + return -ENODEV; } /* Take a crude guess here. * That guess was in microseconds, so multiply with 1000 */ From 1371be0f7c8f6141b2dbfde6a7ae7885bedb9834 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 16 Feb 2009 17:31:59 -0600 Subject: [PATCH 087/263] cpumask: Use cpu_*_mask accessors code: alpha Impact: use new API, fix SMP bug. Use the new accessors rather than frobbing bits directly. This also removes the bug introduced in ee0c468b (alpha: compile fixes) which had Alpha setting bits on an on-stack cpumask, not the cpu_online_map. Cc: Richard Henderson Cc: FUJITA Tomonori Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Acked-by: Ivan Kokshaysky Acked-by: Ingo Molnar --- arch/alpha/kernel/process.c | 8 ++++---- arch/alpha/kernel/smp.c | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index f238370c907d..8d0097f10208 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -93,8 +93,8 @@ common_shutdown_1(void *generic_ptr) if (cpuid != boot_cpuid) { flags |= 0x00040000UL; /* "remain halted" */ *pflags = flags; - cpu_clear(cpuid, cpu_present_map); - cpu_clear(cpuid, cpu_possible_map); + set_cpu_present(cpuid, false); + set_cpu_possible(cpuid, false); halt(); } #endif @@ -120,8 +120,8 @@ common_shutdown_1(void *generic_ptr) #ifdef CONFIG_SMP /* Wait for the secondaries to halt. */ - cpu_clear(boot_cpuid, cpu_present_map); - cpu_clear(boot_cpuid, cpu_possible_map); + set_cpu_present(boot_cpuid, false); + set_cpu_possible(boot_cpuid, false); while (cpus_weight(cpu_present_map)) barrier(); #endif diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 00f1dc3dfd5f..b1fe5674c3a1 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -120,12 +120,12 @@ void __cpuinit smp_callin(void) { int cpuid = hard_smp_processor_id(); - cpumask_t mask = cpu_online_map; - if (cpu_test_and_set(cpuid, mask)) { + if (cpu_online(cpuid)) { printk("??, cpu 0x%x already present??\n", cpuid); BUG(); } + set_cpu_online(cpuid, true); /* Turn on machine checks. */ wrmces(7); @@ -436,8 +436,8 @@ setup_smp(void) ((char *)cpubase + i*hwrpb->processor_size); if ((cpu->flags & 0x1cc) == 0x1cc) { smp_num_probed++; - cpu_set(i, cpu_possible_map); - cpu_set(i, cpu_present_map); + set_cpu_possible(i, true); + set_cpu_present(i, true); cpu->pal_revision = boot_cpu_palrev; } @@ -470,8 +470,8 @@ smp_prepare_cpus(unsigned int max_cpus) /* Nothing to do on a UP box, or when told not to. */ if (smp_num_probed == 1 || max_cpus == 0) { - cpu_possible_map = cpumask_of_cpu(boot_cpuid); - cpu_present_map = cpumask_of_cpu(boot_cpuid); + init_cpu_possible(cpumask_of(boot_cpuid)); + init_cpu_present(cpumask_of(boot_cpuid)); printk(KERN_INFO "SMP mode deactivated.\n"); return; } From d14a7e0bfc4aed6452a436c9836903fbd1a5d6ad Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 16 Feb 2009 10:13:03 +0100 Subject: [PATCH 088/263] Revert "Sound: hda - Restore PCI configuration space with interrupts off" This reverts commit 32e176c14d7a425b681ef003c9061001ddb7fc7b. That commit caused a regression with suspend on Thinkpad SL300. Reference: kernel bug#12711 http://bugzilla.kernel.org/show_bug.cgi?id=12711 Tested-by: Alexandre Rostovtsev Acked-by: Rafael J. Wysocki Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 11e791b965f6..c8d9178f47e5 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1947,16 +1947,13 @@ static int azx_suspend(struct pci_dev *pci, pm_message_t state) return 0; } -static int azx_resume_early(struct pci_dev *pci) -{ - return pci_restore_state(pci); -} - static int azx_resume(struct pci_dev *pci) { struct snd_card *card = pci_get_drvdata(pci); struct azx *chip = card->private_data; + pci_set_power_state(pci, PCI_D0); + pci_restore_state(pci); if (pci_enable_device(pci) < 0) { printk(KERN_ERR "hda-intel: pci_enable_device failed, " "disabling device\n"); @@ -2468,7 +2465,6 @@ static struct pci_driver driver = { .remove = __devexit_p(azx_remove), #ifdef CONFIG_PM .suspend = azx_suspend, - .resume_early = azx_resume_early, .resume = azx_resume, #endif }; From e156ac4c571e3be741bc411e58820b74a9295c72 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Mon, 16 Feb 2009 15:22:39 +0100 Subject: [PATCH 089/263] sound: usb-audio: fix uninitialized variable with M-Audio MIDI interfaces Fix the snd_usbmidi_create_endpoints_midiman() function, which forgot to set the out_interval member of the endpoint info structure for Midiman/ M-Audio devices. Since kernel 2.6.24, any non-zero value makes the driver use interrupt transfers instead of bulk transfers. With EHCI controllers, these random interval values result in unbearably large latencies for output MIDI transfers. Signed-off-by: Clemens Ladisch Reported-by: David Tested-by: David Cc: Signed-off-by: Takashi Iwai --- sound/usb/usbmidi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/usbmidi.c b/sound/usb/usbmidi.c index 320641ab5be7..26bad373fe65 100644 --- a/sound/usb/usbmidi.c +++ b/sound/usb/usbmidi.c @@ -1625,6 +1625,7 @@ static int snd_usbmidi_create_endpoints_midiman(struct snd_usb_midi* umidi, } ep_info.out_ep = get_endpoint(hostif, 2)->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK; + ep_info.out_interval = 0; ep_info.out_cables = endpoint->out_cables & 0x5555; err = snd_usbmidi_out_endpoint_create(umidi, &ep_info, &umidi->endpoints[0]); if (err < 0) From a3c1239eb59c0a907f8be5587d42e950f44543f8 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 16 Feb 2009 14:37:12 +0000 Subject: [PATCH 090/263] wusb: whci-hcd: always lock whc->lock with interrupts disabled Always lock whc->lock with spin_lock_irq() or spin_lock_irqsave(). Signed-off-by: David Vrabel --- drivers/usb/host/whci/asl.c | 4 ++-- drivers/usb/host/whci/pzl.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/whci/asl.c b/drivers/usb/host/whci/asl.c index 2291c5f5af51..958751ccea43 100644 --- a/drivers/usb/host/whci/asl.c +++ b/drivers/usb/host/whci/asl.c @@ -227,13 +227,13 @@ void scan_async_work(struct work_struct *work) * Now that the ASL is updated, complete the removal of any * removed qsets. */ - spin_lock(&whc->lock); + spin_lock_irq(&whc->lock); list_for_each_entry_safe(qset, t, &whc->async_removed_list, list_node) { qset_remove_complete(whc, qset); } - spin_unlock(&whc->lock); + spin_unlock_irq(&whc->lock); } /** diff --git a/drivers/usb/host/whci/pzl.c b/drivers/usb/host/whci/pzl.c index 7dc85a0bee7c..df8b85f07092 100644 --- a/drivers/usb/host/whci/pzl.c +++ b/drivers/usb/host/whci/pzl.c @@ -255,13 +255,13 @@ void scan_periodic_work(struct work_struct *work) * Now that the PZL is updated, complete the removal of any * removed qsets. */ - spin_lock(&whc->lock); + spin_lock_irq(&whc->lock); list_for_each_entry_safe(qset, t, &whc->periodic_removed_list, list_node) { qset_remove_complete(whc, qset); } - spin_unlock(&whc->lock); + spin_unlock_irq(&whc->lock); } /** From 744f6592727a7ab9e3ca4266bedaa786825a31bb Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Mon, 16 Feb 2009 21:21:47 +0100 Subject: [PATCH 091/263] [ARM] 5400/1: Add support for inverted rdy_busy pin for Atmel nand device controller Add support for inverted rdy_busy pin for Atmel nand device controller It will fix building error on NeoCore926 board. Acked-by: Andrew Victor Acked-by: David Woodhouse Signed-off-by: Gregory CLEMENT Signed-off-by: Russell King --- arch/arm/mach-at91/include/mach/board.h | 1 + arch/avr32/mach-at32ap/include/mach/board.h | 1 + drivers/mtd/nand/atmel_nand.c | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-at91/include/mach/board.h b/arch/arm/mach-at91/include/mach/board.h index fb51f0e0a83f..0b3ae21b4565 100644 --- a/arch/arm/mach-at91/include/mach/board.h +++ b/arch/arm/mach-at91/include/mach/board.h @@ -93,6 +93,7 @@ struct atmel_nand_data { u8 enable_pin; /* chip enable */ u8 det_pin; /* card detect */ u8 rdy_pin; /* ready/busy */ + u8 rdy_pin_active_low; /* rdy_pin value is inverted */ u8 ale; /* address line number connected to ALE */ u8 cle; /* address line number connected to CLE */ u8 bus_width_16; /* buswidth is 16 bit */ diff --git a/arch/avr32/mach-at32ap/include/mach/board.h b/arch/avr32/mach-at32ap/include/mach/board.h index aafaf7a78886..cff8e84f78f2 100644 --- a/arch/avr32/mach-at32ap/include/mach/board.h +++ b/arch/avr32/mach-at32ap/include/mach/board.h @@ -116,6 +116,7 @@ struct atmel_nand_data { int enable_pin; /* chip enable */ int det_pin; /* card detect */ int rdy_pin; /* ready/busy */ + u8 rdy_pin_active_low; /* rdy_pin value is inverted */ u8 ale; /* address line number connected to ALE */ u8 cle; /* address line number connected to CLE */ u8 bus_width_16; /* buswidth is 16 bit */ diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c index c98c1570a40b..47a33cec3793 100644 --- a/drivers/mtd/nand/atmel_nand.c +++ b/drivers/mtd/nand/atmel_nand.c @@ -139,7 +139,8 @@ static int atmel_nand_device_ready(struct mtd_info *mtd) struct nand_chip *nand_chip = mtd->priv; struct atmel_nand_host *host = nand_chip->priv; - return gpio_get_value(host->board->rdy_pin); + return gpio_get_value(host->board->rdy_pin) ^ + !!host->board->rdy_pin_active_low; } /* From 0412558c873f716efe902b397af0653a550f7341 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 16 Feb 2009 22:48:12 +0100 Subject: [PATCH 092/263] ALSA: usb-audio - Fix non-continuous rate detection The detection of non-continuous rates (given via rate tables) isn't processed properly (e.g. for type II). This patch fixes and simplifies the detection code. Tested-by: Joris van Rantwijk Cc: Signed-off-by: Takashi Iwai --- sound/usb/usbaudio.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c index 2ab83129d9b0..80863093d2c8 100644 --- a/sound/usb/usbaudio.c +++ b/sound/usb/usbaudio.c @@ -2524,7 +2524,6 @@ static int parse_audio_format_rates(struct snd_usb_audio *chip, struct audioform * build the rate table and bitmap flags */ int r, idx; - unsigned int nonzero_rates = 0; fp->rate_table = kmalloc(sizeof(int) * nr_rates, GFP_KERNEL); if (fp->rate_table == NULL) { @@ -2532,24 +2531,26 @@ static int parse_audio_format_rates(struct snd_usb_audio *chip, struct audioform return -1; } - fp->nr_rates = nr_rates; - fp->rate_min = fp->rate_max = combine_triple(&fmt[8]); + fp->nr_rates = 0; + fp->rate_min = fp->rate_max = 0; for (r = 0, idx = offset + 1; r < nr_rates; r++, idx += 3) { unsigned int rate = combine_triple(&fmt[idx]); + if (!rate) + continue; /* C-Media CM6501 mislabels its 96 kHz altsetting */ if (rate == 48000 && nr_rates == 1 && chip->usb_id == USB_ID(0x0d8c, 0x0201) && fp->altsetting == 5 && fp->maxpacksize == 392) rate = 96000; - fp->rate_table[r] = rate; - nonzero_rates |= rate; - if (rate < fp->rate_min) + fp->rate_table[fp->nr_rates] = rate; + if (!fp->rate_min || rate < fp->rate_min) fp->rate_min = rate; - else if (rate > fp->rate_max) + if (!fp->rate_max || rate > fp->rate_max) fp->rate_max = rate; fp->rates |= snd_pcm_rate_to_rate_bit(rate); + fp->nr_rates++; } - if (!nonzero_rates) { + if (!fp->nr_rates) { hwc_debug("All rates were zero. Skipping format!\n"); return -1; } From 3b03cc5b86e2052295b9b484f37226ee15c87924 Mon Sep 17 00:00:00 2001 From: Joris van Rantwijk Date: Mon, 16 Feb 2009 22:58:23 +0100 Subject: [PATCH 093/263] ALSA: usb-audio - Workaround for misdetected sample rate with CM6207 The CM6207 incorrectly advertises its 96 kHz playback setting as 48 kHz in its USB device descriptor. This patch extends an existing workaround in usbaudio.c to also cover the CM6207. This resolves issue 0004249 in the ALSA bug tracker. Signed-off-by: Joris van Rantwijk Cc: Signed-off-by: Takashi Iwai --- sound/usb/usbaudio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c index 80863093d2c8..19e37451c216 100644 --- a/sound/usb/usbaudio.c +++ b/sound/usb/usbaudio.c @@ -2539,7 +2539,8 @@ static int parse_audio_format_rates(struct snd_usb_audio *chip, struct audioform continue; /* C-Media CM6501 mislabels its 96 kHz altsetting */ if (rate == 48000 && nr_rates == 1 && - chip->usb_id == USB_ID(0x0d8c, 0x0201) && + (chip->usb_id == USB_ID(0x0d8c, 0x0201) || + chip->usb_id == USB_ID(0x0d8c, 0x0102)) && fp->altsetting == 5 && fp->maxpacksize == 392) rate = 96000; fp->rate_table[fp->nr_rates] = rate; From d1b3525b4126d7acad0493b62642b80b71442661 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sun, 15 Feb 2009 23:24:24 +0400 Subject: [PATCH 094/263] libata-sff: fix 32-bit PIO ATAPI regression Commit 871af1210f13966ab911ed2166e4ab2ce775b99d (libata: Add 32bit PIO support) has caused all kinds of errors on the ATAPI devices, so it has been empirically proven that one shouldn't try to read/write an extra data word when a device is not expecting it already. "Don't do it then"; however, still use a chance to do 32-bit read/write one last time when there are exactly 3 trailing bytes. Oh, and stop pointlessly swapping the bytes to and fro on big-endian machines by using io*_rep() accessors which shouldn't byte-swap. This patch should fix the kernel.org bug #12609. Signed-off-by: Sergei Shtylyov Signed-off-by: Jeff Garzik --- drivers/ata/libata-sff.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 0b299b0f8172..714cb046b594 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -773,18 +773,32 @@ unsigned int ata_sff_data_xfer32(struct ata_device *dev, unsigned char *buf, else iowrite32_rep(data_addr, buf, words); + /* Transfer trailing bytes, if any */ if (unlikely(slop)) { - __le32 pad; + unsigned char pad[4]; + + /* Point buf to the tail of buffer */ + buf += buflen - slop; + + /* + * Use io*_rep() accessors here as well to avoid pointlessly + * swapping bytes to and fro on the big endian machines... + */ if (rw == READ) { - pad = cpu_to_le32(ioread32(ap->ioaddr.data_addr)); - memcpy(buf + buflen - slop, &pad, slop); + if (slop < 3) + ioread16_rep(data_addr, pad, 1); + else + ioread32_rep(data_addr, pad, 1); + memcpy(buf, pad, slop); } else { - memcpy(&pad, buf + buflen - slop, slop); - iowrite32(le32_to_cpu(pad), ap->ioaddr.data_addr); + memcpy(pad, buf, slop); + if (slop < 3) + iowrite16_rep(data_addr, pad, 1); + else + iowrite32_rep(data_addr, pad, 1); } - words++; } - return words << 2; + return (buflen + 1) & ~1; } EXPORT_SYMBOL_GPL(ata_sff_data_xfer32); From 7dac745b8e367c99175b8f0d014d996f0e5ed9e5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 12 Feb 2009 10:34:32 +0900 Subject: [PATCH 095/263] sata_nv: give up hardreset on nf2 Kernel bz#12176 reports that nf2 hardreset simply doesn't work. Give up. Argh... Signed-off-by: Tejun Heo Cc: Robert Hancock Reported-by: Saro Signed-off-by: Jeff Garzik --- drivers/ata/sata_nv.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index 444af0415ca1..55a8eed3f3a3 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -421,19 +421,21 @@ static struct ata_port_operations nv_generic_ops = { .hardreset = ATA_OP_NULL, }; -/* OSDL bz3352 reports that nf2/3 controllers can't determine device - * signature reliably. Also, the following thread reports detection - * failure on cold boot with the standard debouncing timing. +/* nf2 is ripe with hardreset related problems. + * + * kernel bz#3352 reports nf2/3 controllers can't determine device + * signature reliably. The following thread reports detection failure + * on cold boot with the standard debouncing timing. * * http://thread.gmane.org/gmane.linux.ide/34098 * - * Debounce with hotplug timing and request follow-up SRST. + * And bz#12176 reports that hardreset simply doesn't work on nf2. + * Give up on it and just don't do hardreset. */ static struct ata_port_operations nv_nf2_ops = { - .inherits = &nv_common_ops, + .inherits = &nv_generic_ops, .freeze = nv_nf2_freeze, .thaw = nv_nf2_thaw, - .hardreset = nv_noclassify_hardreset, }; /* For initial probing after boot and hot plugging, hardreset mostly From 720fd66dfad1b0286721dbb2ed4d6076c0aa953b Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 4 Feb 2009 20:44:01 +0100 Subject: [PATCH 096/263] mfd: Fix egpio kzalloc return test Since ei is already known to be non-NULL, I assume that what was intended was to test the result of kzalloc. Signed-off-by: Julia Lawall Signed-off-by: Samuel Ortiz --- drivers/mfd/htc-egpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c index 1a4d04664d6d..194df7b8256c 100644 --- a/drivers/mfd/htc-egpio.c +++ b/drivers/mfd/htc-egpio.c @@ -307,7 +307,7 @@ static int __init egpio_probe(struct platform_device *pdev) ei->nchips = pdata->num_chips; ei->chip = kzalloc(sizeof(struct egpio_chip) * ei->nchips, GFP_KERNEL); - if (!ei) { + if (!ei->chip) { ret = -ENOMEM; goto fail; } From 62571c29a8343839e85e741db6a489f30686697c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 4 Feb 2009 20:49:52 +0100 Subject: [PATCH 097/263] mfd: Initialise WM8350 interrupts earlier Ensure that the interrupt handling is configured before we do platform specific init. This allows the platform specific initialisation to configure things which use interrupts safely. Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz --- drivers/mfd/wm8350-core.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c index f92595c8f165..70f5e7739546 100644 --- a/drivers/mfd/wm8350-core.c +++ b/drivers/mfd/wm8350-core.c @@ -1404,15 +1404,6 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq, return ret; } - if (pdata && pdata->init) { - ret = pdata->init(wm8350); - if (ret != 0) { - dev_err(wm8350->dev, "Platform init() failed: %d\n", - ret); - goto err; - } - } - mutex_init(&wm8350->auxadc_mutex); mutex_init(&wm8350->irq_mutex); INIT_WORK(&wm8350->irq_work, wm8350_irq_worker); @@ -1430,6 +1421,15 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq, } wm8350->chip_irq = irq; + if (pdata && pdata->init) { + ret = pdata->init(wm8350); + if (ret != 0) { + dev_err(wm8350->dev, "Platform init() failed: %d\n", + ret); + goto err; + } + } + wm8350_reg_write(wm8350, WM8350_SYSTEM_INTERRUPTS_MASK, 0x0); wm8350_client_dev_register(wm8350, "wm8350-codec", From 85c93ea7dca475a6ee3bf414befe94b2c42f1001 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 4 Feb 2009 21:09:38 +0100 Subject: [PATCH 098/263] mfd: Improve diagnostics for WM8350 ID register probe Check the return value of the device I/O functions when reading the ID registers so we can provide a more useful diagnostic when we're having trouble talking to the device. Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz --- drivers/mfd/wm8350-core.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c index 70f5e7739546..e5e82c7cc523 100644 --- a/drivers/mfd/wm8350-core.c +++ b/drivers/mfd/wm8350-core.c @@ -1297,14 +1297,29 @@ static void wm8350_client_dev_register(struct wm8350 *wm8350, int wm8350_device_init(struct wm8350 *wm8350, int irq, struct wm8350_platform_data *pdata) { - int ret = -EINVAL; + int ret; u16 id1, id2, mask_rev; u16 cust_id, mode, chip_rev; /* get WM8350 revision and config mode */ - wm8350->read_dev(wm8350, WM8350_RESET_ID, sizeof(id1), &id1); - wm8350->read_dev(wm8350, WM8350_ID, sizeof(id2), &id2); - wm8350->read_dev(wm8350, WM8350_REVISION, sizeof(mask_rev), &mask_rev); + ret = wm8350->read_dev(wm8350, WM8350_RESET_ID, sizeof(id1), &id1); + if (ret != 0) { + dev_err(wm8350->dev, "Failed to read ID: %d\n", ret); + goto err; + } + + ret = wm8350->read_dev(wm8350, WM8350_ID, sizeof(id2), &id2); + if (ret != 0) { + dev_err(wm8350->dev, "Failed to read ID: %d\n", ret); + goto err; + } + + ret = wm8350->read_dev(wm8350, WM8350_REVISION, sizeof(mask_rev), + &mask_rev); + if (ret != 0) { + dev_err(wm8350->dev, "Failed to read revision: %d\n", ret); + goto err; + } id1 = be16_to_cpu(id1); id2 = be16_to_cpu(id2); From a39a021fd73ce06aad8d1081ac711a36930e6cb8 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 4 Feb 2009 21:10:58 +0100 Subject: [PATCH 099/263] mfd: Mark WM835x USB_SLV_500MA bit as accessible The code is out of sync with the silicon. Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz --- drivers/mfd/wm8350-regmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/wm8350-regmap.c b/drivers/mfd/wm8350-regmap.c index 68887b817d17..9a4cc954cb7c 100644 --- a/drivers/mfd/wm8350-regmap.c +++ b/drivers/mfd/wm8350-regmap.c @@ -3188,7 +3188,7 @@ const struct wm8350_reg_access wm8350_reg_io_map[] = { { 0x7CFF, 0x0C00, 0x7FFF }, /* R1 - ID */ { 0x0000, 0x0000, 0x0000 }, /* R2 */ { 0xBE3B, 0xBE3B, 0x8000 }, /* R3 - System Control 1 */ - { 0xFCF7, 0xFCF7, 0xF800 }, /* R4 - System Control 2 */ + { 0xFEF7, 0xFEF7, 0xF800 }, /* R4 - System Control 2 */ { 0x80FF, 0x80FF, 0x8000 }, /* R5 - System Hibernate */ { 0xFB0E, 0xFB0E, 0x0000 }, /* R6 - Interface Control */ { 0x0000, 0x0000, 0x0000 }, /* R7 */ From 29c6a2e6f88225ae2673aabd2de0fa2126653231 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Wed, 4 Feb 2009 21:23:22 +0100 Subject: [PATCH 100/263] mfd: wm8350 tries reaches -1 With a postfix decrement tries will reach -1 rather than 0, so the warning will not be issued even upon timeout. Signed-off-by: Roel Kluin Acked-by: Mark Brown Signed-off-by: Samuel Ortiz --- drivers/mfd/wm8350-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c index e5e82c7cc523..ea3801833176 100644 --- a/drivers/mfd/wm8350-core.c +++ b/drivers/mfd/wm8350-core.c @@ -1111,7 +1111,7 @@ int wm8350_read_auxadc(struct wm8350 *wm8350, int channel, int scale, int vref) do { schedule_timeout_interruptible(1); reg = wm8350_reg_read(wm8350, WM8350_DIGITISER_CONTROL_1); - } while (tries-- && (reg & WM8350_AUXADC_POLL)); + } while (--tries && (reg & WM8350_AUXADC_POLL)); if (!tries) dev_err(wm8350->dev, "adc chn %d read timeout\n", channel); From a313d758cc7956d7f1e7a727c8fa571b6468fabf Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 4 Feb 2009 21:26:07 +0100 Subject: [PATCH 101/263] mfd: Fix TWL4030 build on some ARM variants Many ARM platforms do not provide a mach/cpu.h so rather than guarding the use of that header with CONFIG_ARM guard it with the guards used when testing for the OMAP variants in the body of the code. Signed-off-by: Mark Brown Acked-by: David Brownell Signed-off-by: Samuel Ortiz --- drivers/mfd/twl4030-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c index e7ab0035d305..68826f1e36bc 100644 --- a/drivers/mfd/twl4030-core.c +++ b/drivers/mfd/twl4030-core.c @@ -38,7 +38,7 @@ #include #include -#ifdef CONFIG_ARM +#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3) #include #endif From 9427c34bc72f05b519e8466f27c38a3327bae157 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Wed, 4 Feb 2009 21:27:48 +0100 Subject: [PATCH 102/263] mfd: fix htc-egpio iomem resource handling using resource_size Fixes an off-by-one error in the iomem resource mapping. Signed-off-by: Philipp Zabel Signed-off-by: Samuel Ortiz --- drivers/mfd/htc-egpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c index 194df7b8256c..aa266e1f69b2 100644 --- a/drivers/mfd/htc-egpio.c +++ b/drivers/mfd/htc-egpio.c @@ -286,7 +286,7 @@ static int __init egpio_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) goto fail; - ei->base_addr = ioremap_nocache(res->start, res->end - res->start); + ei->base_addr = ioremap_nocache(res->start, resource_size(res)); if (!ei->base_addr) goto fail; pr_debug("EGPIO phys=%08x virt=%p\n", (u32)res->start, ei->base_addr); From 2f161f4485535df85451a8cfdf2487c315f665f5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 6 Feb 2009 15:28:15 +0100 Subject: [PATCH 103/263] mfd: Ensure all WM8350 IRQs are masked at startup The IRQs might have been left enabled in hardware, generating spurious IRQs before the drivers have registered. Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz --- drivers/mfd/wm8350-core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c index ea3801833176..84d5ea1ec171 100644 --- a/drivers/mfd/wm8350-core.c +++ b/drivers/mfd/wm8350-core.c @@ -1419,6 +1419,13 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq, return ret; } + wm8350_reg_write(wm8350, WM8350_SYSTEM_INTERRUPTS_MASK, 0xFFFF); + wm8350_reg_write(wm8350, WM8350_INT_STATUS_1_MASK, 0xFFFF); + wm8350_reg_write(wm8350, WM8350_INT_STATUS_2_MASK, 0xFFFF); + wm8350_reg_write(wm8350, WM8350_UNDER_VOLTAGE_INT_STATUS_MASK, 0xFFFF); + wm8350_reg_write(wm8350, WM8350_GPIO_INT_STATUS_MASK, 0xFFFF); + wm8350_reg_write(wm8350, WM8350_COMPARATOR_INT_STATUS_MASK, 0xFFFF); + mutex_init(&wm8350->auxadc_mutex); mutex_init(&wm8350->irq_mutex); INIT_WORK(&wm8350->irq_work, wm8350_irq_worker); From 8915e5402809ae6228e15c76417351dad752826e Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 17 Feb 2009 09:07:02 +0100 Subject: [PATCH 104/263] mfd: terminate pcf50633 i2c_device_id list The i2c_device_id list is supposed to be zero-terminated. Signed-off-by: Jean Delvare Cc: Balaji Rao Signed-off-by: Samuel Ortiz Signed-off-by: Andrew Morton --- drivers/mfd/pcf50633-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mfd/pcf50633-core.c b/drivers/mfd/pcf50633-core.c index ea9488e7ad6d..2e36057659e1 100644 --- a/drivers/mfd/pcf50633-core.c +++ b/drivers/mfd/pcf50633-core.c @@ -678,6 +678,7 @@ static int __devexit pcf50633_remove(struct i2c_client *client) static struct i2c_device_id pcf50633_id_table[] = { {"pcf50633", 0x73}, + {/* end of list */} }; static struct i2c_driver pcf50633_driver = { From 158abca5f699a047ff7b67a64ab19e8ec824e37d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 17 Feb 2009 09:10:19 +0100 Subject: [PATCH 105/263] mfd: fix sm501 section mismatches drv => driver renaming is needed otherwise modpost will spit false positives re pointing to __devinit function from regular data. Signed-off-by: Alexey Dobriyan Cc: Ben Dooks Signed-off-by: Andrew Morton Signed-off-by: Samuel Ortiz --- drivers/mfd/sm501.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c index 0e5761f12634..6c3786f8e9a8 100644 --- a/drivers/mfd/sm501.c +++ b/drivers/mfd/sm501.c @@ -1321,7 +1321,7 @@ static unsigned int sm501_mem_local[] = { * Common init code for an SM501 */ -static int sm501_init_dev(struct sm501_devdata *sm) +static int __devinit sm501_init_dev(struct sm501_devdata *sm) { struct sm501_initdata *idata; struct sm501_platdata *pdata; @@ -1397,7 +1397,7 @@ static int sm501_init_dev(struct sm501_devdata *sm) return 0; } -static int sm501_plat_probe(struct platform_device *dev) +static int __devinit sm501_plat_probe(struct platform_device *dev) { struct sm501_devdata *sm; int ret; @@ -1586,8 +1586,8 @@ static struct sm501_platdata sm501_pci_platdata = { .gpio_base = -1, }; -static int sm501_pci_probe(struct pci_dev *dev, - const struct pci_device_id *id) +static int __devinit sm501_pci_probe(struct pci_dev *dev, + const struct pci_device_id *id) { struct sm501_devdata *sm; int err; @@ -1693,7 +1693,7 @@ static void sm501_dev_remove(struct sm501_devdata *sm) sm501_gpio_remove(sm); } -static void sm501_pci_remove(struct pci_dev *dev) +static void __devexit sm501_pci_remove(struct pci_dev *dev) { struct sm501_devdata *sm = pci_get_drvdata(dev); @@ -1727,16 +1727,16 @@ static struct pci_device_id sm501_pci_tbl[] = { MODULE_DEVICE_TABLE(pci, sm501_pci_tbl); -static struct pci_driver sm501_pci_drv = { +static struct pci_driver sm501_pci_driver = { .name = "sm501", .id_table = sm501_pci_tbl, .probe = sm501_pci_probe, - .remove = sm501_pci_remove, + .remove = __devexit_p(sm501_pci_remove), }; MODULE_ALIAS("platform:sm501"); -static struct platform_driver sm501_plat_drv = { +static struct platform_driver sm501_plat_driver = { .driver = { .name = "sm501", .owner = THIS_MODULE, @@ -1749,14 +1749,14 @@ static struct platform_driver sm501_plat_drv = { static int __init sm501_base_init(void) { - platform_driver_register(&sm501_plat_drv); - return pci_register_driver(&sm501_pci_drv); + platform_driver_register(&sm501_plat_driver); + return pci_register_driver(&sm501_pci_driver); } static void __exit sm501_base_exit(void) { - platform_driver_unregister(&sm501_plat_drv); - pci_unregister_driver(&sm501_pci_drv); + platform_driver_unregister(&sm501_plat_driver); + pci_unregister_driver(&sm501_pci_driver); } module_init(sm501_base_init); From dcd9651ecd652a186dd9ad0dde76d43320b9c0a2 Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Tue, 17 Feb 2009 09:21:52 +0100 Subject: [PATCH 106/263] mfd: Fix sm501_register_gpio section mismatch WARNING: drivers/mfd/built-in.o(.text+0x1706): Section mismatch in reference from the function sm501_register_gpio() to the function .devinit.text:sm501_gpio_register_chip() The function sm501_register_gpio() references the function __devinit sm501_gpio_register_chip(). This is often because sm501_register_gpio lacks a __devinit annotation or the annotation of sm501_gpio_register_chip is wrong. Signed-off-by: Rakib Mullick Signed-off-by: Samuel Ortiz --- drivers/mfd/sm501.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c index 6c3786f8e9a8..4c7b7962f6b8 100644 --- a/drivers/mfd/sm501.c +++ b/drivers/mfd/sm501.c @@ -1050,7 +1050,7 @@ static int __devinit sm501_gpio_register_chip(struct sm501_devdata *sm, return gpiochip_add(gchip); } -static int sm501_register_gpio(struct sm501_devdata *sm) +static int __devinit sm501_register_gpio(struct sm501_devdata *sm) { struct sm501_gpio *gpio = &sm->gpio; resource_size_t iobase = sm->io_res->start + SM501_GPIO; From 8eb2dfac41c71701bb741f496f0cb7b7e4a3c3f6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 17 Feb 2009 20:00:11 +0800 Subject: [PATCH 107/263] crypto: lrw - Fix big endian support It turns out that LRW has never worked properly on big endian. This was never discussed because nobody actually used it that way. In fact, it was only discovered when Geert Uytterhoeven loaded it through tcrypt which failed the test on it. The fix is straightforward, on big endian the to find the nth bit we should be grouping them by words instead of bytes. So setbit128_bbe should xor with 128 - BITS_PER_LONG instead of 128 - BITS_PER_BYTE == 0x78. Tested-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- crypto/lrw.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/crypto/lrw.c b/crypto/lrw.c index 8ef664e3bcd9..358f80be2bf9 100644 --- a/crypto/lrw.c +++ b/crypto/lrw.c @@ -45,7 +45,13 @@ struct priv { static inline void setbit128_bbe(void *b, int bit) { - __set_bit(bit ^ 0x78, b); + __set_bit(bit ^ (0x80 - +#ifdef __BIG_ENDIAN + BITS_PER_LONG +#else + BITS_PER_BYTE +#endif + ), b); } static int setkey(struct crypto_tfm *parent, const u8 *key, From 35cfd1d964f3c2420862f2167a0eb85ff1208999 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Sun, 1 Feb 2009 16:11:04 +0100 Subject: [PATCH 108/263] HID: blacklist Powercom USB UPS For quite some time users with various UPSes from Powercom were forced to play magic with bind/unbind in /sys in order to be able to see the UPSes. The beasts does not work as HID devices, even if claims to do so. cypress_m8 driver works with the devices instead, creating a normal serial port with which normal UPS controlling software works. The manufacturer confirmed the upcoming models with proper HID support will have different device IDs. In any way, it's wrong to have two completely different modules for one device in kernel. Blacklist the device in HID (add it to hid_ignore_list) to stop this mess, finally. Signed-off-By: Michael Tokarev Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 1 + drivers/hid/hid-ids.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 6cad69ed21c5..0e96b1fcc14b 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1605,6 +1605,7 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0002) }, { HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0003) }, { HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0004) }, + { HID_USB_DEVICE(USB_VENDOR_ID_POWERCOM, USB_DEVICE_ID_POWERCOM_UPS) }, { HID_USB_DEVICE(USB_VENDOR_ID_SOUNDGRAPH, USB_DEVICE_ID_SOUNDGRAPH_IMON_LCD) }, { HID_USB_DEVICE(USB_VENDOR_ID_SOUNDGRAPH, USB_DEVICE_ID_SOUNDGRAPH_IMON_LCD2) }, { HID_USB_DEVICE(USB_VENDOR_ID_SOUNDGRAPH, USB_DEVICE_ID_SOUNDGRAPH_IMON_LCD3) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index e899f510ebeb..88511970508d 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -348,6 +348,9 @@ #define USB_VENDOR_ID_PLAYDOTCOM 0x0b43 #define USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII 0x0003 +#define USB_VENDOR_ID_POWERCOM 0x0d9f +#define USB_DEVICE_ID_POWERCOM_UPS 0x0002 + #define USB_VENDOR_ID_SAITEK 0x06a3 #define USB_DEVICE_ID_SAITEK_RUMBLEPAD 0xff17 From dfd395aff4cb16d2480b280064bea1b19ededef1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 3 Feb 2009 16:35:17 +0300 Subject: [PATCH 109/263] HID: unlock properly on error paths in hidraw_ioctl() We can't return immediately because lock_kernel() is held. Signed-off-by: Dan Carpenter Signed-off-by: Jiri Kosina --- drivers/hid/hidraw.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index 732449628971..02b19db5442e 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -267,8 +267,10 @@ static long hidraw_ioctl(struct file *file, unsigned int cmd, default: { struct hid_device *hid = dev->hid; - if (_IOC_TYPE(cmd) != 'H' || _IOC_DIR(cmd) != _IOC_READ) - return -EINVAL; + if (_IOC_TYPE(cmd) != 'H' || _IOC_DIR(cmd) != _IOC_READ) { + ret = -EINVAL; + break; + } if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGRAWNAME(0))) { int len; @@ -277,8 +279,9 @@ static long hidraw_ioctl(struct file *file, unsigned int cmd, len = strlen(hid->name) + 1; if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd); - return copy_to_user(user_arg, hid->name, len) ? + ret = copy_to_user(user_arg, hid->name, len) ? -EFAULT : len; + break; } if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGRAWPHYS(0))) { @@ -288,12 +291,13 @@ static long hidraw_ioctl(struct file *file, unsigned int cmd, len = strlen(hid->phys) + 1; if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd); - return copy_to_user(user_arg, hid->phys, len) ? + ret = copy_to_user(user_arg, hid->phys, len) ? -EFAULT : len; + break; } } - ret = -ENOTTY; + ret = -ENOTTY; } unlock_kernel(); return ret; From daedb3d6a91f9626ab4c921378ac52e44de833d5 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Sat, 14 Feb 2009 11:45:05 +0200 Subject: [PATCH 110/263] HID: move tmff and zpff devices from ignore_list to blacklist The devices handled by hid-tmff and hid-zpff were added in the hid_ignore_list[] instead of hid_blacklist[] in hid-core.c, thus disabling them completely. hid_ignore_list[] causes hid layer to skip the device, while hid_blacklist[] indicates there is a specific driver in hid bus. Re-enable the devices by moving them to the correct list. Signed-off-by: Anssi Hannula Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 0e96b1fcc14b..1cc967448f4d 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1300,7 +1300,13 @@ static const struct hid_device_id hid_blacklist[] = { { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER) }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGX_MOUSE) }, { HID_USB_DEVICE(USB_VENDOR_ID_SUNPLUS, USB_DEVICE_ID_SUNPLUS_WDESKTOP) }, + { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb300) }, + { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb304) }, + { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb651) }, + { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb654) }, { HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED, USB_DEVICE_ID_TOPSEED_CYBERLINK) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0005) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0030) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, 0x030c) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_BT) }, @@ -1613,10 +1619,6 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_SOUNDGRAPH, USB_DEVICE_ID_SOUNDGRAPH_IMON_LCD5) }, { HID_USB_DEVICE(USB_VENDOR_ID_TENX, USB_DEVICE_ID_TENX_IBUDDY1) }, { HID_USB_DEVICE(USB_VENDOR_ID_TENX, USB_DEVICE_ID_TENX_IBUDDY2) }, - { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb300) }, - { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb304) }, - { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb651) }, - { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb654) }, { HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_LABPRO) }, { HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_GOTEMP) }, { HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_SKIP) }, @@ -1627,8 +1629,6 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_1_PHIDGETSERVO_20) }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_8_8_4_IF_KIT) }, { HID_USB_DEVICE(USB_VENDOR_ID_YEALINK, USB_DEVICE_ID_YEALINK_P1K_P4K_B2K) }, - { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0005) }, - { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0030) }, { } }; From 2b639386a2a26c84c8d26c649cf657ebd43a7bc8 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 17 Feb 2009 12:38:36 +0100 Subject: [PATCH 111/263] HID: fix bus endianity in file2alias Fix endianness of bus member of hid_device_id in modpost. Signed-off-by: Jiri Slaby Reported-by: Nye Liu Cc: Jiri Kosina Signed-off-by: Jiri Kosina --- scripts/mod/file2alias.c | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 491b8b1b6abf..4eea60b1693e 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -210,6 +210,7 @@ static void do_usb_table(void *symval, unsigned long size, static int do_hid_entry(const char *filename, struct hid_device_id *id, char *alias) { + id->bus = TO_NATIVE(id->bus); id->vendor = TO_NATIVE(id->vendor); id->product = TO_NATIVE(id->product); From f63145e28934a0b3ad8baf31adc195ec81423351 Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Sat, 24 Jan 2009 20:52:41 -0300 Subject: [PATCH 112/263] V4L/DVB (10516a): zoran: Update MAINTAINERS entry Ronald Bultje hasn't been maintaining the zoran driver for some time. Re-direct people to the mailing lists and web pages. MAINTAINERS | 6 +++--- Signed-off-by: Trent Piepho Acked-by: Jean Delvare Acked-by: Ronald S. Bultje Signed-off-by: Mauro Carvalho Chehab --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index db65b4e6d132..53b50c8d5f38 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4924,11 +4924,11 @@ L: zd1211-devs@lists.sourceforge.net (subscribers-only) S: Maintained ZR36067 VIDEO FOR LINUX DRIVER -P: Ronald Bultje -M: rbultje@ronald.bitfreak.net L: mjpeg-users@lists.sourceforge.net +L: linux-media@vger.kernel.org W: http://mjpeg.sourceforge.net/driver-zoran/ -S: Maintained +T: Mercurial http://linuxtv.org/hg/v4l-dvb +S: Odd Fixes ZS DECSTATION Z85C30 SERIAL DRIVER P: Maciej W. Rozycki From ef88f2b563275d156beebc9f76ed134f3f90f210 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 13 Feb 2009 08:24:34 -0300 Subject: [PATCH 113/263] V4L/DVB (10527): tuner: fix TUV1236D analog/digital setup As reported by David Engel , ATSC115 doesn't work fine with mythtv. This software opens both analog and dvb interfaces of saa7134. What happens is that some tuner commands are going to the wrong place, as shown at the logs: Feb 12 20:37:48 opus kernel: tuner-simple 1-0061: using tuner params #0 (ntsc) Feb 12 20:37:48 opus kernel: tuner-simple 1-0061: freq = 67.25 (1076), range = 0, config = 0xce, cb = 0x01 Feb 12 20:37:48 opus kernel: tuner-simple 1-0061: Freq= 67.25 MHz, V_IF=45.75 MHz, Offset=0.00 MHz, div=1808 Feb 12 20:37:48 opus kernel: tuner 1-0061: tv freq set to 67.25 Feb 12 20:37:48 opus kernel: tuner-simple 1-000a: using tuner params #0 (ntsc) Feb 12 20:37:48 opus kernel: tuner-simple 1-000a: freq = 67.25 (1076), range = 0, config = 0xce, cb = 0x01 Feb 12 20:37:48 opus kernel: tuner-simple 1-000a: Freq= 67.25 MHz, V_IF=45.75 MHz, Offset=0.00 MHz, div=1808 Feb 12 20:37:48 opus kernel: tuner-simple 1-000a: tv 0x07 0x10 0xce 0x01 Feb 12 20:37:48 opus kernel: tuner-simple 1-0061: tv 0x07 0x10 0xce 0x01 This happens due to a hack at TUV1236D analog setup, where it replaces tuner address, at 0x61 for 0x0a, in order to save a few memory bytes. The code assumes that nobody else would try to access the tuner during that setup, but the point is that there's no lock to protect such access. So, this opens the possibility of race conditions to happen. Instead of hacking tuner address, this patch uses a temporary var with the proper tuner value to be used during the setup. This should save the issue, although we should consider to write some analog/digital lock at saa7134 driver. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/tuners/tuner-simple.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/media/common/tuners/tuner-simple.c b/drivers/media/common/tuners/tuner-simple.c index de7adaf5fa5b..78412c9c424a 100644 --- a/drivers/media/common/tuners/tuner-simple.c +++ b/drivers/media/common/tuners/tuner-simple.c @@ -318,7 +318,6 @@ static int simple_std_setup(struct dvb_frontend *fe, u8 *config, u8 *cb) { struct tuner_simple_priv *priv = fe->tuner_priv; - u8 tuneraddr; int rc; /* tv norm specific stuff for multi-norm tuners */ @@ -387,6 +386,7 @@ static int simple_std_setup(struct dvb_frontend *fe, case TUNER_PHILIPS_TUV1236D: { + struct tuner_i2c_props i2c = priv->i2c_props; /* 0x40 -> ATSC antenna input 1 */ /* 0x48 -> ATSC antenna input 2 */ /* 0x00 -> NTSC antenna input 1 */ @@ -398,17 +398,15 @@ static int simple_std_setup(struct dvb_frontend *fe, buffer[1] = 0x04; } /* set to the correct mode (analog or digital) */ - tuneraddr = priv->i2c_props.addr; - priv->i2c_props.addr = 0x0a; - rc = tuner_i2c_xfer_send(&priv->i2c_props, &buffer[0], 2); + i2c.addr = 0x0a; + rc = tuner_i2c_xfer_send(&i2c, &buffer[0], 2); if (2 != rc) tuner_warn("i2c i/o error: rc == %d " "(should be 2)\n", rc); - rc = tuner_i2c_xfer_send(&priv->i2c_props, &buffer[2], 2); + rc = tuner_i2c_xfer_send(&i2c, &buffer[2], 2); if (2 != rc) tuner_warn("i2c i/o error: rc == %d " "(should be 2)\n", rc); - priv->i2c_props.addr = tuneraddr; break; } } From d807dec59d3c850b332b5bf95fe33f18def00068 Mon Sep 17 00:00:00 2001 From: Tobias Lorenz Date: Thu, 12 Feb 2009 14:56:10 -0300 Subject: [PATCH 114/263] V4L/DVB (10532): Correction of Stereo detection/setting and signal strength indication Thanks to Bob Ross - correction of stereo detection/setting - correction of signal strength indicator scaling Signed-off-by: Tobias Lorenz Signed-off-by: Mauro Carvalho Chehab --- drivers/media/radio/radio-si470x.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/media/radio/radio-si470x.c b/drivers/media/radio/radio-si470x.c index 67cbce82cb91..fd81a97a0fdb 100644 --- a/drivers/media/radio/radio-si470x.c +++ b/drivers/media/radio/radio-si470x.c @@ -98,6 +98,9 @@ * - blacklisted KWorld radio in hid-core.c and hid-ids.h * 2008-12-03 Mark Lord * - add support for DealExtreme USB Radio + * 2009-01-31 Bob Ross + * - correction of stereo detection/setting + * - correction of signal strength indicator scaling * * ToDo: * - add firmware download/update support @@ -1385,20 +1388,22 @@ static int si470x_vidioc_g_tuner(struct file *file, void *priv, }; /* stereo indicator == stereo (instead of mono) */ - if ((radio->registers[STATUSRSSI] & STATUSRSSI_ST) == 1) - tuner->rxsubchans = V4L2_TUNER_SUB_MONO | V4L2_TUNER_SUB_STEREO; - else + if ((radio->registers[STATUSRSSI] & STATUSRSSI_ST) == 0) tuner->rxsubchans = V4L2_TUNER_SUB_MONO; + else + tuner->rxsubchans = V4L2_TUNER_SUB_MONO | V4L2_TUNER_SUB_STEREO; /* mono/stereo selector */ - if ((radio->registers[POWERCFG] & POWERCFG_MONO) == 1) - tuner->audmode = V4L2_TUNER_MODE_MONO; - else + if ((radio->registers[POWERCFG] & POWERCFG_MONO) == 0) tuner->audmode = V4L2_TUNER_MODE_STEREO; + else + tuner->audmode = V4L2_TUNER_MODE_MONO; /* min is worst, max is best; signal:0..0xffff; rssi: 0..0xff */ - tuner->signal = (radio->registers[STATUSRSSI] & STATUSRSSI_RSSI) - * 0x0101; + /* measured in units of dbµV in 1 db increments (max at ~75 dbµV) */ + tuner->signal = (radio->registers[STATUSRSSI] & STATUSRSSI_RSSI); + /* the ideal factor is 0xffff/75 = 873,8 */ + tuner->signal = (tuner->signal * 873) + (8 * tuner->signal / 10); /* automatic frequency control: -1: freq to low, 1 freq to high */ /* AFCRL does only indicate that freq. differs, not if too low/high */ From 2f94fc465a6504443bb986ba9d36e28e2b422c6e Mon Sep 17 00:00:00 2001 From: Tobias Lorenz Date: Thu, 12 Feb 2009 14:56:19 -0300 Subject: [PATCH 115/263] V4L/DVB (10533): fix LED status output This patch closes one of my todos that was since long on my list. Some people reported clicks and glitches in the audio stream, correlated to the LED color changing cycle. Thanks to Rick Bronson . Signed-off-by: Tobias Lorenz Signed-off-by: Mauro Carvalho Chehab --- drivers/media/radio/radio-si470x.c | 34 +++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/media/radio/radio-si470x.c b/drivers/media/radio/radio-si470x.c index fd81a97a0fdb..4dfed6aa2dbc 100644 --- a/drivers/media/radio/radio-si470x.c +++ b/drivers/media/radio/radio-si470x.c @@ -101,11 +101,13 @@ * 2009-01-31 Bob Ross * - correction of stereo detection/setting * - correction of signal strength indicator scaling + * 2009-01-31 Rick Bronson + * Tobias Lorenz + * - add LED status output * * ToDo: * - add firmware download/update support * - RDS support: interrupt mode, instead of polling - * - add LED status output (check if that's not already done in firmware) */ @@ -884,6 +886,30 @@ static int si470x_rds_on(struct si470x_device *radio) +/************************************************************************** + * General Driver Functions - LED_REPORT + **************************************************************************/ + +/* + * si470x_set_led_state - sets the led state + */ +static int si470x_set_led_state(struct si470x_device *radio, + unsigned char led_state) +{ + unsigned char buf[LED_REPORT_SIZE]; + int retval; + + buf[0] = LED_REPORT; + buf[1] = LED_COMMAND; + buf[2] = led_state; + + retval = si470x_set_report(radio, (void *) &buf, sizeof(buf)); + + return (retval < 0) ? -EINVAL : 0; +} + + + /************************************************************************** * RDS Driver Functions **************************************************************************/ @@ -1637,6 +1663,9 @@ static int si470x_usb_driver_probe(struct usb_interface *intf, /* set initial frequency */ si470x_set_freq(radio, 87.5 * FREQ_MUL); /* available in all regions */ + /* set led to connect state */ + si470x_set_led_state(radio, BLINK_GREEN_LED); + /* rds buffer allocation */ radio->buf_size = rds_buf * 3; radio->buffer = kmalloc(radio->buf_size, GFP_KERNEL); @@ -1720,6 +1749,9 @@ static void si470x_usb_driver_disconnect(struct usb_interface *intf) cancel_delayed_work_sync(&radio->work); usb_set_intfdata(intf, NULL); if (radio->users == 0) { + /* set led to disconnect state */ + si470x_set_led_state(radio, BLINK_ORANGE_LED); + video_unregister_device(radio->videodev); kfree(radio->buffer); kfree(radio); From 28100165c3f27f681fee8b60e4e44f64a739c454 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 16 Feb 2009 15:27:44 -0300 Subject: [PATCH 116/263] V4L/DVB (10572): Revert commit dda06a8e4610757def753ee3a541a0b1a1feb36b On Mon, 02 Feb 2009, Hartmut wrote: This change set is wrong. The affected functions cannot be called from an interrupt context, because they may process large buffers. In this case, interrupts are disabled for a long time. Functions, like dvb_dmx_swfilter_packets(), could be called only from a tasklet. This change set does hide some strong design bugs in dm1105.c and au0828-dvb.c. Please revert this change set and do fix the bugs in dm1105.c and au0828-dvb.c (and other files). On Sun, 15 Feb 2009, Oliver Endriss wrote: This changeset _must_ be reverted! It breaks all kernels since 2.6.27 for applications which use DVB and require a low interrupt latency. It is a very bad idea to call the demuxer to process data buffers with interrupts disabled! On Mon, 16 Feb 2009, Trent Piepho wrote: I agree, this is bad. The demuxer is far too much work to be done with IRQs off. IMHO, even doing it under a spin-lock is excessive. It should be a mutex. Drivers should use a work-queue to feed the demuxer. Thank you for testing this changeset and discovering the issues on it. Cc: Trent Piepho Cc: Hartmut Cc: Oliver Endriss Cc: Andreas Oberritter Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb/dvb-core/dmxdev.c | 16 +++++++--------- drivers/media/dvb/dvb-core/dvb_demux.c | 16 ++++++---------- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/drivers/media/dvb/dvb-core/dmxdev.c b/drivers/media/dvb/dvb-core/dmxdev.c index 0c733c66a441..069d847ba887 100644 --- a/drivers/media/dvb/dvb-core/dmxdev.c +++ b/drivers/media/dvb/dvb-core/dmxdev.c @@ -364,16 +364,15 @@ static int dvb_dmxdev_section_callback(const u8 *buffer1, size_t buffer1_len, enum dmx_success success) { struct dmxdev_filter *dmxdevfilter = filter->priv; - unsigned long flags; int ret; if (dmxdevfilter->buffer.error) { wake_up(&dmxdevfilter->buffer.queue); return 0; } - spin_lock_irqsave(&dmxdevfilter->dev->lock, flags); + spin_lock(&dmxdevfilter->dev->lock); if (dmxdevfilter->state != DMXDEV_STATE_GO) { - spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags); + spin_unlock(&dmxdevfilter->dev->lock); return 0; } del_timer(&dmxdevfilter->timer); @@ -392,7 +391,7 @@ static int dvb_dmxdev_section_callback(const u8 *buffer1, size_t buffer1_len, } if (dmxdevfilter->params.sec.flags & DMX_ONESHOT) dmxdevfilter->state = DMXDEV_STATE_DONE; - spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags); + spin_unlock(&dmxdevfilter->dev->lock); wake_up(&dmxdevfilter->buffer.queue); return 0; } @@ -404,12 +403,11 @@ static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len, { struct dmxdev_filter *dmxdevfilter = feed->priv; struct dvb_ringbuffer *buffer; - unsigned long flags; int ret; - spin_lock_irqsave(&dmxdevfilter->dev->lock, flags); + spin_lock(&dmxdevfilter->dev->lock); if (dmxdevfilter->params.pes.output == DMX_OUT_DECODER) { - spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags); + spin_unlock(&dmxdevfilter->dev->lock); return 0; } @@ -419,7 +417,7 @@ static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len, else buffer = &dmxdevfilter->dev->dvr_buffer; if (buffer->error) { - spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags); + spin_unlock(&dmxdevfilter->dev->lock); wake_up(&buffer->queue); return 0; } @@ -430,7 +428,7 @@ static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len, dvb_ringbuffer_flush(buffer); buffer->error = ret; } - spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags); + spin_unlock(&dmxdevfilter->dev->lock); wake_up(&buffer->queue); return 0; } diff --git a/drivers/media/dvb/dvb-core/dvb_demux.c b/drivers/media/dvb/dvb-core/dvb_demux.c index a2c1fd5d2f67..e2eca0b1fe7c 100644 --- a/drivers/media/dvb/dvb-core/dvb_demux.c +++ b/drivers/media/dvb/dvb-core/dvb_demux.c @@ -399,9 +399,7 @@ static void dvb_dmx_swfilter_packet(struct dvb_demux *demux, const u8 *buf) void dvb_dmx_swfilter_packets(struct dvb_demux *demux, const u8 *buf, size_t count) { - unsigned long flags; - - spin_lock_irqsave(&demux->lock, flags); + spin_lock(&demux->lock); while (count--) { if (buf[0] == 0x47) @@ -409,17 +407,16 @@ void dvb_dmx_swfilter_packets(struct dvb_demux *demux, const u8 *buf, buf += 188; } - spin_unlock_irqrestore(&demux->lock, flags); + spin_unlock(&demux->lock); } EXPORT_SYMBOL(dvb_dmx_swfilter_packets); void dvb_dmx_swfilter(struct dvb_demux *demux, const u8 *buf, size_t count) { - unsigned long flags; int p = 0, i, j; - spin_lock_irqsave(&demux->lock, flags); + spin_lock(&demux->lock); if (demux->tsbufp) { i = demux->tsbufp; @@ -452,18 +449,17 @@ void dvb_dmx_swfilter(struct dvb_demux *demux, const u8 *buf, size_t count) } bailout: - spin_unlock_irqrestore(&demux->lock, flags); + spin_unlock(&demux->lock); } EXPORT_SYMBOL(dvb_dmx_swfilter); void dvb_dmx_swfilter_204(struct dvb_demux *demux, const u8 *buf, size_t count) { - unsigned long flags; int p = 0, i, j; u8 tmppack[188]; - spin_lock_irqsave(&demux->lock, flags); + spin_lock(&demux->lock); if (demux->tsbufp) { i = demux->tsbufp; @@ -504,7 +500,7 @@ void dvb_dmx_swfilter_204(struct dvb_demux *demux, const u8 *buf, size_t count) } bailout: - spin_unlock_irqrestore(&demux->lock, flags); + spin_unlock(&demux->lock); } EXPORT_SYMBOL(dvb_dmx_swfilter_204); From ad28127d7c7c617bca1d426f95b6ffa1fb8f700f Mon Sep 17 00:00:00 2001 From: Adam Baker Date: Wed, 4 Feb 2009 15:33:21 -0300 Subject: [PATCH 117/263] V4L/DVB (10619): gspca - main: Destroy the URBs at disconnection time. If a device using the gspca framework is unplugged while it is still streaming then the call that is used to free the URBs that have been allocated occurs after the pointer it uses becomes invalid at the end of gspca_disconnect. Make another cleanup call in gspca_disconnect while the pointer is still valid (multiple calls are OK as destroy_urbs checks for pointers already being NULL. Signed-off-by: Adam Baker Signed-off-by: Jean-Francois Moine Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/gspca/gspca.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/media/video/gspca/gspca.c b/drivers/media/video/gspca/gspca.c index 2ed24527ecd6..65e4901f4db7 100644 --- a/drivers/media/video/gspca/gspca.c +++ b/drivers/media/video/gspca/gspca.c @@ -422,6 +422,7 @@ static void destroy_urbs(struct gspca_dev *gspca_dev) if (urb == NULL) break; + BUG_ON(!gspca_dev->dev); gspca_dev->urb[i] = NULL; if (!gspca_dev->present) usb_kill_urb(urb); @@ -1950,8 +1951,12 @@ void gspca_disconnect(struct usb_interface *intf) { struct gspca_dev *gspca_dev = usb_get_intfdata(intf); + mutex_lock(&gspca_dev->usb_lock); gspca_dev->present = 0; + mutex_unlock(&gspca_dev->usb_lock); + destroy_urbs(gspca_dev); + gspca_dev->dev = NULL; usb_set_intfdata(intf, NULL); /* release the device */ From ac9575f75c52bcb455120f8c43376b556acba048 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sat, 14 Feb 2009 19:58:33 -0300 Subject: [PATCH 118/263] V4L/DVB (10625): ivtv: fix decoder crash regression The video_ioctl2 conversion of ivtv in kernel 2.6.27 introduced a bug causing decoder commands to crash. The decoder commands should have been handled from the video_ioctl2 default handler, ensuring correct mapping of the argument between user and kernel space. Unfortunately they ended up before the video_ioctl2 call, causing random crashes. Thanks to hannes@linus.priv.at for testing and helping me track down the cause! Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/ivtv/ivtv-ioctl.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/media/video/ivtv/ivtv-ioctl.c b/drivers/media/video/ivtv/ivtv-ioctl.c index f6b3ef6e691b..9be6244573e9 100644 --- a/drivers/media/video/ivtv/ivtv-ioctl.c +++ b/drivers/media/video/ivtv/ivtv-ioctl.c @@ -1748,6 +1748,18 @@ static long ivtv_default(struct file *file, void *fh, int cmd, void *arg) break; } + case IVTV_IOC_DMA_FRAME: + case VIDEO_GET_PTS: + case VIDEO_GET_FRAME_COUNT: + case VIDEO_GET_EVENT: + case VIDEO_PLAY: + case VIDEO_STOP: + case VIDEO_FREEZE: + case VIDEO_CONTINUE: + case VIDEO_COMMAND: + case VIDEO_TRY_COMMAND: + return ivtv_decoder_ioctls(file, cmd, (void *)arg); + default: return -EINVAL; } @@ -1790,18 +1802,6 @@ static long ivtv_serialized_ioctl(struct ivtv *itv, struct file *filp, ivtv_vapi(itv, CX2341X_DEC_SET_AUDIO_MODE, 2, itv->audio_bilingual_mode, itv->audio_stereo_mode); return 0; - case IVTV_IOC_DMA_FRAME: - case VIDEO_GET_PTS: - case VIDEO_GET_FRAME_COUNT: - case VIDEO_GET_EVENT: - case VIDEO_PLAY: - case VIDEO_STOP: - case VIDEO_FREEZE: - case VIDEO_CONTINUE: - case VIDEO_COMMAND: - case VIDEO_TRY_COMMAND: - return ivtv_decoder_ioctls(filp, cmd, (void *)arg); - default: break; } From 7bf432d64c47f77111fbce5d48d7774df8b48948 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 16 Feb 2009 04:25:32 -0300 Subject: [PATCH 119/263] V4L/DVB (10626): ivtv: fix regression in get sliced vbi format The new v4l2_subdev_call used s_fmt instead of g_fmt. Thanks-to: Andy Walls Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/ivtv/ivtv-ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/video/ivtv/ivtv-ioctl.c b/drivers/media/video/ivtv/ivtv-ioctl.c index 9be6244573e9..c13bd2aa0bea 100644 --- a/drivers/media/video/ivtv/ivtv-ioctl.c +++ b/drivers/media/video/ivtv/ivtv-ioctl.c @@ -393,7 +393,7 @@ static int ivtv_g_fmt_sliced_vbi_cap(struct file *file, void *fh, struct v4l2_fo return 0; } - v4l2_subdev_call(itv->sd_video, video, s_fmt, fmt); + v4l2_subdev_call(itv->sd_video, video, g_fmt, fmt); vbifmt->service_set = ivtv_get_service_set(vbifmt); return 0; } From 5b058bcde961bf28678a70e44c079107313543b6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 17 Feb 2009 18:35:34 +0100 Subject: [PATCH 120/263] tracing/function-graph-tracer: trace the idle tasks When the function graph tracer is activated, it iterates over the task_list to allocate a stack to store the return addresses. But the per cpu idle tasks are not iterated by using do_each_thread / while_each_thread. So we have to iterate on them manually. This fixes somes weirdness in the traces and many losses of traces. Examples on two cpus: 0) Xorg-4287 | 2.906 us | } 0) Xorg-4287 | 3.965 us | } 0) Xorg-4287 | 5.302 us | } ------------------------------------------ 0) Xorg-4287 => -0 ------------------------------------------ 0) -0 | 2.861 us | } 0) -0 | 0.526 us | set_normalized_timespec(); 0) -0 | 7.201 us | } 0) -0 | 8.214 us | } 0) -0 | | clockevents_program_event() { 0) -0 | | lapic_next_event() { 0) -0 | 0.510 us | native_apic_mem_write(); 0) -0 | 1.546 us | } 0) -0 | 2.583 us | } 0) -0 | + 12.435 us | } 0) -0 | + 13.470 us | } 0) -0 | 0.608 us | _spin_unlock_irqrestore(); 0) -0 | + 23.270 us | } 0) -0 | + 24.336 us | } 0) -0 | + 25.417 us | } 0) -0 | 0.593 us | _spin_unlock(); 0) -0 | + 41.869 us | } 0) -0 | + 42.906 us | } 0) -0 | + 95.035 us | } 0) -0 | 0.540 us | menu_reflect(); 0) -0 | ! 100.404 us | } 0) -0 | 0.564 us | mce_idle_callback(); 0) -0 | | enter_idle() { 0) -0 | 0.526 us | mce_idle_callback(); 0) -0 | 1.757 us | } 0) -0 | | cpuidle_idle_call() { 0) -0 | | menu_select() { 0) -0 | 0.525 us | pm_qos_requirement(); 0) -0 | 0.518 us | tick_nohz_get_sleep_length(); 0) -0 | 2.621 us | } [...] 1) -0 | 0.518 us | touch_softlockup_watchdog(); 1) -0 | + 14.355 us | } 1) -0 | + 22.840 us | } 1) -0 | + 25.949 us | } 1) -0 | | handle_irq() { 1) -0 | 0.511 us | irq_to_desc(); 1) -0 | | handle_edge_irq() { 1) -0 | 0.638 us | _spin_lock(); 1) -0 | | ack_apic_edge() { 1) -0 | 0.510 us | irq_to_desc(); 1) -0 | | move_native_irq() { 1) -0 | 0.510 us | irq_to_desc(); 1) -0 | 1.532 us | } 1) -0 | 0.511 us | native_apic_mem_write(); ------------------------------------------ 1) -0 => cat-5073 ------------------------------------------ 1) cat-5073 | 3.731 us | } 1) cat-5073 | | run_local_timers() { 1) cat-5073 | 0.533 us | hrtimer_run_queues(); 1) cat-5073 | | raise_softirq() { 1) cat-5073 | | __raise_softirq_irqoff() { 1) cat-5073 | | /* nr: 1 */ 1) cat-5073 | 2.718 us | } 1) cat-5073 | 3.814 us | } Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9a236ffe2aa4..fdf913dfc7e8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2033,7 +2033,7 @@ free: static int start_graph_tracing(void) { struct ftrace_ret_stack **ret_stack_list; - int ret; + int ret, cpu; ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE * sizeof(struct ftrace_ret_stack *), @@ -2042,6 +2042,10 @@ static int start_graph_tracing(void) if (!ret_stack_list) return -ENOMEM; + /* The cpu_boot init_task->ret_stack will never be freed */ + for_each_online_cpu(cpu) + ftrace_graph_init_task(idle_task(cpu)); + do { ret = alloc_retstack_tasklist(ret_stack_list); } while (ret == -EAGAIN); From 603eaa1bdd3e0402085e815cc531bb0a32827a9e Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 17 Feb 2009 19:59:54 +0100 Subject: [PATCH 121/263] hwmon: (f71882fg) Hide misleading error message If the F71882FG chip is at address 0x4e, then the probe at 0x2e will fail with the following message in the logs: f71882fg: Not a Fintek device This is misleading because there is a Fintek device, just at a different address. So I propose to degrade this message to a debug message. Signed-off-by: Jean Delvare Acked-by: Hans de Goede --- drivers/hwmon/f71882fg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/f71882fg.c b/drivers/hwmon/f71882fg.c index 609cafff86bc..367f6cb1166c 100644 --- a/drivers/hwmon/f71882fg.c +++ b/drivers/hwmon/f71882fg.c @@ -1872,7 +1872,7 @@ static int __init f71882fg_find(int sioaddr, unsigned short *address, devid = superio_inw(sioaddr, SIO_REG_MANID); if (devid != SIO_FINTEK_ID) { - printk(KERN_INFO DRVNAME ": Not a Fintek device\n"); + pr_debug(DRVNAME ": Not a Fintek device\n"); goto exit; } From 18632f84fac770125c0982dfadec6b551e82144e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 17 Feb 2009 19:59:54 +0100 Subject: [PATCH 122/263] hwmon: Fix ACPI resource check error handling This patch fixes a number of cases where things were not properly cleaned up when acpi_check_resource_conflict() returned an error, causing oopses such as the one reported here: https://bugzilla.redhat.com/show_bug.cgi?id=483208 Signed-off-by: Hans de Goede Signed-off-by: Jean Delvare --- drivers/hwmon/f71882fg.c | 2 +- drivers/hwmon/vt1211.c | 2 +- drivers/hwmon/w83627ehf.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/f71882fg.c b/drivers/hwmon/f71882fg.c index 367f6cb1166c..5f81ddf71508 100644 --- a/drivers/hwmon/f71882fg.c +++ b/drivers/hwmon/f71882fg.c @@ -1932,7 +1932,7 @@ static int __init f71882fg_device_add(unsigned short address, res.name = f71882fg_pdev->name; err = acpi_check_resource_conflict(&res); if (err) - return err; + goto exit_device_put; err = platform_device_add_resources(f71882fg_pdev, &res, 1); if (err) { diff --git a/drivers/hwmon/vt1211.c b/drivers/hwmon/vt1211.c index b0ce37852281..73f77a9b8b18 100644 --- a/drivers/hwmon/vt1211.c +++ b/drivers/hwmon/vt1211.c @@ -1262,7 +1262,7 @@ static int __init vt1211_device_add(unsigned short address) res.name = pdev->name; err = acpi_check_resource_conflict(&res); if (err) - goto EXIT; + goto EXIT_DEV_PUT; err = platform_device_add_resources(pdev, &res, 1); if (err) { diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c index cb808d015361..feae743ba991 100644 --- a/drivers/hwmon/w83627ehf.c +++ b/drivers/hwmon/w83627ehf.c @@ -1548,7 +1548,7 @@ static int __init sensors_w83627ehf_init(void) err = acpi_check_resource_conflict(&res); if (err) - goto exit; + goto exit_device_put; err = platform_device_add_resources(pdev, &res, 1); if (err) { From 211e3e0e1b116b10ba707db2fec972909fa647fd Mon Sep 17 00:00:00 2001 From: Frank Seidel Date: Tue, 17 Feb 2009 19:59:54 +0100 Subject: [PATCH 123/263] MAINTAINERS: Switch hdaps to Frank Seidel As Rovert Love doesn't any more seem to be realy active on hdaps driver i'll happily take it over. Signed-off-by: Frank Seidel Cc: Robert Love Signed-off-by: Jean Delvare --- MAINTAINERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index db65b4e6d132..a654c5b9c271 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1905,10 +1905,10 @@ W: http://gigaset307x.sourceforge.net/ S: Maintained HARD DRIVE ACTIVE PROTECTION SYSTEM (HDAPS) DRIVER -P: Robert Love -M: rlove@rlove.org -M: linux-kernel@vger.kernel.org -W: http://www.kernel.org/pub/linux/kernel/people/rml/hdaps/ +P: Frank Seidel +M: frank@f-seidel.de +L: lm-sensors@lm-sensors.org +W: http://www.kernel.org/pub/linux/kernel/people/fseidel/hdaps/ S: Maintained GSPCA FINEPIX SUBDRIVER From bf51935f3e988e0ed6f34b55593e5912f990750a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 17 Feb 2009 06:01:30 -0800 Subject: [PATCH 124/263] x86, rcu: fix strange load average and ksoftirqd behavior Damien Wyart reported high ksoftirqd CPU usage (20%) on an otherwise idle system. The function-graph trace Damien provided: > 799.521187 | 1) -0 | | rcu_check_callbacks() { > 799.521371 | 1) -0 | | rcu_check_callbacks() { > 799.521555 | 1) -0 | | rcu_check_callbacks() { > 799.521738 | 1) -0 | | rcu_check_callbacks() { > 799.521934 | 1) -0 | | rcu_check_callbacks() { > 799.522068 | 1) ksoftir-2324 | | rcu_check_callbacks() { > 799.522208 | 1) -0 | | rcu_check_callbacks() { > 799.522392 | 1) -0 | | rcu_check_callbacks() { > 799.522575 | 1) -0 | | rcu_check_callbacks() { > 799.522759 | 1) -0 | | rcu_check_callbacks() { > 799.522956 | 1) -0 | | rcu_check_callbacks() { > 799.523074 | 1) ksoftir-2324 | | rcu_check_callbacks() { > 799.523214 | 1) -0 | | rcu_check_callbacks() { > 799.523397 | 1) -0 | | rcu_check_callbacks() { > 799.523579 | 1) -0 | | rcu_check_callbacks() { > 799.523762 | 1) -0 | | rcu_check_callbacks() { > 799.523960 | 1) -0 | | rcu_check_callbacks() { > 799.524079 | 1) ksoftir-2324 | | rcu_check_callbacks() { > 799.524220 | 1) -0 | | rcu_check_callbacks() { > 799.524403 | 1) -0 | | rcu_check_callbacks() { > 799.524587 | 1) -0 | | rcu_check_callbacks() { > 799.524770 | 1) -0 | | rcu_check_callbacks() { > [ . . . ] Shows rcu_check_callbacks() being invoked way too often. It should be called once per jiffy, and here it is called no less than 22 times in about 3.5 milliseconds, meaning one call every 160 microseconds or so. Why do we need to call rcu_pending() and rcu_check_callbacks() from the idle loop of 32-bit x86, especially given that no other architecture does this? The following patch removes the call to rcu_pending() and rcu_check_callbacks() from the x86 32-bit idle loop in order to reduce the softirq load on idle systems. Reported-by: Damien Wyart Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a546f55c77b4..bd4da2af08ae 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -104,9 +104,6 @@ void cpu_idle(void) check_pgt_cache(); rmb(); - if (rcu_pending(cpu)) - rcu_check_callbacks(cpu, 0); - if (cpu_is_offline(cpu)) play_dead(); From 1a88b5364b535edaa321d70a566e358390ff0872 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 16 Feb 2009 02:38:12 +0000 Subject: [PATCH 125/263] Fix incomplete __mntput locking Getting this wrong caused WARNING: at fs/namespace.c:636 mntput_no_expire+0xac/0xf2() due to optimistically checking cpu_writer->mnt outside the spinlock. Here's what we really want: * we know that nobody will set cpu_writer->mnt to mnt from now on * all changes to that sucker are done under cpu_writer->lock * we want the laziest equivalent of spin_lock(&cpu_writer->lock); if (likely(cpu_writer->mnt != mnt)) { spin_unlock(&cpu_writer->lock); continue; } /* do stuff */ that would make sure we won't miss earlier setting of ->mnt done by another CPU. Anyway, for now we just move the spin_lock() earlier and move the test into the properly locked region. Signed-off-by: Al Viro Reported-and-tested-by: Li Zefan Signed-off-by: Linus Torvalds --- fs/namespace.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 228d8c4bfd18..06f8e63f6cb1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -614,9 +614,11 @@ static inline void __mntput(struct vfsmount *mnt) */ for_each_possible_cpu(cpu) { struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu); - if (cpu_writer->mnt != mnt) - continue; spin_lock(&cpu_writer->lock); + if (cpu_writer->mnt != mnt) { + spin_unlock(&cpu_writer->lock); + continue; + } atomic_add(cpu_writer->count, &mnt->__mnt_writers); cpu_writer->count = 0; /* From ca77fde8e62cecb2c0769052228d15b901367af8 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 13 Feb 2009 23:18:03 +0000 Subject: [PATCH 126/263] Fix Intel IOMMU write-buffer flushing This is the cause of the DMA faults and disk corruption that people have been seeing. Some chipsets neglect to report the RWBF "capability" -- the flag which says that we need to flush the chipset write-buffer when changing the DMA page tables, to ensure that the change is visible to the IOMMU. Override that bit on the affected chipsets, and everything is happy again. Thanks to Chris and Bhavesh and others for helping to debug. Signed-off-by: David Woodhouse Tested-by: Chris Wright Reviewed-by: Bhavesh Davda Signed-off-by: Linus Torvalds --- drivers/pci/intel-iommu.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index f4b7c79023ff..fa9e41626bfc 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -61,6 +61,8 @@ /* global iommu list, set NULL for ignored DMAR units */ static struct intel_iommu **g_iommus; +static int rwbf_quirk = 0; + /* * 0: Present * 1-11: Reserved @@ -785,7 +787,7 @@ static void iommu_flush_write_buffer(struct intel_iommu *iommu) u32 val; unsigned long flag; - if (!cap_rwbf(iommu->cap)) + if (!rwbf_quirk && !cap_rwbf(iommu->cap)) return; val = iommu->gcmd | DMA_GCMD_WBF; @@ -3137,3 +3139,13 @@ static struct iommu_ops intel_iommu_ops = { .unmap = intel_iommu_unmap_range, .iova_to_phys = intel_iommu_iova_to_phys, }; + +static void __devinit quirk_iommu_rwbf(struct pci_dev *dev) +{ + /* Mobile 4 Series Chipset neglects to set RWBF capability, + but needs it */ + printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n"); + rwbf_quirk = 1; +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf); From 2f6097129af5625db2fb5e601f89e5bf55cd1dcd Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Mon, 16 Feb 2009 12:49:16 +0000 Subject: [PATCH 127/263] FRV: __pte_to_swp_entry doesn't expand correctly The macro doesn't expand correctly when its parameter isn't 'pte'. Signed-off-by: Roel Kluin Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/asm-frv/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h index 83c51aba534b..e16fdb1f4f4f 100644 --- a/include/asm-frv/pgtable.h +++ b/include/asm-frv/pgtable.h @@ -478,7 +478,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define __swp_type(x) (((x).val >> 2) & 0x1f) #define __swp_offset(x) ((x).val >> 8) #define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 2) | ((offset) << 8) }) -#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte }) +#define __pte_to_swp_entry(_pte) ((swp_entry_t) { (_pte).pte }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) static inline int pte_file(pte_t pte) From fd4b9b3650076ffadbdd6e360eb198f5d61747c0 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 17 Feb 2009 20:45:50 +0100 Subject: [PATCH 128/263] [ARM] 5401/1: Orion: fix edge triggered GPIO interrupt support The GPIO interrupts can be configured as either level triggered or edge triggered, with a default of level triggered. When an edge triggered interrupt is requested, the gpio_irq_set_type method is called which currently switches the given IRQ descriptor between two struct irq_chip instances: orion_gpio_irq_level_chip and orion_gpio_irq_edge_chip. This happens via __setup_irq() which also calls irq_chip_set_defaults() to assign default methods to uninitialized ones. The problem is that irq_chip_set_defaults() is called before the irq_chip reference is switched, leaving the new irq_chip (orion_gpio_irq_edge_chip in this case) with uninitialized methods such as chip->startup() causing a kernel oops. Many solutions are possible, such as making irq_chip_set_defaults() global and calling it from gpio_irq_set_type(), or calling __irq_set_trigger() before irq_chip_set_defaults() in __setup_irq(). But those require modifications to the generic IRQ code which might have adverse effect on other architectures, and that would still be a fragile arrangement. Manually copying the missing methods from within gpio_irq_set_type() would be really ugly and it would break again the day new methods with automatic defaults are added. A better solution is to have a single irq_chip instance which can deal with both edge and level triggered interrupts. It is also a good idea to switch the IRQ handler instead, as the edge IRQ handler allows for one edge IRQ event to be queued as the IRQ is actually masked only when that second IRQ is received, at which point the hardware can queue an additional IRQ event, making edge triggered interrupts a bit more reliable. Tested-by: Martin Michlmayr Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/mach-kirkwood/irq.c | 2 +- arch/arm/mach-mv78xx0/irq.c | 2 +- arch/arm/mach-orion5x/irq.c | 2 +- arch/arm/plat-orion/gpio.c | 75 +++++++++---------------- arch/arm/plat-orion/include/plat/gpio.h | 3 +- 5 files changed, 30 insertions(+), 54 deletions(-) diff --git a/arch/arm/mach-kirkwood/irq.c b/arch/arm/mach-kirkwood/irq.c index efb86b700276..06083b23bb44 100644 --- a/arch/arm/mach-kirkwood/irq.c +++ b/arch/arm/mach-kirkwood/irq.c @@ -42,7 +42,7 @@ void __init kirkwood_init_irq(void) writel(0, GPIO_EDGE_CAUSE(32)); for (i = IRQ_KIRKWOOD_GPIO_START; i < NR_IRQS; i++) { - set_irq_chip(i, &orion_gpio_irq_level_chip); + set_irq_chip(i, &orion_gpio_irq_chip); set_irq_handler(i, handle_level_irq); irq_desc[i].status |= IRQ_LEVEL; set_irq_flags(i, IRQF_VALID); diff --git a/arch/arm/mach-mv78xx0/irq.c b/arch/arm/mach-mv78xx0/irq.c index e273418797b4..30b7e4bcdbc7 100644 --- a/arch/arm/mach-mv78xx0/irq.c +++ b/arch/arm/mach-mv78xx0/irq.c @@ -40,7 +40,7 @@ void __init mv78xx0_init_irq(void) writel(0, GPIO_EDGE_CAUSE(0)); for (i = IRQ_MV78XX0_GPIO_START; i < NR_IRQS; i++) { - set_irq_chip(i, &orion_gpio_irq_level_chip); + set_irq_chip(i, &orion_gpio_irq_chip); set_irq_handler(i, handle_level_irq); irq_desc[i].status |= IRQ_LEVEL; set_irq_flags(i, IRQF_VALID); diff --git a/arch/arm/mach-orion5x/irq.c b/arch/arm/mach-orion5x/irq.c index 0caae43301e5..e03f7b45cb0d 100644 --- a/arch/arm/mach-orion5x/irq.c +++ b/arch/arm/mach-orion5x/irq.c @@ -44,7 +44,7 @@ void __init orion5x_init_irq(void) * User can use set_type() if he wants to use edge types handlers. */ for (i = IRQ_ORION5X_GPIO_START; i < NR_IRQS; i++) { - set_irq_chip(i, &orion_gpio_irq_level_chip); + set_irq_chip(i, &orion_gpio_irq_chip); set_irq_handler(i, handle_level_irq); irq_desc[i].status |= IRQ_LEVEL; set_irq_flags(i, IRQF_VALID); diff --git a/arch/arm/plat-orion/gpio.c b/arch/arm/plat-orion/gpio.c index 967186425ca1..0d12c2164766 100644 --- a/arch/arm/plat-orion/gpio.c +++ b/arch/arm/plat-orion/gpio.c @@ -265,51 +265,36 @@ EXPORT_SYMBOL(orion_gpio_set_blink); * polarity LEVEL mask * ****************************************************************************/ -static void gpio_irq_edge_ack(u32 irq) -{ - int pin = irq_to_gpio(irq); - writel(~(1 << (pin & 31)), GPIO_EDGE_CAUSE(pin)); +static void gpio_irq_ack(u32 irq) +{ + int type = irq_desc[irq].status & IRQ_TYPE_SENSE_MASK; + if (type & (IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING)) { + int pin = irq_to_gpio(irq); + writel(~(1 << (pin & 31)), GPIO_EDGE_CAUSE(pin)); + } } -static void gpio_irq_edge_mask(u32 irq) +static void gpio_irq_mask(u32 irq) { int pin = irq_to_gpio(irq); - u32 u; - - u = readl(GPIO_EDGE_MASK(pin)); + int type = irq_desc[irq].status & IRQ_TYPE_SENSE_MASK; + u32 reg = (type & (IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING)) ? + GPIO_EDGE_MASK(pin) : GPIO_LEVEL_MASK(pin); + u32 u = readl(reg); u &= ~(1 << (pin & 31)); - writel(u, GPIO_EDGE_MASK(pin)); + writel(u, reg); } -static void gpio_irq_edge_unmask(u32 irq) +static void gpio_irq_unmask(u32 irq) { int pin = irq_to_gpio(irq); - u32 u; - - u = readl(GPIO_EDGE_MASK(pin)); + int type = irq_desc[irq].status & IRQ_TYPE_SENSE_MASK; + u32 reg = (type & (IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING)) ? + GPIO_EDGE_MASK(pin) : GPIO_LEVEL_MASK(pin); + u32 u = readl(reg); u |= 1 << (pin & 31); - writel(u, GPIO_EDGE_MASK(pin)); -} - -static void gpio_irq_level_mask(u32 irq) -{ - int pin = irq_to_gpio(irq); - u32 u; - - u = readl(GPIO_LEVEL_MASK(pin)); - u &= ~(1 << (pin & 31)); - writel(u, GPIO_LEVEL_MASK(pin)); -} - -static void gpio_irq_level_unmask(u32 irq) -{ - int pin = irq_to_gpio(irq); - u32 u; - - u = readl(GPIO_LEVEL_MASK(pin)); - u |= 1 << (pin & 31); - writel(u, GPIO_LEVEL_MASK(pin)); + writel(u, reg); } static int gpio_irq_set_type(u32 irq, u32 type) @@ -331,9 +316,9 @@ static int gpio_irq_set_type(u32 irq, u32 type) * Set edge/level type. */ if (type & (IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING)) { - desc->chip = &orion_gpio_irq_edge_chip; + desc->handle_irq = handle_edge_irq; } else if (type & (IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_LEVEL_LOW)) { - desc->chip = &orion_gpio_irq_level_chip; + desc->handle_irq = handle_level_irq; } else { printk(KERN_ERR "failed to set irq=%d (type=%d)\n", irq, type); return -EINVAL; @@ -371,19 +356,11 @@ static int gpio_irq_set_type(u32 irq, u32 type) return 0; } -struct irq_chip orion_gpio_irq_edge_chip = { - .name = "orion_gpio_irq_edge", - .ack = gpio_irq_edge_ack, - .mask = gpio_irq_edge_mask, - .unmask = gpio_irq_edge_unmask, - .set_type = gpio_irq_set_type, -}; - -struct irq_chip orion_gpio_irq_level_chip = { - .name = "orion_gpio_irq_level", - .mask = gpio_irq_level_mask, - .mask_ack = gpio_irq_level_mask, - .unmask = gpio_irq_level_unmask, +struct irq_chip orion_gpio_irq_chip = { + .name = "orion_gpio", + .ack = gpio_irq_ack, + .mask = gpio_irq_mask, + .unmask = gpio_irq_unmask, .set_type = gpio_irq_set_type, }; diff --git a/arch/arm/plat-orion/include/plat/gpio.h b/arch/arm/plat-orion/include/plat/gpio.h index 54deaf274b52..ec743e82c876 100644 --- a/arch/arm/plat-orion/include/plat/gpio.h +++ b/arch/arm/plat-orion/include/plat/gpio.h @@ -31,8 +31,7 @@ void orion_gpio_set_blink(unsigned pin, int blink); /* * GPIO interrupt handling. */ -extern struct irq_chip orion_gpio_irq_edge_chip; -extern struct irq_chip orion_gpio_irq_level_chip; +extern struct irq_chip orion_gpio_irq_chip; void orion_gpio_irq_handler(int irqoff); From 6ec68bff3c81e776a455f6aca95c8c5f1d630198 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 12 Feb 2009 13:39:26 +0100 Subject: [PATCH 129/263] x86, mce: reinitialize per cpu features on resume Impact: Bug fix This fixes a long standing bug in the machine check code. On resume the boot CPU wouldn't get its vendor specific state like thermal handling reinitialized. This means the boot cpu wouldn't ever get any thermal events reported again. Call the respective initialization functions on resume v2: Remove ancient init because they don't have a resume device anyways. Pointed out by Thomas Gleixner. v3: Now fix the Subject too to reflect v2 change Signed-off-by: Andi Kleen Acked-by: Thomas Gleixner Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce_64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 1c838032fd37..1f184efb6bc2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -734,6 +734,7 @@ __setup("mce=", mcheck_enable); static int mce_resume(struct sys_device *dev) { mce_init(NULL); + mce_cpu_features(¤t_cpu_data); return 0; } From 380851bc6b1b4107c61dfa2997f9095dcf779336 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 12 Feb 2009 13:39:33 +0100 Subject: [PATCH 130/263] x86, mce: use force_sig_info to kill process in machine check Impact: bug fix (with tolerant == 3) do_exit cannot be called directly from the exception handler because it can sleep and the exception handler runs on the exception stack. Use force_sig() instead. Based on a earlier patch by Ying Huang who debugged the problem. Signed-off-by: Andi Kleen Acked-by: Thomas Gleixner Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 1f184efb6bc2..25cf624eccb7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -295,11 +295,11 @@ void do_machine_check(struct pt_regs * regs, long error_code) * If we know that the error was in user space, send a * SIGBUS. Otherwise, panic if tolerance is low. * - * do_exit() takes an awful lot of locks and has a slight + * force_sig() takes an awful lot of locks and has a slight * risk of deadlocking. */ if (user_space) { - do_exit(SIGBUS); + force_sig(SIGBUS, current); } else if (panic_on_oops || tolerant < 2) { mce_panic("Uncorrected machine check", &panicm, mcestart); From 07db1c140eb233971341396e492cc73d4280e698 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 12 Feb 2009 13:39:35 +0100 Subject: [PATCH 131/263] x86, mce: fix ifdef for 64bit thermal apic vector clear on shutdown Impact: Bugfix The ifdef for the apic clear on shutdown for the 64bit intel thermal vector was incorrect and never triggered. Fix that. Signed-off-by: Andi Kleen Acked-by: Thomas Gleixner Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 115449f869ee..570f36e44e59 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -862,7 +862,7 @@ void clear_local_APIC(void) } /* lets not touch this if we didn't frob it */ -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) { v = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); From 3494252d5644993f407a45f01c3e8ad5ae38f93c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 13 Feb 2009 23:41:12 +0100 Subject: [PATCH 132/263] USB/PCI: Fix resume breakage of controllers behind cardbus bridges If a USB PCI controller is behind a cardbus bridge, we are trying to restore its configuration registers too early, before the cardbus bridge is operational. To fix this, call pci_restore_state() from usb_hcd_pci_resume() and remove usb_hcd_pci_resume_early() which is no longer necessary (the configuration spaces of USB controllers that are not behind cardbus bridges will be restored by the PCI PM core with interrupts disabled anyway). This patch fixes the regression from 2.6.28 tracked as http://bugzilla.kernel.org/show_bug.cgi?id=12659 [ Side note: the proper long-term fix is probably to just force the unplug event at suspend time instead of doing a plug/unplug at resume time, but this patch is fine regardless - Linus ] Signed-off-by: Rafael J. Wysocki Reported-by: Miles Lane Signed-off-by: Linus Torvalds --- drivers/usb/core/hcd-pci.c | 15 ++------------- drivers/usb/core/hcd.h | 1 - drivers/usb/host/ehci-pci.c | 1 - drivers/usb/host/ohci-pci.c | 1 - drivers/usb/host/uhci-hcd.c | 1 - 5 files changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index c54fc40458b1..a4301dc02d27 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -297,19 +297,6 @@ int usb_hcd_pci_suspend(struct pci_dev *dev, pm_message_t message) } EXPORT_SYMBOL_GPL(usb_hcd_pci_suspend); -/** - * usb_hcd_pci_resume_early - resume a PCI-based HCD before IRQs are enabled - * @dev: USB Host Controller being resumed - * - * Store this function in the HCD's struct pci_driver as .resume_early. - */ -int usb_hcd_pci_resume_early(struct pci_dev *dev) -{ - pci_restore_state(dev); - return 0; -} -EXPORT_SYMBOL_GPL(usb_hcd_pci_resume_early); - /** * usb_hcd_pci_resume - power management resume of a PCI-based HCD * @dev: USB Host Controller being resumed @@ -333,6 +320,8 @@ int usb_hcd_pci_resume(struct pci_dev *dev) } #endif + pci_restore_state(dev); + hcd = pci_get_drvdata(dev); if (hcd->state != HC_STATE_SUSPENDED) { dev_dbg(hcd->self.controller, diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h index 5b94a56bec23..f750eb1ab595 100644 --- a/drivers/usb/core/hcd.h +++ b/drivers/usb/core/hcd.h @@ -257,7 +257,6 @@ extern void usb_hcd_pci_remove(struct pci_dev *dev); #ifdef CONFIG_PM extern int usb_hcd_pci_suspend(struct pci_dev *dev, pm_message_t msg); -extern int usb_hcd_pci_resume_early(struct pci_dev *dev); extern int usb_hcd_pci_resume(struct pci_dev *dev); #endif /* CONFIG_PM */ diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c index bb21fb0a4969..abb9a7706ec7 100644 --- a/drivers/usb/host/ehci-pci.c +++ b/drivers/usb/host/ehci-pci.c @@ -432,7 +432,6 @@ static struct pci_driver ehci_pci_driver = { #ifdef CONFIG_PM .suspend = usb_hcd_pci_suspend, - .resume_early = usb_hcd_pci_resume_early, .resume = usb_hcd_pci_resume, #endif .shutdown = usb_hcd_pci_shutdown, diff --git a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c index 5d625c3fd423..f9961b4c0da3 100644 --- a/drivers/usb/host/ohci-pci.c +++ b/drivers/usb/host/ohci-pci.c @@ -487,7 +487,6 @@ static struct pci_driver ohci_pci_driver = { #ifdef CONFIG_PM .suspend = usb_hcd_pci_suspend, - .resume_early = usb_hcd_pci_resume_early, .resume = usb_hcd_pci_resume, #endif diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c index 944f7e0ca4df..cf5e4cf7ea42 100644 --- a/drivers/usb/host/uhci-hcd.c +++ b/drivers/usb/host/uhci-hcd.c @@ -942,7 +942,6 @@ static struct pci_driver uhci_pci_driver = { #ifdef CONFIG_PM .suspend = usb_hcd_pci_suspend, - .resume_early = usb_hcd_pci_resume_early, .resume = usb_hcd_pci_resume, #endif /* PM */ }; From 5955c7a2cfb6a35429adea5dc480002b15ca8cfc Mon Sep 17 00:00:00 2001 From: Zlatko Calusic Date: Wed, 18 Feb 2009 01:33:34 +0100 Subject: [PATCH 133/263] Add support for VT6415 PCIE PATA IDE Host Controller Signed-off-by: Zlatko Calusic Signed-off-by: Linus Torvalds --- drivers/ata/pata_via.c | 4 +++- include/linux/pci_ids.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c index 79a6c9a0b721..ba556d3e6963 100644 --- a/drivers/ata/pata_via.c +++ b/drivers/ata/pata_via.c @@ -110,7 +110,8 @@ static const struct via_isa_bridge { { "vt8237s", PCI_DEVICE_ID_VIA_8237S, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "vt8251", PCI_DEVICE_ID_VIA_8251, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "cx700", PCI_DEVICE_ID_VIA_CX700, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_SATA_PATA }, - { "vt6410", PCI_DEVICE_ID_VIA_6410, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES}, + { "vt6410", PCI_DEVICE_ID_VIA_6410, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES }, + { "vt6415", PCI_DEVICE_ID_VIA_6415, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES }, { "vt8237a", PCI_DEVICE_ID_VIA_8237A, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "vt8237", PCI_DEVICE_ID_VIA_8237, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "vt8235", PCI_DEVICE_ID_VIA_8235, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, @@ -593,6 +594,7 @@ static int via_reinit_one(struct pci_dev *pdev) #endif static const struct pci_device_id via[] = { + { PCI_VDEVICE(VIA, 0x0415), }, { PCI_VDEVICE(VIA, 0x0571), }, { PCI_VDEVICE(VIA, 0x0581), }, { PCI_VDEVICE(VIA, 0x1571), }, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 52a9fe08451c..918391b4b109 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1312,6 +1312,7 @@ #define PCI_DEVICE_ID_VIA_VT3351 0x0351 #define PCI_DEVICE_ID_VIA_VT3364 0x0364 #define PCI_DEVICE_ID_VIA_8371_0 0x0391 +#define PCI_DEVICE_ID_VIA_6415 0x0415 #define PCI_DEVICE_ID_VIA_8501_0 0x0501 #define PCI_DEVICE_ID_VIA_82C561 0x0561 #define PCI_DEVICE_ID_VIA_82C586_1 0x0571 From 444122fd58fdc83c96877a92b3f6288cafddb08d Mon Sep 17 00:00:00 2001 From: Yi Li Date: Thu, 5 Feb 2009 15:31:57 +0800 Subject: [PATCH 134/263] MMC: fix bug - SDHC card capacity not correct Signed-off-by: Yi Li Signed-off-by: Bryan Wu Signed-off-by: Pierre Ossman --- drivers/mmc/card/block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 45b1f430685f..513eb09a638f 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -584,7 +584,7 @@ static int mmc_blk_probe(struct mmc_card *card) if (err) goto out; - string_get_size(get_capacity(md->disk) << 9, STRING_UNITS_2, + string_get_size((u64)get_capacity(md->disk) << 9, STRING_UNITS_2, cap_str, sizeof(cap_str)); printk(KERN_INFO "%s: %s %s %s %s\n", md->disk->disk_name, mmc_card_id(card), mmc_card_name(card), From 86a6a8749d5b8fd5c2b544fe9fd11101e3d0550f Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Mon, 2 Feb 2009 21:13:49 +0100 Subject: [PATCH 135/263] Revert "sdhci: force high speed capability on some controllers" This reverts commit a4b76193774b463b922cab2f92450efb20d29ef0. It turned out that the controller had problem running at the higher speed, so go back to trusting the hardware capability bits. Signed-off-by: Pierre Ossman --- drivers/mmc/host/sdhci-pci.c | 3 +-- drivers/mmc/host/sdhci.c | 3 +-- drivers/mmc/host/sdhci.h | 2 -- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index f07255cb17ee..8cff5f5e7f86 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -144,8 +144,7 @@ static int jmicron_probe(struct sdhci_pci_chip *chip) SDHCI_QUIRK_32BIT_DMA_SIZE | SDHCI_QUIRK_32BIT_ADMA_SIZE | SDHCI_QUIRK_RESET_AFTER_REQUEST | - SDHCI_QUIRK_BROKEN_SMALL_PIO | - SDHCI_QUIRK_FORCE_HIGHSPEED; + SDHCI_QUIRK_BROKEN_SMALL_PIO; } /* diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 6b2d1f99af67..24d7414a3315 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1636,8 +1636,7 @@ int sdhci_add_host(struct sdhci_host *host) mmc->f_max = host->max_clk; mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_SDIO_IRQ; - if ((caps & SDHCI_CAN_DO_HISPD) || - (host->quirks & SDHCI_QUIRK_FORCE_HIGHSPEED)) + if (caps & SDHCI_CAN_DO_HISPD) mmc->caps |= MMC_CAP_SD_HIGHSPEED; mmc->ocr_avail = 0; diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 3efba2363941..ffeab227d95b 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -208,8 +208,6 @@ struct sdhci_host { #define SDHCI_QUIRK_BROKEN_TIMEOUT_VAL (1<<12) /* Controller has an issue with buffer bits for small transfers */ #define SDHCI_QUIRK_BROKEN_SMALL_PIO (1<<13) -/* Controller supports high speed but doesn't have the caps bit set */ -#define SDHCI_QUIRK_FORCE_HIGHSPEED (1<<14) int irq; /* Device IRQ */ void __iomem * ioaddr; /* Mapped address */ From c1c201200a359cf3b6e2e36a4236cdca77a3cd8e Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Tue, 3 Feb 2009 07:47:29 +0100 Subject: [PATCH 136/263] bsg: Fix sense buffer bug in SG_IO When submitting requests via SG_IO, which does a sync io, a bsg_command is not allocated. So an in-Kernel sense_buffer was not set. However when calling blk_execute_rq() with no sense buffer one is provided from the stack. Now bsg at blk_complete_sgv4_hdr_rq() would check if rq->sense_len and a sense was requested by sg_io_v4 the rq->sense was copy_user() back, but by now it is already mangled stack memory. I have fixed that by forcing a sense_buffer when calling bsg_map_hdr(). The bsg_command->sense is provided in the write/read path like before, and on-the-stack buffer is provided when doing SG_IO. I have also fixed a dprintk message to print rq->errors in hex because of the scsi bit-field use of this member. For other block devices it does not matter anyway. Signed-off-by: Boaz Harrosh Acked-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/bsg.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index d414bb5607e8..0ce8806dd0c1 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -244,7 +244,8 @@ bsg_validate_sgv4_hdr(struct request_queue *q, struct sg_io_v4 *hdr, int *rw) * map sg_io_v4 to a request. */ static struct request * -bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm) +bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm, + u8 *sense) { struct request_queue *q = bd->queue; struct request *rq, *next_rq = NULL; @@ -306,6 +307,10 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm) if (ret) goto out; } + + rq->sense = sense; + rq->sense_len = 0; + return rq; out: if (rq->cmd != rq->__cmd) @@ -348,9 +353,6 @@ static void bsg_rq_end_io(struct request *rq, int uptodate) static void bsg_add_command(struct bsg_device *bd, struct request_queue *q, struct bsg_command *bc, struct request *rq) { - rq->sense = bc->sense; - rq->sense_len = 0; - /* * add bc command to busy queue and submit rq for io */ @@ -419,7 +421,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, { int ret = 0; - dprintk("rq %p bio %p %u\n", rq, bio, rq->errors); + dprintk("rq %p bio %p 0x%x\n", rq, bio, rq->errors); /* * fill in all the output members */ @@ -635,7 +637,7 @@ static int __bsg_write(struct bsg_device *bd, const char __user *buf, /* * get a request, fill in the blanks, and add to request queue */ - rq = bsg_map_hdr(bd, &bc->hdr, has_write_perm); + rq = bsg_map_hdr(bd, &bc->hdr, has_write_perm, bc->sense); if (IS_ERR(rq)) { ret = PTR_ERR(rq); rq = NULL; @@ -922,11 +924,12 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct request *rq; struct bio *bio, *bidi_bio = NULL; struct sg_io_v4 hdr; + u8 sense[SCSI_SENSE_BUFFERSIZE]; if (copy_from_user(&hdr, uarg, sizeof(hdr))) return -EFAULT; - rq = bsg_map_hdr(bd, &hdr, file->f_mode & FMODE_WRITE); + rq = bsg_map_hdr(bd, &hdr, file->f_mode & FMODE_WRITE, sense); if (IS_ERR(rq)) return PTR_ERR(rq); From 93dbb393503d53cd226e5e1f0088fe8f4dbaa2b8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 16 Feb 2009 10:25:40 +0100 Subject: [PATCH 137/263] block: fix bad definition of BIO_RW_SYNC We can't OR shift values, so get rid of BIO_RW_SYNC and use BIO_RW_SYNCIO and BIO_RW_UNPLUG explicitly. This brings back the behaviour from before 213d9417fec62ef4c3675621b9364a667954d4dd. Signed-off-by: Jens Axboe --- block/blktrace.c | 2 +- drivers/md/dm-io.c | 2 +- drivers/md/dm-kcopyd.c | 2 +- drivers/md/md.c | 4 ++-- include/linux/bio.h | 2 -- include/linux/blktrace_api.h | 1 + include/linux/fs.h | 6 +++--- kernel/power/swap.c | 5 +++-- mm/page_io.c | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) diff --git a/block/blktrace.c b/block/blktrace.c index 39cc3bfe56e4..7cf9d1ff45a0 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -142,7 +142,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, what |= ddir_act[rw & WRITE]; what |= MASK_TC_BIT(rw, BARRIER); - what |= MASK_TC_BIT(rw, SYNC); + what |= MASK_TC_BIT(rw, SYNCIO); what |= MASK_TC_BIT(rw, AHEAD); what |= MASK_TC_BIT(rw, META); what |= MASK_TC_BIT(rw, DISCARD); diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index a34338567a2a..f14813be4eff 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -328,7 +328,7 @@ static void dispatch_io(int rw, unsigned int num_regions, struct dpages old_pages = *dp; if (sync) - rw |= (1 << BIO_RW_SYNC); + rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); /* * For multiple regions we need to be careful to rewind diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 3073618269ea..0a225da21272 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -344,7 +344,7 @@ static int run_io_job(struct kcopyd_job *job) { int r; struct dm_io_request io_req = { - .bi_rw = job->rw | (1 << BIO_RW_SYNC), + .bi_rw = job->rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG), .mem.type = DM_IO_PAGE_LIST, .mem.ptr.pl = job->pages, .mem.offset = job->offset, diff --git a/drivers/md/md.c b/drivers/md/md.c index 4495104f6c9f..03b4cd0a6344 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -474,7 +474,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, * causes ENOTSUPP, we allocate a spare bio... */ struct bio *bio = bio_alloc(GFP_NOIO, 1); - int rw = (1<bi_bdev = rdev->bdev; bio->bi_sector = sector; @@ -531,7 +531,7 @@ int sync_page_io(struct block_device *bdev, sector_t sector, int size, struct completion event; int ret; - rw |= (1 << BIO_RW_SYNC); + rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); bio->bi_bdev = bdev; bio->bi_sector = sector; diff --git a/include/linux/bio.h b/include/linux/bio.h index 2aa283ab062b..1b16108a5417 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -171,8 +171,6 @@ struct bio { #define BIO_RW_FAILFAST_TRANSPORT 8 #define BIO_RW_FAILFAST_DRIVER 9 -#define BIO_RW_SYNC (BIO_RW_SYNCIO | BIO_RW_UNPLUG) - #define bio_rw_flagged(bio, flag) ((bio)->bi_rw & (1 << (flag))) /* diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 25379cba2370..6e915878e88c 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -15,6 +15,7 @@ enum blktrace_cat { BLK_TC_WRITE = 1 << 1, /* writes */ BLK_TC_BARRIER = 1 << 2, /* barrier */ BLK_TC_SYNC = 1 << 3, /* sync IO */ + BLK_TC_SYNCIO = BLK_TC_SYNC, BLK_TC_QUEUE = 1 << 4, /* queueing/merging */ BLK_TC_REQUEUE = 1 << 5, /* requeueing */ BLK_TC_ISSUE = 1 << 6, /* issue */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 6022f44043f2..67857dc16365 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -87,10 +87,10 @@ struct inodes_stat_t { #define WRITE 1 #define READA 2 /* read-ahead - don't block if no resources */ #define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ -#define READ_SYNC (READ | (1 << BIO_RW_SYNC)) +#define READ_SYNC (READ | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) #define READ_META (READ | (1 << BIO_RW_META)) -#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) -#define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC)) +#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) +#define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) #define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) #define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 6da14358537c..505f319e489c 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -60,6 +60,7 @@ static struct block_device *resume_bdev; static int submit(int rw, pgoff_t page_off, struct page *page, struct bio **bio_chain) { + const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); struct bio *bio; bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); @@ -80,7 +81,7 @@ static int submit(int rw, pgoff_t page_off, struct page *page, bio_get(bio); if (bio_chain == NULL) { - submit_bio(rw | (1 << BIO_RW_SYNC), bio); + submit_bio(bio_rw, bio); wait_on_page_locked(page); if (rw == READ) bio_set_pages_dirty(bio); @@ -90,7 +91,7 @@ static int submit(int rw, pgoff_t page_off, struct page *page, get_page(page); /* These pages are freed later */ bio->bi_private = *bio_chain; *bio_chain = bio; - submit_bio(rw | (1 << BIO_RW_SYNC), bio); + submit_bio(bio_rw, bio); } return 0; } diff --git a/mm/page_io.c b/mm/page_io.c index dc6ce0afbded..3023c475e041 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -111,7 +111,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) goto out; } if (wbc->sync_mode == WB_SYNC_ALL) - rw |= (1 << BIO_RW_SYNC); + rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); count_vm_event(PSWPOUT); set_page_writeback(page); unlock_page(page); From a60e78e57a17d55bbd5a96da16fe9649d364b987 Mon Sep 17 00:00:00 2001 From: Subhash Peddamallu Date: Mon, 16 Feb 2009 10:27:07 +0100 Subject: [PATCH 138/263] fs/bio: bio_alloc_bioset: pass right object ptr to mempool_free When freeing from bio pool use right ptr to account for bs->front_pad, instead of bio ptr, Signed-off-by: Subhash Peddamallu Signed-off-by: Jens Axboe --- fs/bio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/bio.c b/fs/bio.c index 062299acbccd..72ab251cdb9c 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -302,9 +302,10 @@ void bio_init(struct bio *bio) struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) { struct bio *bio = NULL; + void *p; if (bs) { - void *p = mempool_alloc(bs->bio_pool, gfp_mask); + p = mempool_alloc(bs->bio_pool, gfp_mask); if (p) bio = p + bs->front_pad; @@ -329,7 +330,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) } if (unlikely(!bvl)) { if (bs) - mempool_free(bio, bs->bio_pool); + mempool_free(p, bs->bio_pool); else kfree(bio); bio = NULL; From c8cbec6bdf6329279fd14696020f6b59d1d3124d Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Mon, 16 Feb 2009 13:11:55 +0100 Subject: [PATCH 139/263] paride/pg.c: xs(): &&/|| confusion &&/|| confusion Signed-off-by: Roel Kluin Cc: Bartlomiej Zolnierkiewicz Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/block/paride/pg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c index 9dfa27163001..c397b3ddba9b 100644 --- a/drivers/block/paride/pg.c +++ b/drivers/block/paride/pg.c @@ -422,7 +422,7 @@ static void xs(char *buf, char *targ, int len) for (k = 0; k < len; k++) { char c = *buf++; - if (c != ' ' || c != l) + if (c != ' ' && c != l) l = *targ++ = c; } if (l == ' ') From 82eb03cfd862a65363fa2826de0dbd5474cfe5e2 Mon Sep 17 00:00:00 2001 From: Chip Coldwell Date: Mon, 16 Feb 2009 13:11:56 +0100 Subject: [PATCH 140/263] cciss: PCI power management reset for kexec The kexec kernel resets the CCISS hardware in three steps: 1. Use PCI power management states to reset the controller in the kexec kernel. 2. Clear the MSI/MSI-X bits in PCI configuration space so that MSI initialization in the kexec kernel doesn't fail. 3. Use the CCISS "No-op" message to determine when the controller firmware has recovered from the PCI PM reset. [akpm@linux-foundation.org: cleanups] Signed-off-by: Mike Miller Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 215 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 01e69383d9c0..d2cb67b61176 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -3390,6 +3390,203 @@ static void free_hba(int i) kfree(p); } +/* Send a message CDB to the firmware. */ +static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, unsigned char type) +{ + typedef struct { + CommandListHeader_struct CommandHeader; + RequestBlock_struct Request; + ErrDescriptor_struct ErrorDescriptor; + } Command; + static const size_t cmd_sz = sizeof(Command) + sizeof(ErrorInfo_struct); + Command *cmd; + dma_addr_t paddr64; + uint32_t paddr32, tag; + void __iomem *vaddr; + int i, err; + + vaddr = ioremap_nocache(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0)); + if (vaddr == NULL) + return -ENOMEM; + + /* The Inbound Post Queue only accepts 32-bit physical addresses for the + CCISS commands, so they must be allocated from the lower 4GiB of + memory. */ + err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); + if (err) { + iounmap(vaddr); + return -ENOMEM; + } + + cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64); + if (cmd == NULL) { + iounmap(vaddr); + return -ENOMEM; + } + + /* This must fit, because of the 32-bit consistent DMA mask. Also, + although there's no guarantee, we assume that the address is at + least 4-byte aligned (most likely, it's page-aligned). */ + paddr32 = paddr64; + + cmd->CommandHeader.ReplyQueue = 0; + cmd->CommandHeader.SGList = 0; + cmd->CommandHeader.SGTotal = 0; + cmd->CommandHeader.Tag.lower = paddr32; + cmd->CommandHeader.Tag.upper = 0; + memset(&cmd->CommandHeader.LUN.LunAddrBytes, 0, 8); + + cmd->Request.CDBLen = 16; + cmd->Request.Type.Type = TYPE_MSG; + cmd->Request.Type.Attribute = ATTR_HEADOFQUEUE; + cmd->Request.Type.Direction = XFER_NONE; + cmd->Request.Timeout = 0; /* Don't time out */ + cmd->Request.CDB[0] = opcode; + cmd->Request.CDB[1] = type; + memset(&cmd->Request.CDB[2], 0, 14); /* the rest of the CDB is reserved */ + + cmd->ErrorDescriptor.Addr.lower = paddr32 + sizeof(Command); + cmd->ErrorDescriptor.Addr.upper = 0; + cmd->ErrorDescriptor.Len = sizeof(ErrorInfo_struct); + + writel(paddr32, vaddr + SA5_REQUEST_PORT_OFFSET); + + for (i = 0; i < 10; i++) { + tag = readl(vaddr + SA5_REPLY_PORT_OFFSET); + if ((tag & ~3) == paddr32) + break; + schedule_timeout_uninterruptible(HZ); + } + + iounmap(vaddr); + + /* we leak the DMA buffer here ... no choice since the controller could + still complete the command. */ + if (i == 10) { + printk(KERN_ERR "cciss: controller message %02x:%02x timed out\n", + opcode, type); + return -ETIMEDOUT; + } + + pci_free_consistent(pdev, cmd_sz, cmd, paddr64); + + if (tag & 2) { + printk(KERN_ERR "cciss: controller message %02x:%02x failed\n", + opcode, type); + return -EIO; + } + + printk(KERN_INFO "cciss: controller message %02x:%02x succeeded\n", + opcode, type); + return 0; +} + +#define cciss_soft_reset_controller(p) cciss_message(p, 1, 0) +#define cciss_noop(p) cciss_message(p, 3, 0) + +static __devinit int cciss_reset_msi(struct pci_dev *pdev) +{ +/* the #defines are stolen from drivers/pci/msi.h. */ +#define msi_control_reg(base) (base + PCI_MSI_FLAGS) +#define PCI_MSIX_FLAGS_ENABLE (1 << 15) + + int pos; + u16 control = 0; + + pos = pci_find_capability(pdev, PCI_CAP_ID_MSI); + if (pos) { + pci_read_config_word(pdev, msi_control_reg(pos), &control); + if (control & PCI_MSI_FLAGS_ENABLE) { + printk(KERN_INFO "cciss: resetting MSI\n"); + pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSI_FLAGS_ENABLE); + } + } + + pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX); + if (pos) { + pci_read_config_word(pdev, msi_control_reg(pos), &control); + if (control & PCI_MSIX_FLAGS_ENABLE) { + printk(KERN_INFO "cciss: resetting MSI-X\n"); + pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSIX_FLAGS_ENABLE); + } + } + + return 0; +} + +/* This does a hard reset of the controller using PCI power management + * states. */ +static __devinit int cciss_hard_reset_controller(struct pci_dev *pdev) +{ + u16 pmcsr, saved_config_space[32]; + int i, pos; + + printk(KERN_INFO "cciss: using PCI PM to reset controller\n"); + + /* This is very nearly the same thing as + + pci_save_state(pci_dev); + pci_set_power_state(pci_dev, PCI_D3hot); + pci_set_power_state(pci_dev, PCI_D0); + pci_restore_state(pci_dev); + + but we can't use these nice canned kernel routines on + kexec, because they also check the MSI/MSI-X state in PCI + configuration space and do the wrong thing when it is + set/cleared. Also, the pci_save/restore_state functions + violate the ordering requirements for restoring the + configuration space from the CCISS document (see the + comment below). So we roll our own .... */ + + for (i = 0; i < 32; i++) + pci_read_config_word(pdev, 2*i, &saved_config_space[i]); + + pos = pci_find_capability(pdev, PCI_CAP_ID_PM); + if (pos == 0) { + printk(KERN_ERR "cciss_reset_controller: PCI PM not supported\n"); + return -ENODEV; + } + + /* Quoting from the Open CISS Specification: "The Power + * Management Control/Status Register (CSR) controls the power + * state of the device. The normal operating state is D0, + * CSR=00h. The software off state is D3, CSR=03h. To reset + * the controller, place the interface device in D3 then to + * D0, this causes a secondary PCI reset which will reset the + * controller." */ + + /* enter the D3hot power management state */ + pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr); + pmcsr &= ~PCI_PM_CTRL_STATE_MASK; + pmcsr |= PCI_D3hot; + pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr); + + schedule_timeout_uninterruptible(HZ >> 1); + + /* enter the D0 power management state */ + pmcsr &= ~PCI_PM_CTRL_STATE_MASK; + pmcsr |= PCI_D0; + pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr); + + schedule_timeout_uninterruptible(HZ >> 1); + + /* Restore the PCI configuration space. The Open CISS + * Specification says, "Restore the PCI Configuration + * Registers, offsets 00h through 60h. It is important to + * restore the command register, 16-bits at offset 04h, + * last. Do not restore the configuration status register, + * 16-bits at offset 06h." Note that the offset is 2*i. */ + for (i = 0; i < 32; i++) { + if (i == 2 || i == 3) + continue; + pci_write_config_word(pdev, 2*i, saved_config_space[i]); + } + wmb(); + pci_write_config_word(pdev, 4, saved_config_space[2]); + + return 0; +} + /* * This is it. Find all the controllers and register them. I really hate * stealing all these major device numbers. @@ -3404,6 +3601,24 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, int dac, return_code; InquiryData_struct *inq_buff = NULL; + if (reset_devices) { + /* Reset the controller with a PCI power-cycle */ + if (cciss_hard_reset_controller(pdev) || cciss_reset_msi(pdev)) + return -ENODEV; + + /* Some devices (notably the HP Smart Array 5i Controller) + need a little pause here */ + schedule_timeout_uninterruptible(30*HZ); + + /* Now try to get the controller to respond to a no-op */ + for (i=0; i<12; i++) { + if (cciss_noop(pdev) == 0) + break; + else + printk("cciss: no-op failed%s\n", (i < 11 ? "; re-trying" : "")); + } + } + i = alloc_cciss_hba(); if (i < 0) return -1; From 78f707bfc723552e8309b7c38a8d0cc51012e813 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 Feb 2009 13:59:08 +0100 Subject: [PATCH 141/263] block: revert part of 18ce3751ccd488c78d3827e9f6bf54e6322676fb The above commit added WRITE_SYNC and switched various places to using that for committing writes that will be waited upon immediately after submission. However, this causes a performance regression with AS and CFQ for ext3 at least, since sync_dirty_buffer() will submit some writes with WRITE_SYNC while ext3 has sumitted others dependent writes without the sync flag set. This causes excessive anticipation/idling in the IO scheduler because sync and async writes get interleaved, causing a big performance regression for the below test case (which is meant to simulate sqlite like behaviour). ---- test case ---- int main(int argc, char **argv) { int fdes, i; FILE *fp; struct timeval start; struct timeval end; struct timeval res; gettimeofday(&start, NULL); for (i=0; i --- fs/buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/buffer.c b/fs/buffer.c index 665d446b25bc..62b57e330b69 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3108,7 +3108,7 @@ int sync_dirty_buffer(struct buffer_head *bh) if (test_clear_buffer_dirty(bh)) { get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(WRITE_SYNC, bh); + ret = submit_bh(WRITE, bh); wait_on_buffer(bh); if (buffer_eopnotsupp(bh)) { clear_buffer_eopnotsupp(bh); From 41b8c853a495438208faa5be03bbb0050859163b Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Wed, 18 Feb 2009 10:33:59 +0100 Subject: [PATCH 142/263] block: fix booting from partitioned md array Hi Tejun, it looks like your commit: block: don't depend on consecutive minor space f331c0296f2a9fee0d396a70598b954062603015 broke a particular case for booting from partitioned md/raid devices. That is the second time this has been broken recently. The previous time was fixed by block: do_mounts - accept root= 30f2f0eb4bd2c43d10a8b0d872c6e5ad8f31c9a0 Because the data isn't available when an md device is first created (we add disks and set it up after creation), the initial partition scan finds nothing. It is not until the device is opened that another partition scan happens and finds something. So at the point where the kernel parameter "root=/dev/md_d0p1" is being parsed, md_d0 exists, but md_d0p1 does not. However if we let blk_lookup_devt return the correct device number even though the device doesn't exist, then the attempt to mount it will successfully find the partition. I have tried in the past to find a way to get the partition table to be read as soon as the array is assembled but that proved impossible (at the time). I don't remember the details, and could possibly revisit it. However it would be really nice if blk_lookup_devt could be adjusted to again accept non existant partitions. Signed-off-by: Jens Axboe --- block/genhd.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/block/genhd.c b/block/genhd.c index 397960cf26af..e1eadcc9546a 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1087,6 +1087,14 @@ dev_t blk_lookup_devt(const char *name, int partno) if (strcmp(dev_name(dev), name)) continue; + if (partno < disk->minors) { + /* We need to return the right devno, even + * if the partition doesn't exist yet. + */ + devt = MKDEV(MAJOR(dev->devt), + MINOR(dev->devt) + partno); + break; + } part = disk_get_part(disk, partno); if (part) { devt = part_devt(part); From be987fdb55a4726e2fcbab7501f89276bdb57288 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 18 Feb 2009 10:30:15 +0100 Subject: [PATCH 143/263] block: fix deadlock in blk_abort_queue() for drivers that readd to timeout list blk_abort_queue() iterates the timeout list and aborts each request on the list, but if the driver error handling readds a request to the timeout list during this processing, we could be looping forever. Fix this by splicing current entries to a local list and run over that list instead. Signed-off-by: Jens Axboe --- block/blk-timeout.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/block/blk-timeout.c b/block/blk-timeout.c index a09535377a94..bbbdc4b8ccf2 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -209,12 +209,19 @@ void blk_abort_queue(struct request_queue *q) { unsigned long flags; struct request *rq, *tmp; + LIST_HEAD(list); spin_lock_irqsave(q->queue_lock, flags); elv_abort_queue(q); - list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) + /* + * Splice entries to local list, to avoid deadlocking if entries + * get readded to the timeout list by error handling + */ + list_splice_init(&q->timeout_list, &list); + + list_for_each_entry_safe(rq, tmp, &list, timeout_list) blk_abort_request(rq); spin_unlock_irqrestore(q->queue_lock, flags); From 2678f60d2bc05a12580b93eb36f089f0e55693e0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 18 Feb 2009 16:46:27 +0100 Subject: [PATCH 144/263] ALSA: jack - Use card->shortname for input name Currently the jack layer refers to card->longname as a part of its input device name string. However, longname is often really long and way too ugly as an identifier, such as, "HDA Intel at 0xf8400000 irq 21". This patch changes the code to use card->shortname instead. The shortname string contains usually the h/w vendor and product names but without messy I/O port or IRQ numbers. Signed-off-by: Takashi Iwai --- sound/core/jack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/jack.c b/sound/core/jack.c index dd4a12dc09aa..077a85262c1c 100644 --- a/sound/core/jack.c +++ b/sound/core/jack.c @@ -47,7 +47,7 @@ static int snd_jack_dev_register(struct snd_device *device) int err; snprintf(jack->name, sizeof(jack->name), "%s %s", - card->longname, jack->id); + card->shortname, jack->id); jack->input_dev->name = jack->name; /* Default to the sound card device. */ From 994244883739e4044bef76d4e5d7a9b66dc6c7b6 Mon Sep 17 00:00:00 2001 From: Yauhen Kharuzhy Date: Wed, 11 Feb 2009 13:25:52 -0800 Subject: [PATCH 145/263] s3cmci: Fix hangup in do_pio_write() This commit fixes the regression what was added by commit 088a78af978d0c8e339071a9b2bca1f4cb368f30 "s3cmci: Support transfers which are not multiple of 32 bits." fifo_free() now returns amount of available space in FIFO buffer in bytes. But do_pio_write() writes to FIFO 32-bit words. Condition for return from cycle is (fifo_free() == 0), but when fifo has 1..3 bytes of free space then this condition will never be true and system hangs. This patch changes condition in the while() to (fifo_free() > 3). Signed-off-by: Yauhen Kharuzhy Signed-off-by: Andrew Morton Signed-off-by: Pierre Ossman --- drivers/mmc/host/s3cmci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index 35a98eec7414..f4a67c65d301 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -329,7 +329,7 @@ static void do_pio_write(struct s3cmci_host *host) to_ptr = host->base + host->sdidata; - while ((fifo = fifo_free(host))) { + while ((fifo = fifo_free(host)) > 3) { if (!host->pio_bytes) { res = get_data_buffer(host, &host->pio_bytes, &host->pio_ptr); From 58a5dd3e0e77029d3db1f8fa75d0b54b38169d5d Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Fri, 13 Feb 2009 22:55:26 +0530 Subject: [PATCH 146/263] mmc_test: fix basic read test Due to a typo in the Basic Read test, it's currently identical to the Basic Write test. Fix this. Signed-off-by: Rabin Vincent Signed-off-by: Pierre Ossman --- drivers/mmc/card/mmc_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c index b92b172074ee..b9f1e84897cc 100644 --- a/drivers/mmc/card/mmc_test.c +++ b/drivers/mmc/card/mmc_test.c @@ -494,7 +494,7 @@ static int mmc_test_basic_read(struct mmc_test_card *test) sg_init_one(&sg, test->buffer, 512); - ret = mmc_test_simple_transfer(test, &sg, 1, 0, 1, 512, 1); + ret = mmc_test_simple_transfer(test, &sg, 1, 0, 1, 512, 0); if (ret) return ret; From 5dbace0c9ba110c1a3810a89fa6bf12b7574b5a3 Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Sat, 14 Feb 2009 16:22:39 +0100 Subject: [PATCH 147/263] sdhci: fix led naming Fix the led device naming for the sdhci driver. The led class documentation defines the led name to have the form "devicename:colour:function" while not applicable sections should be left blank. To comply with the documentation the led device name is changed from "mmc*" to "mmc*::". Signed-off-by: Helmut Schaa Signed-off-by: Pierre Ossman --- drivers/mmc/host/sdhci.c | 4 +++- drivers/mmc/host/sdhci.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 24d7414a3315..f52f3053ed92 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1722,7 +1722,9 @@ int sdhci_add_host(struct sdhci_host *host) #endif #ifdef SDHCI_USE_LEDS_CLASS - host->led.name = mmc_hostname(mmc); + snprintf(host->led_name, sizeof(host->led_name), + "%s::", mmc_hostname(mmc)); + host->led.name = host->led_name; host->led.brightness = LED_OFF; host->led.default_trigger = mmc_hostname(mmc); host->led.brightness_set = sdhci_led_control; diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index ffeab227d95b..ebb83657e27a 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -220,6 +220,7 @@ struct sdhci_host { #if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE) struct led_classdev led; /* LED control */ + char led_name[32]; #endif spinlock_t lock; /* Mutex */ From 249d0fa9d59b6165ecc224720d9ce9b7267cf1b8 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Wed, 4 Feb 2009 14:42:03 -0800 Subject: [PATCH 148/263] omap_hsmmc: card detect irq bugfix Work around lockdep issue when card detect IRQ handlers run in thread context ... it forces IRQF_DISABLED, which prevents all access to twl4030 card detect signals. Signed-off-by: David Brownell Acked-by: Tony Lindgren Signed-off-by: Pierre Ossman --- drivers/mmc/host/omap_hsmmc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index db37490f67ec..4dba48642e60 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -517,6 +517,9 @@ static void mmc_omap_detect(struct work_struct *work) { struct mmc_omap_host *host = container_of(work, struct mmc_omap_host, mmc_carddetect_work); + struct omap_mmc_slot_data *slot = &mmc_slot(host); + + host->carddetect = slot->card_detect(slot->card_detect_irq); sysfs_notify(&host->mmc->class_dev.kobj, NULL, "cover_switch"); if (host->carddetect) { @@ -538,7 +541,6 @@ static irqreturn_t omap_mmc_cd_handler(int irq, void *dev_id) { struct mmc_omap_host *host = (struct mmc_omap_host *)dev_id; - host->carddetect = mmc_slot(host).card_detect(irq); schedule_work(&host->mmc_carddetect_work); return IRQ_HANDLED; From eb25082657be3e7639e349fc926afdcbb0a4dc65 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 17 Feb 2009 14:49:01 -0800 Subject: [PATCH 149/263] omap_hsmmc: only MMC1 allows HCTL.SDVS != 1.8V Based on a patch from Tony Lindgren ... after initialization, never change HCTL.SDVS except for MMC1. The other controller instances only support 1.8V in that field, although they can suport other card/SDIO/eMMC/... voltages with level shifting solutions such as external transceivers. MMC2 behavior sanity tested on Overo/WLAN, OMAP3430 SDP, and custom hardware. MMC1 also sanity tested on those platforms plus Beagle. This also fixes a bug preventing MMC2 (and also presumably MMC3) from powering down when requested. Signed-off-by: David Brownell Acked-by: Tony Lindgren Signed-off-by: Pierre Ossman --- drivers/mmc/host/omap_hsmmc.c | 43 +++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 4dba48642e60..3bfd0facb794 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -55,6 +55,7 @@ #define VS30 (1 << 25) #define SDVS18 (0x5 << 9) #define SDVS30 (0x6 << 9) +#define SDVS33 (0x7 << 9) #define SDVSCLR 0xFFFFF1FF #define SDVSDET 0x00000400 #define AUTOIDLE 0x1 @@ -456,13 +457,20 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id) } /* - * Switch MMC operating voltage + * Switch MMC interface voltage ... only relevant for MMC1. + * + * MMC2 and MMC3 use fixed 1.8V levels, and maybe a transceiver. + * The MMC2 transceiver controls are used instead of DAT4..DAT7. + * Some chips, like eMMC ones, use internal transceivers. */ static int omap_mmc_switch_opcond(struct mmc_omap_host *host, int vdd) { u32 reg_val = 0; int ret; + if (host->id != OMAP_MMC1_DEVID) + return 0; + /* Disable the clocks */ clk_disable(host->fclk); clk_disable(host->iclk); @@ -485,19 +493,26 @@ static int omap_mmc_switch_opcond(struct mmc_omap_host *host, int vdd) OMAP_HSMMC_WRITE(host->base, HCTL, OMAP_HSMMC_READ(host->base, HCTL) & SDVSCLR); reg_val = OMAP_HSMMC_READ(host->base, HCTL); + /* * If a MMC dual voltage card is detected, the set_ios fn calls * this fn with VDD bit set for 1.8V. Upon card removal from the * slot, omap_mmc_set_ios sets the VDD back to 3V on MMC_POWER_OFF. * - * Only MMC1 supports 3.0V. MMC2 will not function if SDVS30 is - * set in HCTL. + * Cope with a bit of slop in the range ... per data sheets: + * - "1.8V" for vdds_mmc1/vdds_mmc1a can be up to 2.45V max, + * but recommended values are 1.71V to 1.89V + * - "3.0V" for vdds_mmc1/vdds_mmc1a can be up to 3.5V max, + * but recommended values are 2.7V to 3.3V + * + * Board setup code shouldn't permit anything very out-of-range. + * TWL4030-family VMMC1 and VSIM regulators are fine (avoiding the + * middle range) but VSIM can't power DAT4..DAT7 at more than 3V. */ - if (host->id == OMAP_MMC1_DEVID && (((1 << vdd) == MMC_VDD_32_33) || - ((1 << vdd) == MMC_VDD_33_34))) - reg_val |= SDVS30; - if ((1 << vdd) == MMC_VDD_165_195) + if ((1 << vdd) <= MMC_VDD_23_24) reg_val |= SDVS18; + else + reg_val |= SDVS30; OMAP_HSMMC_WRITE(host->base, HCTL, reg_val); @@ -759,10 +774,14 @@ static void omap_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) case MMC_POWER_OFF: mmc_slot(host).set_power(host->dev, host->slot_id, 0, 0); /* - * Reset bus voltage to 3V if it got set to 1.8V earlier. + * Reset interface voltage to 3V if it's 1.8V now; + * only relevant on MMC-1, the others always use 1.8V. + * * REVISIT: If we are able to detect cards after unplugging * a 1.8V card, this code should not be needed. */ + if (host->id != OMAP_MMC1_DEVID) + break; if (!(OMAP_HSMMC_READ(host->base, HCTL) & SDVSDET)) { int vdd = fls(host->mmc->ocr_avail) - 1; if (omap_mmc_switch_opcond(host, vdd) != 0) @@ -786,7 +805,9 @@ static void omap_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) } if (host->id == OMAP_MMC1_DEVID) { - /* Only MMC1 can operate at 3V/1.8V */ + /* Only MMC1 can interface at 3V without some flavor + * of external transceiver; but they all handle 1.8V. + */ if ((OMAP_HSMMC_READ(host->base, HCTL) & SDVSDET) && (ios->vdd == DUAL_VOLT_OCR_BIT)) { /* @@ -1139,7 +1160,9 @@ static int omap_mmc_suspend(struct platform_device *pdev, pm_message_t state) " level suspend\n"); } - if (!(OMAP_HSMMC_READ(host->base, HCTL) & SDVSDET)) { + if (host->id == OMAP_MMC1_DEVID + && !(OMAP_HSMMC_READ(host->base, HCTL) + & SDVSDET)) { OMAP_HSMMC_WRITE(host->base, HCTL, OMAP_HSMMC_READ(host->base, HCTL) & SDVSCLR); From c232f457e409b34417166596ea3daf298ace95c9 Mon Sep 17 00:00:00 2001 From: Jean Pihet Date: Wed, 11 Feb 2009 13:11:39 -0800 Subject: [PATCH 150/263] omap_hsmmc: recover from transfer failures Timeouts during a command that has a data phase can result in the next command issued after the command that failed not being processed, i.e. no interrupt ever occurs to indicate the command has completed. This failure can result in a deadlock. This patch resets the data state machine to clear the error in case of a command timeout. Tested on OMAP3430 chip and intensive MMC/SD device removal while transferring data. Signed-off-by: Andy Lowe Signed-off-by: Jean Pihet Signed-off-by: Adrian Hunter Signed-off-by: Andrew Morton Acked-by: Tony Lindgren Signed-off-by: Pierre Ossman --- drivers/mmc/host/omap_hsmmc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 3bfd0facb794..8d21a07f63de 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -417,8 +417,15 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id) } end_cmd = 1; } - if (host->data) + if (host->data) { mmc_dma_cleanup(host); + OMAP_HSMMC_WRITE(host->base, SYSCTL, + OMAP_HSMMC_READ(host->base, + SYSCTL) | SRD); + while (OMAP_HSMMC_READ(host->base, + SYSCTL) & SRD) + ; + } } if ((status & DATA_TIMEOUT) || (status & DATA_CRC)) { From 3ebf74b1de9f94da4291db3ea1ae11c5bedb5784 Mon Sep 17 00:00:00 2001 From: Jean Pihet Date: Fri, 6 Feb 2009 16:42:51 +0100 Subject: [PATCH 151/263] omap_hsmmc: Change while(); loops with finite version Replace the infinite 'while() ;' loops with a finite loop version. Signed-off-by: Jean Pihet Acked-by: Tony Lindgren Signed-off-by: Pierre Ossman --- drivers/mmc/host/omap_hsmmc.c | 54 +++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 8d21a07f63de..a631c81dce12 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -376,6 +376,32 @@ static void mmc_omap_report_irq(struct mmc_omap_host *host, u32 status) } #endif /* CONFIG_MMC_DEBUG */ +/* + * MMC controller internal state machines reset + * + * Used to reset command or data internal state machines, using respectively + * SRC or SRD bit of SYSCTL register + * Can be called from interrupt context + */ +static inline void mmc_omap_reset_controller_fsm(struct mmc_omap_host *host, + unsigned long bit) +{ + unsigned long i = 0; + unsigned long limit = (loops_per_jiffy * + msecs_to_jiffies(MMC_TIMEOUT_MS)); + + OMAP_HSMMC_WRITE(host->base, SYSCTL, + OMAP_HSMMC_READ(host->base, SYSCTL) | bit); + + while ((OMAP_HSMMC_READ(host->base, SYSCTL) & bit) && + (i++ < limit)) + cpu_relax(); + + if (OMAP_HSMMC_READ(host->base, SYSCTL) & bit) + dev_err(mmc_dev(host->mmc), + "Timeout waiting on controller reset in %s\n", + __func__); +} /* * MMC controller IRQ handler @@ -404,13 +430,7 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id) (status & CMD_CRC)) { if (host->cmd) { if (status & CMD_TIMEOUT) { - OMAP_HSMMC_WRITE(host->base, SYSCTL, - OMAP_HSMMC_READ(host->base, - SYSCTL) | SRC); - while (OMAP_HSMMC_READ(host->base, - SYSCTL) & SRC) - ; - + mmc_omap_reset_controller_fsm(host, SRC); host->cmd->error = -ETIMEDOUT; } else { host->cmd->error = -EILSEQ; @@ -419,12 +439,7 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id) } if (host->data) { mmc_dma_cleanup(host); - OMAP_HSMMC_WRITE(host->base, SYSCTL, - OMAP_HSMMC_READ(host->base, - SYSCTL) | SRD); - while (OMAP_HSMMC_READ(host->base, - SYSCTL) & SRD) - ; + mmc_omap_reset_controller_fsm(host, SRD); } } if ((status & DATA_TIMEOUT) || @@ -434,12 +449,7 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id) mmc_dma_cleanup(host); else host->data->error = -EILSEQ; - OMAP_HSMMC_WRITE(host->base, SYSCTL, - OMAP_HSMMC_READ(host->base, - SYSCTL) | SRD); - while (OMAP_HSMMC_READ(host->base, - SYSCTL) & SRD) - ; + mmc_omap_reset_controller_fsm(host, SRD); end_trans = 1; } } @@ -547,11 +557,7 @@ static void mmc_omap_detect(struct work_struct *work) if (host->carddetect) { mmc_detect_change(host->mmc, (HZ * 200) / 1000); } else { - OMAP_HSMMC_WRITE(host->base, SYSCTL, - OMAP_HSMMC_READ(host->base, SYSCTL) | SRD); - while (OMAP_HSMMC_READ(host->base, SYSCTL) & SRD) - ; - + mmc_omap_reset_controller_fsm(host, SRD); mmc_detect_change(host->mmc, (HZ * 50) / 1000); } } From c296861291669f305deef19b78042330d7135017 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 18 Feb 2009 14:48:12 -0800 Subject: [PATCH 152/263] vmalloc: add __get_vm_area_caller() We have get_vm_area_caller() and __get_vm_area() but not __get_vm_area_caller() On powerpc, I use __get_vm_area() to separate the ranges of addresses given to vmalloc vs. ioremap (various good reasons for that) so in order to be able to implement the new caller tracking in /proc/vmallocinfo, I need a "_caller" variant of it. (akpm: needed for ongoing powerpc development, so merge it early) [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Benjamin Herrenschmidt Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 4 ++++ mm/vmalloc.c | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 506e7620a986..9c0890c7a06a 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -84,6 +84,10 @@ extern struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, void *caller); extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, unsigned long start, unsigned long end); +extern struct vm_struct *__get_vm_area_caller(unsigned long size, + unsigned long flags, + unsigned long start, unsigned long end, + void *caller); extern struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags, int node, gfp_t gfp_mask); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 75f49d312e8c..4dd2636d0b92 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1106,6 +1106,14 @@ struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, } EXPORT_SYMBOL_GPL(__get_vm_area); +struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, + unsigned long start, unsigned long end, + void *caller) +{ + return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL, + caller); +} + /** * get_vm_area - reserve a contiguous kernel virtual area * @size: size of the area From b6d6c5175809934e04a606d9193ef04924a7a7d9 Mon Sep 17 00:00:00 2001 From: Ed Cashin Date: Wed, 18 Feb 2009 14:48:13 -0800 Subject: [PATCH 153/263] aoe: ignore vendor extension AoE responses The Welland ME-747K-SI AoE target generates unsolicited AoE responses that are marked as vendor extensions. Instead of ignoring these packets, the aoe driver was generating kernel messages for each unrecognized response received. This patch corrects the behavior. Signed-off-by: Ed Cashin Reported-by: Tested-by: Cc: Cc: Alex Buell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/aoe/aoe.h | 1 + drivers/block/aoe/aoenet.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index c237527b1aa5..5e41e6dd657b 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -18,6 +18,7 @@ enum { AOECMD_ATA, AOECMD_CFG, + AOECMD_VEND_MIN = 0xf0, AOEFL_RSP = (1<<3), AOEFL_ERR = (1<<2), diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 30de5b1c647e..c6099ba9a4b8 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -142,6 +142,8 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, aoecmd_cfg_rsp(skb); break; default: + if (h->cmd >= AOECMD_VEND_MIN) + break; /* don't complain about vendor commands */ printk(KERN_INFO "aoe: unknown cmd %d\n", h->cmd); } exit: From b851ee7921fabdd7dfc96ffc4e9609f5062bd12b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 18 Feb 2009 14:48:14 -0800 Subject: [PATCH 154/263] cgroups: update documentation about css_set hash table The css_set hash table was introduced in 2.6.26, so update the documentation accordingly. Signed-off-by: Li Zefan Acked-by: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/cgroups.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index d9e5d6f41b92..93feb8444489 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt @@ -252,10 +252,8 @@ cgroup file system directories. When a task is moved from one cgroup to another, it gets a new css_set pointer - if there's an already existing css_set with the desired collection of cgroups then that group is reused, else a new -css_set is allocated. Note that the current implementation uses a -linear search to locate an appropriate existing css_set, so isn't -very efficient. A future version will use a hash table for better -performance. +css_set is allocated. The appropriate existing css_set is located by +looking into a hash table. To allow access from a cgroup to the css_sets (and hence tasks) that comprise it, a set of cg_cgroup_link objects form a lattice; From 55ec82176eca52e4e0530a82a0eb59160a1a95a1 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Wed, 18 Feb 2009 14:48:15 -0800 Subject: [PATCH 155/263] vfs: separate FMODE_PREAD/FMODE_PWRITE into separate flags Separate FMODE_PREAD and FMODE_PWRITE into separate flags to reflect the reality that the read and write paths may have independent restrictions. A git grep verifies that these flags are always cleared together so this new behavior will only apply to interfaces that change to clear flags individually. This is required for "seq_file: properly cope with pread", a post-2.6.25 regression fix. [akpm@linux-foundation.org: add comment] Signed-off-by: Paul Turner Cc: Eric Biederman Cc: Alexey Dobriyan Cc: Al Viro Cc: [2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 6022f44043f2..5852bd6afbe4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -54,24 +54,30 @@ struct inodes_stat_t { #define MAY_ACCESS 16 #define MAY_OPEN 32 +/* + * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond + * to O_WRONLY and O_RDWR via the strange trick in __dentry_open() + */ + /* file is open for reading */ #define FMODE_READ ((__force fmode_t)1) /* file is open for writing */ #define FMODE_WRITE ((__force fmode_t)2) /* file is seekable */ #define FMODE_LSEEK ((__force fmode_t)4) -/* file can be accessed using pread/pwrite */ +/* file can be accessed using pread */ #define FMODE_PREAD ((__force fmode_t)8) -#define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */ +/* file can be accessed using pwrite */ +#define FMODE_PWRITE ((__force fmode_t)16) /* File is opened for execution with sys_execve / sys_uselib */ -#define FMODE_EXEC ((__force fmode_t)16) +#define FMODE_EXEC ((__force fmode_t)32) /* File is opened with O_NDELAY (only set for block devices) */ -#define FMODE_NDELAY ((__force fmode_t)32) +#define FMODE_NDELAY ((__force fmode_t)64) /* File is opened with O_EXCL (only set for block devices) */ -#define FMODE_EXCL ((__force fmode_t)64) +#define FMODE_EXCL ((__force fmode_t)128) /* File is opened using open(.., 3, ..) and is writeable only for ioctls (specialy hack for floppy.c) */ -#define FMODE_WRITE_IOCTL ((__force fmode_t)128) +#define FMODE_WRITE_IOCTL ((__force fmode_t)256) /* * Don't update ctime and mtime. From 8f19d472935c83d823fa4cf02bcc0a7b9952db30 Mon Sep 17 00:00:00 2001 From: Eric Biederman Date: Wed, 18 Feb 2009 14:48:16 -0800 Subject: [PATCH 156/263] seq_file: properly cope with pread Currently seq_read assumes that the offset passed to it is always the offset it passed to user space. In the case pread this assumption is broken and we do the wrong thing when presented with pread. To solve this I introduce an offset cache inside of struct seq_file so we know where our logical file position is. Then in seq_read if we try to read from another offset we reset our data structures and attempt to go to the offset user space wanted. [akpm@linux-foundation.org: restore FMODE_PWRITE] [pjt@google.com: seq_open needs its fmode opened up to take advantage of this] Signed-off-by: Eric Biederman Cc: Alexey Dobriyan Cc: Al Viro Cc: Paul Turner Cc: [2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/seq_file.c | 36 ++++++++++++++++++++++++++++++++---- include/linux/seq_file.h | 1 + 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/fs/seq_file.c b/fs/seq_file.c index 5267098532bf..a1a4cfe19210 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -48,8 +48,16 @@ int seq_open(struct file *file, const struct seq_operations *op) */ file->f_version = 0; - /* SEQ files support lseek, but not pread/pwrite */ - file->f_mode &= ~(FMODE_PREAD | FMODE_PWRITE); + /* + * seq_files support lseek() and pread(). They do not implement + * write() at all, but we clear FMODE_PWRITE here for historical + * reasons. + * + * If a client of seq_files a) implements file.write() and b) wishes to + * support pwrite() then that client will need to implement its own + * file.open() which calls seq_open() and then sets FMODE_PWRITE. + */ + file->f_mode &= ~FMODE_PWRITE; return 0; } EXPORT_SYMBOL(seq_open); @@ -131,6 +139,22 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) int err = 0; mutex_lock(&m->lock); + + /* Don't assume *ppos is where we left it */ + if (unlikely(*ppos != m->read_pos)) { + m->read_pos = *ppos; + while ((err = traverse(m, *ppos)) == -EAGAIN) + ; + if (err) { + /* With prejudice... */ + m->read_pos = 0; + m->version = 0; + m->index = 0; + m->count = 0; + goto Done; + } + } + /* * seq_file->op->..m_start/m_stop/m_next may do special actions * or optimisations based on the file->f_version, so we want to @@ -230,8 +254,10 @@ Fill: Done: if (!copied) copied = err; - else + else { *ppos += copied; + m->read_pos += copied; + } file->f_version = m->version; mutex_unlock(&m->lock); return copied; @@ -266,16 +292,18 @@ loff_t seq_lseek(struct file *file, loff_t offset, int origin) if (offset < 0) break; retval = offset; - if (offset != file->f_pos) { + if (offset != m->read_pos) { while ((retval=traverse(m, offset)) == -EAGAIN) ; if (retval) { /* with extreme prejudice... */ file->f_pos = 0; + m->read_pos = 0; m->version = 0; m->index = 0; m->count = 0; } else { + m->read_pos = offset; retval = file->f_pos = offset; } } diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 40ea5058c2ec..f616f31576d7 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -19,6 +19,7 @@ struct seq_file { size_t from; size_t count; loff_t index; + loff_t read_pos; u64 version; struct mutex lock; const struct seq_operations *op; From ef35ce231b3cb2a4b1808e826da263bf37ccb38a Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 18 Feb 2009 14:48:16 -0800 Subject: [PATCH 157/263] Pavel has moved My @suse.cz address will stop working some day, so put working one into MAINTAINERS/CREDITS. It would be cool to get this to 2.6.29... it should not really break anything. Signed-off-by: Pavel Machek Cc: Jiri Kosina Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- CREDITS | 1 - MAINTAINERS | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/CREDITS b/CREDITS index 2b39168c06aa..5e0736722afd 100644 --- a/CREDITS +++ b/CREDITS @@ -2166,7 +2166,6 @@ D: Initial implementation of VC's, pty's and select() N: Pavel Machek E: pavel@ucw.cz -E: pavel@suse.cz D: Softcursor for vga, hypertech cdrom support, vcsa bugfix, nbd D: sun4/330 port, capabilities for elf, speedup for rm on ext2, USB, D: work on suspend-to-ram/disk, killing duplicates from ioctl32 diff --git a/MAINTAINERS b/MAINTAINERS index e784b358b335..a2008bd587c2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2001,7 +2001,7 @@ S: Maintained HIBERNATION (aka Software Suspend, aka swsusp) P: Pavel Machek -M: pavel@suse.cz +M: pavel@ucw.cz P: Rafael J. Wysocki M: rjw@sisk.pl L: linux-pm@lists.linux-foundation.org @@ -4172,7 +4172,7 @@ SUSPEND TO RAM P: Len Brown M: len.brown@intel.com P: Pavel Machek -M: pavel@suse.cz +M: pavel@ucw.cz P: Rafael J. Wysocki M: rjw@sisk.pl L: linux-pm@lists.linux-foundation.org From 610d18f4128ebbd88845d0fc60cce67b49af881e Mon Sep 17 00:00:00 2001 From: Davide Libenzi Date: Wed, 18 Feb 2009 14:48:18 -0800 Subject: [PATCH 158/263] timerfd: add flags check As requested by Michael, add a missing check for valid flags in timerfd_settime(), and make it return EINVAL in case some extra bits are set. Michael said: If this is to be any use to userland apps that want to check flag support (perhaps it is too late already), then the sooner we get it into the kernel the better: 2.6.29 would be good; earlier stables as well would be even better. [akpm@linux-foundation.org: remove unused TFD_FLAGS_SET] Acked-by: Michael Kerrisk Signed-off-by: Davide Libenzi Cc: [2.6.27.x, 2.6.28.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/timerfd.c | 12 ++++++------ include/linux/timerfd.h | 16 ++++++++++++---- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/fs/timerfd.c b/fs/timerfd.c index 6a123b8ff3f5..b042bd7034b1 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -186,10 +186,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK); - if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) - return -EINVAL; - if (clockid != CLOCK_MONOTONIC && - clockid != CLOCK_REALTIME) + if ((flags & ~TFD_CREATE_FLAGS) || + (clockid != CLOCK_MONOTONIC && + clockid != CLOCK_REALTIME)) return -EINVAL; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -201,7 +200,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, - flags & (O_CLOEXEC | O_NONBLOCK)); + flags & TFD_SHARED_FCNTL_FLAGS); if (ufd < 0) kfree(ctx); @@ -219,7 +218,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) return -EFAULT; - if (!timespec_valid(&ktmr.it_value) || + if ((flags & ~TFD_SETTIME_FLAGS) || + !timespec_valid(&ktmr.it_value) || !timespec_valid(&ktmr.it_interval)) return -EINVAL; diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h index 86cb0501d3e2..2d0792983f8c 100644 --- a/include/linux/timerfd.h +++ b/include/linux/timerfd.h @@ -11,13 +11,21 @@ /* For O_CLOEXEC and O_NONBLOCK */ #include -/* Flags for timerfd_settime. */ +/* + * CAREFUL: Check include/asm-generic/fcntl.h when defining + * new flags, since they might collide with O_* ones. We want + * to re-use O_* flags that couldn't possibly have a meaning + * from eventfd, in order to leave a free define-space for + * shared O_* flags. + */ #define TFD_TIMER_ABSTIME (1 << 0) - -/* Flags for timerfd_create. */ #define TFD_CLOEXEC O_CLOEXEC #define TFD_NONBLOCK O_NONBLOCK +#define TFD_SHARED_FCNTL_FLAGS (TFD_CLOEXEC | TFD_NONBLOCK) +/* Flags for timerfd_create. */ +#define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS +/* Flags for timerfd_settime. */ +#define TFD_SETTIME_FLAGS TFD_TIMER_ABSTIME #endif /* _LINUX_TIMERFD_H */ - From 1cf6e7d83bf334cc5916137862c920a97aabc018 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Feb 2009 14:48:18 -0800 Subject: [PATCH 159/263] mm: task dirty accounting fix YAMAMOTO-san noticed that task_dirty_inc doesn't seem to be called properly for cases where set_page_dirty is not used to dirty a page (eg. mark_buffer_dirty). Additionally, there is some inconsistency about when task_dirty_inc is called. It is used for dirty balancing, however it even gets called for __set_page_dirty_no_writeback. So rather than increment it in a set_page_dirty wrapper, move it down to exactly where the dirty page accounting stats are incremented. Cc: YAMAMOTO Takashi Signed-off-by: Nick Piggin Acked-by: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 1 + include/linux/mm.h | 1 + mm/page-writeback.c | 13 +++---------- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 665d446b25bc..ff4d1cdd779b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -777,6 +777,7 @@ static int __set_page_dirty(struct page *page, __inc_zone_page_state(page, NR_FILE_DIRTY); __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); + task_dirty_inc(current); task_io_account_write(PAGE_CACHE_SIZE); } radix_tree_tag_set(&mapping->page_tree, diff --git a/include/linux/mm.h b/include/linux/mm.h index 7dc04ff5ab89..10074212a35b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1159,6 +1159,7 @@ extern int filemap_fault(struct vm_area_struct *, struct vm_fault *); /* mm/page-writeback.c */ int write_one_page(struct page *page, int wait); +void task_dirty_inc(struct task_struct *tsk); /* readahead.c */ #define VM_MAX_READAHEAD 128 /* kbytes */ diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 3c84128596ba..74dc57c74349 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -240,7 +240,7 @@ void bdi_writeout_inc(struct backing_dev_info *bdi) } EXPORT_SYMBOL_GPL(bdi_writeout_inc); -static inline void task_dirty_inc(struct task_struct *tsk) +void task_dirty_inc(struct task_struct *tsk) { prop_inc_single(&vm_dirties, &tsk->dirties); } @@ -1230,6 +1230,7 @@ int __set_page_dirty_nobuffers(struct page *page) __inc_zone_page_state(page, NR_FILE_DIRTY); __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); + task_dirty_inc(current); task_io_account_write(PAGE_CACHE_SIZE); } radix_tree_tag_set(&mapping->page_tree, @@ -1262,7 +1263,7 @@ EXPORT_SYMBOL(redirty_page_for_writepage); * If the mapping doesn't provide a set_page_dirty a_op, then * just fall through and assume that it wants buffer_heads. */ -static int __set_page_dirty(struct page *page) +int set_page_dirty(struct page *page) { struct address_space *mapping = page_mapping(page); @@ -1280,14 +1281,6 @@ static int __set_page_dirty(struct page *page) } return 0; } - -int set_page_dirty(struct page *page) -{ - int ret = __set_page_dirty(page); - if (ret) - task_dirty_inc(current); - return ret; -} EXPORT_SYMBOL(set_page_dirty); /* From 67e055d144c5b2acdc1c63811fde031263bf92c5 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 18 Feb 2009 14:48:20 -0800 Subject: [PATCH 160/263] cgroups: fix possible use after free In cgroup_kill_sb(), root is freed before sb is detached from the list, so another sget() may find this sb and call cgroup_test_super(), which will access the root that has been freed. Reported-by: Al Viro Signed-off-by: Li Zefan Acked-by: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e14db9c089b9..9edb5c4b79b4 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1122,8 +1122,8 @@ static void cgroup_kill_sb(struct super_block *sb) { mutex_unlock(&cgroup_mutex); - kfree(root); kill_litter_super(sb); + kfree(root); } static struct file_system_type cgroup_fs_type = { From 42f5e039c3f6512271636928ddc4e7f7a0371672 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 18 Feb 2009 14:48:21 -0800 Subject: [PATCH 161/263] pm: fix build for CONFIG_PM unset Compilation of kprobes.c with CONFIG_PM unset is broken due to some broken config dependncies. Fix that. Signed-off-by: Rafael J. Wysocki Reported-by: Ingo Molnar Tested-by: Masami Hiramatsu Cc: Len Brown Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/Makefile | 1 + kernel/power/Makefile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/Makefile b/kernel/Makefile index 170a9213c1b6..e4791b3ba55d 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -51,6 +51,7 @@ obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_KALLSYMS) += kallsyms.o obj-$(CONFIG_PM) += power/ +obj-$(CONFIG_FREEZER) += power/ obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o diff --git a/kernel/power/Makefile b/kernel/power/Makefile index d7a10167a25b..720ea4f781bd 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -3,7 +3,7 @@ ifeq ($(CONFIG_PM_DEBUG),y) EXTRA_CFLAGS += -DDEBUG endif -obj-y := main.o +obj-$(CONFIG_PM) += main.o obj-$(CONFIG_PM_SLEEP) += console.o obj-$(CONFIG_FREEZER) += process.o obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o From 3a5093ee6728c8cbe9c039e685fc1fca8f965048 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 18 Feb 2009 14:48:22 -0800 Subject: [PATCH 162/263] eeepc: should depend on INPUT Otherwise with INPUT=m, EEEPC_LAPTOP=y one gets drivers/built-in.o: In function `input_sync': eeepc-laptop.c:(.text+0x18ce51): undefined reference to `input_event' drivers/built-in.o: In function `input_report_key': eeepc-laptop.c:(.text+0x18ce73): undefined reference to `input_event' drivers/built-in.o: In function `eeepc_hotk_check': eeepc-laptop.c:(.text+0x18d05f): undefined reference to `input_allocate_device' eeepc-laptop.c:(.text+0x18d10f): undefined reference to `input_register_device' eeepc-laptop.c:(.text+0x18d131): undefined reference to `input_free_device' drivers/built-in.o: In function `eeepc_backlight_exit': eeepc-laptop.c:(.text+0x18d546): undefined reference to `input_unregister_device' Signed-off-by: Alexey Dobriyan Cc: Len Brown Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/platform/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 94363115a42a..6bce56c22623 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -301,6 +301,7 @@ config INTEL_MENLOW config EEEPC_LAPTOP tristate "Eee PC Hotkey Driver (EXPERIMENTAL)" depends on ACPI + depends on INPUT depends on EXPERIMENTAL select BACKLIGHT_CLASS_DEVICE select HWMON From ef2cfc790bf5f0ff189b01eabc0f4feb5e8524df Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 18 Feb 2009 14:48:23 -0800 Subject: [PATCH 163/263] hp accelerometer: add freefall detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds freefall handling to hp_accel driver. According to HP, it should just work, without us having to set the chip up by hand. hpfall.c is example .c program that parks the disk when accelerometer detects free fall. It should work; for now, it uses fixed 20seconds protection period. Signed-off-by: Pavel Machek Cc: Thomas Renninger Cc: Éric Piel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/hwmon/hpfall.c | 101 ++++++++++++++++++++ Documentation/hwmon/lis3lv02d | 8 ++ drivers/hwmon/hp_accel.c | 27 ++++++ drivers/hwmon/lis3lv02d.c | 172 ++++++++++++++++++++++++++++++---- drivers/hwmon/lis3lv02d.h | 5 + 5 files changed, 296 insertions(+), 17 deletions(-) create mode 100644 Documentation/hwmon/hpfall.c diff --git a/Documentation/hwmon/hpfall.c b/Documentation/hwmon/hpfall.c new file mode 100644 index 000000000000..bbea1ccfd46a --- /dev/null +++ b/Documentation/hwmon/hpfall.c @@ -0,0 +1,101 @@ +/* Disk protection for HP machines. + * + * Copyright 2008 Eric Piel + * Copyright 2009 Pavel Machek + * + * GPLv2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void write_int(char *path, int i) +{ + char buf[1024]; + int fd = open(path, O_RDWR); + if (fd < 0) { + perror("open"); + exit(1); + } + sprintf(buf, "%d", i); + if (write(fd, buf, strlen(buf)) != strlen(buf)) { + perror("write"); + exit(1); + } + close(fd); +} + +void set_led(int on) +{ + write_int("/sys/class/leds/hp::hddprotect/brightness", on); +} + +void protect(int seconds) +{ + write_int("/sys/block/sda/device/unload_heads", seconds*1000); +} + +int on_ac(void) +{ +// /sys/class/power_supply/AC0/online +} + +int lid_open(void) +{ +// /proc/acpi/button/lid/LID/state +} + +void ignore_me(void) +{ + protect(0); + set_led(0); + +} + +int main(int argc, char* argv[]) +{ + int fd, ret; + + fd = open("/dev/freefall", O_RDONLY); + if (fd < 0) { + perror("open"); + return EXIT_FAILURE; + } + + signal(SIGALRM, ignore_me); + + for (;;) { + unsigned char count; + + ret = read(fd, &count, sizeof(count)); + alarm(0); + if ((ret == -1) && (errno == EINTR)) { + /* Alarm expired, time to unpark the heads */ + continue; + } + + if (ret != sizeof(count)) { + perror("read"); + break; + } + + protect(21); + set_led(1); + if (1 || on_ac() || lid_open()) { + alarm(2); + } else { + alarm(20); + } + } + + close(fd); + return EXIT_SUCCESS; +} diff --git a/Documentation/hwmon/lis3lv02d b/Documentation/hwmon/lis3lv02d index 0fcfc4a7ccdc..287f8c902656 100644 --- a/Documentation/hwmon/lis3lv02d +++ b/Documentation/hwmon/lis3lv02d @@ -33,6 +33,14 @@ rate - reports the sampling rate of the accelerometer device in HZ This driver also provides an absolute input class device, allowing the laptop to act as a pinball machine-esque joystick. +Another feature of the driver is misc device called "freefall" that +acts similar to /dev/rtc and reacts on free-fall interrupts received +from the device. It supports blocking operations, poll/select and +fasync operation modes. You must read 1 bytes from the device. The +result is number of free-fall interrupts since the last successful +read (or 255 if number of interrupts would not fit). + + Axes orientation ---------------- diff --git a/drivers/hwmon/hp_accel.c b/drivers/hwmon/hp_accel.c index abf4dfc8ec22..dcefe1d2adbd 100644 --- a/drivers/hwmon/hp_accel.c +++ b/drivers/hwmon/hp_accel.c @@ -213,6 +213,30 @@ static struct delayed_led_classdev hpled_led = { .set_brightness = hpled_set, }; +static acpi_status +lis3lv02d_get_resource(struct acpi_resource *resource, void *context) +{ + if (resource->type == ACPI_RESOURCE_TYPE_EXTENDED_IRQ) { + struct acpi_resource_extended_irq *irq; + u32 *device_irq = context; + + irq = &resource->data.extended_irq; + *device_irq = irq->interrupts[0]; + } + + return AE_OK; +} + +static void lis3lv02d_enum_resources(struct acpi_device *device) +{ + acpi_status status; + + status = acpi_walk_resources(device->handle, METHOD_NAME__CRS, + lis3lv02d_get_resource, &adev.irq); + if (ACPI_FAILURE(status)) + printk(KERN_DEBUG DRIVER_NAME ": Error getting resources\n"); +} + static int lis3lv02d_add(struct acpi_device *device) { u8 val; @@ -247,6 +271,9 @@ static int lis3lv02d_add(struct acpi_device *device) if (ret) return ret; + /* obtain IRQ number of our device from ACPI */ + lis3lv02d_enum_resources(adev.device); + ret = lis3lv02d_init_device(&adev); if (ret) { flush_work(&hpled_led.work); diff --git a/drivers/hwmon/lis3lv02d.c b/drivers/hwmon/lis3lv02d.c index 219d2d0d5a62..3afa3afc77f3 100644 --- a/drivers/hwmon/lis3lv02d.c +++ b/drivers/hwmon/lis3lv02d.c @@ -3,7 +3,7 @@ * * Copyright (C) 2007-2008 Yan Burman * Copyright (C) 2008 Eric Piel - * Copyright (C) 2008 Pavel Machek + * Copyright (C) 2008-2009 Pavel Machek * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include "lis3lv02d.h" @@ -55,7 +56,10 @@ /* Maximum value our axis may get for the input device (signed 12 bits) */ #define MDPS_MAX_VAL 2048 -struct acpi_lis3lv02d adev; +struct acpi_lis3lv02d adev = { + .misc_wait = __WAIT_QUEUE_HEAD_INITIALIZER(adev.misc_wait), +}; + EXPORT_SYMBOL_GPL(adev); static int lis3lv02d_add_fs(struct acpi_device *device); @@ -110,26 +114,13 @@ static void lis3lv02d_get_xyz(acpi_handle handle, int *x, int *y, int *z) void lis3lv02d_poweroff(acpi_handle handle) { adev.is_on = 0; - /* disable X,Y,Z axis and power down */ - adev.write(handle, CTRL_REG1, 0x00); } EXPORT_SYMBOL_GPL(lis3lv02d_poweroff); void lis3lv02d_poweron(acpi_handle handle) { - u8 val; - adev.is_on = 1; adev.init(handle); - adev.write(handle, FF_WU_CFG, 0); - /* - * BDU: LSB and MSB values are not updated until both have been read. - * So the value read will always be correct. - * IEN: Interrupt for free-fall and DD, not for data-ready. - */ - adev.read(handle, CTRL_REG2, &val); - val |= CTRL2_BDU | CTRL2_IEN; - adev.write(handle, CTRL_REG2, val); } EXPORT_SYMBOL_GPL(lis3lv02d_poweron); @@ -162,6 +153,140 @@ static void lis3lv02d_decrease_use(struct acpi_lis3lv02d *dev) mutex_unlock(&dev->lock); } +static irqreturn_t lis302dl_interrupt(int irq, void *dummy) +{ + /* + * Be careful: on some HP laptops the bios force DD when on battery and + * the lid is closed. This leads to interrupts as soon as a little move + * is done. + */ + atomic_inc(&adev.count); + + wake_up_interruptible(&adev.misc_wait); + kill_fasync(&adev.async_queue, SIGIO, POLL_IN); + return IRQ_HANDLED; +} + +static int lis3lv02d_misc_open(struct inode *inode, struct file *file) +{ + int ret; + + if (test_and_set_bit(0, &adev.misc_opened)) + return -EBUSY; /* already open */ + + atomic_set(&adev.count, 0); + + /* + * The sensor can generate interrupts for free-fall and direction + * detection (distinguishable with FF_WU_SRC and DD_SRC) but to keep + * the things simple and _fast_ we activate it only for free-fall, so + * no need to read register (very slow with ACPI). For the same reason, + * we forbid shared interrupts. + * + * IRQF_TRIGGER_RISING seems pointless on HP laptops because the + * io-apic is not configurable (and generates a warning) but I keep it + * in case of support for other hardware. + */ + ret = request_irq(adev.irq, lis302dl_interrupt, IRQF_TRIGGER_RISING, + DRIVER_NAME, &adev); + + if (ret) { + clear_bit(0, &adev.misc_opened); + printk(KERN_ERR DRIVER_NAME ": IRQ%d allocation failed\n", adev.irq); + return -EBUSY; + } + lis3lv02d_increase_use(&adev); + printk("lis3: registered interrupt %d\n", adev.irq); + return 0; +} + +static int lis3lv02d_misc_release(struct inode *inode, struct file *file) +{ + fasync_helper(-1, file, 0, &adev.async_queue); + lis3lv02d_decrease_use(&adev); + free_irq(adev.irq, &adev); + clear_bit(0, &adev.misc_opened); /* release the device */ + return 0; +} + +static ssize_t lis3lv02d_misc_read(struct file *file, char __user *buf, + size_t count, loff_t *pos) +{ + DECLARE_WAITQUEUE(wait, current); + u32 data; + unsigned char byte_data; + ssize_t retval = 1; + + if (count < 1) + return -EINVAL; + + add_wait_queue(&adev.misc_wait, &wait); + while (true) { + set_current_state(TASK_INTERRUPTIBLE); + data = atomic_xchg(&adev.count, 0); + if (data) + break; + + if (file->f_flags & O_NONBLOCK) { + retval = -EAGAIN; + goto out; + } + + if (signal_pending(current)) { + retval = -ERESTARTSYS; + goto out; + } + + schedule(); + } + + if (data < 255) + byte_data = data; + else + byte_data = 255; + + /* make sure we are not going into copy_to_user() with + * TASK_INTERRUPTIBLE state */ + set_current_state(TASK_RUNNING); + if (copy_to_user(buf, &byte_data, sizeof(byte_data))) + retval = -EFAULT; + +out: + __set_current_state(TASK_RUNNING); + remove_wait_queue(&adev.misc_wait, &wait); + + return retval; +} + +static unsigned int lis3lv02d_misc_poll(struct file *file, poll_table *wait) +{ + poll_wait(file, &adev.misc_wait, wait); + if (atomic_read(&adev.count)) + return POLLIN | POLLRDNORM; + return 0; +} + +static int lis3lv02d_misc_fasync(int fd, struct file *file, int on) +{ + return fasync_helper(fd, file, on, &adev.async_queue); +} + +static const struct file_operations lis3lv02d_misc_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = lis3lv02d_misc_read, + .open = lis3lv02d_misc_open, + .release = lis3lv02d_misc_release, + .poll = lis3lv02d_misc_poll, + .fasync = lis3lv02d_misc_fasync, +}; + +static struct miscdevice lis3lv02d_misc_device = { + .minor = MISC_DYNAMIC_MINOR, + .name = "freefall", + .fops = &lis3lv02d_misc_fops, +}; + /** * lis3lv02d_joystick_kthread - Kthread polling function * @data: unused - here to conform to threadfn prototype @@ -203,7 +328,6 @@ static void lis3lv02d_joystick_close(struct input_dev *input) lis3lv02d_decrease_use(&adev); } - static inline void lis3lv02d_calibrate_joystick(void) { lis3lv02d_get_xyz(adev.device->handle, &adev.xcalib, &adev.ycalib, &adev.zcalib); @@ -250,6 +374,7 @@ void lis3lv02d_joystick_disable(void) if (!adev.idev) return; + misc_deregister(&lis3lv02d_misc_device); input_unregister_device(adev.idev); adev.idev = NULL; } @@ -268,6 +393,19 @@ int lis3lv02d_init_device(struct acpi_lis3lv02d *dev) if (lis3lv02d_joystick_enable()) printk(KERN_ERR DRIVER_NAME ": joystick initialization failed\n"); + printk("lis3_init_device: irq %d\n", dev->irq); + + /* if we did not get an IRQ from ACPI - we have nothing more to do */ + if (!dev->irq) { + printk(KERN_ERR DRIVER_NAME + ": No IRQ in ACPI. Disabling /dev/freefall\n"); + goto out; + } + + printk("lis3: registering device\n"); + if (misc_register(&lis3lv02d_misc_device)) + printk(KERN_ERR DRIVER_NAME ": misc_register failed\n"); +out: lis3lv02d_decrease_use(dev); return 0; } @@ -351,6 +489,6 @@ int lis3lv02d_remove_fs(void) EXPORT_SYMBOL_GPL(lis3lv02d_remove_fs); MODULE_DESCRIPTION("ST LIS3LV02Dx three-axis digital accelerometer driver"); -MODULE_AUTHOR("Yan Burman and Eric Piel"); +MODULE_AUTHOR("Yan Burman, Eric Piel, Pavel Machek"); MODULE_LICENSE("GPL"); diff --git a/drivers/hwmon/lis3lv02d.h b/drivers/hwmon/lis3lv02d.h index 223f1c0763bb..2e7597c42d80 100644 --- a/drivers/hwmon/lis3lv02d.h +++ b/drivers/hwmon/lis3lv02d.h @@ -170,6 +170,11 @@ struct acpi_lis3lv02d { unsigned char is_on; /* whether the device is on or off */ unsigned char usage; /* usage counter */ struct axis_conversion ac; /* hw -> logical axis */ + + u32 irq; /* IRQ number */ + struct fasync_struct *async_queue; /* queue for the misc device */ + wait_queue_head_t misc_wait; /* Wait queue for the misc device */ + unsigned long misc_opened; /* bit0: whether the device is open */ }; int lis3lv02d_init_device(struct acpi_lis3lv02d *dev); From 137bad32342a613586347341d1307c2b9812ef44 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Wed, 18 Feb 2009 14:48:24 -0800 Subject: [PATCH 164/263] lis3lv02d: support both one- and two-byte sensors Sensors responding with 0x3B to WHO_AM_I only have one data register per direction, thus returning a signed byte from the position which is occupied by the MSB in sensors responding with 0x3A. Since multiple sensors share the reply to WHO_AM_I, we rename the defines to better indicate what they identify (family of single and double precision sensors). We support both kind of sensors by checking for the sensor type on init and defining appropriate data-access routines and sensor limits (for the joystick) depending on what we find. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Giuseppe Bilotta Acked-by: Eric Piel Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/hwmon/hp_accel.c | 36 ++++++++++++++++++++++++++++++++---- drivers/hwmon/lis3lv02d.c | 25 ++++++------------------- drivers/hwmon/lis3lv02d.h | 16 +++++++++++++--- 3 files changed, 51 insertions(+), 26 deletions(-) diff --git a/drivers/hwmon/hp_accel.c b/drivers/hwmon/hp_accel.c index dcefe1d2adbd..6b16566c4e6c 100644 --- a/drivers/hwmon/hp_accel.c +++ b/drivers/hwmon/hp_accel.c @@ -237,9 +237,25 @@ static void lis3lv02d_enum_resources(struct acpi_device *device) printk(KERN_DEBUG DRIVER_NAME ": Error getting resources\n"); } +static s16 lis3lv02d_read_16(acpi_handle handle, int reg) +{ + u8 lo, hi; + + adev.read(handle, reg - 1, &lo); + adev.read(handle, reg, &hi); + /* In "12 bit right justified" mode, bit 6, bit 7, bit 8 = bit 5 */ + return (s16)((hi << 8) | lo); +} + +static s16 lis3lv02d_read_8(acpi_handle handle, int reg) +{ + s8 lo; + adev.read(handle, reg, &lo); + return lo; +} + static int lis3lv02d_add(struct acpi_device *device) { - u8 val; int ret; if (!device) @@ -253,10 +269,22 @@ static int lis3lv02d_add(struct acpi_device *device) strcpy(acpi_device_class(device), ACPI_MDPS_CLASS); device->driver_data = &adev; - lis3lv02d_acpi_read(device->handle, WHO_AM_I, &val); - if ((val != LIS3LV02DL_ID) && (val != LIS302DL_ID)) { + lis3lv02d_acpi_read(device->handle, WHO_AM_I, &adev.whoami); + switch (adev.whoami) { + case LIS_DOUBLE_ID: + printk(KERN_INFO DRIVER_NAME ": 2-byte sensor found\n"); + adev.read_data = lis3lv02d_read_16; + adev.mdps_max_val = 2048; + break; + case LIS_SINGLE_ID: + printk(KERN_INFO DRIVER_NAME ": 1-byte sensor found\n"); + adev.read_data = lis3lv02d_read_8; + adev.mdps_max_val = 128; + break; + default: printk(KERN_ERR DRIVER_NAME - ": Accelerometer chip not LIS3LV02D{L,Q}\n"); + ": unknown sensor type 0x%X\n", adev.whoami); + return -EINVAL; } /* If possible use a "standard" axes order */ diff --git a/drivers/hwmon/lis3lv02d.c b/drivers/hwmon/lis3lv02d.c index 3afa3afc77f3..8bb2158f0453 100644 --- a/drivers/hwmon/lis3lv02d.c +++ b/drivers/hwmon/lis3lv02d.c @@ -53,9 +53,6 @@ * joystick. */ -/* Maximum value our axis may get for the input device (signed 12 bits) */ -#define MDPS_MAX_VAL 2048 - struct acpi_lis3lv02d adev = { .misc_wait = __WAIT_QUEUE_HEAD_INITIALIZER(adev.misc_wait), }; @@ -64,16 +61,6 @@ EXPORT_SYMBOL_GPL(adev); static int lis3lv02d_add_fs(struct acpi_device *device); -static s16 lis3lv02d_read_16(acpi_handle handle, int reg) -{ - u8 lo, hi; - - adev.read(handle, reg, &lo); - adev.read(handle, reg + 1, &hi); - /* In "12 bit right justified" mode, bit 6, bit 7, bit 8 = bit 5 */ - return (s16)((hi << 8) | lo); -} - /** * lis3lv02d_get_axis - For the given axis, give the value converted * @axis: 1,2,3 - can also be negative @@ -102,9 +89,9 @@ static void lis3lv02d_get_xyz(acpi_handle handle, int *x, int *y, int *z) { int position[3]; - position[0] = lis3lv02d_read_16(handle, OUTX_L); - position[1] = lis3lv02d_read_16(handle, OUTY_L); - position[2] = lis3lv02d_read_16(handle, OUTZ_L); + position[0] = adev.read_data(handle, OUTX); + position[1] = adev.read_data(handle, OUTY); + position[2] = adev.read_data(handle, OUTZ); *x = lis3lv02d_get_axis(adev.ac.x, position); *y = lis3lv02d_get_axis(adev.ac.y, position); @@ -355,9 +342,9 @@ int lis3lv02d_joystick_enable(void) adev.idev->close = lis3lv02d_joystick_close; set_bit(EV_ABS, adev.idev->evbit); - input_set_abs_params(adev.idev, ABS_X, -MDPS_MAX_VAL, MDPS_MAX_VAL, 3, 3); - input_set_abs_params(adev.idev, ABS_Y, -MDPS_MAX_VAL, MDPS_MAX_VAL, 3, 3); - input_set_abs_params(adev.idev, ABS_Z, -MDPS_MAX_VAL, MDPS_MAX_VAL, 3, 3); + input_set_abs_params(adev.idev, ABS_X, -adev.mdps_max_val, adev.mdps_max_val, 3, 3); + input_set_abs_params(adev.idev, ABS_Y, -adev.mdps_max_val, adev.mdps_max_val, 3, 3); + input_set_abs_params(adev.idev, ABS_Z, -adev.mdps_max_val, adev.mdps_max_val, 3, 3); err = input_register_device(adev.idev); if (err) { diff --git a/drivers/hwmon/lis3lv02d.h b/drivers/hwmon/lis3lv02d.h index 2e7597c42d80..75972bf372ff 100644 --- a/drivers/hwmon/lis3lv02d.h +++ b/drivers/hwmon/lis3lv02d.h @@ -22,12 +22,15 @@ /* * The actual chip is STMicroelectronics LIS3LV02DL or LIS3LV02DQ that seems to * be connected via SPI. There exists also several similar chips (such as LIS302DL or - * LIS3L02DQ) but not in the HP laptops and they have slightly different registers. + * LIS3L02DQ) and they have slightly different registers, but we can provide a + * common interface for all of them. * They can also be connected via I²C. */ -#define LIS3LV02DL_ID 0x3A /* Also the LIS3LV02DQ */ -#define LIS302DL_ID 0x3B /* Also the LIS202DL! */ +/* 2-byte registers */ +#define LIS_DOUBLE_ID 0x3A /* LIS3LV02D[LQ] */ +/* 1-byte registers */ +#define LIS_SINGLE_ID 0x3B /* LIS[32]02DL and others */ enum lis3lv02d_reg { WHO_AM_I = 0x0F, @@ -44,10 +47,13 @@ enum lis3lv02d_reg { STATUS_REG = 0x27, OUTX_L = 0x28, OUTX_H = 0x29, + OUTX = 0x29, OUTY_L = 0x2A, OUTY_H = 0x2B, + OUTY = 0x2B, OUTZ_L = 0x2C, OUTZ_H = 0x2D, + OUTZ = 0x2D, FF_WU_CFG = 0x30, FF_WU_SRC = 0x31, FF_WU_ACK = 0x32, @@ -159,6 +165,10 @@ struct acpi_lis3lv02d { acpi_status (*write) (acpi_handle handle, int reg, u8 val); acpi_status (*read) (acpi_handle handle, int reg, u8 *ret); + u8 whoami; /* 3Ah: 2-byte registries, 3Bh: 1-byte registries */ + s16 (*read_data) (acpi_handle handle, int reg); + int mdps_max_val; + struct input_dev *idev; /* input device */ struct task_struct *kthread; /* kthread for input */ struct mutex lock; From 9ccf3b5e8409927835c4d38cb2f380c9e4349e76 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Wed, 18 Feb 2009 14:48:25 -0800 Subject: [PATCH 165/263] lis3lv02d: add axes knowledge of HP Pavilion dv5 models Add support for HP Pavilion dv5. Since Intel-based models have an inverted x axis, while AMD-based models have an inverted y axis, we introduce a new macro that special-cases axis orientation based on two DMI entries: HP dv5 axis configuration is then based on both the PRODUCT and BOARD name. Signed-off-by: Giuseppe Bilotta Cc: Eric Piel Cc: Pavel Machek Tested-by: Palatis Tseng Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/hwmon/hp_accel.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/hwmon/hp_accel.c b/drivers/hwmon/hp_accel.c index 6b16566c4e6c..29c83b5b9697 100644 --- a/drivers/hwmon/hp_accel.c +++ b/drivers/hwmon/hp_accel.c @@ -166,6 +166,18 @@ static struct axis_conversion lis3lv02d_axis_xy_swap_yz_inverted = {2, -1, -3}; }, \ .driver_data = &lis3lv02d_axis_##_axis \ } + +#define AXIS_DMI_MATCH2(_ident, _class1, _name1, \ + _class2, _name2, \ + _axis) { \ + .ident = _ident, \ + .callback = lis3lv02d_dmi_matched, \ + .matches = { \ + DMI_MATCH(DMI_##_class1, _name1), \ + DMI_MATCH(DMI_##_class2, _name2), \ + }, \ + .driver_data = &lis3lv02d_axis_##_axis \ +} static struct dmi_system_id lis3lv02d_dmi_ids[] = { /* product names are truncated to match all kinds of a same model */ AXIS_DMI_MATCH("NC64x0", "HP Compaq nc64", x_inverted), @@ -179,6 +191,16 @@ static struct dmi_system_id lis3lv02d_dmi_ids[] = { AXIS_DMI_MATCH("NC673x", "HP Compaq 673", xy_rotated_left_usd), AXIS_DMI_MATCH("NC651xx", "HP Compaq 651", xy_rotated_right), AXIS_DMI_MATCH("NC671xx", "HP Compaq 671", xy_swap_yz_inverted), + /* Intel-based HP Pavilion dv5 */ + AXIS_DMI_MATCH2("HPDV5_I", + PRODUCT_NAME, "HP Pavilion dv5", + BOARD_NAME, "3603", + x_inverted), + /* AMD-based HP Pavilion dv5 */ + AXIS_DMI_MATCH2("HPDV5_A", + PRODUCT_NAME, "HP Pavilion dv5", + BOARD_NAME, "3600", + y_inverted), { NULL, } /* Laptop models without axis info (yet): * "NC6910" "HP Compaq 6910" From 287d859222e0adbc67666a6154aaf42d7d5bbb54 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 Feb 2009 14:48:26 -0800 Subject: [PATCH 166/263] atmel-mci: fix initialization of dma slave data The conversion of atmel-mci to dma_request_channel missed the initialization of the channel dma_slave information. The filter_fn passed to dma_request_channel is responsible for initializing the channel's private data. This implementation has the additional benefit of enabling a generic client-channel data passing mechanism. Reviewed-by: Atsushi Nemoto Signed-off-by: Dan Williams Acked-by: Haavard Skinnemoen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/dma/dmaengine.c | 2 ++ drivers/dma/dw_dmac.c | 5 ++--- drivers/dma/dw_dmac_regs.h | 2 -- drivers/mmc/host/atmel-mci.c | 5 +++-- include/linux/dmaengine.h | 2 ++ 5 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index a58993011edb..280a9d263eb3 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -518,6 +518,7 @@ struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, v dma_chan_name(chan), err); else break; + chan->private = NULL; chan = NULL; } } @@ -536,6 +537,7 @@ void dma_release_channel(struct dma_chan *chan) WARN_ONCE(chan->client_count != 1, "chan reference count %d != 1\n", chan->client_count); dma_chan_put(chan); + chan->private = NULL; mutex_unlock(&dma_list_mutex); } EXPORT_SYMBOL_GPL(dma_release_channel); diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 6b702cc46b3d..a97c07eef7ec 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -560,7 +560,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, unsigned long flags) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); - struct dw_dma_slave *dws = dwc->dws; + struct dw_dma_slave *dws = chan->private; struct dw_desc *prev; struct dw_desc *first; u32 ctllo; @@ -790,7 +790,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) cfghi = DWC_CFGH_FIFO_MODE; cfglo = 0; - dws = dwc->dws; + dws = chan->private; if (dws) { /* * We need controller-specific data to set up slave @@ -866,7 +866,6 @@ static void dwc_free_chan_resources(struct dma_chan *chan) spin_lock_bh(&dwc->lock); list_splice_init(&dwc->free_list, &list); dwc->descs_allocated = 0; - dwc->dws = NULL; /* Disable interrupts */ channel_clear_bit(dw, MASK.XFER, dwc->mask); diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h index 00fdd187bb0c..b252b202c5cf 100644 --- a/drivers/dma/dw_dmac_regs.h +++ b/drivers/dma/dw_dmac_regs.h @@ -139,8 +139,6 @@ struct dw_dma_chan { struct list_head queue; struct list_head free_list; - struct dw_dma_slave *dws; - unsigned int descs_allocated; }; diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index 76bfe16c09b1..2b1196e6142c 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c @@ -1548,9 +1548,10 @@ static bool filter(struct dma_chan *chan, void *slave) { struct dw_dma_slave *dws = slave; - if (dws->dma_dev == chan->device->dev) + if (dws->dma_dev == chan->device->dev) { + chan->private = dws; return true; - else + } else return false; } #endif diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 3e68469c1885..f0413845f20e 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -121,6 +121,7 @@ struct dma_chan_percpu { * @local: per-cpu pointer to a struct dma_chan_percpu * @client-count: how many clients are using this channel * @table_count: number of appearances in the mem-to-mem allocation table + * @private: private data for certain client-channel associations */ struct dma_chan { struct dma_device *device; @@ -134,6 +135,7 @@ struct dma_chan { struct dma_chan_percpu *local; int client_count; int table_count; + void *private; }; /** From 27c0c8e511fa9e2389503926840fac606d90a049 Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Wed, 18 Feb 2009 14:48:28 -0800 Subject: [PATCH 167/263] atmel_serial might lose modem status change I found a problem of handling of modem status of atmel_serial driver. With the commit 1ecc26 ("atmel_serial: split the interrupt handler"), handling of modem status signal was splitted into two parts. The atmel_tasklet_func() compares new status with irq_status_prev, but irq_status_prev is not correct if signal status was changed while the port is closed. Here is a sequence to cause problem: 1. Remote side sets CTS (and DSR). 2. Local side close the port. 3. Local side clears RTS and DTR. 4. Remote side clears CTS and DSR. 5. Local side reopen the port. hw_stopped becomes 1. 6. Local side sets RTS and DTR. 7. Remote side sets CTS and DSR. Then CTS change interrupt can be received, but since CTS bit in irq_status_prev and new status is same, uart_handle_cts_change() will not be called (so hw_stopped will not be cleared, i.e. cannot send any data). I suppose irq_status_prev should be initialized at somewhere in open sequence. Itai Levi pointed out that we need to initialize atmel_port->irq_status as well here. His analysis is as follows: > Regarding the second part of the patch (which resets irq_status_prev), > it turns out that both versions of the patch (mine and Atsushi's) > still leave enough room for faulty behavior when opening the port. > > This is because we are not resetting both irq_status_prev and > irq_status in atmel_startup() to CSR, which leads faulty behavior in > the following sequences: > > First case: > 1. closing the port while CTS line = 1 (TX not allowed) > 2. setting CTS line = 0 (TX allowed) > 3. opening the port > 4. transmitting one char > 5. Cannot transmit more chars, although CTS line is 0 > > Second case: > 1. closing the port while CTS line = 0 (TX allowed) > 2. setting CTS line = 1 (TX not allowed) > 3. opening the port > 4. receiving some chars > 5. Now we can transmit, although CTS line is 1 > > This reason for this is that the tasklet is scheduled as a result of > TX or RX interrupts (not a status change!), in steps 4 above. Inside > the tasklet, the atmel_port->irq_status (which holds the value from > the previous session) is compared to atmel_port->irq_status_prev. > Hence, a status-change of the CTS line is faultily detected. > > Both cases were verified on 9260 hardware. [haavard.skinnemoen@atmel.com: folded with patch from Itai Levi] Signed-off-by: Atsushi Nemoto Signed-off-by: Haavard Skinnemoen Cc: Remy Bohmer Cc: Marc Pignat Cc: Itai Levi Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/serial/atmel_serial.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/serial/atmel_serial.c b/drivers/serial/atmel_serial.c index 89362d733d62..8f58f7ff0dd7 100644 --- a/drivers/serial/atmel_serial.c +++ b/drivers/serial/atmel_serial.c @@ -877,6 +877,10 @@ static int atmel_startup(struct uart_port *port) } } + /* Save current CSR for comparison in atmel_tasklet_func() */ + atmel_port->irq_status_prev = UART_GET_CSR(port); + atmel_port->irq_status = atmel_port->irq_status_prev; + /* * Finally, enable the serial port */ From ada723dcd681e2dffd7d73345cc8fda0eb0df9bd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 18 Feb 2009 14:48:30 -0800 Subject: [PATCH 168/263] fs/super.c: add lockdep annotation to s_umount Li Zefan said: Thread 1: for ((; ;)) { mount -t cpuset xxx /mnt > /dev/null 2>&1 cat /mnt/cpus > /dev/null 2>&1 umount /mnt > /dev/null 2>&1 } Thread 2: for ((; ;)) { mount -t cpuset xxx /mnt > /dev/null 2>&1 umount /mnt > /dev/null 2>&1 } (Note: It is irrelevant which cgroup subsys is used.) After a while a lockdep warning showed up: ============================================= [ INFO: possible recursive locking detected ] 2.6.28 #479 --------------------------------------------- mount/13554 is trying to acquire lock: (&type->s_umount_key#19){--..}, at: [] sget+0x5e/0x321 but task is already holding lock: (&type->s_umount_key#19){--..}, at: [] sget+0x1e2/0x321 other info that might help us debug this: 1 lock held by mount/13554: #0: (&type->s_umount_key#19){--..}, at: [] sget+0x1e2/0x321 stack backtrace: Pid: 13554, comm: mount Not tainted 2.6.28-mc #479 Call Trace: [] validate_chain+0x4c6/0xbbd [] __lock_acquire+0x676/0x700 [] lock_acquire+0x5d/0x7a [] ? sget+0x5e/0x321 [] down_write+0x34/0x50 [] ? sget+0x5e/0x321 [] sget+0x5e/0x321 [] ? cgroup_set_super+0x0/0x3e [] ? cgroup_test_super+0x0/0x2f [] cgroup_get_sb+0x98/0x2e7 [] cpuset_get_sb+0x4a/0x5f [] vfs_kern_mount+0x40/0x7b [] do_kern_mount+0x37/0xbf [] do_mount+0x5c3/0x61a [] ? copy_mount_options+0x2c/0x111 [] sys_mount+0x69/0xa0 [] sysenter_do_call+0x12/0x31 The cause is after alloc_super() and then retry, an old entry in list fs_supers is found, so grab_super(old) is called, but both functions hold s_umount lock: struct super_block *sget(...) { ... retry: spin_lock(&sb_lock); if (test) { list_for_each_entry(old, &type->fs_supers, s_instances) { if (!test(old, data)) continue; if (!grab_super(old)) <--- 2nd: down_write(&old->s_umount); goto retry; if (s) destroy_super(s); return old; } } if (!s) { spin_unlock(&sb_lock); s = alloc_super(type); <--- 1th: down_write(&s->s_umount) if (!s) return ERR_PTR(-ENOMEM); goto retry; } ... } It seems like a false positive, and seems like VFS but not cgroup needs to be fixed. Peter said: We can simply put the new s_umount instance in a but lockdep doesn't particularly cares about subclass order. If there's any issue with the callers of sget() assuming the s_umount lock being of sublcass 0, then there is another annotation we can use to fix that, but lets not bother with that if this is sufficient. Addresses http://bugzilla.kernel.org/show_bug.cgi?id=12673 Signed-off-by: Peter Zijlstra Tested-by: Li Zefan Reported-by: Li Zefan Cc: Al Viro Cc: Paul Menage Cc: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/super.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/super.c b/fs/super.c index 61dce001dd57..8349ed6b1412 100644 --- a/fs/super.c +++ b/fs/super.c @@ -82,7 +82,22 @@ static struct super_block *alloc_super(struct file_system_type *type) * lock ordering than usbfs: */ lockdep_set_class(&s->s_lock, &type->s_lock_key); - down_write(&s->s_umount); + /* + * sget() can have s_umount recursion. + * + * When it cannot find a suitable sb, it allocates a new + * one (this one), and tries again to find a suitable old + * one. + * + * In case that succeeds, it will acquire the s_umount + * lock of the old one. Since these are clearly distrinct + * locks, and this object isn't exposed yet, there's no + * risk of deadlocks. + * + * Annotate this by putting this lock in a different + * subclass. + */ + down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); s->s_count = S_BIAS; atomic_set(&s->s_active, 1); mutex_init(&s->s_vfs_rename_mutex); From f2dbcfa738368c8a40d4a5f0b65dc9879577cb21 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 18 Feb 2009 14:48:32 -0800 Subject: [PATCH 169/263] mm: clean up for early_pfn_to_nid() What's happening is that the assertion in mm/page_alloc.c:move_freepages() is triggering: BUG_ON(page_zone(start_page) != page_zone(end_page)); Once I knew this is what was happening, I added some annotations: if (unlikely(page_zone(start_page) != page_zone(end_page))) { printk(KERN_ERR "move_freepages: Bogus zones: " "start_page[%p] end_page[%p] zone[%p]\n", start_page, end_page, zone); printk(KERN_ERR "move_freepages: " "start_zone[%p] end_zone[%p]\n", page_zone(start_page), page_zone(end_page)); printk(KERN_ERR "move_freepages: " "start_pfn[0x%lx] end_pfn[0x%lx]\n", page_to_pfn(start_page), page_to_pfn(end_page)); printk(KERN_ERR "move_freepages: " "start_nid[%d] end_nid[%d]\n", page_to_nid(start_page), page_to_nid(end_page)); ... And here's what I got: move_freepages: Bogus zones: start_page[2207d0000] end_page[2207dffc0] zone[fffff8103effcb00] move_freepages: start_zone[fffff8103effcb00] end_zone[fffff8003fffeb00] move_freepages: start_pfn[0x81f600] end_pfn[0x81f7ff] move_freepages: start_nid[1] end_nid[0] My memory layout on this box is: [ 0.000000] Zone PFN ranges: [ 0.000000] Normal 0x00000000 -> 0x0081ff5d [ 0.000000] Movable zone start PFN for each node [ 0.000000] early_node_map[8] active PFN ranges [ 0.000000] 0: 0x00000000 -> 0x00020000 [ 0.000000] 1: 0x00800000 -> 0x0081f7ff [ 0.000000] 1: 0x0081f800 -> 0x0081fe50 [ 0.000000] 1: 0x0081fed1 -> 0x0081fed8 [ 0.000000] 1: 0x0081feda -> 0x0081fedb [ 0.000000] 1: 0x0081fedd -> 0x0081fee5 [ 0.000000] 1: 0x0081fee7 -> 0x0081ff51 [ 0.000000] 1: 0x0081ff59 -> 0x0081ff5d So it's a block move in that 0x81f600-->0x81f7ff region which triggers the problem. This patch: Declaration of early_pfn_to_nid() is scattered over per-arch include files, and it seems it's complicated to know when the declaration is used. I think it makes fix-for-memmap-init not easy. This patch moves all declaration to include/linux/mm.h After this, if !CONFIG_NODES_POPULATES_NODE_MAP && !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -> Use static definition in include/linux/mm.h else if !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -> Use generic definition in mm/page_alloc.c else -> per-arch back end function will be called. Signed-off-by: KAMEZAWA Hiroyuki Tested-by: KOSAKI Motohiro Reported-by: David Miller Cc: Mel Gorman Cc: Heiko Carstens Cc: [2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/mmzone.h | 4 ---- arch/ia64/mm/numa.c | 2 +- arch/x86/include/asm/mmzone_32.h | 2 -- arch/x86/include/asm/mmzone_64.h | 2 -- arch/x86/mm/numa_64.c | 2 +- include/linux/mm.h | 19 ++++++++++++++++--- mm/page_alloc.c | 8 +++++++- 7 files changed, 25 insertions(+), 14 deletions(-) diff --git a/arch/ia64/include/asm/mmzone.h b/arch/ia64/include/asm/mmzone.h index 34efe88eb849..f2ca32069b3f 100644 --- a/arch/ia64/include/asm/mmzone.h +++ b/arch/ia64/include/asm/mmzone.h @@ -31,10 +31,6 @@ static inline int pfn_to_nid(unsigned long pfn) #endif } -#ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -extern int early_pfn_to_nid(unsigned long pfn); -#endif - #ifdef CONFIG_IA64_DIG /* DIG systems are small */ # define MAX_PHYSNODE_ID 8 # define NR_NODE_MEMBLKS (MAX_NUMNODES * 8) diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index b73bf1838e57..5061c3fb6796 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -58,7 +58,7 @@ paddr_to_nid(unsigned long paddr) * SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where * the section resides. */ -int early_pfn_to_nid(unsigned long pfn) +int __meminit __early_pfn_to_nid(unsigned long pfn) { int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec; diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 07f1af494ca5..105fb90a0635 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -32,8 +32,6 @@ static inline void get_memcfg_numa(void) get_memcfg_numa_flat(); } -extern int early_pfn_to_nid(unsigned long pfn); - extern void resume_map_numa_kva(pgd_t *pgd); #else /* !CONFIG_NUMA */ diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h index a5b3817d4b9e..a29f48c2a322 100644 --- a/arch/x86/include/asm/mmzone_64.h +++ b/arch/x86/include/asm/mmzone_64.h @@ -40,8 +40,6 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ NODE_DATA(nid)->node_spanned_pages) -extern int early_pfn_to_nid(unsigned long pfn); - #ifdef CONFIG_NUMA_EMU #define FAKE_NODE_MIN_SIZE (64 * 1024 * 1024) #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 71a14f89f89e..f3516da035d1 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -145,7 +145,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes, return shift; } -int early_pfn_to_nid(unsigned long pfn) +int __meminit __early_pfn_to_nid(unsigned long pfn) { return phys_to_nid(pfn << PAGE_SHIFT); } diff --git a/include/linux/mm.h b/include/linux/mm.h index 10074212a35b..065cdf8c09fb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1041,10 +1041,23 @@ extern void free_bootmem_with_active_regions(int nid, typedef int (*work_fn_t)(unsigned long, unsigned long, void *); extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); extern void sparse_memory_present_with_active_regions(int nid); -#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -extern int early_pfn_to_nid(unsigned long pfn); -#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ + +#if !defined(CONFIG_ARCH_POPULATES_NODE_MAP) && \ + !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) +static inline int __early_pfn_to_nid(unsigned long pfn) +{ + return 0; +} +#else +/* please see mm/page_alloc.c */ +extern int __meminit early_pfn_to_nid(unsigned long pfn); +#ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID +/* there is a per-arch backend function. */ +extern int __meminit __early_pfn_to_nid(unsigned long pfn); +#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ +#endif + extern void set_dma_reserve(unsigned long new_dma_reserve); extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long, enum memmap_context); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5675b3073854..c5dd74602efc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2989,7 +2989,7 @@ static int __meminit next_active_region_index_in_nid(int index, int nid) * was used and there are no special requirements, this is a convenient * alternative */ -int __meminit early_pfn_to_nid(unsigned long pfn) +int __meminit __early_pfn_to_nid(unsigned long pfn) { int i; @@ -3005,6 +3005,12 @@ int __meminit early_pfn_to_nid(unsigned long pfn) } #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ +int __meminit early_pfn_to_nid(unsigned long pfn) +{ + return __early_pfn_to_nid(pfn); +} + + /* Basic iterator support to walk early_node_map[] */ #define for_each_active_range_index_in_nid(i, nid) \ for (i = first_active_region_index_in_nid(nid); i != -1; \ From cc2559bccc72767cb446f79b071d96c30c26439b Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 18 Feb 2009 14:48:33 -0800 Subject: [PATCH 170/263] mm: fix memmap init for handling memory hole Now, early_pfn_in_nid(PFN, NID) may returns false if PFN is a hole. and memmap initialization was not done. This was a trouble for sparc boot. To fix this, the PFN should be initialized and marked as PG_reserved. This patch changes early_pfn_in_nid() return true if PFN is a hole. Signed-off-by: KAMEZAWA Hiroyuki Reported-by: David Miller Tested-by: KOSAKI Motohiro Cc: Mel Gorman Cc: Heiko Carstens Cc: [2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/numa.c | 2 +- include/linux/mmzone.h | 2 +- mm/page_alloc.c | 23 ++++++++++++++++++++--- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index 5061c3fb6796..3efea7d0a351 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -70,7 +70,7 @@ int __meminit __early_pfn_to_nid(unsigned long pfn) return node_memblk[i].nid; } - return 0; + return -1; } #ifdef CONFIG_MEMORY_HOTPLUG diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 09c14e213b63..1aca6cebbb78 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1071,7 +1071,7 @@ void sparse_init(void); #endif /* CONFIG_SPARSEMEM */ #ifdef CONFIG_NODES_SPAN_OTHER_NODES -#define early_pfn_in_nid(pfn, nid) (early_pfn_to_nid(pfn) == (nid)) +bool early_pfn_in_nid(unsigned long pfn, int nid); #else #define early_pfn_in_nid(pfn, nid) (1) #endif diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c5dd74602efc..5c44ed49ca93 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3000,16 +3000,33 @@ int __meminit __early_pfn_to_nid(unsigned long pfn) if (start_pfn <= pfn && pfn < end_pfn) return early_node_map[i].nid; } - - return 0; + /* This is a memory hole */ + return -1; } #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ int __meminit early_pfn_to_nid(unsigned long pfn) { - return __early_pfn_to_nid(pfn); + int nid; + + nid = __early_pfn_to_nid(pfn); + if (nid >= 0) + return nid; + /* just returns 0 */ + return 0; } +#ifdef CONFIG_NODES_SPAN_OTHER_NODES +bool __meminit early_pfn_in_nid(unsigned long pfn, int node) +{ + int nid; + + nid = __early_pfn_to_nid(pfn); + if (nid >= 0 && nid != node) + return false; + return true; +} +#endif /* Basic iterator support to walk early_node_map[] */ #define for_each_active_range_index_in_nid(i, nid) \ From ffa7525c13eb3db0fd19a3e1cffe2ce6f561f5f3 Mon Sep 17 00:00:00 2001 From: Adam Lackorzynski Date: Wed, 18 Feb 2009 14:48:34 -0800 Subject: [PATCH 171/263] jsm: additional device support I have a Digi Neo 8 PCI card (114f:00b1) Serial controller: Digi International Digi Neo 8 (rev 05) that works with the jsm driver after using the following patch. Signed-off-by: Adam Lackorzynski Cc: Scott H Kilau Cc: Wendy Xiong Acked-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/serial/jsm/jsm_driver.c | 3 +++ include/linux/pci_ids.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/serial/jsm/jsm_driver.c b/drivers/serial/jsm/jsm_driver.c index 92187e28608a..ac79cbe4c2cf 100644 --- a/drivers/serial/jsm/jsm_driver.c +++ b/drivers/serial/jsm/jsm_driver.c @@ -84,6 +84,8 @@ static int jsm_probe_one(struct pci_dev *pdev, const struct pci_device_id *ent) brd->pci_dev = pdev; if (pdev->device == PCIE_DEVICE_ID_NEO_4_IBM) brd->maxports = 4; + else if (pdev->device == PCI_DEVICE_ID_DIGI_NEO_8) + brd->maxports = 8; else brd->maxports = 2; @@ -212,6 +214,7 @@ static struct pci_device_id jsm_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_DIGI, PCI_DEVICE_ID_NEO_2RJ45), 0, 0, 2 }, { PCI_DEVICE(PCI_VENDOR_ID_DIGI, PCI_DEVICE_ID_NEO_2RJ45PRI), 0, 0, 3 }, { PCI_DEVICE(PCI_VENDOR_ID_DIGI, PCIE_DEVICE_ID_NEO_4_IBM), 0, 0, 4 }, + { PCI_DEVICE(PCI_VENDOR_ID_DIGI, PCI_DEVICE_ID_DIGI_NEO_8), 0, 0, 5 }, { 0, } }; MODULE_DEVICE_TABLE(pci, jsm_pci_tbl); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 918391b4b109..114b8192eab9 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1445,6 +1445,7 @@ #define PCI_DEVICE_ID_DIGI_DF_M_E 0x0071 #define PCI_DEVICE_ID_DIGI_DF_M_IOM2_A 0x0072 #define PCI_DEVICE_ID_DIGI_DF_M_A 0x0073 +#define PCI_DEVICE_ID_DIGI_NEO_8 0x00B1 #define PCI_DEVICE_ID_NEO_2DB9 0x00C8 #define PCI_DEVICE_ID_NEO_2DB9PRI 0x00C9 #define PCI_DEVICE_ID_NEO_2RJ45 0x00CA From 5a74db06cc8d36a325913aa4968ae169f997a466 Mon Sep 17 00:00:00 2001 From: Philippe De Muyter Date: Wed, 18 Feb 2009 14:48:36 -0800 Subject: [PATCH 172/263] floppy: request and release only the ports we actually use The floppy driver requests an I/O port it doesn't need, and sometimes this causes a conflict with a motherboard device reported by PNPBIOS. This patch makes the floppy driver request and release only the ports it actually uses. It also factors out the request/release stuff and the io-ports list so they're all in one place now. The current floppy driver uses only these ports: 0x3f2 (FD_DOR) 0x3f4 (FD_STATUS) 0x3f5 (FD_DATA) 0x3f7 (FD_DCR/FD_DIR) but it requests 0x3f2-0x3f5 and 0x3f7, which includes the unused port 0x3f3. Some BIOSes report 0x3f3 as a motherboard resource. The PNP system driver reserves that, which causes a conflict when the floppy driver requests 0x3f2-0x3f5 later. Philippe reported that this conflict broke the floppy driver between 2.6.11 and 2.6.22. His PNPBIOS reports these devices: $ cat 00:07/id 00:07/resources # motherboard device PNP0c02 state = active io 0x80-0x80 io 0x10-0x1f io 0x22-0x3f io 0x44-0x5f io 0x90-0x9f io 0xa2-0xbf io 0x3f0-0x3f1 io 0x3f3-0x3f3 $ cat 00:03/id 00:03/resources # floppy device PNP0700 state = active io 0x3f4-0x3f5 io 0x3f2-0x3f2 Reference: http://lkml.org/lkml/2009/1/31/162 Signed-off-by: Bjorn Helgaas Signed-off-by: Philippe De Muyter Reported-by: Philippe De Muyter Tested-by: Philippe De Muyter Cc: Adam M Belay Cc: Robert Hancock Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/floppy.c | 79 +++++++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 27 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index cf29cc4e6ab7..83d8ed39433d 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -558,6 +558,8 @@ static void process_fd_request(void); static void recalibrate_floppy(void); static void floppy_shutdown(unsigned long); +static int floppy_request_regions(int); +static void floppy_release_regions(int); static int floppy_grab_irq_and_dma(void); static void floppy_release_irq_and_dma(void); @@ -4274,8 +4276,7 @@ static int __init floppy_init(void) FDCS->rawcmd = 2; if (user_reset_fdc(-1, FD_RESET_ALWAYS, 0)) { /* free ioports reserved by floppy_grab_irq_and_dma() */ - release_region(FDCS->address + 2, 4); - release_region(FDCS->address + 7, 1); + floppy_release_regions(fdc); FDCS->address = -1; FDCS->version = FDC_NONE; continue; @@ -4284,8 +4285,7 @@ static int __init floppy_init(void) FDCS->version = get_fdc_version(); if (FDCS->version == FDC_NONE) { /* free ioports reserved by floppy_grab_irq_and_dma() */ - release_region(FDCS->address + 2, 4); - release_region(FDCS->address + 7, 1); + floppy_release_regions(fdc); FDCS->address = -1; continue; } @@ -4358,6 +4358,47 @@ out_put_disk: static DEFINE_SPINLOCK(floppy_usage_lock); +static const struct io_region { + int offset; + int size; +} io_regions[] = { + { 2, 1 }, + /* address + 3 is sometimes reserved by pnp bios for motherboard */ + { 4, 2 }, + /* address + 6 is reserved, and may be taken by IDE. + * Unfortunately, Adaptec doesn't know this :-(, */ + { 7, 1 }, +}; + +static void floppy_release_allocated_regions(int fdc, const struct io_region *p) +{ + while (p != io_regions) { + p--; + release_region(FDCS->address + p->offset, p->size); + } +} + +#define ARRAY_END(X) (&((X)[ARRAY_SIZE(X)])) + +static int floppy_request_regions(int fdc) +{ + const struct io_region *p; + + for (p = io_regions; p < ARRAY_END(io_regions); p++) { + if (!request_region(FDCS->address + p->offset, p->size, "floppy")) { + DPRINT("Floppy io-port 0x%04lx in use\n", FDCS->address + p->offset); + floppy_release_allocated_regions(fdc, p); + return -EBUSY; + } + } + return 0; +} + +static void floppy_release_regions(int fdc) +{ + floppy_release_allocated_regions(fdc, ARRAY_END(io_regions)); +} + static int floppy_grab_irq_and_dma(void) { unsigned long flags; @@ -4399,18 +4440,8 @@ static int floppy_grab_irq_and_dma(void) for (fdc = 0; fdc < N_FDC; fdc++) { if (FDCS->address != -1) { - if (!request_region(FDCS->address + 2, 4, "floppy")) { - DPRINT("Floppy io-port 0x%04lx in use\n", - FDCS->address + 2); - goto cleanup1; - } - if (!request_region(FDCS->address + 7, 1, "floppy DIR")) { - DPRINT("Floppy io-port 0x%04lx in use\n", - FDCS->address + 7); - goto cleanup2; - } - /* address + 6 is reserved, and may be taken by IDE. - * Unfortunately, Adaptec doesn't know this :-(, */ + if (floppy_request_regions(fdc)) + goto cleanup; } } for (fdc = 0; fdc < N_FDC; fdc++) { @@ -4432,15 +4463,11 @@ static int floppy_grab_irq_and_dma(void) fdc = 0; irqdma_allocated = 1; return 0; -cleanup2: - release_region(FDCS->address + 2, 4); -cleanup1: +cleanup: fd_free_irq(); fd_free_dma(); - while (--fdc >= 0) { - release_region(FDCS->address + 2, 4); - release_region(FDCS->address + 7, 1); - } + while (--fdc >= 0) + floppy_release_regions(fdc); spin_lock_irqsave(&floppy_usage_lock, flags); usage_count--; spin_unlock_irqrestore(&floppy_usage_lock, flags); @@ -4501,10 +4528,8 @@ static void floppy_release_irq_and_dma(void) #endif old_fdc = fdc; for (fdc = 0; fdc < N_FDC; fdc++) - if (FDCS->address != -1) { - release_region(FDCS->address + 2, 4); - release_region(FDCS->address + 7, 1); - } + if (FDCS->address != -1) + floppy_release_regions(fdc); fdc = old_fdc; } From a1a5c3b9237662f326cc730e167e7524b5d05a36 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Wed, 18 Feb 2009 14:48:38 -0800 Subject: [PATCH 173/263] fbdev/drm: fix Kconfig submenu mess in "Graphics support" Submenus of the graphics support "Support for frame buffer devices" and "Direct Rendering Manager (XFree86 4.1.0 and higher DRI support)" are broken in half after latest changes for Intel 915 mode setting support. The DRM subsection is broken because one option is put outside the choice section it depends on. The frame buffers part is broken then due to circular dependency. Fix this by make Intel frame buffers depend on CONFIG_INTEL_AGP. Kconfigs are broken by d2f59357700487a8b944f4f7777d1e97cf5ea2ed ("drm/i915: select framebuffer support automatically"). This is probably not only way to fix this. Signed-off-by: Krzysztof Helt Cc: Ingo Molnar Cc: Dave Airlie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpu/drm/Kconfig | 13 ++++++------- drivers/video/Kconfig | 10 ++-------- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 4be3acbaaf9a..3a22eb9be378 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -80,18 +80,17 @@ config DRM_I915 XFree86 4.4 and above. If unsure, build this and i830 as modules and the X server will load the correct one. -endchoice - config DRM_I915_KMS bool "Enable modesetting on intel by default" depends on DRM_I915 help - Choose this option if you want kernel modesetting enabled by default, - and you have a new enough userspace to support this. Running old - userspaces with this enabled will cause pain. Note that this causes - the driver to bind to PCI devices, which precludes loading things - like intelfb. + Choose this option if you want kernel modesetting enabled by default, + and you have a new enough userspace to support this. Running old + userspaces with this enabled will cause pain. Note that this causes + the driver to bind to PCI devices, which precludes loading things + like intelfb. +endchoice config DRM_MGA tristate "Matrox g200/g400" diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index bf0af660df8a..fb19803060cf 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -1054,10 +1054,7 @@ config FB_RIVA_BACKLIGHT config FB_I810 tristate "Intel 810/815 support (EXPERIMENTAL)" - depends on EXPERIMENTAL && PCI && X86_32 - select AGP - select AGP_INTEL - select FB + depends on EXPERIMENTAL && FB && PCI && X86_32 && AGP_INTEL select FB_MODE_HELPERS select FB_CFB_FILLRECT select FB_CFB_COPYAREA @@ -1120,10 +1117,7 @@ config FB_CARILLO_RANCH config FB_INTEL tristate "Intel 830M/845G/852GM/855GM/865G/915G/945G/945GM/965G/965GM support (EXPERIMENTAL)" - depends on EXPERIMENTAL && PCI && X86 - select FB - select AGP - select AGP_INTEL + depends on EXPERIMENTAL && FB && PCI && X86 && AGP_INTEL select FB_MODE_HELPERS select FB_CFB_FILLRECT select FB_CFB_COPYAREA From 2db69a9340da12a4db44edb7506dd68799aeff55 Mon Sep 17 00:00:00 2001 From: Bill Nottingham Date: Wed, 18 Feb 2009 14:48:39 -0800 Subject: [PATCH 174/263] vt: Declare PIO_CMAP/GIO_CMAP as compatbile ioctls. Otherwise, these don't work when called from 32-bit userspace on 64-bit kernels. Cc: Jiri Kosina Cc: Alan Cox Cc: [2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/compat_ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 9c6d815dd191..39bd4d38e889 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1938,6 +1938,8 @@ ULONG_IOCTL(SET_BITMAP_FILE) /* Big K */ COMPATIBLE_IOCTL(PIO_FONT) COMPATIBLE_IOCTL(GIO_FONT) +COMPATIBLE_IOCTL(PIO_CMAP) +COMPATIBLE_IOCTL(GIO_CMAP) ULONG_IOCTL(KDSIGACCEPT) COMPATIBLE_IOCTL(KDGETKEYCODE) COMPATIBLE_IOCTL(KDSETKEYCODE) From 310d8c93f9f07499cd7ca82d7997774a89de00e7 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 18 Feb 2009 14:48:39 -0800 Subject: [PATCH 175/263] x86: dell-laptop: depends on POWER_SUPPLY Build breaks when DELL_LAPTOP=y and POWER_SUPPLY=m. DELL_LAPTOP needs to depend on POWER_SUPPLY. dell-laptop.c:(.text+0x1ef3c4): undefined reference to `power_supply_is_system_supplied' dell-laptop.c:(.text+0x1ef45e): undefined reference to `power_supply_is_system_supplied' Signed-off-by: Randy Dunlap Cc: Matthew Garrett Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Len Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/platform/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 6bce56c22623..b3866ad50227 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -62,6 +62,7 @@ config DELL_LAPTOP depends on EXPERIMENTAL depends on BACKLIGHT_CLASS_DEVICE depends on RFKILL + depends on POWER_SUPPLY default n ---help--- This driver adds support for rfkill and backlight control to Dell From 97bef7dd05563807539122c488a5dd93ed327722 Mon Sep 17 00:00:00 2001 From: Bernhard Walle Date: Wed, 18 Feb 2009 14:48:40 -0800 Subject: [PATCH 176/263] Bernhard has moved Since I don't work for SUSE any more and the bwalle@suse.de address is invalid, correct it in the copyright headers and documentation. Signed-off-by: Bernhard Walle Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/sysfs-firmware-memmap | 2 +- drivers/firmware/memmap.c | 2 +- include/linux/firmware-map.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-firmware-memmap b/Documentation/ABI/testing/sysfs-firmware-memmap index 0d99ee6ae02e..eca0d65087dc 100644 --- a/Documentation/ABI/testing/sysfs-firmware-memmap +++ b/Documentation/ABI/testing/sysfs-firmware-memmap @@ -1,6 +1,6 @@ What: /sys/firmware/memmap/ Date: June 2008 -Contact: Bernhard Walle +Contact: Bernhard Walle Description: On all platforms, the firmware provides a memory map which the kernel reads. The resources from that memory map are registered diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c index 261b9aa3f248..05aa2d406ac6 100644 --- a/drivers/firmware/memmap.c +++ b/drivers/firmware/memmap.c @@ -1,7 +1,7 @@ /* * linux/drivers/firmware/memmap.c * Copyright (C) 2008 SUSE LINUX Products GmbH - * by Bernhard Walle + * by Bernhard Walle * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License v2.0 as published by diff --git a/include/linux/firmware-map.h b/include/linux/firmware-map.h index 6e199c8dfacc..cca686b39123 100644 --- a/include/linux/firmware-map.h +++ b/include/linux/firmware-map.h @@ -1,7 +1,7 @@ /* * include/linux/firmware-map.h: * Copyright (C) 2008 SUSE LINUX Products GmbH - * by Bernhard Walle + * by Bernhard Walle * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License v2.0 as published by From be50344e604f956891fc0013f1ba78823a758124 Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Wed, 18 Feb 2009 14:48:41 -0800 Subject: [PATCH 177/263] spi-gpio: sanitize MISO bitvalue gpio_get_value() returns 0 or nonzero, but getmiso() expects 0 or 1. Sanitize the value to a 0/1 boolean. Signed-off-by: Michael Buesch Acked-by: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/spi/spi_gpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi_gpio.c b/drivers/spi/spi_gpio.c index 49698cabc30d..f5ed9721aabb 100644 --- a/drivers/spi/spi_gpio.c +++ b/drivers/spi/spi_gpio.c @@ -114,7 +114,7 @@ static inline void setmosi(const struct spi_device *spi, int is_on) static inline int getmiso(const struct spi_device *spi) { - return gpio_get_value(SPI_MISO_GPIO); + return !!gpio_get_value(SPI_MISO_GPIO); } #undef pdata From f04b30de3c82528f1ab4c58b3dd4c975f5341901 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 18 Feb 2009 14:48:43 -0800 Subject: [PATCH 178/263] inotify: fix GFP_KERNEL related deadlock Enhanced lockdep coverage of __GFP_NOFS turned up this new lockdep assert: [ 1093.677775] [ 1093.677781] ================================= [ 1093.680031] [ INFO: inconsistent lock state ] [ 1093.680031] 2.6.29-rc5-tip-01504-gb49eca1-dirty #1 [ 1093.680031] --------------------------------- [ 1093.680031] inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-W} usage. [ 1093.680031] kswapd0/308 [HC0[0]:SC0[0]:HE1:SE1] takes: [ 1093.680031] (&inode->inotify_mutex){+.+.?.}, at: [] inotify_inode_is_dead+0x20/0x80 [ 1093.680031] {RECLAIM_FS-ON-W} state was registered at: [ 1093.680031] [] mark_held_locks+0x43/0x5b [ 1093.680031] [] lockdep_trace_alloc+0x6c/0x6e [ 1093.680031] [] kmem_cache_alloc+0x20/0x150 [ 1093.680031] [] idr_pre_get+0x27/0x6c [ 1093.680031] [] inotify_handle_get_wd+0x25/0xad [ 1093.680031] [] inotify_add_watch+0x7a/0x129 [ 1093.680031] [] sys_inotify_add_watch+0x20f/0x250 [ 1093.680031] [] sysenter_do_call+0x12/0x35 [ 1093.680031] [] 0xffffffff [ 1093.680031] irq event stamp: 60417 [ 1093.680031] hardirqs last enabled at (60417): [] call_rcu+0x53/0x59 [ 1093.680031] hardirqs last disabled at (60416): [] call_rcu+0x17/0x59 [ 1093.680031] softirqs last enabled at (59656): [] __do_softirq+0x157/0x16b [ 1093.680031] softirqs last disabled at (59651): [] do_softirq+0x74/0x15d [ 1093.680031] [ 1093.680031] other info that might help us debug this: [ 1093.680031] 2 locks held by kswapd0/308: [ 1093.680031] #0: (shrinker_rwsem){++++..}, at: [] shrink_slab+0x36/0x189 [ 1093.680031] #1: (&type->s_umount_key#4){+++++.}, at: [] shrink_dcache_memory+0x110/0x1fb [ 1093.680031] [ 1093.680031] stack backtrace: [ 1093.680031] Pid: 308, comm: kswapd0 Not tainted 2.6.29-rc5-tip-01504-gb49eca1-dirty #1 [ 1093.680031] Call Trace: [ 1093.680031] [] valid_state+0x12a/0x13d [ 1093.680031] [] mark_lock+0xc1/0x1e9 [ 1093.680031] [] ? check_usage_forwards+0x0/0x3f [ 1093.680031] [] __lock_acquire+0x2c6/0xac8 [ 1093.680031] [] ? register_lock_class+0x17/0x228 [ 1093.680031] [] lock_acquire+0x5d/0x7a [ 1093.680031] [] ? inotify_inode_is_dead+0x20/0x80 [ 1093.680031] [] __mutex_lock_common+0x3a/0x4cb [ 1093.680031] [] ? inotify_inode_is_dead+0x20/0x80 [ 1093.680031] [] mutex_lock_nested+0x2e/0x36 [ 1093.680031] [] ? inotify_inode_is_dead+0x20/0x80 [ 1093.680031] [] inotify_inode_is_dead+0x20/0x80 [ 1093.680031] [] dentry_iput+0x90/0xc2 [ 1093.680031] [] d_kill+0x21/0x45 [ 1093.680031] [] __shrink_dcache_sb+0x27f/0x355 [ 1093.680031] [] shrink_dcache_memory+0x15e/0x1fb [ 1093.680031] [] shrink_slab+0x121/0x189 [ 1093.680031] [] kswapd+0x39f/0x561 [ 1093.680031] [] ? isolate_pages_global+0x0/0x233 [ 1093.680031] [] ? autoremove_wake_function+0x0/0x43 [ 1093.680031] [] ? kswapd+0x0/0x561 [ 1093.680031] [] kthread+0x41/0x82 [ 1093.680031] [] ? kthread+0x0/0x82 [ 1093.680031] [] kernel_thread_helper+0x7/0x10 inotify_handle_get_wd() does idr_pre_get() which does a kmem_cache_alloc() without __GFP_FS - and is hence deadlockable under extreme MM pressure. Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Cc: MinChan Kim Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/notify/inotify/inotify.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c index dae3f28f30d4..331f2e88e284 100644 --- a/fs/notify/inotify/inotify.c +++ b/fs/notify/inotify/inotify.c @@ -156,7 +156,7 @@ static int inotify_handle_get_wd(struct inotify_handle *ih, int ret; do { - if (unlikely(!idr_pre_get(&ih->idr, GFP_KERNEL))) + if (unlikely(!idr_pre_get(&ih->idr, GFP_NOFS))) return -ENOSPC; ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd); } while (ret == -EAGAIN); From 0c5119c1e655e0719a69601b1049acdd5ec1c125 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 18 Feb 2009 18:33:57 -0500 Subject: [PATCH 179/263] tracing: disable tracing while testing ring buffer Impact: fix to prevent hard lockup on self tests If one of the tracers are broken and is constantly filling the ring buffer while the test of the ring buffer is running, it will hang the box. The reason is that the test is a consumer that will not stop till the ring buffer is empty. But if the tracer is broken and is constantly producing input to the buffer, this test will never end. The result is a lockup of the box. This happened when KALLSYMS was not defined and the dynamic ftrace test constantly filled the ring buffer, because the filter failed and all functions were being traced. Something was being called that constantly filled the buffer. Signed-off-by: Steven Rostedt --- kernel/trace/trace_selftest.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 88c8eb70f54a..a7e0ef662f9f 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -57,11 +57,20 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) cnt = ring_buffer_entries(tr->buffer); + /* + * The trace_test_buffer_cpu runs a while loop to consume all data. + * If the calling tracer is broken, and is constantly filling + * the buffer, this will run forever, and hard lock the box. + * We disable the ring buffer while we do this test to prevent + * a hard lock up. + */ + tracing_off(); for_each_possible_cpu(cpu) { ret = trace_test_buffer_cpu(tr, cpu); if (ret) break; } + tracing_on(); __raw_spin_unlock(&ftrace_max_lock); local_irq_restore(flags); From 4d7a077c0c7bfdba04cf0aa0b79053cf4ebaacf8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 18 Feb 2009 22:06:18 -0500 Subject: [PATCH 180/263] tracing: have function trace select kallsyms Impact: fix output of function tracer to be useful The function tracer is pretty useless if KALLSYMS is not configured. Unless you are good at reading hex values, the function tracer should select the KALLSYMS configuration. Also, the dynamic function tracer will fail its self test if KALLSYMS is not selected. Signed-off-by: Steven Rostedt --- kernel/trace/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 58a93fbd68aa..34e707e5ab87 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -52,6 +52,7 @@ config FUNCTION_TRACER depends on HAVE_FUNCTION_TRACER depends on DEBUG_KERNEL select FRAME_POINTER + select KALLSYMS select TRACING select CONTEXT_SWITCH_TRACER help @@ -238,6 +239,7 @@ config STACK_TRACER depends on DEBUG_KERNEL select FUNCTION_TRACER select STACKTRACE + select KALLSYMS help This special tracer records the maximum stack footprint of the kernel and displays it in debugfs/tracing/stack_trace. From 4b3e3d228429c75d398f1aa24532e468d3220c49 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 18 Feb 2009 22:50:01 -0500 Subject: [PATCH 181/263] tracing: limit the number of loops the ring buffer self test can make Impact: prevent deadlock if ring buffer gets corrupted This patch adds a paranoid check to make sure the ring buffer consumer does not go into an infinite loop. Since the ring buffer has been set to read only, the consumer should not loop for more than the ring buffer size. A check is added to make sure the consumer does not loop more than the ring buffer size. Signed-off-by: Steven Rostedt --- kernel/trace/trace_selftest.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index a7e0ef662f9f..bc8e80a86bca 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -23,10 +23,20 @@ static int trace_test_buffer_cpu(struct trace_array *tr, int cpu) { struct ring_buffer_event *event; struct trace_entry *entry; + unsigned int loops = 0; while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) { entry = ring_buffer_event_data(event); + /* + * The ring buffer is a size of trace_buf_size, if + * we loop more than the size, there's something wrong + * with the ring buffer. + */ + if (loops++ > trace_buf_size) { + printk(KERN_CONT ".. bad ring buffer "); + goto failed; + } if (!trace_valid_entry(entry)) { printk(KERN_CONT ".. invalid entry %d ", entry->type); From 6ce6c473a7fd742fdb0db95841e2c4c6b37337c5 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 17 Feb 2009 09:50:30 +0100 Subject: [PATCH 182/263] sound: virtuoso: revert "do not overwrite EEPROM on Xonar D2/D2X" This reverts commit 7e86c0e6850504ec9516b953f316a47277825e33 ("do not overwrite EEPROM on Xonar D2/D2X") because it did not actually help with the problem. More user reports show that the overwriting of the EEPROM is not triggered by using this driver but by installing Linux, and that the installation of any other operating system (even one without any CMI8788 driver) has the same effect. In other words, the presence of this driver does not have any effect on the occurrence of the error. (So far, the available evidence seems to point to a BIOS bug.) Furthermore, it turns out that the EEPROM chip is protected against stray write commands by the command format and by requiring a separate write-enable command, so the error scenario in the previous commit (that SPI writes can be misinterpreted as an EEPROM write command) is not even theoretically possible. The mixer control that was removed as a consequence of the previous commit can only be partially emulated in userspace, which also means it cannot be seen be the in-kernel OSS API emulation, so it is better to revert that change. Signed-off-by: Clemens Ladisch Cc: Signed-off-by: Takashi Iwai --- sound/pci/oxygen/virtuoso.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/sound/pci/oxygen/virtuoso.c b/sound/pci/oxygen/virtuoso.c index 18c7c91786bc..6c870c12a177 100644 --- a/sound/pci/oxygen/virtuoso.c +++ b/sound/pci/oxygen/virtuoso.c @@ -26,7 +26,7 @@ * SPI 0 -> 1st PCM1796 (front) * SPI 1 -> 2nd PCM1796 (surround) * SPI 2 -> 3rd PCM1796 (center/LFE) - * SPI 4 -> 4th PCM1796 (back) and EEPROM self-destruct (do not use!) + * SPI 4 -> 4th PCM1796 (back) * * GPIO 2 -> M0 of CS5381 * GPIO 3 -> M1 of CS5381 @@ -207,12 +207,6 @@ static void xonar_gpio_changed(struct oxygen *chip); static inline void pcm1796_write_spi(struct oxygen *chip, unsigned int codec, u8 reg, u8 value) { - /* - * We don't want to do writes on SPI 4 because the EEPROM, which shares - * the same pin, might get confused and broken. We'd better take care - * that the driver works with the default register values ... - */ -#if 0 /* maps ALSA channel pair number to SPI output */ static const u8 codec_map[4] = { 0, 1, 2, 4 @@ -223,7 +217,6 @@ static inline void pcm1796_write_spi(struct oxygen *chip, unsigned int codec, (codec_map[codec] << OXYGEN_SPI_CODEC_SHIFT) | OXYGEN_SPI_CEN_LATCH_CLOCK_HI, (reg << 8) | value); -#endif } static inline void pcm1796_write_i2c(struct oxygen *chip, unsigned int codec, @@ -757,9 +750,6 @@ static const DECLARE_TLV_DB_SCALE(cs4362a_db_scale, -12700, 100, 0); static int xonar_d2_control_filter(struct snd_kcontrol_new *template) { - if (!strncmp(template->name, "Master Playback ", 16)) - /* disable volume/mute because they would require SPI writes */ - return 1; if (!strncmp(template->name, "CD Capture ", 11)) /* CD in is actually connected to the video in pin */ template->private_value ^= AC97_CD ^ AC97_VIDEO; @@ -850,8 +840,9 @@ static const struct oxygen_model model_xonar_d2 = { .dac_volume_min = 0x0f, .dac_volume_max = 0xff, .misc_flags = OXYGEN_MISC_MIDI, - .function_flags = OXYGEN_FUNCTION_SPI, - .dac_i2s_format = OXYGEN_I2S_FORMAT_I2S, + .function_flags = OXYGEN_FUNCTION_SPI | + OXYGEN_FUNCTION_ENABLE_SPI_4_5, + .dac_i2s_format = OXYGEN_I2S_FORMAT_LJUST, .adc_i2s_format = OXYGEN_I2S_FORMAT_LJUST, }; From 3fd9825c42c784a59b3b90bdf073f49d4bb42a8d Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 18 Feb 2009 22:29:22 +0100 Subject: [PATCH 183/263] [ARM] 5402/1: fix a case of wrap-around in sanity_check_meminfo() In the non highmem case, if two memory banks of 1GB each are provided, the second bank would evade suppression since its virtual base would be 0. Fix this by disallowing any memory bank which virtual base address is found to be lower than PAGE_OFFSET. Reported-by: Lennert Buytenhek Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 9b36c5cb5e9f..d4d082c5c2d4 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -693,7 +693,8 @@ static void __init sanity_check_meminfo(void) * Check whether this memory bank would entirely overlap * the vmalloc area. */ - if (__va(bank->start) >= VMALLOC_MIN) { + if (__va(bank->start) >= VMALLOC_MIN || + __va(bank->start) < PAGE_OFFSET) { printk(KERN_NOTICE "Ignoring RAM at %.8lx-%.8lx " "(vmalloc region overlap).\n", bank->start, bank->start + bank->size - 1); From 22eb36f49e24e922ca6594a99157a3fcb92d3824 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Thu, 19 Feb 2009 11:57:46 +0100 Subject: [PATCH 184/263] [ARM] 5403/1: pxa25x_ep_fifo_flush() *ep->reg_udccs always set to 0 *ep->reg_udccs is always set to 0. Signed-off-by: Roel Kluin Acked-by: Eric Miao Signed-off-by: Russell King --- drivers/usb/gadget/pxa25x_udc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/pxa25x_udc.c b/drivers/usb/gadget/pxa25x_udc.c index 9b36205c5759..0ce4e2819847 100644 --- a/drivers/usb/gadget/pxa25x_udc.c +++ b/drivers/usb/gadget/pxa25x_udc.c @@ -904,8 +904,8 @@ static void pxa25x_ep_fifo_flush(struct usb_ep *_ep) /* most IN status is the same, but ISO can't stall */ *ep->reg_udccs = UDCCS_BI_TPC|UDCCS_BI_FTF|UDCCS_BI_TUR - | (ep->bmAttributes == USB_ENDPOINT_XFER_ISOC) - ? 0 : UDCCS_BI_SST; + | (ep->bmAttributes == USB_ENDPOINT_XFER_ISOC + ? 0 : UDCCS_BI_SST); } From 41f3103fcfffff096c34f5267d7c9a26b44d89d3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Feb 2009 13:25:16 +0000 Subject: [PATCH 185/263] [ARM] omap: fix clock reparenting in omap2_clk_set_parent() When changing the parent of a clock, it is necessary to keep the clock use counts balanced otherwise things the parent state will get corrupted. Since we already disable and re-enable the clock, we might as well use the recursive versions instead. Signed-off-by: Russell King --- arch/arm/mach-omap2/clock.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c index e64549e4e326..ce4d46a4a838 100644 --- a/arch/arm/mach-omap2/clock.c +++ b/arch/arm/mach-omap2/clock.c @@ -746,7 +746,7 @@ int omap2_clk_set_parent(struct clk *clk, struct clk *new_parent) return -EINVAL; if (clk->usecount > 0) - _omap2_clk_disable(clk); + omap2_clk_disable(clk); /* Set new source value (previous dividers if any in effect) */ reg_val = __raw_readl(src_addr) & ~field_mask; @@ -759,11 +759,11 @@ int omap2_clk_set_parent(struct clk *clk, struct clk *new_parent) wmb(); } - if (clk->usecount > 0) - _omap2_clk_enable(clk); - clk->parent = new_parent; + if (clk->usecount > 0) + omap2_clk_enable(clk); + /* CLKSEL clocks follow their parents' rates, divided by a divisor */ clk->rate = new_parent->rate; From e2e5a0f2b100a5204d27def8bbf73333d1710be2 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Thu, 19 Feb 2009 15:18:59 +0100 Subject: [PATCH 186/263] [S390] sclp: handle empty event buffers Handle a malformed hardware response which some versions of the Support Element (SE) may present during SE restart and which otherwise would result in an endless loop in function sclp_dispatch_evbufs. Signed-off-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- drivers/s390/char/sclp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c index 1fd8f2193ed8..4377e93a43d7 100644 --- a/drivers/s390/char/sclp.c +++ b/drivers/s390/char/sclp.c @@ -280,8 +280,11 @@ sclp_dispatch_evbufs(struct sccb_header *sccb) rc = 0; for (offset = sizeof(struct sccb_header); offset < sccb->length; offset += evbuf->length) { - /* Search for event handler */ evbuf = (struct evbuf_header *) ((addr_t) sccb + offset); + /* Check for malformed hardware response */ + if (evbuf->length == 0) + break; + /* Search for event handler */ reg = NULL; list_for_each(l, &sclp_reg_list) { reg = list_entry(l, struct sclp_register, list); From d5cd0343d2878b66e25e044f644563c6bf708833 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 19 Feb 2009 15:19:00 +0100 Subject: [PATCH 187/263] [S390] Fix timeval regression on s390 commit aa5e97ce4bbc9d5daeec16b1d15bb3f6b7b4f4d4 [PATCH] improve precision of process accounting. Introduced a timing regression: -bash-3.2# time ls real 0m0.006s user 0m1.754s sys 0m1.094s The problem was introduced by an error in cputime_to_timeval. Cputime is now 1/4096 microsecond, therefore, we have to divide the remainder with 4096 to get the microseconds. Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cputime.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 521726430afa..95b0f7db3c69 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -145,7 +145,7 @@ cputime_to_timeval(const cputime_t cputime, struct timeval *value) value->tv_usec = rp.subreg.even / 4096; value->tv_sec = rp.subreg.odd; #else - value->tv_usec = cputime % 4096000000ULL; + value->tv_usec = (cputime % 4096000000ULL) / 4096; value->tv_sec = cputime / 4096000000ULL; #endif } From 23d75d9cadd79bc9fd6553857d57c679cf18d4cb Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 19 Feb 2009 15:19:01 +0100 Subject: [PATCH 188/263] [S390] fix "mem=" handling in case of standby memory Standby memory detected with the sclp interface gets always registered with add_memory calls without considering the limitationt that the "mem=" kernel paramater implies. So fix this and only register standby memory that is below the specified limit. This fixes zfcpdump since it uses "mem=32M". In case there is appr. 2GB standby memory present all of usable memory would be used for the struct pages needed for standby memory. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/setup.h | 2 ++ arch/s390/kernel/setup.c | 9 +++++++-- drivers/s390/char/sclp_cmd.c | 5 +++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 2bd9faeb3919..e8bd6ac22c99 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -43,6 +43,8 @@ struct mem_chunk { extern struct mem_chunk memory_chunk[]; extern unsigned long real_memory_size; +extern int memory_end_set; +extern unsigned long memory_end; void detect_memory_layout(struct mem_chunk chunk[]); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index d825f4950e4e..c5cfb6185eac 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -82,7 +82,9 @@ char elf_platform[ELF_PLATFORM_SIZE]; struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS]; volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */ -static unsigned long __initdata memory_end; + +int __initdata memory_end_set; +unsigned long __initdata memory_end; /* * This is set up by the setup-routine at boot-time @@ -281,6 +283,7 @@ void (*pm_power_off)(void) = machine_power_off; static int __init early_parse_mem(char *p) { memory_end = memparse(p, &p); + memory_end_set = 1; return 0; } early_param("mem", early_parse_mem); @@ -508,8 +511,10 @@ static void __init setup_memory_end(void) int i; #if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_ZFCPDUMP_MODULE) - if (ipl_info.type == IPL_TYPE_FCP_DUMP) + if (ipl_info.type == IPL_TYPE_FCP_DUMP) { memory_end = ZFCPDUMP_HSA_SIZE; + memory_end_set = 1; + } #endif memory_size = 0; memory_end &= PAGE_MASK; diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index 506390496416..77ab6e34a100 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "sclp.h" @@ -474,6 +475,10 @@ static void __init add_memory_merged(u16 rn) goto skip_add; if (start + size > VMEM_MAX_PHYS) size = VMEM_MAX_PHYS - start; + if (memory_end_set && (start >= memory_end)) + goto skip_add; + if (memory_end_set && (start + size > memory_end)) + size = memory_end - start; add_memory(0, start, size); skip_add: first_rn = rn; From 9da616fb9946c8d65387052e5a538b8f54ddb292 Mon Sep 17 00:00:00 2001 From: Makito SHIOKAWA Date: Thu, 19 Feb 2009 15:34:59 +0100 Subject: [PATCH 189/263] [ARM] 5404/1: Fix condition in arm_elf_read_implies_exec() to set READ_IMPLIES_EXEC READ_IMPLIES_EXEC must be set when: o binary _is_ an executable stack (i.e. not EXSTACK_DISABLE_X) o processor architecture is _under_ ARMv6 (XN bit is supported from ARMv6) Signed-off-by: Makito SHIOKAWA Signed-off-by: Russell King --- arch/arm/kernel/elf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/elf.c b/arch/arm/kernel/elf.c index 84849098c8e8..d4a0da1e48f4 100644 --- a/arch/arm/kernel/elf.c +++ b/arch/arm/kernel/elf.c @@ -74,9 +74,9 @@ EXPORT_SYMBOL(elf_set_personality); */ int arm_elf_read_implies_exec(const struct elf32_hdr *x, int executable_stack) { - if (executable_stack != EXSTACK_ENABLE_X) + if (executable_stack != EXSTACK_DISABLE_X) return 1; - if (cpu_architecture() <= CPU_ARCH_ARMv6) + if (cpu_architecture() < CPU_ARCH_ARMv6) return 1; return 0; } From 9dd446f657ebebb209274878be5d01103fcfe988 Mon Sep 17 00:00:00 2001 From: Hartley Sweeten Date: Thu, 19 Feb 2009 17:09:04 +0100 Subject: [PATCH 190/263] [ARM] 5405/1: ep93xx: remove unused gesbc9312.h header Remove the gesbc9312.h header since it is unused. Signed-off-by: H Hartley Sweeten Signed-off-by: Russell King --- arch/arm/mach-ep93xx/include/mach/gesbc9312.h | 3 --- arch/arm/mach-ep93xx/include/mach/hardware.h | 1 - 2 files changed, 4 deletions(-) delete mode 100644 arch/arm/mach-ep93xx/include/mach/gesbc9312.h diff --git a/arch/arm/mach-ep93xx/include/mach/gesbc9312.h b/arch/arm/mach-ep93xx/include/mach/gesbc9312.h deleted file mode 100644 index 21fe2b922aa5..000000000000 --- a/arch/arm/mach-ep93xx/include/mach/gesbc9312.h +++ /dev/null @@ -1,3 +0,0 @@ -/* - * arch/arm/mach-ep93xx/include/mach/gesbc9312.h - */ diff --git a/arch/arm/mach-ep93xx/include/mach/hardware.h b/arch/arm/mach-ep93xx/include/mach/hardware.h index 529807d182bf..2866297310b7 100644 --- a/arch/arm/mach-ep93xx/include/mach/hardware.h +++ b/arch/arm/mach-ep93xx/include/mach/hardware.h @@ -10,7 +10,6 @@ #include "platform.h" -#include "gesbc9312.h" #include "ts72xx.h" #endif From 7fdf582447aa01658b624adc0a51a31e4278b68c Mon Sep 17 00:00:00 2001 From: Felix Blyakher Date: Wed, 18 Feb 2009 15:41:28 -0600 Subject: [PATCH 191/263] Revert "[XFS] use scalable vmap API" This reverts commit 95f8e302c04c0b0c6de35ab399a5551605eeb006. This commit caused regression. We'll try to fix use of new vmap API for next release. Signed-off-by: Christoph Hellwig Signed-off-by: Felix Blyakher --- fs/xfs/linux-2.6/xfs_buf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index d71dc44e21ed..0b2177a9fbdc 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -264,7 +264,7 @@ xfs_buf_free( uint i; if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) - vm_unmap_ram(bp->b_addr - bp->b_offset, bp->b_page_count); + vunmap(bp->b_addr - bp->b_offset); for (i = 0; i < bp->b_page_count; i++) { struct page *page = bp->b_pages[i]; @@ -386,8 +386,8 @@ _xfs_buf_map_pages( bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; bp->b_flags |= XBF_MAPPED; } else if (flags & XBF_MAPPED) { - bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, - -1, PAGE_KERNEL); + bp->b_addr = vmap(bp->b_pages, bp->b_page_count, + VM_MAP, PAGE_KERNEL); if (unlikely(bp->b_addr == NULL)) return -ENOMEM; bp->b_addr += bp->b_offset; From 27e88bf6af7d42adf790f7b2ed7d65475f191cf2 Mon Sep 17 00:00:00 2001 From: Felix Blyakher Date: Wed, 18 Feb 2009 15:56:51 -0600 Subject: [PATCH 192/263] Revert "[XFS] remove old vmap cache" This reverts commit d2859751cd0bf586941ffa7308635a293f943c17. This commit caused regression. We'll try to fix use of new vmap API for next release. Signed-off-by: Christoph Hellwig Signed-off-by: Felix Blyakher --- fs/xfs/linux-2.6/xfs_buf.c | 75 +++++++++++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 0b2177a9fbdc..cb329edc925b 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -165,6 +165,75 @@ test_page_region( return (mask && (page_private(page) & mask) == mask); } +/* + * Mapping of multi-page buffers into contiguous virtual space + */ + +typedef struct a_list { + void *vm_addr; + struct a_list *next; +} a_list_t; + +static a_list_t *as_free_head; +static int as_list_len; +static DEFINE_SPINLOCK(as_lock); + +/* + * Try to batch vunmaps because they are costly. + */ +STATIC void +free_address( + void *addr) +{ + a_list_t *aentry; + +#ifdef CONFIG_XEN + /* + * Xen needs to be able to make sure it can get an exclusive + * RO mapping of pages it wants to turn into a pagetable. If + * a newly allocated page is also still being vmap()ed by xfs, + * it will cause pagetable construction to fail. This is a + * quick workaround to always eagerly unmap pages so that Xen + * is happy. + */ + vunmap(addr); + return; +#endif + + aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT); + if (likely(aentry)) { + spin_lock(&as_lock); + aentry->next = as_free_head; + aentry->vm_addr = addr; + as_free_head = aentry; + as_list_len++; + spin_unlock(&as_lock); + } else { + vunmap(addr); + } +} + +STATIC void +purge_addresses(void) +{ + a_list_t *aentry, *old; + + if (as_free_head == NULL) + return; + + spin_lock(&as_lock); + aentry = as_free_head; + as_free_head = NULL; + as_list_len = 0; + spin_unlock(&as_lock); + + while ((old = aentry) != NULL) { + vunmap(aentry->vm_addr); + aentry = aentry->next; + kfree(old); + } +} + /* * Internal xfs_buf_t object manipulation */ @@ -264,7 +333,7 @@ xfs_buf_free( uint i; if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) - vunmap(bp->b_addr - bp->b_offset); + free_address(bp->b_addr - bp->b_offset); for (i = 0; i < bp->b_page_count; i++) { struct page *page = bp->b_pages[i]; @@ -386,6 +455,8 @@ _xfs_buf_map_pages( bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; bp->b_flags |= XBF_MAPPED; } else if (flags & XBF_MAPPED) { + if (as_list_len > 64) + purge_addresses(); bp->b_addr = vmap(bp->b_pages, bp->b_page_count, VM_MAP, PAGE_KERNEL); if (unlikely(bp->b_addr == NULL)) @@ -1672,6 +1743,8 @@ xfsbufd( count++; } + if (as_list_len > 0) + purge_addresses(); if (count) blk_run_address_space(target->bt_mapping); From 334f85b647bc46ff4d27ace55aa65f44d6a2f4db Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 19 Feb 2009 11:22:36 -0800 Subject: [PATCH 193/263] [IA64] Build fix for __early_pfn_to_nid() undefined link error ia64 only defines __early_pfn_to_nid() for SPARSEMEM && NUMA configurations, so the recent: commit: f2dbcfa738368c8a40d4a5f0b65dc9879577cb21 mm: clean up for early_pfn_to_nid() ends up with some link problems for certain configuration files. Fix arch/ia64/Kconfig to only define HAVE_ARCH_EARLY_PFN_TO_NID in the cases where we do provide this function. Signed-off-by: Tony Luck --- arch/ia64/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 6183aeccecf1..20d6d28ca735 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -479,8 +479,7 @@ config HOLES_IN_ZONE default y if VIRTUAL_MEM_MAP config HAVE_ARCH_EARLY_PFN_TO_NID - def_bool y - depends on NEED_MULTIPLE_NODES + def_bool NUMA && SPARSEMEM config HAVE_ARCH_NODEDATA_EXTENSION def_bool y From 39d481cba27809598e755e184bc0d8ae1d22423e Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Tue, 3 Feb 2009 18:40:59 -0600 Subject: [PATCH 194/263] [IA64] bte_copy of BTE_MAX_XFER trips BUG_ON. BTE_MAX_XFER is wrong. It is one greater than the number of cache lines the BTE is actually able to transfer. If you request a transfer of exactly BTE_MAX_XFER size, you trip a very cryptic BUG_ON() which should certainly be made more clear. This patch fixes that constant and also cleans up the BUG_ON()s in arch/ia64/sn/kernel/bte.c to test one condition per line. Signed-off-by: Robin Holt Signed-off-by: Tony Luck --- arch/ia64/include/asm/sn/bte.h | 2 +- arch/ia64/sn/kernel/bte.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/ia64/include/asm/sn/bte.h b/arch/ia64/include/asm/sn/bte.h index 5efecf06c9a4..96798d2da7c2 100644 --- a/arch/ia64/include/asm/sn/bte.h +++ b/arch/ia64/include/asm/sn/bte.h @@ -39,7 +39,7 @@ /* BTE status register only supports 16 bits for length field */ #define BTE_LEN_BITS (16) #define BTE_LEN_MASK ((1 << BTE_LEN_BITS) - 1) -#define BTE_MAX_XFER ((1 << BTE_LEN_BITS) * L1_CACHE_BYTES) +#define BTE_MAX_XFER (BTE_LEN_MASK << L1_CACHE_SHIFT) /* Define hardware */ diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index 9456d4034024..c6d6b62db66c 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -97,9 +97,10 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) return BTE_SUCCESS; } - BUG_ON((len & L1_CACHE_MASK) || - (src & L1_CACHE_MASK) || (dest & L1_CACHE_MASK)); - BUG_ON(!(len < ((BTE_LEN_MASK + 1) << L1_CACHE_SHIFT))); + BUG_ON(len & L1_CACHE_MASK); + BUG_ON(src & L1_CACHE_MASK); + BUG_ON(dest & L1_CACHE_MASK); + BUG_ON(len > BTE_MAX_XFER); /* * Start with interface corresponding to cpu number From 66db2e6331612bbec193a358885854330596a92a Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 9 Feb 2009 11:16:16 -0700 Subject: [PATCH 195/263] [IA64] Revert "prevent ia64 from invoking irq handlers on offline CPUs" This reverts commit e7b140365b86aaf94374214c6f4e6decbee2eb0a. Commit e7b14036 removes the targetted disabled CPU from the cpu_online_map after calls to migrate_platform_irqs and fixup_irqs. Paul McKenney states that the reasoning behind the patch was to prevent irq handlers from running on CPUs marked offline because: RCU happily ignores CPUs that don't have their bits set in cpu_online_map, so if there are RCU read-side critical sections in the irq handlers being run, RCU will ignore them. If the other CPUs were running, they might sequence through the RCU state machine, which could result in data structures being yanked out from under those irq handlers, which in turn could result in oopses or worse. Unfortunately, both ia64 functions above look at cpu_online_map to find a new CPU to migrate interrupts onto. This means we can potentially migrate an interrupt off ourself back to... ourself. Uh oh. This causes an oops when we finally try to process pending interrupts on the CPU we want to disable. The oops results from calling __do_IRQ with a NULL pt_regs: Unable to handle kernel NULL pointer dereference (address 0000000000000040) Call Trace: [] show_stack+0x50/0xa0 sp=e0000009c922fa00 bsp=e0000009c92214d0 [] show_regs+0x820/0x860 sp=e0000009c922fbd0 bsp=e0000009c9221478 [] die+0x1a0/0x2e0 sp=e0000009c922fbd0 bsp=e0000009c9221438 [] ia64_do_page_fault+0x950/0xa80 sp=e0000009c922fbd0 bsp=e0000009c92213d8 [] ia64_native_leave_kernel+0x0/0x270 sp=e0000009c922fc60 bsp=e0000009c92213d8 [] profile_tick+0xd0/0x1c0 sp=e0000009c922fe30 bsp=e0000009c9221398 [] timer_interrupt+0x170/0x3e0 sp=e0000009c922fe30 bsp=e0000009c9221330 [] handle_IRQ_event+0x80/0x120 sp=e0000009c922fe30 bsp=e0000009c92212f8 [] __do_IRQ+0x160/0x4a0 sp=e0000009c922fe30 bsp=e0000009c9221290 [] ia64_process_pending_intr+0x2b0/0x360 sp=e0000009c922fe30 bsp=e0000009c9221208 [] fixup_irqs+0xf0/0x2a0 sp=e0000009c922fe30 bsp=e0000009c92211a8 [] __cpu_disable+0x140/0x240 sp=e0000009c922fe30 bsp=e0000009c9221168 [] take_cpu_down+0x50/0xa0 sp=e0000009c922fe30 bsp=e0000009c9221148 [] stop_cpu+0xd0/0x200 sp=e0000009c922fe30 bsp=e0000009c92210f0 [] kthread+0xc0/0x140 sp=e0000009c922fe30 bsp=e0000009c92210c8 [] kernel_thread_helper+0xd0/0x100 sp=e0000009c922fe30 bsp=e0000009c92210a0 [] start_kernel_thread+0x20/0x40 sp=e0000009c922fe30 bsp=e0000009c92210a0 I don't like this revert because it is fragile. ia64 is getting lucky because we seem to only ever process timer interrupts in this path, but if we ever race with an IPI here, we definitely use RCU and have the potential of hitting an oops that Paul describes above. Patching ia64's timer_interrupt() to check for NULL pt_regs is insufficient though, as we still hit the above oops. As a short term solution, I do think that this revert is the right answer. The revert hold up under repeated testing (24+ hour test runs) with this setup: - 8-way rx6600 - randomly toggling CPU online/offline state every 2 seconds - running CPU exercisers, memory hog, disk exercisers, and network stressors - average system load around ~160 In the long term, we really need to figure out why we set pt_regs = NULL in ia64_process_pending_intr(). If it turns out that it is unnecessary to do so, then we could safely re-introduce e7b14036 (along with some other logic to be smarter about migrating interrupts). One final note: x86 also removes the disabled CPU from cpu_online_map and then re-enables interrupts for 1ms, presumably to handle any pending interrupts: arch/x86/kernel/irq_32.c (and irq_64.c): cpu_disable_common: [remove cpu from cpu_online_map] fixup_irqs(): for_each_irq: [break CPU affinities] local_irq_enable(); mdelay(1); local_irq_disable(); So they are doing implicitly what ia64 is doing explicitly. Signed-off-by: Alex Chiang Signed-off-by: Tony Luck --- arch/ia64/kernel/smpboot.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 11463994a7d5..2ec5bbff461e 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -736,14 +736,16 @@ int __cpu_disable(void) return -EBUSY; } + cpu_clear(cpu, cpu_online_map); + if (migrate_platform_irqs(cpu)) { cpu_set(cpu, cpu_online_map); return (-EBUSY); } remove_siblinginfo(cpu); - fixup_irqs(); cpu_clear(cpu, cpu_online_map); + fixup_irqs(); local_flush_tlb_all(); cpu_clear(cpu, cpu_callin_map); return 0; From c0acdea21437eff134cc3486bdc52907d9086af1 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 9 Feb 2009 11:16:57 -0700 Subject: [PATCH 196/263] [IA64] Remove redundant cpu_clear() in __cpu_disable path The second call to cpu_clear() is redundant, as we've already removed the CPU from cpu_online_map before calling migrate_platform_irqs(). Signed-off-by: Alex Chiang Signed-off-by: Tony Luck --- arch/ia64/kernel/smpboot.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 2ec5bbff461e..52290547c85b 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -740,11 +740,10 @@ int __cpu_disable(void) if (migrate_platform_irqs(cpu)) { cpu_set(cpu, cpu_online_map); - return (-EBUSY); + return -EBUSY; } remove_siblinginfo(cpu); - cpu_clear(cpu, cpu_online_map); fixup_irqs(); local_flush_tlb_all(); cpu_clear(cpu, cpu_callin_map); From 1d5b20f490f61f36a58e6ecf1713a49a43620666 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 16 Jan 2009 12:17:30 +0900 Subject: [PATCH 197/263] [IA64] fixes configs and add default config for ia64 xen domU This patch fixes xen related Kconfigs and add default config file for ia64 xen domU. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/Kconfig | 4 + arch/ia64/configs/xen_domu_defconfig | 1601 ++++++++++++++++++++++++++ arch/ia64/xen/Kconfig | 3 +- 3 files changed, 1606 insertions(+), 2 deletions(-) create mode 100644 arch/ia64/configs/xen_domu_defconfig diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 20d6d28ca735..4eb45c012498 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -221,7 +221,11 @@ config IA64_HP_SIM config IA64_XEN_GUEST bool "Xen guest" + select SWIOTLB depends on XEN + help + Build a kernel that runs on Xen guest domain. At this moment only + 16KB page size in supported. endchoice diff --git a/arch/ia64/configs/xen_domu_defconfig b/arch/ia64/configs/xen_domu_defconfig new file mode 100644 index 000000000000..0bb0714dc19d --- /dev/null +++ b/arch/ia64/configs/xen_domu_defconfig @@ -0,0 +1,1601 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.29-rc1 +# Fri Jan 16 11:49:59 2009 +# +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +CONFIG_POSIX_MQUEUE=y +# CONFIG_BSD_PROCESS_ACCT is not set +# CONFIG_TASKSTATS is not set +# CONFIG_AUDIT is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=20 +CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y +# CONFIG_GROUP_SCHED is not set + +# +# Control Group support +# +# CONFIG_CGROUPS is not set +CONFIG_SYSFS_DEPRECATED=y +CONFIG_SYSFS_DEPRECATED_V2=y +# CONFIG_RELAY is not set +CONFIG_NAMESPACES=y +# CONFIG_UTS_NS is not set +# CONFIG_IPC_NS is not set +# CONFIG_USER_NS is not set +# CONFIG_PID_NS is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y +# CONFIG_EMBEDDED is not set +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y +CONFIG_KALLSYMS_STRIP_GENERATED=y +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_COMPAT_BRK=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_ANON_INODES=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +CONFIG_AIO=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_PCI_QUIRKS=y +CONFIG_SLUB_DEBUG=y +# CONFIG_SLAB is not set +CONFIG_SLUB=y +# CONFIG_SLOB is not set +# CONFIG_PROFILING is not set +CONFIG_HAVE_OPROFILE=y +# CONFIG_KPROBES is not set +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_HAVE_ARCH_TRACEHOOK=y +CONFIG_HAVE_DMA_ATTRS=y +CONFIG_USE_GENERIC_SMP_HELPERS=y +# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set +CONFIG_SLABINFO=y +CONFIG_RT_MUTEXES=y +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +CONFIG_MODVERSIONS=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_STOP_MACHINE=y +CONFIG_BLOCK=y +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_BLK_DEV_INTEGRITY is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" +CONFIG_CLASSIC_RCU=y +# CONFIG_TREE_RCU is not set +# CONFIG_PREEMPT_RCU is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_PREEMPT_RCU_TRACE is not set +CONFIG_FREEZER=y + +# +# Processor type and features +# +CONFIG_IA64=y +CONFIG_64BIT=y +CONFIG_ZONE_DMA=y +CONFIG_QUICKLIST=y +CONFIG_MMU=y +CONFIG_SWIOTLB=y +CONFIG_IOMMU_HELPER=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_HUGETLB_PAGE_SIZE_VARIABLE=y +CONFIG_GENERIC_FIND_NEXT_BIT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_TIME_VSYSCALL=y +CONFIG_HAVE_SETUP_PER_CPU_AREA=y +CONFIG_DMI=y +CONFIG_EFI=y +CONFIG_GENERIC_IOMAP=y +CONFIG_SCHED_OMIT_FRAME_POINTER=y +CONFIG_AUDIT_ARCH=y +CONFIG_PARAVIRT_GUEST=y +CONFIG_PARAVIRT=y +CONFIG_XEN=y +CONFIG_XEN_XENCOMM=y +CONFIG_NO_IDLE_HZ=y +# CONFIG_IA64_GENERIC is not set +# CONFIG_IA64_DIG is not set +# CONFIG_IA64_DIG_VTD is not set +# CONFIG_IA64_HP_ZX1 is not set +# CONFIG_IA64_HP_ZX1_SWIOTLB is not set +# CONFIG_IA64_SGI_SN2 is not set +# CONFIG_IA64_SGI_UV is not set +# CONFIG_IA64_HP_SIM is not set +CONFIG_IA64_XEN_GUEST=y +# CONFIG_ITANIUM is not set +CONFIG_MCKINLEY=y +# CONFIG_IA64_PAGE_SIZE_4KB is not set +# CONFIG_IA64_PAGE_SIZE_8KB is not set +CONFIG_IA64_PAGE_SIZE_16KB=y +# CONFIG_IA64_PAGE_SIZE_64KB is not set +CONFIG_PGTABLE_3=y +# CONFIG_PGTABLE_4 is not set +CONFIG_HZ=250 +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_300 is not set +# CONFIG_HZ_1000 is not set +# CONFIG_SCHED_HRTICK is not set +CONFIG_IA64_L1_CACHE_SHIFT=7 +CONFIG_IA64_CYCLONE=y +CONFIG_IOSAPIC=y +CONFIG_FORCE_MAX_ZONEORDER=17 +# CONFIG_VIRT_CPU_ACCOUNTING is not set +CONFIG_SMP=y +CONFIG_NR_CPUS=16 +CONFIG_HOTPLUG_CPU=y +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y +CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y +# CONFIG_SCHED_SMT is not set +CONFIG_PERMIT_BSP_REMOVE=y +CONFIG_FORCE_CPEI_RETARGET=y +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y +CONFIG_PAGEFLAGS_EXTENDED=y +CONFIG_SPLIT_PTLOCK_CPUS=4 +CONFIG_MIGRATION=y +CONFIG_PHYS_ADDR_T_64BIT=y +CONFIG_ZONE_DMA_FLAG=1 +CONFIG_BOUNCE=y +CONFIG_NR_QUICK=1 +CONFIG_VIRT_TO_BUS=y +CONFIG_UNEVICTABLE_LRU=y +CONFIG_ARCH_SELECT_MEMORY_MODEL=y +CONFIG_ARCH_DISCONTIGMEM_ENABLE=y +CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_POPULATES_NODE_MAP=y +CONFIG_VIRTUAL_MEM_MAP=y +CONFIG_HOLES_IN_ZONE=y +# CONFIG_IA32_SUPPORT is not set +# CONFIG_COMPAT_FOR_U64_ALIGNMENT is not set +CONFIG_IA64_MCA_RECOVERY=y +CONFIG_PERFMON=y +CONFIG_IA64_PALINFO=y +# CONFIG_IA64_MC_ERR_INJECT is not set +# CONFIG_IA64_ESI is not set +# CONFIG_IA64_HP_AML_NFW is not set +CONFIG_KEXEC=y +# CONFIG_CRASH_DUMP is not set + +# +# Firmware Drivers +# +# CONFIG_FIRMWARE_MEMMAP is not set +CONFIG_EFI_VARS=y +CONFIG_EFI_PCDP=y +CONFIG_DMIID=y +CONFIG_BINFMT_ELF=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_HAVE_AOUT is not set +CONFIG_BINFMT_MISC=m + +# +# Power management and ACPI options +# +CONFIG_PM=y +# CONFIG_PM_DEBUG is not set +CONFIG_PM_SLEEP=y +CONFIG_SUSPEND=y +CONFIG_SUSPEND_FREEZER=y +CONFIG_ACPI=y +CONFIG_ACPI_SLEEP=y +CONFIG_ACPI_PROCFS=y +CONFIG_ACPI_PROCFS_POWER=y +CONFIG_ACPI_SYSFS_POWER=y +CONFIG_ACPI_PROC_EVENT=y +CONFIG_ACPI_BUTTON=m +CONFIG_ACPI_FAN=m +# CONFIG_ACPI_DOCK is not set +CONFIG_ACPI_PROCESSOR=m +CONFIG_ACPI_HOTPLUG_CPU=y +CONFIG_ACPI_THERMAL=m +# CONFIG_ACPI_CUSTOM_DSDT is not set +CONFIG_ACPI_BLACKLIST_YEAR=0 +# CONFIG_ACPI_DEBUG is not set +# CONFIG_ACPI_PCI_SLOT is not set +CONFIG_ACPI_SYSTEM=y +CONFIG_ACPI_CONTAINER=m + +# +# CPU Frequency scaling +# +# CONFIG_CPU_FREQ is not set + +# +# Bus options (PCI, PCMCIA) +# +CONFIG_PCI=y +CONFIG_PCI_DOMAINS=y +CONFIG_PCI_SYSCALL=y +# CONFIG_PCIEPORTBUS is not set +CONFIG_ARCH_SUPPORTS_MSI=y +# CONFIG_PCI_MSI is not set +CONFIG_PCI_LEGACY=y +# CONFIG_PCI_DEBUG is not set +# CONFIG_PCI_STUB is not set +CONFIG_HOTPLUG_PCI=m +# CONFIG_HOTPLUG_PCI_FAKE is not set +CONFIG_HOTPLUG_PCI_ACPI=m +# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set +# CONFIG_HOTPLUG_PCI_CPCI is not set +# CONFIG_HOTPLUG_PCI_SHPC is not set +# CONFIG_PCCARD is not set +CONFIG_NET=y + +# +# Networking options +# +# CONFIG_NET_NS is not set +CONFIG_COMPAT_NET_DEV_OPS=y +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +# CONFIG_XFRM_SUB_POLICY is not set +# CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set +# CONFIG_NET_KEY is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +CONFIG_ARPD=y +CONFIG_SYN_COOKIES=y +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +# CONFIG_INET_TUNNEL is not set +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_INET_XFRM_MODE_BEET=y +# CONFIG_INET_LRO is not set +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set +# CONFIG_IPV6 is not set +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETFILTER is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_NET_SCHED is not set +# CONFIG_DCB is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set +# CONFIG_PHONET is not set +# CONFIG_WIRELESS is not set +# CONFIG_WIMAX is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +CONFIG_FW_LOADER=y +CONFIG_FIRMWARE_IN_KERNEL=y +CONFIG_EXTRA_FIRMWARE="" +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set +# CONFIG_SYS_HYPERVISOR is not set +# CONFIG_CONNECTOR is not set +# CONFIG_MTD is not set +# CONFIG_PARPORT is not set +CONFIG_PNP=y +CONFIG_PNP_DEBUG_MESSAGES=y + +# +# Protocols +# +CONFIG_PNPACPI=y +CONFIG_BLK_DEV=y +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_UB is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +# CONFIG_BLK_DEV_XIP is not set +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set +CONFIG_XEN_BLKDEV_FRONTEND=y +# CONFIG_BLK_DEV_HD is not set +CONFIG_MISC_DEVICES=y +# CONFIG_PHANTOM is not set +# CONFIG_EEPROM_93CX6 is not set +# CONFIG_SGI_IOC4 is not set +# CONFIG_TIFM_CORE is not set +# CONFIG_ICS932S401 is not set +# CONFIG_ENCLOSURE_SERVICES is not set +# CONFIG_HP_ILO is not set +# CONFIG_C2PORT is not set +CONFIG_HAVE_IDE=y +CONFIG_IDE=y + +# +# Please see Documentation/ide/ide.txt for help/info on IDE drives +# +CONFIG_IDE_TIMINGS=y +CONFIG_IDE_ATAPI=y +# CONFIG_BLK_DEV_IDE_SATA is not set +CONFIG_IDE_GD=y +CONFIG_IDE_GD_ATA=y +# CONFIG_IDE_GD_ATAPI is not set +CONFIG_BLK_DEV_IDECD=y +CONFIG_BLK_DEV_IDECD_VERBOSE_ERRORS=y +# CONFIG_BLK_DEV_IDETAPE is not set +# CONFIG_BLK_DEV_IDEACPI is not set +# CONFIG_IDE_TASK_IOCTL is not set +CONFIG_IDE_PROC_FS=y + +# +# IDE chipset support/bugfixes +# +# CONFIG_IDE_GENERIC is not set +# CONFIG_BLK_DEV_PLATFORM is not set +# CONFIG_BLK_DEV_IDEPNP is not set +CONFIG_BLK_DEV_IDEDMA_SFF=y + +# +# PCI IDE chipsets support +# +CONFIG_BLK_DEV_IDEPCI=y +CONFIG_IDEPCI_PCIBUS_ORDER=y +# CONFIG_BLK_DEV_OFFBOARD is not set +CONFIG_BLK_DEV_GENERIC=y +# CONFIG_BLK_DEV_OPTI621 is not set +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_AEC62XX is not set +# CONFIG_BLK_DEV_ALI15X3 is not set +# CONFIG_BLK_DEV_AMD74XX is not set +CONFIG_BLK_DEV_CMD64X=y +# CONFIG_BLK_DEV_TRIFLEX is not set +# CONFIG_BLK_DEV_CS5520 is not set +# CONFIG_BLK_DEV_CS5530 is not set +# CONFIG_BLK_DEV_HPT366 is not set +# CONFIG_BLK_DEV_JMICRON is not set +# CONFIG_BLK_DEV_SC1200 is not set +CONFIG_BLK_DEV_PIIX=y +# CONFIG_BLK_DEV_IT8172 is not set +# CONFIG_BLK_DEV_IT8213 is not set +# CONFIG_BLK_DEV_IT821X is not set +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_PDC202XX_OLD is not set +# CONFIG_BLK_DEV_PDC202XX_NEW is not set +# CONFIG_BLK_DEV_SVWKS is not set +# CONFIG_BLK_DEV_SIIMAGE is not set +# CONFIG_BLK_DEV_SLC90E66 is not set +# CONFIG_BLK_DEV_TRM290 is not set +# CONFIG_BLK_DEV_VIA82CXXX is not set +# CONFIG_BLK_DEV_TC86C001 is not set +CONFIG_BLK_DEV_IDEDMA=y + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +CONFIG_SCSI=y +CONFIG_SCSI_DMA=y +# CONFIG_SCSI_TGT is not set +CONFIG_SCSI_NETLINK=y +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=m +# CONFIG_CHR_DEV_OSST is not set +CONFIG_BLK_DEV_SR=m +# CONFIG_BLK_DEV_SR_VENDOR is not set +CONFIG_CHR_DEV_SG=m +# CONFIG_CHR_DEV_SCH is not set + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +# CONFIG_SCSI_MULTI_LUN is not set +# CONFIG_SCSI_CONSTANTS is not set +# CONFIG_SCSI_LOGGING is not set +# CONFIG_SCSI_SCAN_ASYNC is not set +CONFIG_SCSI_WAIT_SCAN=m + +# +# SCSI Transports +# +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_FC_ATTRS=y +# CONFIG_SCSI_ISCSI_ATTRS is not set +# CONFIG_SCSI_SAS_LIBSAS is not set +# CONFIG_SCSI_SRP_ATTRS is not set +CONFIG_SCSI_LOWLEVEL=y +# CONFIG_ISCSI_TCP is not set +# CONFIG_SCSI_CXGB3_ISCSI is not set +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_SCSI_AIC94XX is not set +# CONFIG_SCSI_DPT_I2O is not set +# CONFIG_SCSI_ADVANSYS is not set +# CONFIG_SCSI_ARCMSR is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set +# CONFIG_SCSI_HPTIOP is not set +# CONFIG_LIBFC is not set +# CONFIG_FCOE is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_MVSAS is not set +# CONFIG_SCSI_STEX is not set +CONFIG_SCSI_SYM53C8XX_2=y +CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 +CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 +CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 +CONFIG_SCSI_SYM53C8XX_MMIO=y +CONFIG_SCSI_QLOGIC_1280=y +# CONFIG_SCSI_QLA_FC is not set +# CONFIG_SCSI_QLA_ISCSI is not set +# CONFIG_SCSI_LPFC is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_DEBUG is not set +# CONFIG_SCSI_SRP is not set +# CONFIG_SCSI_DH is not set +# CONFIG_ATA is not set +CONFIG_MD=y +CONFIG_BLK_DEV_MD=m +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +# CONFIG_MD_RAID10 is not set +# CONFIG_MD_RAID456 is not set +CONFIG_MD_MULTIPATH=m +# CONFIG_MD_FAULTY is not set +CONFIG_BLK_DEV_DM=m +# CONFIG_DM_DEBUG is not set +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_ZERO=m +# CONFIG_DM_MULTIPATH is not set +# CONFIG_DM_DELAY is not set +# CONFIG_DM_UEVENT is not set +CONFIG_FUSION=y +CONFIG_FUSION_SPI=y +CONFIG_FUSION_FC=y +# CONFIG_FUSION_SAS is not set +CONFIG_FUSION_MAX_SGE=128 +CONFIG_FUSION_CTL=y +# CONFIG_FUSION_LOGGING is not set + +# +# IEEE 1394 (FireWire) support +# + +# +# Enable only one of the two stacks, unless you know what you are doing +# +# CONFIG_FIREWIRE is not set +# CONFIG_IEEE1394 is not set +# CONFIG_I2O is not set +CONFIG_NETDEVICES=y +CONFIG_DUMMY=m +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set +# CONFIG_VETH is not set +# CONFIG_NET_SB1000 is not set +# CONFIG_ARCNET is not set +CONFIG_PHYLIB=y + +# +# MII PHY device drivers +# +# CONFIG_MARVELL_PHY is not set +# CONFIG_DAVICOM_PHY is not set +# CONFIG_QSEMI_PHY is not set +# CONFIG_LXT_PHY is not set +# CONFIG_CICADA_PHY is not set +# CONFIG_VITESSE_PHY is not set +# CONFIG_SMSC_PHY is not set +# CONFIG_BROADCOM_PHY is not set +# CONFIG_ICPLUS_PHY is not set +# CONFIG_REALTEK_PHY is not set +# CONFIG_NATIONAL_PHY is not set +# CONFIG_STE10XP is not set +# CONFIG_LSI_ET1011C_PHY is not set +# CONFIG_FIXED_PHY is not set +# CONFIG_MDIO_BITBANG is not set +CONFIG_NET_ETHERNET=y +CONFIG_MII=m +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set +# CONFIG_NET_VENDOR_3COM is not set +CONFIG_NET_TULIP=y +# CONFIG_DE2104X is not set +CONFIG_TULIP=m +# CONFIG_TULIP_MWI is not set +# CONFIG_TULIP_MMIO is not set +# CONFIG_TULIP_NAPI is not set +# CONFIG_DE4X5 is not set +# CONFIG_WINBOND_840 is not set +# CONFIG_DM9102 is not set +# CONFIG_ULI526X is not set +# CONFIG_HP100 is not set +# CONFIG_IBM_NEW_EMAC_ZMII is not set +# CONFIG_IBM_NEW_EMAC_RGMII is not set +# CONFIG_IBM_NEW_EMAC_TAH is not set +# CONFIG_IBM_NEW_EMAC_EMAC4 is not set +# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set +# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set +# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set +CONFIG_NET_PCI=y +# CONFIG_PCNET32 is not set +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_B44 is not set +# CONFIG_FORCEDETH is not set +CONFIG_E100=m +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +# CONFIG_NE2K_PCI is not set +# CONFIG_8139CP is not set +# CONFIG_8139TOO is not set +# CONFIG_R6040 is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SMSC9420 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_TLAN is not set +# CONFIG_VIA_RHINE is not set +# CONFIG_SC92031 is not set +# CONFIG_ATL2 is not set +CONFIG_NETDEV_1000=y +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +CONFIG_E1000=y +# CONFIG_E1000E is not set +# CONFIG_IP1000 is not set +# CONFIG_IGB is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SIS190 is not set +# CONFIG_SKGE is not set +# CONFIG_SKY2 is not set +# CONFIG_VIA_VELOCITY is not set +CONFIG_TIGON3=y +# CONFIG_BNX2 is not set +# CONFIG_QLA3XXX is not set +# CONFIG_ATL1 is not set +# CONFIG_ATL1E is not set +# CONFIG_JME is not set +CONFIG_NETDEV_10000=y +# CONFIG_CHELSIO_T1 is not set +CONFIG_CHELSIO_T3_DEPENDS=y +# CONFIG_CHELSIO_T3 is not set +# CONFIG_ENIC is not set +# CONFIG_IXGBE is not set +# CONFIG_IXGB is not set +# CONFIG_S2IO is not set +# CONFIG_MYRI10GE is not set +# CONFIG_NETXEN_NIC is not set +# CONFIG_NIU is not set +# CONFIG_MLX4_EN is not set +# CONFIG_MLX4_CORE is not set +# CONFIG_TEHUTI is not set +# CONFIG_BNX2X is not set +# CONFIG_QLGE is not set +# CONFIG_SFC is not set +# CONFIG_TR is not set + +# +# Wireless LAN +# +# CONFIG_WLAN_PRE80211 is not set +# CONFIG_WLAN_80211 is not set +# CONFIG_IWLWIFI_LEDS is not set + +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# + +# +# USB Network Adapters +# +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +# CONFIG_USB_USBNET is not set +# CONFIG_WAN is not set +CONFIG_XEN_NETDEV_FRONTEND=y +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_NET_FC is not set +CONFIG_NETCONSOLE=y +# CONFIG_NETCONSOLE_DYNAMIC is not set +CONFIG_NETPOLL=y +# CONFIG_NETPOLL_TRAP is not set +CONFIG_NET_POLL_CONTROLLER=y +# CONFIG_ISDN is not set +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set +# CONFIG_INPUT_POLLDEV is not set + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_EVDEV is not set +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_ATKBD=y +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +CONFIG_INPUT_MOUSE=y +CONFIG_MOUSE_PS2=y +CONFIG_MOUSE_PS2_ALPS=y +CONFIG_MOUSE_PS2_LOGIPS2PP=y +CONFIG_MOUSE_PS2_SYNAPTICS=y +CONFIG_MOUSE_PS2_LIFEBOOK=y +CONFIG_MOUSE_PS2_TRACKPOINT=y +# CONFIG_MOUSE_PS2_ELANTECH is not set +# CONFIG_MOUSE_PS2_TOUCHKIT is not set +# CONFIG_MOUSE_SERIAL is not set +# CONFIG_MOUSE_APPLETOUCH is not set +# CONFIG_MOUSE_BCM5974 is not set +# CONFIG_MOUSE_VSXXXAA is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +# CONFIG_INPUT_MISC is not set + +# +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +# CONFIG_SERIO_SERPORT is not set +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set +CONFIG_GAMEPORT=m +# CONFIG_GAMEPORT_NS558 is not set +# CONFIG_GAMEPORT_L4 is not set +# CONFIG_GAMEPORT_EMU10K1 is not set +# CONFIG_GAMEPORT_FM801 is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_CONSOLE_TRANSLATIONS=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +# CONFIG_VT_HW_CONSOLE_BINDING is not set +CONFIG_DEVKMEM=y +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_COMPUTONE is not set +# CONFIG_ROCKETPORT is not set +# CONFIG_CYCLADES is not set +# CONFIG_DIGIEPCA is not set +# CONFIG_MOXA_INTELLIO is not set +# CONFIG_MOXA_SMARTIO is not set +# CONFIG_ISI is not set +# CONFIG_SYNCLINKMP is not set +# CONFIG_SYNCLINK_GT is not set +# CONFIG_N_HDLC is not set +# CONFIG_RISCOM8 is not set +# CONFIG_SPECIALIX is not set +# CONFIG_SX is not set +# CONFIG_RIO is not set +# CONFIG_STALDRV is not set +# CONFIG_NOZOMI is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_PCI=y +CONFIG_SERIAL_8250_PNP=y +CONFIG_SERIAL_8250_NR_UARTS=6 +CONFIG_SERIAL_8250_RUNTIME_UARTS=4 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +# CONFIG_SERIAL_8250_DETECT_IRQ is not set +# CONFIG_SERIAL_8250_RSA is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set +CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 +CONFIG_HVC_DRIVER=y +CONFIG_HVC_IRQ=y +CONFIG_HVC_XEN=y +# CONFIG_IPMI_HANDLER is not set +# CONFIG_HW_RANDOM is not set +CONFIG_EFI_RTC=y +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set +CONFIG_RAW_DRIVER=m +CONFIG_MAX_RAW_DEVS=256 +CONFIG_HPET=y +CONFIG_HPET_MMAP=y +# CONFIG_HANGCHECK_TIMER is not set +# CONFIG_TCG_TPM is not set +CONFIG_DEVPORT=y +CONFIG_I2C=m +CONFIG_I2C_BOARDINFO=y +# CONFIG_I2C_CHARDEV is not set +CONFIG_I2C_HELPER_AUTO=y +CONFIG_I2C_ALGOBIT=m + +# +# I2C Hardware Bus support +# + +# +# PC SMBus host controller drivers +# +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_AMD8111 is not set +# CONFIG_I2C_I801 is not set +# CONFIG_I2C_ISCH is not set +# CONFIG_I2C_PIIX4 is not set +# CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set + +# +# I2C system bus drivers (mostly embedded / system-on-chip) +# +# CONFIG_I2C_OCORES is not set +# CONFIG_I2C_SIMTEC is not set + +# +# External I2C/SMBus adapter drivers +# +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_TAOS_EVM is not set +# CONFIG_I2C_TINY_USB is not set + +# +# Graphics adapter I2C/DDC channel drivers +# +# CONFIG_I2C_VOODOO3 is not set + +# +# Other I2C/SMBus bus drivers +# +# CONFIG_I2C_PCA_PLATFORM is not set +# CONFIG_I2C_STUB is not set + +# +# Miscellaneous I2C Chip support +# +# CONFIG_DS1682 is not set +# CONFIG_AT24 is not set +# CONFIG_SENSORS_EEPROM is not set +# CONFIG_SENSORS_PCF8574 is not set +# CONFIG_PCF8575 is not set +# CONFIG_SENSORS_PCA9539 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_SENSORS_MAX6875 is not set +# CONFIG_SENSORS_TSL2550 is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set +# CONFIG_SPI is not set +# CONFIG_W1 is not set +CONFIG_POWER_SUPPLY=y +# CONFIG_POWER_SUPPLY_DEBUG is not set +# CONFIG_PDA_POWER is not set +# CONFIG_BATTERY_DS2760 is not set +# CONFIG_BATTERY_BQ27x00 is not set +CONFIG_HWMON=y +# CONFIG_HWMON_VID is not set +# CONFIG_SENSORS_AD7414 is not set +# CONFIG_SENSORS_AD7418 is not set +# CONFIG_SENSORS_ADM1021 is not set +# CONFIG_SENSORS_ADM1025 is not set +# CONFIG_SENSORS_ADM1026 is not set +# CONFIG_SENSORS_ADM1029 is not set +# CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set +# CONFIG_SENSORS_ADT7462 is not set +# CONFIG_SENSORS_ADT7470 is not set +# CONFIG_SENSORS_ADT7473 is not set +# CONFIG_SENSORS_ATXP1 is not set +# CONFIG_SENSORS_DS1621 is not set +# CONFIG_SENSORS_I5K_AMB is not set +# CONFIG_SENSORS_F71805F is not set +# CONFIG_SENSORS_F71882FG is not set +# CONFIG_SENSORS_F75375S is not set +# CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_GL520SM is not set +# CONFIG_SENSORS_IT87 is not set +# CONFIG_SENSORS_LM63 is not set +# CONFIG_SENSORS_LM75 is not set +# CONFIG_SENSORS_LM77 is not set +# CONFIG_SENSORS_LM78 is not set +# CONFIG_SENSORS_LM80 is not set +# CONFIG_SENSORS_LM83 is not set +# CONFIG_SENSORS_LM85 is not set +# CONFIG_SENSORS_LM87 is not set +# CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set +# CONFIG_SENSORS_LM93 is not set +# CONFIG_SENSORS_LTC4245 is not set +# CONFIG_SENSORS_MAX1619 is not set +# CONFIG_SENSORS_MAX6650 is not set +# CONFIG_SENSORS_PC87360 is not set +# CONFIG_SENSORS_PC87427 is not set +# CONFIG_SENSORS_SIS5595 is not set +# CONFIG_SENSORS_DME1737 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_SMSC47M192 is not set +# CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_ADS7828 is not set +# CONFIG_SENSORS_THMC50 is not set +# CONFIG_SENSORS_VIA686A is not set +# CONFIG_SENSORS_VT1211 is not set +# CONFIG_SENSORS_VT8231 is not set +# CONFIG_SENSORS_W83781D is not set +# CONFIG_SENSORS_W83791D is not set +# CONFIG_SENSORS_W83792D is not set +# CONFIG_SENSORS_W83793 is not set +# CONFIG_SENSORS_W83L785TS is not set +# CONFIG_SENSORS_W83L786NG is not set +# CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set +# CONFIG_SENSORS_LIS3LV02D is not set +# CONFIG_HWMON_DEBUG_CHIP is not set +CONFIG_THERMAL=m +# CONFIG_THERMAL_HWMON is not set +# CONFIG_WATCHDOG is not set +CONFIG_SSB_POSSIBLE=y + +# +# Sonics Silicon Backplane +# +# CONFIG_SSB is not set + +# +# Multifunction device drivers +# +# CONFIG_MFD_CORE is not set +# CONFIG_MFD_SM501 is not set +# CONFIG_HTC_PASIC3 is not set +# CONFIG_MFD_TMIO is not set +# CONFIG_MFD_WM8400 is not set +# CONFIG_MFD_WM8350_I2C is not set +# CONFIG_MFD_PCF50633 is not set +# CONFIG_REGULATOR is not set + +# +# Multimedia devices +# + +# +# Multimedia core support +# +# CONFIG_VIDEO_DEV is not set +# CONFIG_DVB_CORE is not set +# CONFIG_VIDEO_MEDIA is not set + +# +# Multimedia drivers +# +CONFIG_DAB=y +# CONFIG_USB_DABUSB is not set + +# +# Graphics support +# +CONFIG_AGP=m +CONFIG_DRM=m +CONFIG_DRM_TDFX=m +CONFIG_DRM_R128=m +CONFIG_DRM_RADEON=m +CONFIG_DRM_MGA=m +CONFIG_DRM_SIS=m +# CONFIG_DRM_VIA is not set +# CONFIG_DRM_SAVAGE is not set +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set +# CONFIG_FB is not set +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set + +# +# Console display driver support +# +CONFIG_VGA_CONSOLE=y +# CONFIG_VGACON_SOFT_SCROLLBACK is not set +CONFIG_DUMMY_CONSOLE=y +# CONFIG_SOUND is not set +CONFIG_HID_SUPPORT=y +CONFIG_HID=y +# CONFIG_HID_DEBUG is not set +# CONFIG_HIDRAW is not set + +# +# USB Input Devices +# +CONFIG_USB_HID=y +# CONFIG_HID_PID is not set +# CONFIG_USB_HIDDEV is not set + +# +# Special HID drivers +# +CONFIG_HID_COMPAT=y +CONFIG_HID_A4TECH=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_EZKEY=y +CONFIG_HID_GYRATION=y +CONFIG_HID_LOGITECH=y +# CONFIG_LOGITECH_FF is not set +# CONFIG_LOGIRUMBLEPAD2_FF is not set +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_NTRIG=y +CONFIG_HID_PANTHERLORD=y +# CONFIG_PANTHERLORD_FF is not set +CONFIG_HID_PETALYNX=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SUNPLUS=y +# CONFIG_GREENASIA_FF is not set +CONFIG_HID_TOPSEED=y +# CONFIG_THRUSTMASTER_FF is not set +# CONFIG_ZEROPLUS_FF is not set +CONFIG_USB_SUPPORT=y +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y +CONFIG_USB=y +# CONFIG_USB_DEBUG is not set +# CONFIG_USB_ANNOUNCE_NEW_DEVICES is not set + +# +# Miscellaneous USB options +# +CONFIG_USB_DEVICEFS=y +CONFIG_USB_DEVICE_CLASS=y +# CONFIG_USB_DYNAMIC_MINORS is not set +# CONFIG_USB_SUSPEND is not set +# CONFIG_USB_OTG is not set +# CONFIG_USB_MON is not set +# CONFIG_USB_WUSB is not set +# CONFIG_USB_WUSB_CBAF is not set + +# +# USB Host Controller Drivers +# +# CONFIG_USB_C67X00_HCD is not set +CONFIG_USB_EHCI_HCD=m +# CONFIG_USB_EHCI_ROOT_HUB_TT is not set +# CONFIG_USB_EHCI_TT_NEWSCHED is not set +# CONFIG_USB_OXU210HP_HCD is not set +# CONFIG_USB_ISP116X_HCD is not set +# CONFIG_USB_ISP1760_HCD is not set +CONFIG_USB_OHCI_HCD=m +# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set +# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set +CONFIG_USB_OHCI_LITTLE_ENDIAN=y +CONFIG_USB_UHCI_HCD=y +# CONFIG_USB_SL811_HCD is not set +# CONFIG_USB_R8A66597_HCD is not set +# CONFIG_USB_WHCI_HCD is not set +# CONFIG_USB_HWA_HCD is not set + +# +# USB Device Class drivers +# +# CONFIG_USB_ACM is not set +# CONFIG_USB_PRINTER is not set +# CONFIG_USB_WDM is not set +# CONFIG_USB_TMC is not set + +# +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed; +# + +# +# see USB_STORAGE Help for more information +# +CONFIG_USB_STORAGE=m +# CONFIG_USB_STORAGE_DEBUG is not set +# CONFIG_USB_STORAGE_DATAFAB is not set +# CONFIG_USB_STORAGE_FREECOM is not set +# CONFIG_USB_STORAGE_ISD200 is not set +# CONFIG_USB_STORAGE_USBAT is not set +# CONFIG_USB_STORAGE_SDDR09 is not set +# CONFIG_USB_STORAGE_SDDR55 is not set +# CONFIG_USB_STORAGE_JUMPSHOT is not set +# CONFIG_USB_STORAGE_ALAUDA is not set +# CONFIG_USB_STORAGE_ONETOUCH is not set +# CONFIG_USB_STORAGE_KARMA is not set +# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set +# CONFIG_USB_LIBUSUAL is not set + +# +# USB Imaging devices +# +# CONFIG_USB_MDC800 is not set +# CONFIG_USB_MICROTEK is not set + +# +# USB port drivers +# +# CONFIG_USB_SERIAL is not set + +# +# USB Miscellaneous drivers +# +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_ADUTUX is not set +# CONFIG_USB_SEVSEG is not set +# CONFIG_USB_RIO500 is not set +# CONFIG_USB_LEGOTOWER is not set +# CONFIG_USB_LCD is not set +# CONFIG_USB_BERRY_CHARGE is not set +# CONFIG_USB_LED is not set +# CONFIG_USB_CYPRESS_CY7C63 is not set +# CONFIG_USB_CYTHERM is not set +# CONFIG_USB_PHIDGET is not set +# CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_FTDI_ELAN is not set +# CONFIG_USB_APPLEDISPLAY is not set +# CONFIG_USB_SISUSBVGA is not set +# CONFIG_USB_LD is not set +# CONFIG_USB_TRANCEVIBRATOR is not set +# CONFIG_USB_IOWARRIOR is not set +# CONFIG_USB_TEST is not set +# CONFIG_USB_ISIGHTFW is not set +# CONFIG_USB_VST is not set +# CONFIG_USB_GADGET is not set + +# +# OTG and related infrastructure +# +# CONFIG_UWB is not set +# CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set +# CONFIG_NEW_LEDS is not set +# CONFIG_ACCESSIBILITY is not set +# CONFIG_INFINIBAND is not set +# CONFIG_RTC_CLASS is not set +# CONFIG_DMADEVICES is not set +# CONFIG_UIO is not set +CONFIG_XEN_BALLOON=y +CONFIG_XEN_SCRUB_PAGES=y +CONFIG_XENFS=y +CONFIG_XEN_COMPAT_XENFS=y +# CONFIG_STAGING is not set +# CONFIG_MSPEC is not set + +# +# File systems +# +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +# CONFIG_EXT2_FS_XIP is not set +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +# CONFIG_EXT4_FS is not set +CONFIG_JBD=y +CONFIG_FS_MBCACHE=y +CONFIG_REISERFS_FS=y +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +CONFIG_REISERFS_FS_SECURITY=y +# CONFIG_JFS_FS is not set +CONFIG_FS_POSIX_ACL=y +CONFIG_FILE_LOCKING=y +CONFIG_XFS_FS=y +# CONFIG_XFS_QUOTA is not set +# CONFIG_XFS_POSIX_ACL is not set +# CONFIG_XFS_RT is not set +# CONFIG_XFS_DEBUG is not set +# CONFIG_GFS2_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set +CONFIG_DNOTIFY=y +CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y +# CONFIG_QUOTA is not set +CONFIG_AUTOFS_FS=y +CONFIG_AUTOFS4_FS=y +# CONFIG_FUSE_FS is not set + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +# CONFIG_ZISOFS is not set +CONFIG_UDF_FS=m +CONFIG_UDF_NLS=y + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=y +# CONFIG_MSDOS_FS is not set +CONFIG_VFAT_FS=y +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" +CONFIG_NTFS_FS=m +# CONFIG_NTFS_DEBUG is not set +# CONFIG_NTFS_RW is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +# CONFIG_TMPFS_POSIX_ACL is not set +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y +# CONFIG_CONFIGFS_FS is not set +CONFIG_MISC_FILESYSTEMS=y +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_OMFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set +CONFIG_NETWORK_FILESYSTEMS=y +CONFIG_NFS_FS=m +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +CONFIG_NFS_V4=y +CONFIG_NFSD=m +CONFIG_NFSD_V3=y +# CONFIG_NFSD_V3_ACL is not set +CONFIG_NFSD_V4=y +CONFIG_LOCKD=m +CONFIG_LOCKD_V4=y +CONFIG_EXPORTFS=m +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=m +CONFIG_SUNRPC_GSS=m +# CONFIG_SUNRPC_REGISTER_V4 is not set +CONFIG_RPCSEC_GSS_KRB5=m +# CONFIG_RPCSEC_GSS_SPKM3 is not set +CONFIG_SMB_FS=m +CONFIG_SMB_NLS_DEFAULT=y +CONFIG_SMB_NLS_REMOTE="cp437" +CONFIG_CIFS=m +# CONFIG_CIFS_STATS is not set +# CONFIG_CIFS_WEAK_PW_HASH is not set +# CONFIG_CIFS_XATTR is not set +# CONFIG_CIFS_DEBUG2 is not set +# CONFIG_CIFS_EXPERIMENTAL is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +# CONFIG_MAC_PARTITION is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_BSD_DISKLABEL is not set +# CONFIG_MINIX_SUBPARTITION is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_LDM_PARTITION is not set +CONFIG_SGI_PARTITION=y +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +# CONFIG_KARMA_PARTITION is not set +CONFIG_EFI_PARTITION=y +# CONFIG_SYSV68_PARTITION is not set +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +# CONFIG_NLS_ASCII is not set +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m +# CONFIG_DLM is not set + +# +# Kernel hacking +# +# CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_WARN_DEPRECATED=y +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=2048 +CONFIG_MAGIC_SYSRQ=y +# CONFIG_UNUSED_SYMBOLS is not set +# CONFIG_DEBUG_FS is not set +# CONFIG_HEADERS_CHECK is not set +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SHIRQ is not set +CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_SCHED_DEBUG=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_TIMER_STATS is not set +# CONFIG_DEBUG_OBJECTS is not set +# CONFIG_SLUB_DEBUG_ON is not set +# CONFIG_SLUB_STATS is not set +# CONFIG_DEBUG_RT_MUTEXES is not set +# CONFIG_RT_MUTEX_TESTER is not set +# CONFIG_DEBUG_SPINLOCK is not set +CONFIG_DEBUG_MUTEXES=y +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_WRITECOUNT is not set +CONFIG_DEBUG_MEMORY_INIT=y +# CONFIG_DEBUG_LIST is not set +# CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set +# CONFIG_BOOT_PRINTK_DELAY is not set +# CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_RCU_CPU_STALL_DETECTOR is not set +# CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set +# CONFIG_FAULT_INJECTION is not set +# CONFIG_SYSCTL_SYSCALL_CHECK is not set + +# +# Tracers +# +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_DYNAMIC_PRINTK_DEBUG is not set +# CONFIG_SAMPLES is not set +CONFIG_IA64_GRANULE_16MB=y +# CONFIG_IA64_GRANULE_64MB is not set +# CONFIG_IA64_PRINT_HAZARDS is not set +# CONFIG_DISABLE_VHPT is not set +# CONFIG_IA64_DEBUG_CMPXCHG is not set +# CONFIG_IA64_DEBUG_IRQ is not set + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set +# CONFIG_SECURITYFS is not set +# CONFIG_SECURITY_FILE_CAPABILITIES is not set +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +# CONFIG_CRYPTO_FIPS is not set +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_ALGAPI2=y +CONFIG_CRYPTO_AEAD2=y +CONFIG_CRYPTO_BLKCIPHER=m +CONFIG_CRYPTO_BLKCIPHER2=y +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_RNG2=y +CONFIG_CRYPTO_MANAGER=m +CONFIG_CRYPTO_MANAGER2=y +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_CRYPTD is not set +# CONFIG_CRYPTO_AUTHENC is not set +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +CONFIG_CRYPTO_CBC=m +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +CONFIG_CRYPTO_ECB=m +# CONFIG_CRYPTO_LRW is not set +CONFIG_CRYPTO_PCBC=m +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +# CONFIG_CRYPTO_HMAC is not set +# CONFIG_CRYPTO_XCBC is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_MD4 is not set +CONFIG_CRYPTO_MD5=y +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_RMD128 is not set +# CONFIG_CRYPTO_RMD160 is not set +# CONFIG_CRYPTO_RMD256 is not set +# CONFIG_CRYPTO_RMD320 is not set +# CONFIG_CRYPTO_SHA1 is not set +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +# CONFIG_CRYPTO_AES is not set +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +CONFIG_CRYPTO_DES=m +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set + +# +# Compression +# +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_LZO is not set + +# +# Random Number Generation +# +# CONFIG_CRYPTO_ANSI_CPRNG is not set +CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_DEV_HIFN_795X is not set +CONFIG_HAVE_KVM=y +CONFIG_VIRTUALIZATION=y +# CONFIG_KVM is not set +# CONFIG_VIRTIO_PCI is not set +# CONFIG_VIRTIO_BALLOON is not set + +# +# Library routines +# +CONFIG_BITREVERSE=y +CONFIG_GENERIC_FIND_LAST_BIT=y +# CONFIG_CRC_CCITT is not set +# CONFIG_CRC16 is not set +# CONFIG_CRC_T10DIF is not set +CONFIG_CRC_ITU_T=m +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +# CONFIG_LIBCRC32C is not set +CONFIG_PLIST=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_PENDING_IRQ=y +CONFIG_IRQ_PER_CPU=y +# CONFIG_IOMMU_API is not set diff --git a/arch/ia64/xen/Kconfig b/arch/ia64/xen/Kconfig index f1683a20275b..515e0826803a 100644 --- a/arch/ia64/xen/Kconfig +++ b/arch/ia64/xen/Kconfig @@ -8,8 +8,7 @@ config XEN depends on PARAVIRT && MCKINLEY && IA64_PAGE_SIZE_16KB && EXPERIMENTAL select XEN_XENCOMM select NO_IDLE_HZ - - # those are required to save/restore. + # followings are required to save/restore. select ARCH_SUSPEND_POSSIBLE select SUSPEND select PM_SLEEP From ec8148de85a73a3be397a59b6d8f4f32cf2dd254 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 19 Feb 2009 12:05:00 -0800 Subject: [PATCH 198/263] [IA64] xen_domu build fix arch/ia64/xen/xen_pv_ops.c:156: error: xen_init_ops causes a section type conflict arch/ia64/xen/xen_pv_ops.c:340: error: xen_iosapic_ops causes a section type conflict Signed-off-by: Tony Luck --- arch/ia64/xen/xen_pv_ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/ia64/xen/xen_pv_ops.c b/arch/ia64/xen/xen_pv_ops.c index 04cd12350455..936cff3c96e0 100644 --- a/arch/ia64/xen/xen_pv_ops.c +++ b/arch/ia64/xen/xen_pv_ops.c @@ -153,7 +153,7 @@ xen_post_smp_prepare_boot_cpu(void) xen_setup_vcpu_info_placement(); } -static const struct pv_init_ops xen_init_ops __initdata = { +static const struct pv_init_ops xen_init_ops __initconst = { .banner = xen_banner, .reserve_memory = xen_reserve_memory, @@ -337,7 +337,7 @@ xen_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op); } -static const struct pv_iosapic_ops xen_iosapic_ops __initdata = { +static const struct pv_iosapic_ops xen_iosapic_ops __initconst = { .pcat_compat_init = xen_pcat_compat_init, .__get_irq_chip = xen_iosapic_get_irq_chip, From 005568be363b90c9333c3bcbc1e7a53922816322 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 9 Feb 2009 22:02:42 +0100 Subject: [PATCH 199/263] drm/i915: Storage class should be before const qualifier The C99 specification states in section 6.11.5: The placement of a storage-class specifier other than at the beginning of the declaration specifiers in a declaration is an obsolescent feature. Signed-off-by: Tobias Klauser Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/intel_sdvo.c | 2 +- drivers/gpu/drm/i915/intel_tv.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index a30508b639ba..fbe6f3931b1b 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -193,7 +193,7 @@ static bool intel_sdvo_write_byte(struct intel_output *intel_output, int addr, #define SDVO_CMD_NAME_ENTRY(cmd) {cmd, #cmd} /** Mapping of command numbers to names, for debug output */ -const static struct _sdvo_cmd_name { +static const struct _sdvo_cmd_name { u8 cmd; char *name; } sdvo_cmd_names[] = { diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index fbb35dc56f5c..56485d67369b 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -411,7 +411,7 @@ struct tv_mode { * These values account for -1s required. */ -const static struct tv_mode tv_modes[] = { +static const struct tv_mode tv_modes[] = { { .name = "NTSC-M", .clock = 107520, From ad45aa9e6e010283bbd8cf0c6309866233e113f2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Feb 2009 11:31:41 +0000 Subject: [PATCH 200/263] drm: Potential use-after-free on error path. Remove the member from the hash table before we free the structure! Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 6915fb82d0b0..308fe1e207f5 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -104,8 +104,8 @@ drm_gem_init(struct drm_device *dev) if (drm_mm_init(&mm->offset_manager, DRM_FILE_PAGE_OFFSET_START, DRM_FILE_PAGE_OFFSET_SIZE)) { - drm_free(mm, sizeof(struct drm_gem_mm), DRM_MEM_MM); drm_ht_remove(&mm->offset_hash); + drm_free(mm, sizeof(struct drm_gem_mm), DRM_MEM_MM); return -ENOMEM; } From 3e49c4f4cf786b70bbc369b99e590de4bebac1b3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Feb 2009 11:31:41 +0000 Subject: [PATCH 201/263] drm: Free the object ref on error. Ensure that the object is unreferenced if we fail to allocate during drm_gem_flink_ioctl(). Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_gem.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 308fe1e207f5..e5a8ebf9a662 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -295,8 +295,10 @@ drm_gem_flink_ioctl(struct drm_device *dev, void *data, return -EBADF; again: - if (idr_pre_get(&dev->object_name_idr, GFP_KERNEL) == 0) - return -ENOMEM; + if (idr_pre_get(&dev->object_name_idr, GFP_KERNEL) == 0) { + ret = -ENOMEM; + goto err; + } spin_lock(&dev->object_name_lock); if (obj->name) { @@ -310,12 +312,8 @@ again: if (ret == -EAGAIN) goto again; - if (ret != 0) { - mutex_lock(&dev->struct_mutex); - drm_gem_object_unreference(obj); - mutex_unlock(&dev->struct_mutex); - return ret; - } + if (ret != 0) + goto err; /* * Leave the reference from the lookup around as the @@ -324,6 +322,12 @@ again: args->name = (uint64_t) obj->name; return 0; + +err: + mutex_lock(&dev->struct_mutex); + drm_gem_object_unreference(obj); + mutex_unlock(&dev->struct_mutex); + return ret; } /** From a198bc80ae59cf7c6da93bc8bd017b2198148ed7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Feb 2009 16:55:20 +0000 Subject: [PATCH 202/263] drm/i915: Cleanup trivial leak on execbuffer error path. Also spotted by Owain Ainsworth. Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_gem.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 818576654092..b79ced8f3c61 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2480,13 +2480,15 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, if (dev_priv->mm.wedged) { DRM_ERROR("Execbuf while wedged\n"); mutex_unlock(&dev->struct_mutex); - return -EIO; + ret = -EIO; + goto pre_mutex_err; } if (dev_priv->mm.suspended) { DRM_ERROR("Execbuf while VT-switched.\n"); mutex_unlock(&dev->struct_mutex); - return -EBUSY; + ret = -EBUSY; + goto pre_mutex_err; } /* Look up object handles */ From d6873102fd36c577f88174d8bd50f1d51645fc51 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 8 Feb 2009 19:07:51 +0000 Subject: [PATCH 203/263] drm/i915: hold mutex for unreference() in i915_gem_tiling.c Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_gem_tiling.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index fa1685cba840..7fb4191ef934 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -299,9 +299,8 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, } obj_priv->stride = args->stride; - mutex_unlock(&dev->struct_mutex); - drm_gem_object_unreference(obj); + mutex_unlock(&dev->struct_mutex); return 0; } @@ -340,9 +339,8 @@ i915_gem_get_tiling(struct drm_device *dev, void *data, DRM_ERROR("unknown tiling mode\n"); } - mutex_unlock(&dev->struct_mutex); - drm_gem_object_unreference(obj); + mutex_unlock(&dev->struct_mutex); return 0; } From 96dec61d563fb8dff2c8427fdf85327a95b65c74 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 8 Feb 2009 19:08:04 +0000 Subject: [PATCH 204/263] drm/i915: refleak along pin() error path. A missing unreference if the user calls pin() a second time on a pinned buffer. Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_gem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b79ced8f3c61..55f4c060fa01 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2755,6 +2755,7 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data, if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) { DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", args->handle); + drm_gem_object_unreference(obj); mutex_unlock(&dev->struct_mutex); return -EINVAL; } From a35f2e2b83a789e189a501ebd722bc9a1310eb05 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 6 Feb 2009 17:48:09 -0800 Subject: [PATCH 205/263] drm/i915: Fix potential AB-BA deadlock in i915_gem_execbuffer() Lockdep warns that i915_gem_execbuffer() can trigger a page fault (which takes mmap_sem) while holding dev->struct_mutex, while drm_vm_open() (which is called with mmap_sem already held) takes dev->struct_mutex. So this is a potential AB-BA deadlock. The way that i915_gem_execbuffer() triggers a page fault is by doing copy_to_user() when returning new buffer offsets back to userspace; however there is no reason to hold the struct_mutex when doing this copy, since what is being copied is the contents of an array private to i915_gem_execbuffer() anyway. So we can fix the potential deadlock (and get rid of the lockdep warning) by simply moving the copy_to_user() outside of where struct_mutex is held. This fixes . Reported-by: Jesse Brandeburg Tested-by: Jesse Brandeburg Signed-off-by: Roland Dreier Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_gem.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 55f4c060fa01..ff0d94dd3550 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2634,15 +2634,6 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, i915_verify_inactive(dev, __FILE__, __LINE__); - /* Copy the new buffer offsets back to the user's exec list. */ - ret = copy_to_user((struct drm_i915_relocation_entry __user *) - (uintptr_t) args->buffers_ptr, - exec_list, - sizeof(*exec_list) * args->buffer_count); - if (ret) - DRM_ERROR("failed to copy %d exec entries " - "back to user (%d)\n", - args->buffer_count, ret); err: for (i = 0; i < pinned; i++) i915_gem_object_unpin(object_list[i]); @@ -2652,6 +2643,18 @@ err: mutex_unlock(&dev->struct_mutex); + if (!ret) { + /* Copy the new buffer offsets back to the user's exec list. */ + ret = copy_to_user((struct drm_i915_relocation_entry __user *) + (uintptr_t) args->buffers_ptr, + exec_list, + sizeof(*exec_list) * args->buffer_count); + if (ret) + DRM_ERROR("failed to copy %d exec entries " + "back to user (%d)\n", + args->buffer_count, ret); + } + pre_mutex_err: drm_free(object_list, sizeof(*object_list) * args->buffer_count, DRM_MEM_DRIVER); From 8d59bae5d9aae10ab230561519bfb97962509bcb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Feb 2009 14:26:28 +0000 Subject: [PATCH 206/263] drm: Do not leak a new reference for flink() on an existing name The name table should only hold a single reference, so avoid leaking additional references for secondary calls to flink(). Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_gem.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index e5a8ebf9a662..5dad6b9d0dec 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -301,27 +301,25 @@ again: } spin_lock(&dev->object_name_lock); - if (obj->name) { - args->name = obj->name; + if (!obj->name) { + ret = idr_get_new_above(&dev->object_name_idr, obj, 1, + &obj->name); + args->name = (uint64_t) obj->name; spin_unlock(&dev->object_name_lock); - return 0; + + if (ret == -EAGAIN) + goto again; + + if (ret != 0) + goto err; + + /* Allocate a reference for the name table. */ + drm_gem_object_reference(obj); + } else { + args->name = (uint64_t) obj->name; + spin_unlock(&dev->object_name_lock); + ret = 0; } - ret = idr_get_new_above(&dev->object_name_idr, obj, 1, - &obj->name); - spin_unlock(&dev->object_name_lock); - if (ret == -EAGAIN) - goto again; - - if (ret != 0) - goto err; - - /* - * Leave the reference from the lookup around as the - * name table now holds one - */ - args->name = (uint64_t) obj->name; - - return 0; err: mutex_lock(&dev->struct_mutex); @@ -452,6 +450,7 @@ drm_gem_object_handle_free(struct kref *kref) spin_lock(&dev->object_name_lock); if (obj->name) { idr_remove(&dev->object_name_idr, obj->name); + obj->name = 0; spin_unlock(&dev->object_name_lock); /* * The object name held a reference to this object, drop From 2ebed176a7ee126448d34fc336afb2ea0238c280 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Feb 2009 14:26:30 +0000 Subject: [PATCH 207/263] drm/i915: Set framebuffer alignment based upon the fence constraints. Set the request alignment to 0, and leave it up to i915_gem_object_pin() to set the appropriate alignment to match the fence covering the object. Eric Anholt mentioned that the pinning code is meant to choose the maximum of the request alignment and that of the fence covering the object... However currently, the pinning code will only apply the fence constraints if the supplied alignment is 0. Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/intel_display.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index bbdd72909a11..a440d0db5ccc 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -377,10 +377,8 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, alignment = 64 * 1024; break; case I915_TILING_X: - if (IS_I9XX(dev)) - alignment = 1024 * 1024; - else - alignment = 512 * 1024; + /* pin() will align the object as required by fence */ + alignment = 0; break; case I915_TILING_Y: /* FIXME: Is this true? */ From 13af10627676879d1b20ee3cdba9a28f0906dd98 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Feb 2009 14:26:31 +0000 Subject: [PATCH 208/263] drm/i915: Release and unlock on mmap_gtt error path. We failed to unlock the mutex after failing to create the mmap offset. Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_gem.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ff0d94dd3550..f565b6afe414 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -758,8 +758,11 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, if (!obj_priv->mmap_offset) { ret = i915_gem_create_mmap_offset(obj); - if (ret) + if (ret) { + drm_gem_object_unreference(obj); + mutex_unlock(&dev->struct_mutex); return ret; + } } args->offset = obj_priv->mmap_offset; From 491152b8778d7d290579c989e8607892accde920 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Feb 2009 14:26:32 +0000 Subject: [PATCH 209/263] drm/i915: unpin for an invalid memory domain. A missing unreference and unpin after rejecting the relocation for an invalid memory domain. Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_gem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f565b6afe414..0bec4e6d3369 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2254,6 +2254,8 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, (int) reloc.offset, reloc.read_domains, reloc.write_domain); + drm_gem_object_unreference(target_obj); + i915_gem_object_unpin(obj); return -EINVAL; } From 47ed185a777632063d2748f59d14ec6fdeb26f67 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Feb 2009 14:26:33 +0000 Subject: [PATCH 210/263] drm/i915: Unpin the ringbuffer if we fail to ioremap it. A missing unpin on the error path. Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_gem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0bec4e6d3369..e5fc664337f3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3159,6 +3159,7 @@ i915_gem_init_ringbuffer(struct drm_device *dev) if (ring->map.handle == NULL) { DRM_ERROR("Failed to map ringbuffer.\n"); memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); + i915_gem_object_unpin(obj); drm_gem_object_unreference(obj); return -EINVAL; } From 3eb2ee77b0b6b7b2c10308d7b46d2a459fb5be10 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Feb 2009 14:26:34 +0000 Subject: [PATCH 211/263] drm/i915: Unpin the hws if we fail to kmap. A missing unpin on the error path. Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_gem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e5fc664337f3..fd4b4e268a22 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3106,6 +3106,7 @@ i915_gem_init_hws(struct drm_device *dev) if (dev_priv->hw_status_page == NULL) { DRM_ERROR("Failed to map status page.\n"); memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); + i915_gem_object_unpin(obj); drm_gem_object_unreference(obj); return -EINVAL; } From b4476f52e43fadcb9402723a1a55ba1308757525 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Feb 2009 14:26:36 +0000 Subject: [PATCH 212/263] drm/i915: Unpin the fb on error during construction. If we fail whilst constructing the fb, then we need to unpin it as well. Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/intel_fb.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c index afd1217b8a02..b7f0ebe9f810 100644 --- a/drivers/gpu/drm/i915/intel_fb.c +++ b/drivers/gpu/drm/i915/intel_fb.c @@ -473,7 +473,7 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width, ret = intel_framebuffer_create(dev, &mode_cmd, &fb, fbo); if (ret) { DRM_ERROR("failed to allocate fb.\n"); - goto out_unref; + goto out_unpin; } list_add(&fb->filp_head, &dev->mode_config.fb_kernel_list); @@ -484,7 +484,7 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width, info = framebuffer_alloc(sizeof(struct intelfb_par), device); if (!info) { ret = -ENOMEM; - goto out_unref; + goto out_unpin; } par = info->par; @@ -513,7 +513,7 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width, size); if (!info->screen_base) { ret = -ENOSPC; - goto out_unref; + goto out_unpin; } info->screen_size = size; @@ -608,6 +608,8 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width, mutex_unlock(&dev->struct_mutex); return 0; +out_unpin: + i915_gem_object_unpin(fbo); out_unref: drm_gem_object_unreference(fbo); mutex_unlock(&dev->struct_mutex); From ea39f835168f60b01e59d0f348da25d297e7cf94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20H=C3=B8gsberg?= Date: Thu, 12 Feb 2009 14:37:56 -0500 Subject: [PATCH 213/263] drm: Release user fbs in drm_release MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoids leaking fbs and associated buffers on release. Signed-off-by: Kristian Høgsberg Tested-by: Tested-by: Chris Wilson Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc.c | 3 +-- drivers/gpu/drm/drm_fops.c | 3 +++ include/drm/drm_crtc.h | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index bfce0992fefb..94a768871734 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -1741,9 +1741,8 @@ out: * RETURNS: * Zero on success, errno on failure. */ -void drm_fb_release(struct file *filp) +void drm_fb_release(struct drm_file *priv) { - struct drm_file *priv = filp->private_data; struct drm_device *dev = priv->minor->dev; struct drm_framebuffer *fb, *tfb; diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c index b06a53715853..6c020fe5431c 100644 --- a/drivers/gpu/drm/drm_fops.c +++ b/drivers/gpu/drm/drm_fops.c @@ -457,6 +457,9 @@ int drm_release(struct inode *inode, struct file *filp) if (dev->driver->driver_features & DRIVER_GEM) drm_gem_release(dev, file_priv); + if (dev->driver->driver_features & DRIVER_MODESET) + drm_fb_release(file_priv); + mutex_lock(&dev->ctxlist_mutex); if (!list_empty(&dev->ctxlist)) { struct drm_ctx_list *pos, *n; diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index d54de24bf371..5ded1acfb543 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -609,7 +609,7 @@ extern char *drm_get_dvi_i_subconnector_name(int val); extern char *drm_get_dvi_i_select_name(int val); extern char *drm_get_tv_subconnector_name(int val); extern char *drm_get_tv_select_name(int val); -extern void drm_fb_release(struct file *filp); +extern void drm_fb_release(struct drm_file *file_priv); extern int drm_mode_group_init_legacy_group(struct drm_device *dev, struct drm_mode_group *group); extern struct edid *drm_get_edid(struct drm_connector *connector, struct i2c_adapter *adapter); From 67eabc0553a32c491fdb392ff2358a0384562050 Mon Sep 17 00:00:00 2001 From: Steve Aarnio Date: Thu, 12 Feb 2009 11:34:02 -0800 Subject: [PATCH 214/263] drm/i915: Don't add panel_fixed_mode to the probed modes list at LVDS init. In the case where no EDID data is read from the device, adding the panel_fixed_mode pointer to the probed modes list causes data corruption. If the panel_fixed_mode pointer is added to the probed modes list at init time, a copy of the mode is added again at drm_get_modes() request time. Then, the panel_fixed_mode pointer is freed because it is seen as a duplicate mode. Unfortunately, this pointer is still stored and used in mode_fixup(). Because the panel_fixed_mode data is copied and returned at drm_get_modes() time, it is unnecessary to add this information at init time. Signed-off-by: Steve Aarnio Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/intel_lvds.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 6d4f91265354..0d211af98854 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -481,8 +481,6 @@ void intel_lvds_init(struct drm_device *dev) if (dev_priv->panel_fixed_mode) { dev_priv->panel_fixed_mode->type |= DRM_MODE_TYPE_PREFERRED; - drm_mode_probed_add(connector, - dev_priv->panel_fixed_mode); goto out; } } From 85a7bb98582b60b7e9130159d2464eb0bbac13f7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Feb 2009 14:52:44 +0000 Subject: [PATCH 215/263] drm/i915: Cleanup the hws on ringbuffer constrution failure. If we fail to create the ringbuffer, then we need to cleanup the allocated hws. Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_gem.c | 39 +++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fd4b4e268a22..078858178832 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3119,6 +3119,27 @@ i915_gem_init_hws(struct drm_device *dev) return 0; } +static void +i915_gem_cleanup_hws(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_gem_object *obj = dev_priv->hws_obj; + struct drm_i915_gem_object *obj_priv = obj->driver_private; + + if (dev_priv->hws_obj == NULL) + return; + + kunmap(obj_priv->page_list[0]); + i915_gem_object_unpin(obj); + drm_gem_object_unreference(obj); + dev_priv->hws_obj = NULL; + memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); + dev_priv->hw_status_page = NULL; + + /* Write high address into HWS_PGA when disabling. */ + I915_WRITE(HWS_PGA, 0x1ffff000); +} + int i915_gem_init_ringbuffer(struct drm_device *dev) { @@ -3136,6 +3157,7 @@ i915_gem_init_ringbuffer(struct drm_device *dev) obj = drm_gem_object_alloc(dev, 128 * 1024); if (obj == NULL) { DRM_ERROR("Failed to allocate ringbuffer\n"); + i915_gem_cleanup_hws(dev); return -ENOMEM; } obj_priv = obj->driver_private; @@ -3143,6 +3165,7 @@ i915_gem_init_ringbuffer(struct drm_device *dev) ret = i915_gem_object_pin(obj, 4096); if (ret != 0) { drm_gem_object_unreference(obj); + i915_gem_cleanup_hws(dev); return ret; } @@ -3162,6 +3185,7 @@ i915_gem_init_ringbuffer(struct drm_device *dev) memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); i915_gem_object_unpin(obj); drm_gem_object_unreference(obj); + i915_gem_cleanup_hws(dev); return -EINVAL; } ring->ring_obj = obj; @@ -3241,20 +3265,7 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev) dev_priv->ring.ring_obj = NULL; memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); - if (dev_priv->hws_obj != NULL) { - struct drm_gem_object *obj = dev_priv->hws_obj; - struct drm_i915_gem_object *obj_priv = obj->driver_private; - - kunmap(obj_priv->page_list[0]); - i915_gem_object_unpin(obj); - drm_gem_object_unreference(obj); - dev_priv->hws_obj = NULL; - memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); - dev_priv->hw_status_page = NULL; - - /* Write high address into HWS_PGA when disabling. */ - I915_WRITE(HWS_PGA, 0x1ffff000); - } + i915_gem_cleanup_hws(dev); } int From e62fb64e6187ea9d8bcedb17ccaa045ed92d4b55 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Feb 2009 16:39:21 +0000 Subject: [PATCH 216/263] drm: Check for a NULL encoder when reverting on error path We need to skip the connectors with a NULL encoder to match the success path and avoid an OOPS. Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc_helper.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index 964c5eb1fada..8ba22c039a11 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -775,8 +775,12 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) fail_set_mode: set->crtc->enabled = save_enabled; count = 0; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + if (!connector->encoder) + continue; + connector->encoder->crtc = save_crtcs[count++]; + } fail_no_encoder: kfree(save_crtcs); count = 0; From 5c3b82e2b229e78eb32f4ea12d16f3ebeeab3fc7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Feb 2009 13:25:09 +0000 Subject: [PATCH 217/263] drm: Propagate failure from setting crtc base. Check the error paths within intel_pipe_set_base() to first cleanup and then report back the error. Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc_helper.c | 15 +++-- drivers/gpu/drm/i915/intel_display.c | 84 ++++++++++++++++++---------- include/drm/drm_crtc_helper.h | 10 ++-- 3 files changed, 69 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index 8ba22c039a11..733028b4d45e 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -512,8 +512,8 @@ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc, if (drm_mode_equal(&saved_mode, &crtc->mode)) { if (saved_x != crtc->x || saved_y != crtc->y || depth_changed || bpp_changed) { - crtc_funcs->mode_set_base(crtc, crtc->x, crtc->y, - old_fb); + ret = !crtc_funcs->mode_set_base(crtc, crtc->x, crtc->y, + old_fb); goto done; } } @@ -552,7 +552,9 @@ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc, /* Set up the DPLL and any encoders state that needs to adjust or depend * on the DPLL. */ - crtc_funcs->mode_set(crtc, mode, adjusted_mode, x, y, old_fb); + ret = !crtc_funcs->mode_set(crtc, mode, adjusted_mode, x, y, old_fb); + if (!ret) + goto done; list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { @@ -752,6 +754,8 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) if (!drm_crtc_helper_set_mode(set->crtc, set->mode, set->x, set->y, old_fb)) { + DRM_ERROR("failed to set mode on crtc %p\n", + set->crtc); ret = -EINVAL; goto fail_set_mode; } @@ -765,7 +769,10 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) old_fb = set->crtc->fb; if (set->crtc->fb != set->fb) set->crtc->fb = set->fb; - crtc_funcs->mode_set_base(set->crtc, set->x, set->y, old_fb); + ret = crtc_funcs->mode_set_base(set->crtc, + set->x, set->y, old_fb); + if (ret != 0) + goto fail_set_mode; } kfree(save_encoders); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a440d0db5ccc..ac92799fa8a1 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -343,7 +343,7 @@ intel_wait_for_vblank(struct drm_device *dev) udelay(20000); } -static void +static int intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *old_fb) { @@ -361,11 +361,21 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, int dspstride = (pipe == 0) ? DSPASTRIDE : DSPBSTRIDE; int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR; u32 dspcntr, alignment; + int ret; /* no fb bound */ if (!crtc->fb) { DRM_DEBUG("No FB bound\n"); - return; + return 0; + } + + switch (pipe) { + case 0: + case 1: + break; + default: + DRM_ERROR("Can't update pipe %d in SAREA\n", pipe); + return -EINVAL; } intel_fb = to_intel_framebuffer(crtc->fb); @@ -383,20 +393,24 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, case I915_TILING_Y: /* FIXME: Is this true? */ DRM_ERROR("Y tiled not allowed for scan out buffers\n"); - return; + return -EINVAL; default: BUG(); } - if (i915_gem_object_pin(intel_fb->obj, alignment)) - return; + mutex_lock(&dev->struct_mutex); + ret = i915_gem_object_pin(intel_fb->obj, alignment); + if (ret != 0) { + mutex_unlock(&dev->struct_mutex); + return ret; + } - i915_gem_object_set_to_gtt_domain(intel_fb->obj, 1); - - Start = obj_priv->gtt_offset; - Offset = y * crtc->fb->pitch + x * (crtc->fb->bits_per_pixel / 8); - - I915_WRITE(dspstride, crtc->fb->pitch); + ret = i915_gem_object_set_to_gtt_domain(intel_fb->obj, 1); + if (ret != 0) { + i915_gem_object_unpin(intel_fb->obj); + mutex_unlock(&dev->struct_mutex); + return ret; + } dspcntr = I915_READ(dspcntr_reg); /* Mask out pixel format bits in case we change it */ @@ -417,11 +431,17 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, break; default: DRM_ERROR("Unknown color depth\n"); - return; + i915_gem_object_unpin(intel_fb->obj); + mutex_unlock(&dev->struct_mutex); + return -EINVAL; } I915_WRITE(dspcntr_reg, dspcntr); + Start = obj_priv->gtt_offset; + Offset = y * crtc->fb->pitch + x * (crtc->fb->bits_per_pixel / 8); + DRM_DEBUG("Writing base %08lX %08lX %d %d\n", Start, Offset, x, y); + I915_WRITE(dspstride, crtc->fb->pitch); if (IS_I965G(dev)) { I915_WRITE(dspbase, Offset); I915_READ(dspbase); @@ -438,27 +458,24 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, intel_fb = to_intel_framebuffer(old_fb); i915_gem_object_unpin(intel_fb->obj); } + mutex_unlock(&dev->struct_mutex); if (!dev->primary->master) - return; + return 0; master_priv = dev->primary->master->driver_priv; if (!master_priv->sarea_priv) - return; + return 0; - switch (pipe) { - case 0: - master_priv->sarea_priv->pipeA_x = x; - master_priv->sarea_priv->pipeA_y = y; - break; - case 1: + if (pipe) { master_priv->sarea_priv->pipeB_x = x; master_priv->sarea_priv->pipeB_y = y; - break; - default: - DRM_ERROR("Can't update pipe %d in SAREA\n", pipe); - break; + } else { + master_priv->sarea_priv->pipeA_x = x; + master_priv->sarea_priv->pipeA_y = y; } + + return 0; } @@ -706,11 +723,11 @@ static int intel_panel_fitter_pipe (struct drm_device *dev) return 1; } -static void intel_crtc_mode_set(struct drm_crtc *crtc, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode, - int x, int y, - struct drm_framebuffer *old_fb) +static int intel_crtc_mode_set(struct drm_crtc *crtc, + struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode, + int x, int y, + struct drm_framebuffer *old_fb) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; @@ -737,6 +754,7 @@ static void intel_crtc_mode_set(struct drm_crtc *crtc, bool is_crt = false, is_lvds = false, is_tv = false; struct drm_mode_config *mode_config = &dev->mode_config; struct drm_connector *connector; + int ret; drm_vblank_pre_modeset(dev, pipe); @@ -777,7 +795,7 @@ static void intel_crtc_mode_set(struct drm_crtc *crtc, ok = intel_find_best_PLL(crtc, adjusted_mode->clock, refclk, &clock); if (!ok) { DRM_ERROR("Couldn't find PLL settings for mode!\n"); - return; + return -EINVAL; } fp = clock.n << 16 | clock.m1 << 8 | clock.m2; @@ -948,9 +966,13 @@ static void intel_crtc_mode_set(struct drm_crtc *crtc, I915_WRITE(dspcntr_reg, dspcntr); /* Flush the plane changes */ - intel_pipe_set_base(crtc, x, y, old_fb); + ret = intel_pipe_set_base(crtc, x, y, old_fb); + if (ret != 0) + return ret; drm_vblank_post_modeset(dev, pipe); + + return 0; } /** Loads the palette/gamma unit for the CRTC with the prepared values */ diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h index 0c6f0e11b41b..0b0d236c2154 100644 --- a/include/drm/drm_crtc_helper.h +++ b/include/drm/drm_crtc_helper.h @@ -54,13 +54,13 @@ struct drm_crtc_helper_funcs { struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode); /* Actually set the mode */ - void (*mode_set)(struct drm_crtc *crtc, struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode, int x, int y, - struct drm_framebuffer *old_fb); + int (*mode_set)(struct drm_crtc *crtc, struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode, int x, int y, + struct drm_framebuffer *old_fb); /* Move the crtc on the current fb to the given position *optional* */ - void (*mode_set_base)(struct drm_crtc *crtc, int x, int y, - struct drm_framebuffer *old_fb); + int (*mode_set_base)(struct drm_crtc *crtc, int x, int y, + struct drm_framebuffer *old_fb); }; struct drm_encoder_helper_funcs { From 7f9872e06d749afdc2029aa6b7ffe88cb3b8c5c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20H=C3=B8gsberg?= Date: Fri, 13 Feb 2009 20:56:49 -0500 Subject: [PATCH 218/263] drm: Add locking around cursor gem operations. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to hold the struct_mutex around pinning and the phys object operations. Signed-off-by: Kristian Høgsberg Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/intel_display.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ac92799fa8a1..94c7c098c4ff 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1043,18 +1043,19 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, } /* we only need to pin inside GTT if cursor is non-phy */ + mutex_lock(&dev->struct_mutex); if (!dev_priv->cursor_needs_physical) { ret = i915_gem_object_pin(bo, PAGE_SIZE); if (ret) { DRM_ERROR("failed to pin cursor bo\n"); - goto fail; + goto fail_locked; } addr = obj_priv->gtt_offset; } else { ret = i915_gem_attach_phys_object(dev, bo, (pipe == 0) ? I915_GEM_PHYS_CURSOR_0 : I915_GEM_PHYS_CURSOR_1); if (ret) { DRM_ERROR("failed to attach phys object\n"); - goto fail; + goto fail_locked; } addr = obj_priv->phys_obj->handle->busaddr; } @@ -1074,10 +1075,9 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, i915_gem_detach_phys_object(dev, intel_crtc->cursor_bo); } else i915_gem_object_unpin(intel_crtc->cursor_bo); - mutex_lock(&dev->struct_mutex); drm_gem_object_unreference(intel_crtc->cursor_bo); - mutex_unlock(&dev->struct_mutex); } + mutex_unlock(&dev->struct_mutex); intel_crtc->cursor_addr = addr; intel_crtc->cursor_bo = bo; @@ -1085,6 +1085,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, return 0; fail: mutex_lock(&dev->struct_mutex); +fail_locked: drm_gem_object_unreference(bo); mutex_unlock(&dev->struct_mutex); return ret; From f3cade5c037054ce5f57651fe0b64eaa9781c753 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20H=C3=B8gsberg?= Date: Fri, 13 Feb 2009 20:56:50 -0500 Subject: [PATCH 219/263] drm: Bring PLL limits in sync with DDX values. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kristian Høgsberg Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/intel_display.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 94c7c098c4ff..4f54ac55f326 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -90,12 +90,12 @@ typedef struct { #define I9XX_DOT_MAX 400000 #define I9XX_VCO_MIN 1400000 #define I9XX_VCO_MAX 2800000 -#define I9XX_N_MIN 3 -#define I9XX_N_MAX 8 +#define I9XX_N_MIN 1 +#define I9XX_N_MAX 6 #define I9XX_M_MIN 70 #define I9XX_M_MAX 120 #define I9XX_M1_MIN 10 -#define I9XX_M1_MAX 20 +#define I9XX_M1_MAX 22 #define I9XX_M2_MIN 5 #define I9XX_M2_MAX 9 #define I9XX_P_SDVO_DAC_MIN 5 From a29f5ca3d691995266a4b1df313e32ff0509a03c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20H=C3=B8gsberg?= Date: Fri, 13 Feb 2009 20:56:51 -0500 Subject: [PATCH 220/263] drm: Collapse identical i8xx_clock() and i9xx_clock(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit They used to be different. Now they're identical. Signed-off-by: Kristian Høgsberg Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/intel_display.c | 33 +++++----------------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4f54ac55f326..8b2038706268 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -189,9 +189,7 @@ static const intel_limit_t *intel_limit(struct drm_crtc *crtc) return limit; } -/** Derive the pixel clock for the given refclk and divisors for 8xx chips. */ - -static void i8xx_clock(int refclk, intel_clock_t *clock) +static void intel_clock(int refclk, intel_clock_t *clock) { clock->m = 5 * (clock->m1 + 2) + (clock->m2 + 2); clock->p = clock->p1 * clock->p2; @@ -199,25 +197,6 @@ static void i8xx_clock(int refclk, intel_clock_t *clock) clock->dot = clock->vco / clock->p; } -/** Derive the pixel clock for the given refclk and divisors for 9xx chips. */ - -static void i9xx_clock(int refclk, intel_clock_t *clock) -{ - clock->m = 5 * (clock->m1 + 2) + (clock->m2 + 2); - clock->p = clock->p1 * clock->p2; - clock->vco = refclk * clock->m / (clock->n + 2); - clock->dot = clock->vco / clock->p; -} - -static void intel_clock(struct drm_device *dev, int refclk, - intel_clock_t *clock) -{ - if (IS_I9XX(dev)) - i9xx_clock (refclk, clock); - else - i8xx_clock (refclk, clock); -} - /** * Returns whether any output on the specified pipe is of the specified type */ @@ -318,7 +297,7 @@ static bool intel_find_best_PLL(struct drm_crtc *crtc, int target, clock.p1 <= limit->p1.max; clock.p1++) { int this_err; - intel_clock(dev, refclk, &clock); + intel_clock(refclk, &clock); if (!intel_PLL_is_valid(crtc, &clock)) continue; @@ -1313,7 +1292,7 @@ static int intel_crtc_clock_get(struct drm_device *dev, struct drm_crtc *crtc) } /* XXX: Handle the 100Mhz refclk */ - i9xx_clock(96000, &clock); + intel_clock(96000, &clock); } else { bool is_lvds = (pipe == 1) && (I915_READ(LVDS) & LVDS_PORT_EN); @@ -1325,9 +1304,9 @@ static int intel_crtc_clock_get(struct drm_device *dev, struct drm_crtc *crtc) if ((dpll & PLL_REF_INPUT_MASK) == PLLB_REF_INPUT_SPREADSPECTRUMIN) { /* XXX: might not be 66MHz */ - i8xx_clock(66000, &clock); + intel_clock(66000, &clock); } else - i8xx_clock(48000, &clock); + intel_clock(48000, &clock); } else { if (dpll & PLL_P1_DIVIDE_BY_TWO) clock.p1 = 2; @@ -1340,7 +1319,7 @@ static int intel_crtc_clock_get(struct drm_device *dev, struct drm_crtc *crtc) else clock.p2 = 2; - i8xx_clock(48000, &clock); + intel_clock(48000, &clock); } } From 43565a0648e664744ac9201c199681451355edcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20H=C3=B8gsberg?= Date: Fri, 13 Feb 2009 20:56:52 -0500 Subject: [PATCH 221/263] drm: Use spread spectrum when the bios tells us it's ok. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lifted from the DDX modesetting. Signed-off-by: Kristian Høgsberg Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/intel_bios.c | 8 ++++++++ drivers/gpu/drm/i915/intel_display.c | 20 ++++++++++++++------ 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7325363164f8..135a08f615cd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -184,6 +184,8 @@ typedef struct drm_i915_private { unsigned int lvds_dither:1; unsigned int lvds_vbt:1; unsigned int int_crt_support:1; + unsigned int lvds_use_ssc:1; + int lvds_ssc_freq; struct drm_i915_fence_reg fence_regs[16]; /* assume 965 */ int fence_reg_start; /* 4 if userland hasn't ioctl'd us yet */ diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 4ca82a025525..65be30dccc77 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -135,6 +135,14 @@ parse_general_features(struct drm_i915_private *dev_priv, if (general) { dev_priv->int_tv_support = general->int_tv_support; dev_priv->int_crt_support = general->int_crt_support; + dev_priv->lvds_use_ssc = general->enable_ssc; + + if (dev_priv->lvds_use_ssc) { + if (IS_I855(dev_priv->dev)) + dev_priv->lvds_ssc_freq = general->ssc_freq ? 66 : 48; + else + dev_priv->lvds_ssc_freq = general->ssc_freq ? 100 : 96; + } } } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8b2038706268..13f9b6667c94 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -217,7 +217,7 @@ bool intel_pipe_has_type (struct drm_crtc *crtc, int type) return false; } -#define INTELPllInvalid(s) { /* ErrorF (s) */; return false; } +#define INTELPllInvalid(s) do { DRM_DEBUG(s); return false; } while (0) /** * Returns whether the given set of divisors are valid for a given refclk with * the given connectors. @@ -726,7 +726,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, int dspsize_reg = (pipe == 0) ? DSPASIZE : DSPBSIZE; int dsppos_reg = (pipe == 0) ? DSPAPOS : DSPBPOS; int pipesrc_reg = (pipe == 0) ? PIPEASRC : PIPEBSRC; - int refclk; + int refclk, num_outputs = 0; intel_clock_t clock; u32 dpll = 0, fp = 0, dspcntr, pipeconf; bool ok, is_sdvo = false, is_dvo = false; @@ -763,9 +763,14 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, is_crt = true; break; } + + num_outputs++; } - if (IS_I9XX(dev)) { + if (is_lvds && dev_priv->lvds_use_ssc && num_outputs < 2) { + refclk = dev_priv->lvds_ssc_freq * 1000; + DRM_DEBUG("using SSC reference clock of %d MHz\n", refclk / 1000); + } else if (IS_I9XX(dev)) { refclk = 96000; } else { refclk = 48000; @@ -824,11 +829,14 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, } } - if (is_tv) { + if (is_sdvo && is_tv) + dpll |= PLL_REF_INPUT_TVCLKINBC; + else if (is_tv) /* XXX: just matching BIOS for now */ -/* dpll |= PLL_REF_INPUT_TVCLKINBC; */ + /* dpll |= PLL_REF_INPUT_TVCLKINBC; */ dpll |= 3; - } + else if (is_lvds && dev_priv->lvds_use_ssc && num_outputs < 2) + dpll |= PLLB_REF_INPUT_SPREADSPECTRUMIN; else dpll |= PLL_REF_INPUT_DREFCLK; From 496818f08a78476abdb307e241911536221239fc Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 11 Feb 2009 13:28:14 -0800 Subject: [PATCH 222/263] drm/i915: take struct mutex around fb unref Need to do this in case the unref ends up doing a free. Signed-off-by: Jesse Barnes Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/intel_display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 13f9b6667c94..4d2baf7b00be 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1606,7 +1606,9 @@ intel_user_framebuffer_create(struct drm_device *dev, ret = intel_framebuffer_create(dev, mode_cmd, &fb, obj); if (ret) { + mutex_lock(&dev->struct_mutex); drm_gem_object_unreference(obj); + mutex_unlock(&dev->struct_mutex); return NULL; } From ab00b3e5210954cbaff9207db874a9f03197e3ba Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 11 Feb 2009 14:01:46 -0800 Subject: [PATCH 223/263] drm/i915: Keep refs on the object over the lifetime of vmas for GTT mmap. This fixes potential fault at fault time if the object was unreferenced while the mapping still existed. Now, while the mmap_offset only lives for the lifetime of the object, the object also stays alive while a vma exists that needs it. Signed-off-by: Jesse Barnes Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_gem.c | 28 +++++++++++++++++++++ drivers/gpu/drm/i915/i915_drv.c | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 43 +++++++++++++++++++-------------- include/drm/drmP.h | 2 ++ 4 files changed, 57 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 5dad6b9d0dec..88d3368ffddd 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -463,6 +463,26 @@ drm_gem_object_handle_free(struct kref *kref) } EXPORT_SYMBOL(drm_gem_object_handle_free); +void drm_gem_vm_open(struct vm_area_struct *vma) +{ + struct drm_gem_object *obj = vma->vm_private_data; + + drm_gem_object_reference(obj); +} +EXPORT_SYMBOL(drm_gem_vm_open); + +void drm_gem_vm_close(struct vm_area_struct *vma) +{ + struct drm_gem_object *obj = vma->vm_private_data; + struct drm_device *dev = obj->dev; + + mutex_lock(&dev->struct_mutex); + drm_gem_object_unreference(obj); + mutex_unlock(&dev->struct_mutex); +} +EXPORT_SYMBOL(drm_gem_vm_close); + + /** * drm_gem_mmap - memory map routine for GEM objects * @filp: DRM file pointer @@ -524,6 +544,14 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) #endif vma->vm_page_prot = __pgprot(prot); + /* Take a ref for this mapping of the object, so that the fault + * handler can dereference the mmap offset's pointer to the object. + * This reference is cleaned up by the corresponding vm_close + * (which should happen whether the vma was created by this call, or + * by a vm_open due to mremap or partial unmap or whatever). + */ + drm_gem_object_reference(obj); + vma->vm_file = filp; /* Needed for drm_vm_open() */ drm_vm_open_locked(vma); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index aac12ee31a46..a31cbdbc3c54 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -94,6 +94,8 @@ static int i915_resume(struct drm_device *dev) static struct vm_operations_struct i915_gem_vm_ops = { .fault = i915_gem_fault, + .open = drm_gem_vm_open, + .close = drm_gem_vm_close, }; static struct drm_driver driver = { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 078858178832..ac534c9a2f81 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -607,8 +607,6 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) case -EAGAIN: return VM_FAULT_OOM; case -EFAULT: - case -EBUSY: - DRM_ERROR("can't insert pfn?? fault or busy...\n"); return VM_FAULT_SIGBUS; default: return VM_FAULT_NOPAGE; @@ -684,6 +682,30 @@ out_free_list: return ret; } +static void +i915_gem_free_mmap_offset(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + struct drm_i915_gem_object *obj_priv = obj->driver_private; + struct drm_gem_mm *mm = dev->mm_private; + struct drm_map_list *list; + + list = &obj->map_list; + drm_ht_remove_item(&mm->offset_hash, &list->hash); + + if (list->file_offset_node) { + drm_mm_put_block(list->file_offset_node); + list->file_offset_node = NULL; + } + + if (list->map) { + drm_free(list->map, sizeof(struct drm_map), DRM_MEM_DRIVER); + list->map = NULL; + } + + obj_priv->mmap_offset = 0; +} + /** * i915_gem_get_gtt_alignment - return required GTT alignment for an object * @obj: object to check @@ -2896,9 +2918,6 @@ int i915_gem_init_object(struct drm_gem_object *obj) void i915_gem_free_object(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; - struct drm_gem_mm *mm = dev->mm_private; - struct drm_map_list *list; - struct drm_map *map; struct drm_i915_gem_object *obj_priv = obj->driver_private; while (obj_priv->pin_count > 0) @@ -2909,19 +2928,7 @@ void i915_gem_free_object(struct drm_gem_object *obj) i915_gem_object_unbind(obj); - list = &obj->map_list; - drm_ht_remove_item(&mm->offset_hash, &list->hash); - - if (list->file_offset_node) { - drm_mm_put_block(list->file_offset_node); - list->file_offset_node = NULL; - } - - map = list->map; - if (map) { - drm_free(map, sizeof(*map), DRM_MEM_DRIVER); - list->map = NULL; - } + i915_gem_free_mmap_offset(obj); drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER); drm_free(obj->driver_private, 1, DRM_MEM_DRIVER); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 8190b9bcc2d9..e5f4ae989abf 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1321,6 +1321,8 @@ void drm_gem_object_free(struct kref *kref); struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev, size_t size); void drm_gem_object_handle_free(struct kref *kref); +void drm_gem_vm_open(struct vm_area_struct *vma); +void drm_gem_vm_close(struct vm_area_struct *vma); int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); static inline void From 3d16118dc825a654043dfe3e14371fdf2976994d Mon Sep 17 00:00:00 2001 From: etienne Date: Fri, 20 Feb 2009 09:44:45 +1000 Subject: [PATCH 224/263] drm/radeon: update sarea copies of last_ variables on resume. This fixes a regression reported in bug #12613. [airlied: not I tweaked the patch slightly and fixed it by etienne did all the hardwork so gets authorship] Signed-off-by: etienne Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_cp.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c index df4cf97e5d97..92965dbb3c14 100644 --- a/drivers/gpu/drm/radeon/radeon_cp.c +++ b/drivers/gpu/drm/radeon/radeon_cp.c @@ -557,8 +557,10 @@ static int radeon_do_engine_reset(struct drm_device * dev) } static void radeon_cp_init_ring_buffer(struct drm_device * dev, - drm_radeon_private_t * dev_priv) + drm_radeon_private_t *dev_priv, + struct drm_file *file_priv) { + struct drm_radeon_master_private *master_priv; u32 ring_start, cur_read_ptr; u32 tmp; @@ -677,6 +679,14 @@ static void radeon_cp_init_ring_buffer(struct drm_device * dev, dev_priv->scratch[2] = 0; RADEON_WRITE(RADEON_LAST_CLEAR_REG, 0); + /* reset sarea copies of these */ + master_priv = file_priv->master->driver_priv; + if (master_priv->sarea_priv) { + master_priv->sarea_priv->last_frame = 0; + master_priv->sarea_priv->last_dispatch = 0; + master_priv->sarea_priv->last_clear = 0; + } + radeon_do_wait_for_idle(dev_priv); /* Sync everything up */ @@ -1215,7 +1225,7 @@ static int radeon_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, } radeon_cp_load_microcode(dev_priv); - radeon_cp_init_ring_buffer(dev, dev_priv); + radeon_cp_init_ring_buffer(dev, dev_priv, file_priv); dev_priv->last_buf = 0; @@ -1281,7 +1291,7 @@ static int radeon_do_cleanup_cp(struct drm_device * dev) * * Charl P. Botha */ -static int radeon_do_resume_cp(struct drm_device * dev) +static int radeon_do_resume_cp(struct drm_device *dev, struct drm_file *file_priv) { drm_radeon_private_t *dev_priv = dev->dev_private; @@ -1304,7 +1314,7 @@ static int radeon_do_resume_cp(struct drm_device * dev) } radeon_cp_load_microcode(dev_priv); - radeon_cp_init_ring_buffer(dev, dev_priv); + radeon_cp_init_ring_buffer(dev, dev_priv, file_priv); radeon_do_engine_reset(dev); radeon_irq_set_state(dev, RADEON_SW_INT_ENABLE, 1); @@ -1479,8 +1489,7 @@ int radeon_cp_idle(struct drm_device *dev, void *data, struct drm_file *file_pri */ int radeon_cp_resume(struct drm_device *dev, void *data, struct drm_file *file_priv) { - - return radeon_do_resume_cp(dev); + return radeon_do_resume_cp(dev, file_priv); } int radeon_engine_reset(struct drm_device *dev, void *data, struct drm_file *file_priv) From 48ffc70b675aa7798a52a2e92e20f6cce9140b3d Mon Sep 17 00:00:00 2001 From: Alok N Kataria Date: Wed, 18 Feb 2009 12:33:55 -0800 Subject: [PATCH 225/263] x86, vmi: TSC going backwards check in vmi clocksource Impact: fix time warps under vmware Similar to the check for TSC going backwards in the TSC clocksource, we also need this check for VMI clocksource. Signed-off-by: Alok N Kataria Cc: Zachary Amsden Signed-off-by: Ingo Molnar Cc: stable@kernel.org --- arch/x86/kernel/vmiclock_32.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index c4c1f9e09402..bde106cae0a9 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -283,10 +283,13 @@ void __devinit vmi_time_ap_init(void) #endif /** vmi clocksource */ +static struct clocksource clocksource_vmi; static cycle_t read_real_cycles(void) { - return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); + cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); + return ret >= clocksource_vmi.cycle_last ? + ret : clocksource_vmi.cycle_last; } static struct clocksource clocksource_vmi = { From 07a66d7c53a538e1a9759954a82bb6c07365eff9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 08:04:13 +0100 Subject: [PATCH 226/263] x86: use the right protections for split-up pagetables Steven Rostedt found a bug in where in his modified kernel ftrace was unable to modify the kernel text, due to the PMD itself having been marked read-only as well in split_large_page(). The fix, suggested by Linus, is to not try to 'clone' the reference protection of a huge-page, but to use the standard (and permissive) page protection bits of KERNPG_TABLE. The 'cloning' makes sense for the ptes but it's a confused and incorrect concept at the page table level - because the pagetable entry is a set of all ptes and hence cannot 'clone' any single protection attribute - the ptes can be any mixture of protections. With the permissive KERNPG_TABLE, even if the pte protections get changed after this point (due to ftrace doing code-patching or other similar activities like kprobes), the resulting combined protections will still be correct and the pte's restrictive (or permissive) protections will control it. Also update the comment. This bug was there for a long time but has not caused visible problems before as it needs a rather large read-only area to trigger. Steve possibly hacked his kernel with some really large arrays or so. Anyway, the bug is definitely worth fixing. [ Huang Ying also experienced problems in this area when writing the EFI code, but the real bug in split_large_page() was not realized back then. ] Reported-by: Steven Rostedt Reported-by: Huang Ying Acked-by: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 8ca0d8566fc8..7be47d1a97e4 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -508,18 +508,13 @@ static int split_large_page(pte_t *kpte, unsigned long address) #endif /* - * Install the new, split up pagetable. Important details here: + * Install the new, split up pagetable. * - * On Intel the NX bit of all levels must be cleared to make a - * page executable. See section 4.13.2 of Intel 64 and IA-32 - * Architectures Software Developer's Manual). - * - * Mark the entry present. The current mapping might be - * set to not present, which we preserved above. + * We use the standard kernel pagetable protections for the new + * pagetable protections, the actual ptes set above control the + * primary protection behavior: */ - ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte))); - pgprot_val(ref_prot) |= _PAGE_PRESENT; - __set_pmd_pte(kpte, address, mk_pte(base, ref_prot)); + __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE))); base = NULL; out_unlock: From 2cfbd50b536c878e58ab3681c4e944fa3d99b415 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 20 Feb 2009 10:55:10 -0500 Subject: [PATCH 227/263] Btrfs: check file pointer in btrfs_sync_file fsync can be called by NFS with a null file pointer, and btrfs was oopsing in this case. Signed-off-by: Chris Mason --- fs/btrfs/file.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3e8023efaff7..872f104576e5 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1222,7 +1222,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) /* * ok we haven't committed the transaction yet, lets do a commit */ - if (file->private_data) + if (file && file->private_data) btrfs_ioctl_trans_end(file); trans = btrfs_start_transaction(root, 1); @@ -1231,7 +1231,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) goto out; } - ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); + ret = btrfs_log_dentry_safe(trans, root, dentry); if (ret < 0) goto out; @@ -1245,7 +1245,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) * file again, but that will end up using the synchronization * inside btrfs_sync_log to keep things safe. */ - mutex_unlock(&file->f_dentry->d_inode->i_mutex); + mutex_unlock(&dentry->d_inode->i_mutex); if (ret > 0) { ret = btrfs_commit_transaction(trans, root); @@ -1253,7 +1253,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) btrfs_sync_log(trans, root); ret = btrfs_end_transaction(trans, root); } - mutex_lock(&file->f_dentry->d_inode->i_mutex); + mutex_lock(&dentry->d_inode->i_mutex); out: return ret > 0 ? EIO : ret; } From 8cbb5bc374522a5f359e34856829732a7ed458a4 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Fri, 20 Feb 2009 11:27:46 -0800 Subject: [PATCH 228/263] MAINTAINERS: paravirt-ops maintainers update Welcome to Alok Kataria, our new paravirt-ops maintainer. Cc: Chris Wright Cc: Alok Kataria Cc: Rusty Russell Signed-off-by: Ingo Molnar --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 06e03913d2d3..59fd2d1d94a7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3334,8 +3334,8 @@ P: Jeremy Fitzhardinge M: jeremy@xensource.com P: Chris Wright M: chrisw@sous-sol.org -P: Zachary Amsden -M: zach@vmware.com +P: Alok Kataria +M: akataria@vmware.com P: Rusty Russell M: rusty@rustcorp.com.au L: virtualization@lists.osdl.org From 41a9e64ca4d60dc412cfcd42d5be5dec1f1ed427 Mon Sep 17 00:00:00 2001 From: Luca Bigliardi Date: Fri, 20 Feb 2009 15:38:36 -0800 Subject: [PATCH 229/263] uml: fix vde network backend in user mode linux * Replace kmalloc() with uml_kmalloc() (fix build failure) * Remove unnecessary UM_KERN_INFO in printk() (don't display '<6>' while printing info) Signed-off-by: Luca Bigliardi Cc: Jiri Kosina Reviewed-by: WANG Cong Cc: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/drivers/vde_user.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/um/drivers/vde_user.c b/arch/um/drivers/vde_user.c index 56533db25343..c5c43253e6ce 100644 --- a/arch/um/drivers/vde_user.c +++ b/arch/um/drivers/vde_user.c @@ -78,7 +78,7 @@ void vde_init_libstuff(struct vde_data *vpri, struct vde_init *init) { struct vde_open_args *args; - vpri->args = kmalloc(sizeof(struct vde_open_args), UM_GFP_KERNEL); + vpri->args = uml_kmalloc(sizeof(struct vde_open_args), UM_GFP_KERNEL); if (vpri->args == NULL) { printk(UM_KERN_ERR "vde_init_libstuff - vde_open_args " "allocation failed"); @@ -91,8 +91,8 @@ void vde_init_libstuff(struct vde_data *vpri, struct vde_init *init) args->group = init->group; args->mode = init->mode ? init->mode : 0700; - args->port ? printk(UM_KERN_INFO "port %d", args->port) : - printk(UM_KERN_INFO "undefined port"); + args->port ? printk("port %d", args->port) : + printk("undefined port"); } int vde_user_read(void *conn, void *buf, int len) From 58bafe72ad937ec0813109254a154cf32be25fc4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 20 Feb 2009 15:38:38 -0800 Subject: [PATCH 230/263] mn10300: fix oprofile oprofile for MN10300 seems to have been broken by the advent of the new tracing framework. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mn10300/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 9a9f43358879..41d16822e616 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -7,6 +7,7 @@ mainmenu "Linux Kernel Configuration" config MN10300 def_bool y + select HAVE_OPROFILE config AM33 def_bool y From d9190913b71831f5e3d04de62cfb1fd069a9db35 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 20 Feb 2009 15:38:40 -0800 Subject: [PATCH 231/263] mn10300: fix typo && -> || in arch/mn10300/unit-asb2305/pci.c Fix the typo && -> ||. Signed-off-by: Wei Yongjun Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mn10300/unit-asb2305/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mn10300/unit-asb2305/pci.c b/arch/mn10300/unit-asb2305/pci.c index 1a86425fec42..07dbbcda3b2e 100644 --- a/arch/mn10300/unit-asb2305/pci.c +++ b/arch/mn10300/unit-asb2305/pci.c @@ -173,7 +173,7 @@ static int pci_ampci_write_config_byte(struct pci_bus *bus, unsigned int devfn, BRIDGEREGB(where) = value; } else { if (bus->number == 0 && - (devfn == PCI_DEVFN(2, 0) && devfn == PCI_DEVFN(3, 0)) + (devfn == PCI_DEVFN(2, 0) || devfn == PCI_DEVFN(3, 0)) ) __pcidebug("<= %02x", bus, devfn, where, value); CONFIG_ADDRESS = CONFIG_CMD(bus, devfn, where); From 3ef0e5ba467366125f04b423f4638baca54a4fc1 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 20 Feb 2009 15:38:41 -0800 Subject: [PATCH 232/263] slab: introduce kzfree() kzfree() is a wrapper for kfree() that additionally zeroes the underlying memory before releasing it to the slab allocator. Currently there is code which memset()s the memory region of an object before releasing it back to the slab allocator to make sure security-sensitive data are really zeroed out after use. These callsites can then just use kzfree() which saves some code, makes users greppable and allows for a stupid destructor that isn't necessarily aware of the actual object size. Signed-off-by: Johannes Weiner Reviewed-by: Pekka Enberg Cc: Matt Mackall Acked-by: Christoph Lameter Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 1 + mm/util.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/include/linux/slab.h b/include/linux/slab.h index f96d13c281e8..24c5602bee99 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -127,6 +127,7 @@ int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr); void * __must_check __krealloc(const void *, size_t, gfp_t); void * __must_check krealloc(const void *, size_t, gfp_t); void kfree(const void *); +void kzfree(const void *); size_t ksize(const void *); /* diff --git a/mm/util.c b/mm/util.c index cb00b748ce47..37eaccdf3054 100644 --- a/mm/util.c +++ b/mm/util.c @@ -129,6 +129,26 @@ void *krealloc(const void *p, size_t new_size, gfp_t flags) } EXPORT_SYMBOL(krealloc); +/** + * kzfree - like kfree but zero memory + * @p: object to free memory of + * + * The memory of the object @p points to is zeroed before freed. + * If @p is %NULL, kzfree() does nothing. + */ +void kzfree(const void *p) +{ + size_t ks; + void *mem = (void *)p; + + if (unlikely(ZERO_OR_NULL_PTR(mem))) + return; + ks = ksize(mem); + memset(mem, 0, ks); + kfree(mem); +} +EXPORT_SYMBOL(kzfree); + /* * strndup_user - duplicate an existing string from user space * @s: The string to duplicate From ad444684b78f12fb3f45a733722e2cbfe102d25c Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 20 Feb 2009 15:38:43 -0800 Subject: [PATCH 233/263] README: fix a wrong filename It should be Documentation/build/kconfig.txt. Introduced by commit 2af238e455ef5fd31c2f7a06c2db3f13d843b9bf ("kbuild: make *config usage docs"). Signed-off-by: Li Zefan Cc: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README b/README index 90a07658ede1..d6c6c742c1d7 100644 --- a/README +++ b/README @@ -188,7 +188,7 @@ CONFIGURING the kernel: values to random values. You can find more information on using the Linux kernel config tools - in Documentation/kbuild/make-configs.txt. + in Documentation/kbuild/kconfig.txt. NOTES on "make config": - having unnecessary drivers will make the kernel bigger, and can From 9b6d25100ace1dcf9750803ff08f6b61f840be79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 20 Feb 2009 15:38:45 -0800 Subject: [PATCH 234/263] sx.c: fix dbl statement if - add missing braces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Caused by 736d54533aed (sx.c: fix missed unlock_kernel() on error path in sx_fw_ioctl()). You guys keep breaking things this way in every single kernel release in at least couple of places... :-( Signed-off-by: Ilpo Järvinen Acked-by: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/sx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/char/sx.c b/drivers/char/sx.c index f146e90404fa..d7c416566bd9 100644 --- a/drivers/char/sx.c +++ b/drivers/char/sx.c @@ -1746,9 +1746,10 @@ static long sx_fw_ioctl(struct file *filp, unsigned int cmd, sx_dprintk(SX_DEBUG_FIRMWARE, "returning type= %ld\n", rc); break; case SXIO_DO_RAMTEST: - if (sx_initialized) /* Already initialized: better not ramtest the board. */ + if (sx_initialized) { /* Already initialized: better not ramtest the board. */ rc = -EPERM; break; + } if (IS_SX_BOARD(board)) { rc = do_memtest(board, 0, 0x7000); if (!rc) From b28fe28f2a07ee325834179174a95495d2786561 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 20 Feb 2009 15:38:46 -0800 Subject: [PATCH 235/263] sx.c: avoid referencing freed memory if copy_from_user() fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "break" would just result in reusing a free'd pointer. I don't have the cards myself to test it though. :/ Signed-off-by: Dan Carpenter Cc: Ilpo Järvinen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/sx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/sx.c b/drivers/char/sx.c index d7c416566bd9..518f2a25d91e 100644 --- a/drivers/char/sx.c +++ b/drivers/char/sx.c @@ -1789,7 +1789,7 @@ static long sx_fw_ioctl(struct file *filp, unsigned int cmd, nbytes - i : SX_CHUNK_SIZE)) { kfree(tmp); rc = -EFAULT; - break; + goto out; } memcpy_toio(board->base2 + offset + i, tmp, (i + SX_CHUNK_SIZE > nbytes) ? From 152de30bced150617e5731a9fe2364c9d04fe26c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 20 Feb 2009 15:38:47 -0800 Subject: [PATCH 236/263] docsrc: use config instead of menuconfig BUILD_DOCSRC should be controlled by "config" instead of "menuconfig". I have no idea how I managed to use "menuconfig" here. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 29044f500269..1bcf9cd4baa0 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -838,7 +838,7 @@ config FIREWIRE_OHCI_REMOTE_DMA If unsure, say N. -menuconfig BUILD_DOCSRC +config BUILD_DOCSRC bool "Build targets in Documentation/ tree" depends on HEADERS_CHECK help From 3fd076dd955a34c35dc456f4ef676e03cdced044 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 20 Feb 2009 15:38:48 -0800 Subject: [PATCH 237/263] cpuset: various documentation fixes and updates I noticed the old commit 8f5aa26c75b7722e80c0c5c5bb833d41865d7019 ("cpusets: update_cpumask documentation fix") is not a complete fix, resulting in inconsistent paragraphs. This patch fixes it and does other fixes and updates: - s/migrate_all_tasks()/migrate_live_tasks()/ - describe more cpuset control files - s/cpumask_t/struct cpumask/ - document cpu hotplug and change of 'sched_relax_domain_level' may cause domain rebuild - document various ways to query and modify cpusets - the equivalent of "mount -t cpuset" is "mount -t cgroup -o cpuset,noprefix" Signed-off-by: Li Zefan Acked-by: Randy Dunlap Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/cpusets.txt | 63 ++++++++++++++++++------------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroups/cpusets.txt index 5c86c258c791..0611e9528c7c 100644 --- a/Documentation/cgroups/cpusets.txt +++ b/Documentation/cgroups/cpusets.txt @@ -142,7 +142,7 @@ into the rest of the kernel, none in performance critical paths: - in fork and exit, to attach and detach a task from its cpuset. - in sched_setaffinity, to mask the requested CPUs by what's allowed in that tasks cpuset. - - in sched.c migrate_all_tasks(), to keep migrating tasks within + - in sched.c migrate_live_tasks(), to keep migrating tasks within the CPUs allowed by their cpuset, if possible. - in the mbind and set_mempolicy system calls, to mask the requested Memory Nodes by what's allowed in that tasks cpuset. @@ -175,6 +175,10 @@ files describing that cpuset: - mem_exclusive flag: is memory placement exclusive? - mem_hardwall flag: is memory allocation hardwalled - memory_pressure: measure of how much paging pressure in cpuset + - memory_spread_page flag: if set, spread page cache evenly on allowed nodes + - memory_spread_slab flag: if set, spread slab cache evenly on allowed nodes + - sched_load_balance flag: if set, load balance within CPUs on that cpuset + - sched_relax_domain_level: the searching range when migrating tasks In addition, the root cpuset only has the following file: - memory_pressure_enabled flag: compute memory_pressure? @@ -252,7 +256,7 @@ is causing. This is useful both on tightly managed systems running a wide mix of submitted jobs, which may choose to terminate or re-prioritize jobs that -are trying to use more memory than allowed on the nodes assigned them, +are trying to use more memory than allowed on the nodes assigned to them, and with tightly coupled, long running, massively parallel scientific computing jobs that will dramatically fail to meet required performance goals if they start to use more memory than allowed to them. @@ -378,7 +382,7 @@ as cpusets and sched_setaffinity. The algorithmic cost of load balancing and its impact on key shared kernel data structures such as the task list increases more than linearly with the number of CPUs being balanced. So the scheduler -has support to partition the systems CPUs into a number of sched +has support to partition the systems CPUs into a number of sched domains such that it only load balances within each sched domain. Each sched domain covers some subset of the CPUs in the system; no two sched domains overlap; some CPUs might not be in any sched @@ -485,17 +489,22 @@ of CPUs allowed to a cpuset having 'sched_load_balance' enabled. The internal kernel cpuset to scheduler interface passes from the cpuset code to the scheduler code a partition of the load balanced CPUs in the system. This partition is a set of subsets (represented -as an array of cpumask_t) of CPUs, pairwise disjoint, that cover all -the CPUs that must be load balanced. +as an array of struct cpumask) of CPUs, pairwise disjoint, that cover +all the CPUs that must be load balanced. -Whenever the 'sched_load_balance' flag changes, or CPUs come or go -from a cpuset with this flag enabled, or a cpuset with this flag -enabled is removed, the cpuset code builds a new such partition and -passes it to the scheduler sched domain setup code, to have the sched -domains rebuilt as necessary. +The cpuset code builds a new such partition and passes it to the +scheduler sched domain setup code, to have the sched domains rebuilt +as necessary, whenever: + - the 'sched_load_balance' flag of a cpuset with non-empty CPUs changes, + - or CPUs come or go from a cpuset with this flag enabled, + - or 'sched_relax_domain_level' value of a cpuset with non-empty CPUs + and with this flag enabled changes, + - or a cpuset with non-empty CPUs and with this flag enabled is removed, + - or a cpu is offlined/onlined. This partition exactly defines what sched domains the scheduler should -setup - one sched domain for each element (cpumask_t) in the partition. +setup - one sched domain for each element (struct cpumask) in the +partition. The scheduler remembers the currently active sched domain partitions. When the scheduler routine partition_sched_domains() is invoked from @@ -559,7 +568,7 @@ domain, the largest value among those is used. Be careful, if one requests 0 and others are -1 then 0 is used. Note that modifying this file will have both good and bad effects, -and whether it is acceptable or not will be depend on your situation. +and whether it is acceptable or not depends on your situation. Don't modify this file if you are not sure. If your situation is: @@ -600,19 +609,15 @@ to allocate a page of memory for that task. If a cpuset has its 'cpus' modified, then each task in that cpuset will have its allowed CPU placement changed immediately. Similarly, -if a tasks pid is written to a cpusets 'tasks' file, in either its -current cpuset or another cpuset, then its allowed CPU placement is -changed immediately. If such a task had been bound to some subset -of its cpuset using the sched_setaffinity() call, the task will be -allowed to run on any CPU allowed in its new cpuset, negating the -affect of the prior sched_setaffinity() call. +if a tasks pid is written to another cpusets 'tasks' file, then its +allowed CPU placement is changed immediately. If such a task had been +bound to some subset of its cpuset using the sched_setaffinity() call, +the task will be allowed to run on any CPU allowed in its new cpuset, +negating the effect of the prior sched_setaffinity() call. In summary, the memory placement of a task whose cpuset is changed is updated by the kernel, on the next allocation of a page for that task, -but the processor placement is not updated, until that tasks pid is -rewritten to the 'tasks' file of its cpuset. This is done to avoid -impacting the scheduler code in the kernel with a check for changes -in a tasks processor placement. +and the processor placement is updated immediately. Normally, once a page is allocated (given a physical page of main memory) then that page stays on whatever node it @@ -681,10 +686,14 @@ and then start a subshell 'sh' in that cpuset: # The next line should display '/Charlie' cat /proc/self/cpuset -In the future, a C library interface to cpusets will likely be -available. For now, the only way to query or modify cpusets is -via the cpuset file system, using the various cd, mkdir, echo, cat, -rmdir commands from the shell, or their equivalent from C. +There are ways to query or modify cpusets: + - via the cpuset file system directly, using the various cd, mkdir, echo, + cat, rmdir commands from the shell, or their equivalent from C. + - via the C library libcpuset. + - via the C library libcgroup. + (http://sourceforge.net/proects/libcg/) + - via the python application cset. + (http://developer.novell.com/wiki/index.php/Cpuset) The sched_setaffinity calls can also be done at the shell prompt using SGI's runon or Robert Love's taskset. The mbind and set_mempolicy @@ -756,7 +765,7 @@ mount -t cpuset X /dev/cpuset is equivalent to -mount -t cgroup -ocpuset X /dev/cpuset +mount -t cgroup -ocpuset,noprefix X /dev/cpuset echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent 2.2 Adding/removing cpus From f6fcba7014f9cc535fa75ef98c008b24e49e2212 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 15:38:48 -0800 Subject: [PATCH 238/263] vmalloc: call flush_cache_vunmap() from unmap_kernel_range() Impact: proper vcache flush on unmap_kernel_range() flush_cache_vunmap() should be called before pages are unmapped. Add a call to it in unmap_kernel_range(). Signed-off-by: Tejun Heo Acked-by: Nick Piggin Acked-by: David S. Miller Cc: [2.6.28.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 4dd2636d0b92..903cad46e796 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1012,6 +1012,8 @@ void __init vmalloc_init(void) void unmap_kernel_range(unsigned long addr, unsigned long size) { unsigned long end = addr + size; + + flush_cache_vunmap(addr, end); vunmap_page_range(addr, end); flush_tlb_kernel_range(addr, end); } From 01b24fee285b098c74318a8be801ef3711ec18d7 Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Fri, 20 Feb 2009 15:38:49 -0800 Subject: [PATCH 239/263] spi_bitbang: add more lowlevel function documentation This adds more documentation of the lowlevel API to avoid future bugs. Signed-off-by: Michael Buesch Acked-by: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/spi/spi_bitbang.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h index bf8de281b4ed..eed4254bd503 100644 --- a/include/linux/spi/spi_bitbang.h +++ b/include/linux/spi/spi_bitbang.h @@ -83,6 +83,13 @@ extern int spi_bitbang_stop(struct spi_bitbang *spi); * int getmiso(struct spi_device *); * void spidelay(unsigned); * + * setsck()'s is_on parameter is a zero/nonzero boolean. + * + * setmosi()'s is_on parameter is a zero/nonzero boolean. + * + * getmiso() is required to return 0 or 1 only. Any other value is invalid + * and will result in improper operation. + * * A non-inlined routine would call bitbang_txrx_*() routines. The * main loop could easily compile down to a handful of instructions, * especially if the delay is a NOP (to run at peak speed). From 3cf311409d37d904335eb720e8a6b2c17bee6698 Mon Sep 17 00:00:00 2001 From: Yang Hongyang Date: Fri, 20 Feb 2009 15:38:51 -0800 Subject: [PATCH 240/263] atyfb: remove unused local variable `pwr_command' Signed-off-by: Yang Hongyang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/aty/aty128fb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c index e6e299feb51b..2181ce4d7ebd 100644 --- a/drivers/video/aty/aty128fb.c +++ b/drivers/video/aty/aty128fb.c @@ -2365,7 +2365,6 @@ static void fbcon_aty128_bmove(struct display *p, int sy, int sx, int dy, int dx static void aty128_set_suspend(struct aty128fb_par *par, int suspend) { u32 pmgt; - u16 pwr_command; struct pci_dev *pdev = par->pdev; if (!par->pm_reg) From b6adea334c6c89d5e6c94f9196bbf3a279cb53bd Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 20 Feb 2009 15:38:52 -0800 Subject: [PATCH 241/263] 8250: fix boot hang with serial console when using with Serial Over Lan port Intel 8257x Ethernet boards have a feature called Serial Over Lan. This feature works by emulating a serial port, and it is detected by kernel as a normal 8250 port. However, this emulation is not perfect, as also noticed on changeset 7500b1f602aad75901774a67a687ee985d85893f. Before this patch, the kernel were trying to check if the serial TX is capable of work using IRQ's. This were done with a code similar this: serial_outp(up, UART_IER, UART_IER_THRI); lsr = serial_in(up, UART_LSR); iir = serial_in(up, UART_IIR); serial_outp(up, UART_IER, 0); if (lsr & UART_LSR_TEMT && iir & UART_IIR_NO_INT) up->bugs |= UART_BUG_TXEN; This works fine for other 8250 ports, but, on 8250-emulated SoL port, the chip is a little lazy to down UART_IIR_NO_INT at UART_IIR register. Due to that, UART_BUG_TXEN is sometimes enabled. However, as TX IRQ keeps working, and the TX polling is now enabled, the driver miss-interprets the IRQ received later, hanging up the machine until a key is pressed at the serial console. This is the 6 version of this patch. Previous versions were trying to introduce a large enough delay between serial_outp and serial_in(up, UART_IIR), but not taking forever. However, the needed delay couldn't be safely determined. At the experimental tests, a delay of 1us solves most of the cases, but still hangs sometimes. Increasing the delay to 5us was better, but still doesn't solve. A very high delay of 50 ms seemed to work every time. However, poking around with delays and pray for it to be enough doesn't seem to be a good approach, even for a quirk. So, instead of playing with random large arbitrary delays, let's just disable UART_BUG_TXEN for all SoL ports. [akpm@linux-foundation.org: fix warnings] Signed-off-by: Mauro Carvalho Chehab Cc: Alan Cox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/serial/8250.c | 15 +++++++++++++++ drivers/serial/8250_pci.c | 36 ++++++++++++++++++++++++++++++++++++ include/linux/pci_ids.h | 3 +++ include/linux/serial_core.h | 1 + 4 files changed, 55 insertions(+) diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index 0d934bfbdd9b..b4b39811b445 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -2083,6 +2083,20 @@ static int serial8250_startup(struct uart_port *port) serial8250_set_mctrl(&up->port, up->port.mctrl); + /* Serial over Lan (SoL) hack: + Intel 8257x Gigabit ethernet chips have a + 16550 emulation, to be used for Serial Over Lan. + Those chips take a longer time than a normal + serial device to signalize that a transmission + data was queued. Due to that, the above test generally + fails. One solution would be to delay the reading of + iir. However, this is not reliable, since the timeout + is variable. So, let's just don't test if we receive + TX irq. This way, we'll never enable UART_BUG_TXEN. + */ + if (up->port.flags & UPF_NO_TXEN_TEST) + goto dont_test_tx_en; + /* * Do a quick test to see if we receive an * interrupt when we enable the TX irq. @@ -2102,6 +2116,7 @@ static int serial8250_startup(struct uart_port *port) up->bugs &= ~UART_BUG_TXEN; } +dont_test_tx_en: spin_unlock_irqrestore(&up->port.lock, flags); /* diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index 536d8e510f66..533f82025adf 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -798,6 +798,21 @@ pci_default_setup(struct serial_private *priv, return setup_port(priv, port, bar, offset, board->reg_shift); } +static int skip_tx_en_setup(struct serial_private *priv, + const struct pciserial_board *board, + struct uart_port *port, int idx) +{ + port->flags |= UPF_NO_TXEN_TEST; + printk(KERN_DEBUG "serial8250: skipping TxEn test for device " + "[%04x:%04x] subsystem [%04x:%04x]\n", + priv->dev->vendor, + priv->dev->device, + priv->dev->subsystem_vendor, + priv->dev->subsystem_device); + + return pci_default_setup(priv, board, port, idx); +} + /* This should be in linux/pci_ids.h */ #define PCI_VENDOR_ID_SBSMODULARIO 0x124B #define PCI_SUBVENDOR_ID_SBSMODULARIO 0x124B @@ -864,6 +879,27 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .init = pci_inteli960ni_init, .setup = pci_default_setup, }, + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_8257X_SOL, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = skip_tx_en_setup, + }, + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_82573L_SOL, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = skip_tx_en_setup, + }, + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_82573E_SOL, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = skip_tx_en_setup, + }, /* * ITE */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 114b8192eab9..aca8c458aa8a 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2323,6 +2323,9 @@ #define PCI_DEVICE_ID_INTEL_82378 0x0484 #define PCI_DEVICE_ID_INTEL_I960 0x0960 #define PCI_DEVICE_ID_INTEL_I960RM 0x0962 +#define PCI_DEVICE_ID_INTEL_8257X_SOL 0x1062 +#define PCI_DEVICE_ID_INTEL_82573E_SOL 0x1085 +#define PCI_DEVICE_ID_INTEL_82573L_SOL 0x108F #define PCI_DEVICE_ID_INTEL_82815_MC 0x1130 #define PCI_DEVICE_ID_INTEL_82815_CGC 0x1132 #define PCI_DEVICE_ID_INTEL_82092AA_0 0x1221 diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 90bbbf0b1161..df9245c7bd3b 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -296,6 +296,7 @@ struct uart_port { #define UPF_HARDPPS_CD ((__force upf_t) (1 << 11)) #define UPF_LOW_LATENCY ((__force upf_t) (1 << 13)) #define UPF_BUGGY_UART ((__force upf_t) (1 << 14)) +#define UPF_NO_TXEN_TEST ((__force upf_t) (1 << 15)) #define UPF_MAGIC_MULTIPLIER ((__force upf_t) (1 << 16)) #define UPF_CONS_FLOW ((__force upf_t) (1 << 23)) #define UPF_SHARE_IRQ ((__force upf_t) (1 << 24)) From e4cce94c9c8797b08faf6a79396df4d175e377fa Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 10 Feb 2009 14:10:26 +0300 Subject: [PATCH 242/263] [CIFS] Prevent OOPs when mounting with remote prefixpath. Fixes OOPs with message 'kernel BUG at fs/cifs/cifs_dfs_ref.c:274!'. Checks if the prefixpath in an accesible while we are still in cifs_mount and fails with reporting a error if we can't access the prefixpath Should fix Samba bugs 6086 and 5861 and kernel bug 12192 Signed-off-by: Igor Mammedov Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 1 + fs/cifs/connect.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/inode.c | 4 ++-- 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 382ba6298809..ec9f9c1c7d88 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -42,6 +42,7 @@ extern void _FreeXid(unsigned int); #define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current_fsuid())); #define FreeXid(curr_xid) {_FreeXid(curr_xid); cFYI(1,("CIFS VFS: leaving %s (xid = %d) rc = %d",__func__,curr_xid,(int)rc));} extern char *build_path_from_dentry(struct dentry *); +extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb); extern char *build_wildcard_path_from_dentry(struct dentry *direntry); /* extern void renew_parental_timestamps(struct dentry *direntry);*/ extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *, diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 005df85219a8..da0f4ffa0613 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2180,6 +2180,33 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info, "mount option supported")); } +static int +is_path_accessible(int xid, struct cifsTconInfo *tcon, + struct cifs_sb_info *cifs_sb, const char *full_path) +{ + int rc; + __u64 inode_num; + FILE_ALL_INFO *pfile_info; + + rc = CIFSGetSrvInodeNumber(xid, tcon, full_path, &inode_num, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + if (rc != -EOPNOTSUPP) + return rc; + + pfile_info = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); + if (pfile_info == NULL) + return -ENOMEM; + + rc = CIFSSMBQPathInfo(xid, tcon, full_path, pfile_info, + 0 /* not legacy */, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + kfree(pfile_info); + return rc; +} + int cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, char *mount_data, const char *devname) @@ -2190,6 +2217,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, struct cifsSesInfo *pSesInfo = NULL; struct cifsTconInfo *tcon = NULL; struct TCP_Server_Info *srvTcp = NULL; + char *full_path; xid = GetXid(); @@ -2426,6 +2454,23 @@ mount_fail_check: cifs_sb->rsize = min(cifs_sb->rsize, (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE)); + if (!rc && cifs_sb->prepathlen) { + /* build_path_to_root works only when we have a valid tcon */ + full_path = cifs_build_path_to_root(cifs_sb); + if (full_path == NULL) { + rc = -ENOMEM; + goto mount_fail_check; + } + rc = is_path_accessible(xid, tcon, cifs_sb, full_path); + if (rc) { + cERROR(1, ("Path %s in not accessible: %d", + full_path, rc)); + kfree(full_path); + goto mount_fail_check; + } + kfree(full_path); + } + /* volume_info->password is freed above when existing session found (in which case it is not needed anymore) but when new sesion is created the password ptr is put in the new session structure (in which case the diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index bcf7b5184664..7342bfb02ae0 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -621,7 +621,7 @@ static const struct inode_operations cifs_ipc_inode_ops = { .lookup = cifs_lookup, }; -static char *build_path_to_root(struct cifs_sb_info *cifs_sb) +char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb) { int pplen = cifs_sb->prepathlen; int dfsplen; @@ -678,7 +678,7 @@ struct inode *cifs_iget(struct super_block *sb, unsigned long ino) return inode; cifs_sb = CIFS_SB(inode->i_sb); - full_path = build_path_to_root(cifs_sb); + full_path = cifs_build_path_to_root(cifs_sb); if (full_path == NULL) return ERR_PTR(-ENOMEM); From 132ac7b77cc95a22d6118d327c96586759fbf006 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 10 Feb 2009 07:33:57 -0500 Subject: [PATCH 243/263] cifs: refactor new_inode() calls and inode initialization Move new inode creation into a separate routine and refactor the callers to take advantage of it. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 2 + fs/cifs/inode.c | 96 ++++++++++++++++++++++++++++----------------- fs/cifs/readdir.c | 54 ++++++++++++------------- 3 files changed, 86 insertions(+), 66 deletions(-) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index ec9f9c1c7d88..62fd5bd499f6 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -92,6 +92,8 @@ extern u64 cifs_UnixTimeToNT(struct timespec); extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time); extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time); +extern struct inode *cifs_new_inode(struct super_block *sb, + unsigned long *inum); extern int cifs_get_inode_info(struct inode **pinode, const unsigned char *search_path, FILE_ALL_INFO *pfile_info, diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 7342bfb02ae0..c7674f595adb 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -199,6 +199,49 @@ static void fill_fake_finddataunix(FILE_UNIX_BASIC_INFO *pfnd_dat, pfnd_dat->Gid = cpu_to_le64(pinode->i_gid); } +/** + * cifs_new inode - create new inode, initialize, and hash it + * @sb - pointer to superblock + * @inum - if valid pointer and serverino is enabled, replace i_ino with val + * + * Create a new inode, initialize it for CIFS and hash it. Returns the new + * inode or NULL if one couldn't be allocated. + * + * If the share isn't mounted with "serverino" or inum is a NULL pointer then + * we'll just use the inode number assigned by new_inode(). Note that this can + * mean i_ino collisions since the i_ino assigned by new_inode is not + * guaranteed to be unique. + */ +struct inode * +cifs_new_inode(struct super_block *sb, unsigned long *inum) +{ + struct inode *inode; + + inode = new_inode(sb); + if (inode == NULL) + return NULL; + + /* + * BB: Is i_ino == 0 legal? Here, we assume that it is. If it isn't we + * stop passing inum as ptr. Are there sanity checks we can use to + * ensure that the server is really filling in that field? Also, + * if serverino is disabled, perhaps we should be using iunique()? + */ + if (inum && (CIFS_SB(sb)->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) + inode->i_ino = *inum; + + /* + * must set this here instead of cifs_alloc_inode since VFS will + * clobber i_flags + */ + if (sb->s_flags & MS_NOATIME) + inode->i_flags |= S_NOATIME | S_NOCMTIME; + + insert_inode_hash(inode); + + return inode; +} + int cifs_get_inode_info_unix(struct inode **pinode, const unsigned char *full_path, struct super_block *sb, int xid) { @@ -233,22 +276,12 @@ int cifs_get_inode_info_unix(struct inode **pinode, /* get new inode */ if (*pinode == NULL) { - *pinode = new_inode(sb); + *pinode = cifs_new_inode(sb, (unsigned long *) + &find_data.UniqueId); if (*pinode == NULL) { rc = -ENOMEM; goto cgiiu_exit; } - /* Is an i_ino of zero legal? */ - /* note ino incremented to unique num in new_inode */ - /* Are there sanity checks we can use to ensure that - the server is really filling in that field? */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) - (*pinode)->i_ino = (unsigned long)find_data.UniqueId; - - if (sb->s_flags & MS_NOATIME) - (*pinode)->i_flags |= S_NOATIME | S_NOCMTIME; - - insert_inode_hash(*pinode); } inode = *pinode; @@ -465,11 +498,8 @@ int cifs_get_inode_info(struct inode **pinode, /* get new inode */ if (*pinode == NULL) { - *pinode = new_inode(sb); - if (*pinode == NULL) { - rc = -ENOMEM; - goto cgii_exit; - } + __u64 inode_num; + /* Is an i_ino of zero legal? Can we use that to check if the server supports returning inode numbers? Are there other sanity checks we can use to ensure that @@ -486,7 +516,6 @@ int cifs_get_inode_info(struct inode **pinode, if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { int rc1 = 0; - __u64 inode_num; rc1 = CIFSGetSrvInodeNumber(xid, pTcon, full_path, &inode_num, @@ -496,12 +525,17 @@ int cifs_get_inode_info(struct inode **pinode, if (rc1) { cFYI(1, ("GetSrvInodeNum rc %d", rc1)); /* BB EOPNOSUPP disable SERVER_INUM? */ - } else /* do we need cast or hash to ino? */ - (*pinode)->i_ino = inode_num; - } /* else ino incremented to unique num in new_inode*/ - if (sb->s_flags & MS_NOATIME) - (*pinode)->i_flags |= S_NOATIME | S_NOCMTIME; - insert_inode_hash(*pinode); + } + *pinode = cifs_new_inode(sb, (unsigned long *) + &inode_num); + } else { + *pinode = cifs_new_inode(sb, NULL); + } + + if (*pinode == NULL) { + rc = -ENOMEM; + goto cgii_exit; + } } inode = *pinode; cifsInfo = CIFS_I(inode); @@ -1114,24 +1148,14 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) else direntry->d_op = &cifs_dentry_ops; - newinode = new_inode(inode->i_sb); + newinode = cifs_new_inode(inode->i_sb, (unsigned long *) + &pInfo->UniqueId); if (newinode == NULL) { kfree(pInfo); goto mkdir_get_info; } - /* Is an i_ino of zero legal? */ - /* Are there sanity checks we can use to ensure that - the server is really filling in that field? */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { - newinode->i_ino = - (unsigned long)pInfo->UniqueId; - } /* note ino incremented to unique num in new_inode */ - if (inode->i_sb->s_flags & MS_NOATIME) - newinode->i_flags |= S_NOATIME | S_NOCMTIME; newinode->i_nlink = 2; - - insert_inode_hash(newinode); d_instantiate(direntry, newinode); /* we already checked in POSIXCreate whether diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 9f51f9bf0292..02a20221e841 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -56,35 +56,34 @@ static inline void dump_cifs_file_struct(struct file *file, char *label) } #endif /* DEBUG2 */ -/* Returns one if new inode created (which therefore needs to be hashed) */ +/* Returns 1 if new inode created, 2 if both dentry and inode were */ /* Might check in the future if inode number changed so we can rehash inode */ -static int construct_dentry(struct qstr *qstring, struct file *file, - struct inode **ptmp_inode, struct dentry **pnew_dentry) +static int +construct_dentry(struct qstr *qstring, struct file *file, + struct inode **ptmp_inode, struct dentry **pnew_dentry, + unsigned long *inum) { - struct dentry *tmp_dentry; - struct cifs_sb_info *cifs_sb; - struct cifsTconInfo *pTcon; + struct dentry *tmp_dentry = NULL; + struct super_block *sb = file->f_path.dentry->d_sb; int rc = 0; cFYI(1, ("For %s", qstring->name)); - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); - pTcon = cifs_sb->tcon; qstring->hash = full_name_hash(qstring->name, qstring->len); tmp_dentry = d_lookup(file->f_path.dentry, qstring); if (tmp_dentry) { + /* BB: overwrite old name? i.e. tmp_dentry->d_name and + * tmp_dentry->d_name.len?? + */ cFYI(0, ("existing dentry with inode 0x%p", tmp_dentry->d_inode)); *ptmp_inode = tmp_dentry->d_inode; -/* BB overwrite old name? i.e. tmp_dentry->d_name and tmp_dentry->d_name.len??*/ if (*ptmp_inode == NULL) { - *ptmp_inode = new_inode(file->f_path.dentry->d_sb); + *ptmp_inode = cifs_new_inode(sb, inum); if (*ptmp_inode == NULL) return rc; rc = 1; } - if (file->f_path.dentry->d_sb->s_flags & MS_NOATIME) - (*ptmp_inode)->i_flags |= S_NOATIME | S_NOCMTIME; } else { tmp_dentry = d_alloc(file->f_path.dentry, qstring); if (tmp_dentry == NULL) { @@ -93,15 +92,14 @@ static int construct_dentry(struct qstr *qstring, struct file *file, return rc; } - *ptmp_inode = new_inode(file->f_path.dentry->d_sb); - if (pTcon->nocase) + if (CIFS_SB(sb)->tcon->nocase) tmp_dentry->d_op = &cifs_ci_dentry_ops; else tmp_dentry->d_op = &cifs_dentry_ops; + + *ptmp_inode = cifs_new_inode(sb, inum); if (*ptmp_inode == NULL) return rc; - if (file->f_path.dentry->d_sb->s_flags & MS_NOATIME) - (*ptmp_inode)->i_flags |= S_NOATIME | S_NOCMTIME; rc = 2; } @@ -842,9 +840,7 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst, len = strnlen(filename, PATH_MAX); } - /* BB fixme - hash low and high 32 bits if not 64 bit arch BB */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) - *pinum = pFindData->UniqueId; + *pinum = pFindData->UniqueId; } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { FILE_DIRECTORY_INFO *pFindData = (FILE_DIRECTORY_INFO *)current_entry; @@ -940,20 +936,18 @@ static int cifs_filldir(char *pfindEntry, struct file *file, if (rc) return rc; - rc = construct_dentry(&qstring, file, &tmp_inode, &tmp_dentry); + /* only these two infolevels return valid inode numbers */ + if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX || + pCifsF->srch_inf.info_level == SMB_FIND_FILE_ID_FULL_DIR_INFO) + rc = construct_dentry(&qstring, file, &tmp_inode, &tmp_dentry, + &inum); + else + rc = construct_dentry(&qstring, file, &tmp_inode, &tmp_dentry, + NULL); + if ((tmp_inode == NULL) || (tmp_dentry == NULL)) return -ENOMEM; - if (rc) { - /* inode created, we need to hash it with right inode number */ - if (inum != 0) { - /* BB fixme - hash the 2 32 quantities bits together if - * necessary BB */ - tmp_inode->i_ino = inum; - } - insert_inode_hash(tmp_inode); - } - /* we pass in rc below, indicating whether it is a new inode, so we can figure out whether to invalidate the inode cached data if the file has changed */ From 950ec52880fab89b957c7dc45e8b8476dd63741f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 11 Feb 2009 08:08:26 -0500 Subject: [PATCH 244/263] cifs: properly handle case where CIFSGetSrvInodeNumber fails ...if it does then we pass a pointer to an unintialized variable for the inode number to cifs_new_inode. Have it pass a NULL pointer instead. Also tweak the function prototypes to reduce the amount of casting. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 3 +-- fs/cifs/inode.c | 20 ++++++++++---------- fs/cifs/readdir.c | 6 +++--- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 62fd5bd499f6..446e62cbece9 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -92,8 +92,7 @@ extern u64 cifs_UnixTimeToNT(struct timespec); extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time); extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time); -extern struct inode *cifs_new_inode(struct super_block *sb, - unsigned long *inum); +extern struct inode *cifs_new_inode(struct super_block *sb, __u64 *inum); extern int cifs_get_inode_info(struct inode **pinode, const unsigned char *search_path, FILE_ALL_INFO *pfile_info, diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index c7674f595adb..475115c7cc79 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -213,7 +213,7 @@ static void fill_fake_finddataunix(FILE_UNIX_BASIC_INFO *pfnd_dat, * guaranteed to be unique. */ struct inode * -cifs_new_inode(struct super_block *sb, unsigned long *inum) +cifs_new_inode(struct super_block *sb, __u64 *inum) { struct inode *inode; @@ -228,7 +228,7 @@ cifs_new_inode(struct super_block *sb, unsigned long *inum) * if serverino is disabled, perhaps we should be using iunique()? */ if (inum && (CIFS_SB(sb)->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) - inode->i_ino = *inum; + inode->i_ino = (unsigned long) *inum; /* * must set this here instead of cifs_alloc_inode since VFS will @@ -276,8 +276,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, /* get new inode */ if (*pinode == NULL) { - *pinode = cifs_new_inode(sb, (unsigned long *) - &find_data.UniqueId); + *pinode = cifs_new_inode(sb, &find_data.UniqueId); if (*pinode == NULL) { rc = -ENOMEM; goto cgiiu_exit; @@ -499,6 +498,7 @@ int cifs_get_inode_info(struct inode **pinode, /* get new inode */ if (*pinode == NULL) { __u64 inode_num; + __u64 *pinum = &inode_num; /* Is an i_ino of zero legal? Can we use that to check if the server supports returning inode numbers? Are @@ -518,20 +518,20 @@ int cifs_get_inode_info(struct inode **pinode, int rc1 = 0; rc1 = CIFSGetSrvInodeNumber(xid, pTcon, - full_path, &inode_num, + full_path, pinum, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc1) { cFYI(1, ("GetSrvInodeNum rc %d", rc1)); + pinum = NULL; /* BB EOPNOSUPP disable SERVER_INUM? */ } - *pinode = cifs_new_inode(sb, (unsigned long *) - &inode_num); } else { - *pinode = cifs_new_inode(sb, NULL); + pinum = NULL; } + *pinode = cifs_new_inode(sb, pinum); if (*pinode == NULL) { rc = -ENOMEM; goto cgii_exit; @@ -1148,8 +1148,8 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) else direntry->d_op = &cifs_dentry_ops; - newinode = cifs_new_inode(inode->i_sb, (unsigned long *) - &pInfo->UniqueId); + newinode = cifs_new_inode(inode->i_sb, + &pInfo->UniqueId); if (newinode == NULL) { kfree(pInfo); goto mkdir_get_info; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 02a20221e841..c2c01ff4c32c 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -61,7 +61,7 @@ static inline void dump_cifs_file_struct(struct file *file, char *label) static int construct_dentry(struct qstr *qstring, struct file *file, struct inode **ptmp_inode, struct dentry **pnew_dentry, - unsigned long *inum) + __u64 *inum) { struct dentry *tmp_dentry = NULL; struct super_block *sb = file->f_path.dentry->d_sb; @@ -820,7 +820,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, /* inode num, inode type and filename returned */ static int cifs_get_name_from_search_buf(struct qstr *pqst, char *current_entry, __u16 level, unsigned int unicode, - struct cifs_sb_info *cifs_sb, int max_len, ino_t *pinum) + struct cifs_sb_info *cifs_sb, int max_len, __u64 *pinum) { int rc = 0; unsigned int len = 0; @@ -903,7 +903,7 @@ static int cifs_filldir(char *pfindEntry, struct file *file, struct qstr qstring; struct cifsFileInfo *pCifsF; unsigned int obj_type; - ino_t inum; + __u64 inum; struct cifs_sb_info *cifs_sb; struct inode *tmp_inode; struct dentry *tmp_dentry; From 44f68fadd865bb288ebdcea2b602f0b1cab27a0c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 11 Feb 2009 08:08:28 -0500 Subject: [PATCH 245/263] cifs: posix fill in inode needed by posix open function needed to prepare for posix open Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 2 ++ fs/cifs/inode.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 446e62cbece9..083dfc57c7a3 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -92,6 +92,8 @@ extern u64 cifs_UnixTimeToNT(struct timespec); extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time); extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time); +extern void posix_fill_in_inode(struct inode *tmp_inode, + FILE_UNIX_BASIC_INFO *pData, int isNewInode); extern struct inode *cifs_new_inode(struct super_block *sb, __u64 *inum); extern int cifs_get_inode_info(struct inode **pinode, const unsigned char *search_path, diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 475115c7cc79..4690a360c855 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1051,7 +1051,7 @@ out_reval: return rc; } -static void posix_fill_in_inode(struct inode *tmp_inode, +void posix_fill_in_inode(struct inode *tmp_inode, FILE_UNIX_BASIC_INFO *pData, int isNewInode) { struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode); From 69765529d701c838df19ea1f5ad2f33a528261ae Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 17 Feb 2009 01:29:40 +0000 Subject: [PATCH 246/263] [CIFS] Fix oops in cifs_strfromUCS_le mounting to servers which do not specify their OS Fixes kernel bug #10451 http://bugzilla.kernel.org/show_bug.cgi?id=10451 Certain NAS appliances do not set the operating system or network operating system fields in the session setup response on the wire. cifs was oopsing on the unexpected zero length response fields (when trying to null terminate a zero length field). This fixes the oops. Acked-by: Jeff Layton CC: stable Signed-off-by: Steve French --- fs/cifs/CHANGES | 3 ++- fs/cifs/sess.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 73ac7ebd1dfc..1cfa72ef1f37 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -7,7 +7,8 @@ specified and user does not have access to query information about the top of the share. Fix problem in 2.6.28 resolving DFS paths to Samba servers (worked to Windows). Fix rmdir so that pending search (readdir) requests do not get invalid results which include the now -removed directory. +removed directory. Fix oops in cifs_dfs_ref.c when prefixpath is not reachable +when using DFS. Version 1.55 ------------ diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 5f22de7b79a9..b234407a3007 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -228,7 +228,7 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft, kfree(ses->serverOS); /* UTF-8 string will not grow more than four times as big as UCS-16 */ - ses->serverOS = kzalloc(4 * len, GFP_KERNEL); + ses->serverOS = kzalloc((4 * len) + 2 /* trailing null */, GFP_KERNEL); if (ses->serverOS != NULL) cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len, nls_cp); data += 2 * (len + 1); @@ -241,7 +241,7 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft, return rc; kfree(ses->serverNOS); - ses->serverNOS = kzalloc(4 * len, GFP_KERNEL); /* BB this is wrong length FIXME BB */ + ses->serverNOS = kzalloc((4 * len) + 2 /* trailing null */, GFP_KERNEL); if (ses->serverNOS != NULL) { cifs_strfromUCS_le(ses->serverNOS, (__le16 *)data, len, nls_cp); From c3b2a0c640bff7df85d79fb4f89674949a267ec2 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 20 Feb 2009 04:32:45 +0000 Subject: [PATCH 247/263] [CIFS] improve posix semantics of file create Samba server added support for a new posix open/create/mkdir operation a year or so ago, and we added support to cifs for mkdir to use it, but had not added the corresponding code to file create. The following patch helps improve the performance of the cifs create path (to Samba and servers which support the cifs posix protocol extensions). Using Connectathon basic test1, with 2000 files, the performance improved about 15%, and also helped reduce network traffic (17% fewer SMBs sent over the wire) due to saving a network round trip for the SetPathInfo on every file create. It should also help the semantics (and probably the performance) of write (e.g. when posix byte range locks are on the file) on file handles opened with posix create, and adds support for a few flags which would have to be ignored otherwise. Signed-off-by: Steve French --- fs/cifs/CHANGES | 4 +- fs/cifs/dir.c | 301 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 205 insertions(+), 100 deletions(-) diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 1cfa72ef1f37..72063f5e56b1 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -8,7 +8,9 @@ top of the share. Fix problem in 2.6.28 resolving DFS paths to Samba servers (worked to Windows). Fix rmdir so that pending search (readdir) requests do not get invalid results which include the now removed directory. Fix oops in cifs_dfs_ref.c when prefixpath is not reachable -when using DFS. +when using DFS. Add better file create support to servers which support +the CIFS POSIX protocol extensions (this adds support for new flags +on create, and improves semantics for write of locked ranges). Version 1.55 ------------ diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 964aad03c5ad..89fb72832652 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -3,7 +3,7 @@ * * vfs operations that deal with dentries * - * Copyright (C) International Business Machines Corp., 2002,2008 + * Copyright (C) International Business Machines Corp., 2002,2009 * Author(s): Steve French (sfrench@us.ibm.com) * * This library is free software; you can redistribute it and/or modify @@ -129,6 +129,78 @@ cifs_bp_rename_retry: return full_path; } +static int cifs_posix_open(char *full_path, struct inode **pinode, + struct super_block *sb, int mode, int oflags, + int *poplock, __u16 *pnetfid, int xid) +{ + int rc; + __u32 oplock; + FILE_UNIX_BASIC_INFO *presp_data; + __u32 posix_flags = 0; + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + + cFYI(1, ("posix open %s", full_path)); + + presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); + if (presp_data == NULL) + return -ENOMEM; + +/* So far cifs posix extensions can only map the following flags. + There are other valid fmode oflags such as FMODE_LSEEK, FMODE_PREAD, but + so far we do not seem to need them, and we can treat them as local only */ + if ((oflags & (FMODE_READ | FMODE_WRITE)) == + (FMODE_READ | FMODE_WRITE)) + posix_flags = SMB_O_RDWR; + else if (oflags & FMODE_READ) + posix_flags = SMB_O_RDONLY; + else if (oflags & FMODE_WRITE) + posix_flags = SMB_O_WRONLY; + if (oflags & O_CREAT) + posix_flags |= SMB_O_CREAT; + if (oflags & O_EXCL) + posix_flags |= SMB_O_EXCL; + if (oflags & O_TRUNC) + posix_flags |= SMB_O_TRUNC; + if (oflags & O_APPEND) + posix_flags |= SMB_O_APPEND; + if (oflags & O_SYNC) + posix_flags |= SMB_O_SYNC; + if (oflags & O_DIRECTORY) + posix_flags |= SMB_O_DIRECTORY; + if (oflags & O_NOFOLLOW) + posix_flags |= SMB_O_NOFOLLOW; + if (oflags & O_DIRECT) + posix_flags |= SMB_O_DIRECT; + + + rc = CIFSPOSIXCreate(xid, cifs_sb->tcon, posix_flags, mode, + pnetfid, presp_data, &oplock, full_path, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + if (rc) + goto posix_open_ret; + + if (presp_data->Type == cpu_to_le32(-1)) + goto posix_open_ret; /* open ok, caller does qpathinfo */ + + /* get new inode and set it up */ + if (!pinode) + goto posix_open_ret; /* caller does not need info */ + + *pinode = cifs_new_inode(sb, &presp_data->UniqueId); + + /* We do not need to close the file if new_inode fails since + the caller will retry qpathinfo as long as inode is null */ + if (*pinode == NULL) + goto posix_open_ret; + + posix_fill_in_inode(*pinode, presp_data, 1); + +posix_open_ret: + kfree(presp_data); + return rc; +} + static void setup_cifs_dentry(struct cifsTconInfo *tcon, struct dentry *direntry, struct inode *newinode) @@ -150,7 +222,14 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, int xid; int create_options = CREATE_NOT_DIR; int oplock = 0; - /* BB below access is too much for the mknod to request */ + int oflags; + /* + * BB below access is probably too much for mknod to request + * but we have to do query and setpathinfo so requesting + * less could fail (unless we want to request getatr and setatr + * permissions (only). At least for POSIX we do not have to + * request so much. + */ int desiredAccess = GENERIC_READ | GENERIC_WRITE; __u16 fileHandle; struct cifs_sb_info *cifs_sb; @@ -174,13 +253,43 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, } mode &= ~current->fs->umask; + if (oplockEnabled) + oplock = REQ_OPLOCK; + + if (nd && (nd->flags & LOOKUP_OPEN)) + oflags = nd->intent.open.flags; + else + oflags = FMODE_READ; + + if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) && + (CIFS_UNIX_POSIX_PATH_OPS_CAP & + le64_to_cpu(tcon->fsUnixInfo.Capability))) { + rc = cifs_posix_open(full_path, &newinode, inode->i_sb, + mode, oflags, &oplock, &fileHandle, xid); + /* EIO could indicate that (posix open) operation is not + supported, despite what server claimed in capability + negotation. EREMOTE indicates DFS junction, which is not + handled in posix open */ + + if ((rc == 0) && (newinode == NULL)) + goto cifs_create_get_file_info; /* query inode info */ + else if (rc == 0) /* success, no need to query */ + goto cifs_create_set_dentry; + else if ((rc != -EIO) && (rc != -EREMOTE) && + (rc != -EOPNOTSUPP)) /* path not found or net err */ + goto cifs_create_out; + /* else fallthrough to retry, using older open call, this is + case where server does not support this SMB level, and + falsely claims capability (also get here for DFS case + which should be rare for path not covered on files) */ + } if (nd && (nd->flags & LOOKUP_OPEN)) { - int oflags = nd->intent.open.flags; - + /* if the file is going to stay open, then we + need to set the desired access properly */ desiredAccess = 0; if (oflags & FMODE_READ) - desiredAccess |= GENERIC_READ; + desiredAccess |= GENERIC_READ; /* is this too little? */ if (oflags & FMODE_WRITE) { desiredAccess |= GENERIC_WRITE; if (!(oflags & FMODE_READ)) @@ -199,8 +308,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, /* BB add processing to set equivalent of mode - e.g. via CreateX with ACLs */ - if (oplockEnabled) - oplock = REQ_OPLOCK; buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); if (buf == NULL) { @@ -233,116 +340,112 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, } if (rc) { cFYI(1, ("cifs_create returned 0x%x", rc)); - } else { - /* If Open reported that we actually created a file - then we now have to set the mode if possible */ - if ((tcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) { - struct cifs_unix_set_info_args args = { + goto cifs_create_out; + } + + /* If Open reported that we actually created a file + then we now have to set the mode if possible */ + if ((tcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) { + struct cifs_unix_set_info_args args = { .mode = mode, .ctime = NO_CHANGE_64, .atime = NO_CHANGE_64, .mtime = NO_CHANGE_64, .device = 0, - }; + }; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { - args.uid = (__u64) current_fsuid(); - if (inode->i_mode & S_ISGID) - args.gid = (__u64) inode->i_gid; - else - args.gid = (__u64) current_fsgid(); - } else { - args.uid = NO_CHANGE_64; - args.gid = NO_CHANGE_64; - } - CIFSSMBUnixSetInfo(xid, tcon, full_path, &args, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + args.uid = (__u64) current_fsuid(); + if (inode->i_mode & S_ISGID) + args.gid = (__u64) inode->i_gid; + else + args.gid = (__u64) current_fsgid(); } else { - /* BB implement mode setting via Windows security - descriptors e.g. */ - /* CIFSSMBWinSetPerms(xid,tcon,path,mode,-1,-1,nls);*/ - - /* Could set r/o dos attribute if mode & 0222 == 0 */ + args.uid = NO_CHANGE_64; + args.gid = NO_CHANGE_64; } + CIFSSMBUnixSetInfo(xid, tcon, full_path, &args, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + } else { + /* BB implement mode setting via Windows security + descriptors e.g. */ + /* CIFSSMBWinSetPerms(xid,tcon,path,mode,-1,-1,nls);*/ - /* server might mask mode so we have to query for it */ - if (tcon->unix_ext) - rc = cifs_get_inode_info_unix(&newinode, full_path, - inode->i_sb, xid); - else { - rc = cifs_get_inode_info(&newinode, full_path, - buf, inode->i_sb, xid, - &fileHandle); - if (newinode) { - if (cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_DYNPERM) - newinode->i_mode = mode; - if ((oplock & CIFS_CREATE_ACTION) && - (cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_SET_UID)) { - newinode->i_uid = current_fsuid(); - if (inode->i_mode & S_ISGID) - newinode->i_gid = - inode->i_gid; - else - newinode->i_gid = - current_fsgid(); - } + /* Could set r/o dos attribute if mode & 0222 == 0 */ + } + +cifs_create_get_file_info: + /* server might mask mode so we have to query for it */ + if (tcon->unix_ext) + rc = cifs_get_inode_info_unix(&newinode, full_path, + inode->i_sb, xid); + else { + rc = cifs_get_inode_info(&newinode, full_path, buf, + inode->i_sb, xid, &fileHandle); + if (newinode) { + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) + newinode->i_mode = mode; + if ((oplock & CIFS_CREATE_ACTION) && + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) { + newinode->i_uid = current_fsuid(); + if (inode->i_mode & S_ISGID) + newinode->i_gid = inode->i_gid; + else + newinode->i_gid = current_fsgid(); } } + } - if (rc != 0) { - cFYI(1, ("Create worked, get_inode_info failed rc = %d", - rc)); - } else - setup_cifs_dentry(tcon, direntry, newinode); +cifs_create_set_dentry: + if (rc == 0) + setup_cifs_dentry(tcon, direntry, newinode); + else + cFYI(1, ("Create worked, get_inode_info failed rc = %d", rc)); - if ((nd == NULL /* nfsd case - nfs srv does not set nd */) || - (!(nd->flags & LOOKUP_OPEN))) { - /* mknod case - do not leave file open */ - CIFSSMBClose(xid, tcon, fileHandle); - } else if (newinode) { - struct cifsFileInfo *pCifsFile = - kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); + /* nfsd case - nfs srv does not set nd */ + if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) { + /* mknod case - do not leave file open */ + CIFSSMBClose(xid, tcon, fileHandle); + } else if (newinode) { + struct cifsFileInfo *pCifsFile = + kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); - if (pCifsFile == NULL) - goto cifs_create_out; - pCifsFile->netfid = fileHandle; - pCifsFile->pid = current->tgid; - pCifsFile->pInode = newinode; - pCifsFile->invalidHandle = false; - pCifsFile->closePend = false; - init_MUTEX(&pCifsFile->fh_sem); - mutex_init(&pCifsFile->lock_mutex); - INIT_LIST_HEAD(&pCifsFile->llist); - atomic_set(&pCifsFile->wrtPending, 0); + if (pCifsFile == NULL) + goto cifs_create_out; + pCifsFile->netfid = fileHandle; + pCifsFile->pid = current->tgid; + pCifsFile->pInode = newinode; + pCifsFile->invalidHandle = false; + pCifsFile->closePend = false; + init_MUTEX(&pCifsFile->fh_sem); + mutex_init(&pCifsFile->lock_mutex); + INIT_LIST_HEAD(&pCifsFile->llist); + atomic_set(&pCifsFile->wrtPending, 0); - /* set the following in open now + /* set the following in open now pCifsFile->pfile = file; */ - write_lock(&GlobalSMBSeslock); - list_add(&pCifsFile->tlist, &tcon->openFileList); - pCifsInode = CIFS_I(newinode); - if (pCifsInode) { - /* if readable file instance put first in list*/ - if (write_only) { - list_add_tail(&pCifsFile->flist, - &pCifsInode->openFileList); - } else { - list_add(&pCifsFile->flist, - &pCifsInode->openFileList); - } - if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { - pCifsInode->clientCanCacheAll = true; - pCifsInode->clientCanCacheRead = true; - cFYI(1, ("Exclusive Oplock inode %p", - newinode)); - } else if ((oplock & 0xF) == OPLOCK_READ) - pCifsInode->clientCanCacheRead = true; + write_lock(&GlobalSMBSeslock); + list_add(&pCifsFile->tlist, &tcon->openFileList); + pCifsInode = CIFS_I(newinode); + if (pCifsInode) { + /* if readable file instance put first in list*/ + if (write_only) { + list_add_tail(&pCifsFile->flist, + &pCifsInode->openFileList); + } else { + list_add(&pCifsFile->flist, + &pCifsInode->openFileList); } - write_unlock(&GlobalSMBSeslock); + if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { + pCifsInode->clientCanCacheAll = true; + pCifsInode->clientCanCacheRead = true; + cFYI(1, ("Exclusive Oplock inode %p", + newinode)); + } else if ((oplock & 0xF) == OPLOCK_READ) + pCifsInode->clientCanCacheRead = true; } + write_unlock(&GlobalSMBSeslock); } cifs_create_out: kfree(buf); From eca6acf91552a9b2e997cc76339115c95eac0217 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 20 Feb 2009 05:43:09 +0000 Subject: [PATCH 248/263] [CIFS] Fix multiuser mounts so server does not invalidate earlier security contexts When two different users mount the same Windows 2003 Server share using CIFS, the first session mounted can be invalidated. Some servers invalidate the first smb session when a second similar user (e.g. two users who get mapped by server to "guest") authenticates an smb session from the same client. By making sure that we set the 2nd and subsequent vc numbers to nonzero values, this ensures that we will not have this problem. Fixes Samba bug 6004, problem description follows: How to reproduce: - configure an "open share" (full permissions to Guest user) on Windows 2003 Server (I couldn't reproduce the problem with Samba server or Windows older than 2003) - mount the share twice with different users who will be authenticated as guest. noacl,noperm,user=john,dir_mode=0700,domain=DOMAIN,rw noacl,noperm,user=jeff,dir_mode=0700,domain=DOMAIN,rw Result: - just the mount point mounted last is accessible: Signed-off-by: Steve French --- fs/cifs/CHANGES | 10 ++++++ fs/cifs/cifsfs.h | 2 +- fs/cifs/cifsglob.h | 6 +++- fs/cifs/cifssmb.c | 7 ++-- fs/cifs/sess.c | 87 ++++++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 105 insertions(+), 7 deletions(-) diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 72063f5e56b1..851388fafc73 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -1,3 +1,13 @@ +Version 1.57 +------------ +Improve support for multiple security contexts to the same server. We +used to use the same "vcnumber" for all connections which could cause +the server to treat subsequent connections, especially those that +are authenticated as guest, as reconnections, invalidating the earlier +user's smb session. This fix allows cifs to mount multiple times to the +same server with different userids without risking invalidating earlier +established security contexts. + Version 1.56 ------------ Add "forcemandatorylock" mount option to allow user to use mandatory diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 7ac481841f87..2b1d28a9ee28 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -100,5 +100,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* EXPERIMENTAL */ -#define CIFS_VERSION "1.56" +#define CIFS_VERSION "1.57" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 94c1ca0ec953..e004f6db5fc8 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -164,9 +164,12 @@ struct TCP_Server_Info { /* multiplexed reads or writes */ unsigned int maxBuf; /* maxBuf specifies the maximum */ /* message size the server can send or receive for non-raw SMBs */ - unsigned int maxRw; /* maxRw specifies the maximum */ + unsigned int max_rw; /* maxRw specifies the maximum */ /* message size the server can send or receive for */ /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */ + unsigned int max_vcs; /* maximum number of smb sessions, at least + those that can be specified uniquely with + vcnumbers */ char sessid[4]; /* unique token id for this session */ /* (returned on Negotiate */ int capabilities; /* allow selective disabling of caps by smb sess */ @@ -210,6 +213,7 @@ struct cifsSesInfo { unsigned overrideSecFlg; /* if non-zero override global sec flags */ __u16 ipc_tid; /* special tid for connection to IPC share */ __u16 flags; + __u16 vcnum; char *serverOS; /* name of operating system underlying server */ char *serverNOS; /* name of network operating system of server */ char *serverDomain; /* security realm of server */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 552642a507c4..939e2f76b959 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -528,14 +528,15 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) server->maxReq = le16_to_cpu(rsp->MaxMpxCount); server->maxBuf = min((__u32)le16_to_cpu(rsp->MaxBufSize), (__u32)CIFSMaxBufSize + MAX_CIFS_HDR_SIZE); + server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs); GETU32(server->sessid) = le32_to_cpu(rsp->SessionKey); /* even though we do not use raw we might as well set this accurately, in case we ever find a need for it */ if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) { - server->maxRw = 0xFF00; + server->max_rw = 0xFF00; server->capabilities = CAP_MPX_MODE | CAP_RAW_MODE; } else { - server->maxRw = 0;/* we do not need to use raw anyway */ + server->max_rw = 0;/* do not need to use raw anyway */ server->capabilities = CAP_MPX_MODE; } tmp = (__s16)le16_to_cpu(rsp->ServerTimeZone); @@ -638,7 +639,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) /* probably no need to store and check maxvcs */ server->maxBuf = min(le32_to_cpu(pSMBr->MaxBufferSize), (__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE); - server->maxRw = le32_to_cpu(pSMBr->MaxRawSize); + server->max_rw = le32_to_cpu(pSMBr->MaxRawSize); cFYI(DBG2, ("Max buf = %d", ses->server->maxBuf)); GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey); server->capabilities = le32_to_cpu(pSMBr->Capabilities); diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index b234407a3007..5c68b4282be9 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -34,15 +34,99 @@ extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24); +/* Checks if this is the first smb session to be reconnected after + the socket has been reestablished (so we know whether to use vc 0). + Called while holding the cifs_tcp_ses_lock, so do not block */ +static bool is_first_ses_reconnect(struct cifsSesInfo *ses) +{ + struct list_head *tmp; + struct cifsSesInfo *tmp_ses; + + list_for_each(tmp, &ses->server->smb_ses_list) { + tmp_ses = list_entry(tmp, struct cifsSesInfo, + smb_ses_list); + if (tmp_ses->need_reconnect == false) + return false; + } + /* could not find a session that was already connected, + this must be the first one we are reconnecting */ + return true; +} + +/* + * vc number 0 is treated specially by some servers, and should be the + * first one we request. After that we can use vcnumbers up to maxvcs, + * one for each smb session (some Windows versions set maxvcs incorrectly + * so maxvc=1 can be ignored). If we have too many vcs, we can reuse + * any vc but zero (some servers reset the connection on vcnum zero) + * + */ +static __le16 get_next_vcnum(struct cifsSesInfo *ses) +{ + __u16 vcnum = 0; + struct list_head *tmp; + struct cifsSesInfo *tmp_ses; + __u16 max_vcs = ses->server->max_vcs; + __u16 i; + int free_vc_found = 0; + + /* Quoting the MS-SMB specification: "Windows-based SMB servers set this + field to one but do not enforce this limit, which allows an SMB client + to establish more virtual circuits than allowed by this value ... but + other server implementations can enforce this limit." */ + if (max_vcs < 2) + max_vcs = 0xFFFF; + + write_lock(&cifs_tcp_ses_lock); + if ((ses->need_reconnect) && is_first_ses_reconnect(ses)) + goto get_vc_num_exit; /* vcnum will be zero */ + for (i = ses->server->srv_count - 1; i < max_vcs; i++) { + if (i == 0) /* this is the only connection, use vc 0 */ + break; + + free_vc_found = 1; + + list_for_each(tmp, &ses->server->smb_ses_list) { + tmp_ses = list_entry(tmp, struct cifsSesInfo, + smb_ses_list); + if (tmp_ses->vcnum == i) { + free_vc_found = 0; + break; /* found duplicate, try next vcnum */ + } + } + if (free_vc_found) + break; /* we found a vcnumber that will work - use it */ + } + + if (i == 0) + vcnum = 0; /* for most common case, ie if one smb session, use + vc zero. Also for case when no free vcnum, zero + is safest to send (some clients only send zero) */ + else if (free_vc_found == 0) + vcnum = 1; /* we can not reuse vc=0 safely, since some servers + reset all uids on that, but 1 is ok. */ + else + vcnum = i; + ses->vcnum = vcnum; +get_vc_num_exit: + write_unlock(&cifs_tcp_ses_lock); + + return le16_to_cpu(vcnum); +} + static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB) { __u32 capabilities = 0; /* init fields common to all four types of SessSetup */ - /* note that header is initialized to zero in header_assemble */ + /* Note that offsets for first seven fields in req struct are same */ + /* in CIFS Specs so does not matter which of 3 forms of struct */ + /* that we use in next few lines */ + /* Note that header is initialized to zero in header_assemble */ pSMB->req.AndXCommand = 0xFF; pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf); pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq); + pSMB->req.VcNumber = get_next_vcnum(ses); /* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */ @@ -71,7 +155,6 @@ static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB) if (ses->capabilities & CAP_UNIX) capabilities |= CAP_UNIX; - /* BB check whether to init vcnum BB */ return capabilities; } From cc3ca22063784076bd240fda87217387a8f2ae92 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 20 Feb 2009 23:35:51 -0800 Subject: [PATCH 249/263] x86, mce: remove incorrect __cpuinit for mce_cpu_features() Impact: Bug fix on UP Checkin 6ec68bff3c81e776a455f6aca95c8c5f1d630198: x86, mce: reinitialize per cpu features on resume introduced a call to mce_cpu_features() in the resume path, in order for the MCE machinery to get properly reinitialized after a resume. However, this function (and its successors) was flagged __cpuinit, which becomes __init on UP configurations (on SMP suspend/resume requires CPU hotplug and so this would not be seen.) Remove the offending __cpuinit annotations for mce_cpu_features() and its successor functions. Cc: Andi Kleen Cc: Linus Torvalds Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce_64.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 25cf624eccb7..fe79985ce0f2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -490,7 +490,7 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) } -static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) +static void mce_cpu_features(struct cpuinfo_x86 *c) { switch (c->x86_vendor) { case X86_VENDOR_INTEL: diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 8ae8c4ff094d..f2ee0ae29bd6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -121,7 +121,7 @@ static long threshold_restart_bank(void *_tr) } /* cpu init entry point, called from mce.c with preempt off */ -void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) +void mce_amd_feature_init(struct cpuinfo_x86 *c) { unsigned int bank, block; unsigned int cpu = smp_processor_id(); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index 4b48f251fd39..f44c36624360 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -30,7 +30,7 @@ asmlinkage void smp_thermal_interrupt(void) irq_exit(); } -static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) +static void intel_init_thermal(struct cpuinfo_x86 *c) { u32 l, h; int tm2 = 0; @@ -84,7 +84,7 @@ static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) return; } -void __cpuinit mce_intel_feature_init(struct cpuinfo_x86 *c) +void mce_intel_feature_init(struct cpuinfo_x86 *c) { intel_init_thermal(c); } From 56f382a08722186623400180adbb9d1be1721cee Mon Sep 17 00:00:00 2001 From: Richard Hughes Date: Sun, 25 Jan 2009 15:05:50 +0000 Subject: [PATCH 250/263] battery: don't assume we are fully charged when not charging or discharging On hardware like the T61 it can take a couple of seconds for the battery to start charging after the power is connected, and we incorrectly tell userspace that we are fully charged, and then go back to charging. Only mark a battery as fully charged when the preset charge matches either the last full charge, or the design charge. http://bugzilla.kernel.org/show_bug.cgi?id=12632 Signed-off-by: Richard Hughes Acked-by: Alexey Starikovskiy Acked-by: Henrique de Moraes Holschuh Signed-off-by: Len Brown --- drivers/acpi/battery.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 65132f920459..69cbc57c2d1c 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -138,6 +138,29 @@ static int acpi_battery_technology(struct acpi_battery *battery) static int acpi_battery_get_state(struct acpi_battery *battery); +static int acpi_battery_is_charged(struct acpi_battery *battery) +{ + /* either charging or discharging */ + if (battery->state != 0) + return 0; + + /* battery not reporting charge */ + if (battery->capacity_now == ACPI_BATTERY_VALUE_UNKNOWN || + battery->capacity_now == 0) + return 0; + + /* good batteries update full_charge as the batteries degrade */ + if (battery->full_charge_capacity == battery->capacity_now) + return 1; + + /* fallback to using design values for broken batteries */ + if (battery->design_capacity == battery->capacity_now) + return 1; + + /* we don't do any sort of metric based on percentages */ + return 0; +} + static int acpi_battery_get_property(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val) @@ -155,7 +178,7 @@ static int acpi_battery_get_property(struct power_supply *psy, val->intval = POWER_SUPPLY_STATUS_DISCHARGING; else if (battery->state & 0x02) val->intval = POWER_SUPPLY_STATUS_CHARGING; - else if (battery->state == 0) + else if (acpi_battery_is_charged(battery)) val->intval = POWER_SUPPLY_STATUS_FULL; else val->intval = POWER_SUPPLY_STATUS_UNKNOWN; From 216773a787c3c46ef26bf1742c1fdba37d26be45 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 14 Feb 2009 01:59:06 +0100 Subject: [PATCH 251/263] Consolidate driver_probe_done() loops into one place there's a few places that currently loop over driver_probe_done(), and I'm about to add another one. This patch abstracts it into a helper to reduce duplication. Signed-off-by: Arjan van de Ven Signed-off-by: Rafael J. Wysocki Cc: Len Brown Acked-by: Greg KH Signed-off-by: Linus Torvalds --- drivers/base/dd.c | 17 +++++++++++++++++ include/linux/device.h | 2 ++ init/do_mounts.c | 13 +++++++++---- init/do_mounts_md.c | 5 +++-- 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 315bed8d5e7f..135231239103 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -18,9 +18,11 @@ */ #include +#include #include #include #include +#include #include "base.h" #include "power/power.h" @@ -167,6 +169,21 @@ int driver_probe_done(void) return 0; } +/** + * wait_for_device_probe + * Wait for device probing to be completed. + * + * Note: this function polls at 100 msec intervals. + */ +int wait_for_device_probe(void) +{ + /* wait for the known devices to complete their probing */ + while (driver_probe_done() != 0) + msleep(100); + async_synchronize_full(); + return 0; +} + /** * driver_probe_device - attempt to bind device & driver together * @drv: driver to bind a device to diff --git a/include/linux/device.h b/include/linux/device.h index 45e5b1921fbb..47f343c7bdda 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -147,6 +147,8 @@ extern void put_driver(struct device_driver *drv); extern struct device_driver *driver_find(const char *name, struct bus_type *bus); extern int driver_probe_done(void); +extern int wait_for_device_probe(void); + /* sysfs interface for exporting driver attributes */ diff --git a/init/do_mounts.c b/init/do_mounts.c index 708105e163df..8d4ff5afc1d8 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -370,10 +370,14 @@ void __init prepare_namespace(void) ssleep(root_delay); } - /* wait for the known devices to complete their probing */ - while (driver_probe_done() != 0) - msleep(100); - async_synchronize_full(); + /* + * wait for the known devices to complete their probing + * + * Note: this is a potential source of long boot delays. + * For example, it is not atypical to wait 5 seconds here + * for the touchpad of a laptop to initialize. + */ + wait_for_device_probe(); md_run_setup(); @@ -399,6 +403,7 @@ void __init prepare_namespace(void) while (driver_probe_done() != 0 || (ROOT_DEV = name_to_dev_t(saved_root_name)) == 0) msleep(100); + async_synchronize_full(); } is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR; diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c index ff95e3192884..9bdddbcb3d6a 100644 --- a/init/do_mounts_md.c +++ b/init/do_mounts_md.c @@ -281,8 +281,9 @@ static void __init autodetect_raid(void) */ printk(KERN_INFO "md: Waiting for all devices to be available before autodetect\n"); printk(KERN_INFO "md: If you don't use raid, use raid=noautodetect\n"); - while (driver_probe_done() < 0) - msleep(100); + + wait_for_device_probe(); + fd = sys_open("/dev/md0", 0, 0); if (fd >= 0) { sys_ioctl(fd, RAID_AUTORUN, raid_autopart); From eed3ee08292d821282169708e5e8e89a0d0a0c63 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 14 Feb 2009 02:00:19 +0100 Subject: [PATCH 252/263] PM/resume: wait for device probing to finish the resume code does not currently wait for device probing to finish. Even without async function calls this is dicey and not correct, but with async function calls during the boot sequence this is going to get hit more... This patch adds the synchronization using the newly introduced helper. Signed-off-by: Arjan van de Ven Signed-off-by: Rafael J. Wysocki Cc: Len Brown Acked-by: Greg KH Signed-off-by: Linus Torvalds --- kernel/power/disk.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 432ee575c9ee..7b40e94b1d42 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -594,6 +594,12 @@ static int software_resume(void) int error; unsigned int flags; + /* + * If the user said "noresume".. bail out early. + */ + if (noresume) + return 0; + /* * name_to_dev_t() below takes a sysfs buffer mutex when sysfs * is configured into the kernel. Since the regular hibernate @@ -610,6 +616,11 @@ static int software_resume(void) mutex_unlock(&pm_mutex); return -ENOENT; } + /* + * Some device discovery might still be in progress; we need + * to wait for this to finish. + */ + wait_for_device_probe(); swsusp_resume_device = name_to_dev_t(resume_file); pr_debug("PM: Resume from partition %s\n", resume_file); } else { From a1bb7d61233ba5fb5cd865f907a9ddcc8f8c02bd Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Sat, 14 Feb 2009 02:01:14 +0100 Subject: [PATCH 253/263] PM/hibernate: fix "swap breaks after hibernation failures" http://bugzilla.kernel.org/show_bug.cgi?id=12239 The image writing code dropped a reference to the current swap device. This doesn't show up if the hibernation succeeds - because it doesn't affect the image which gets resumed. But it means multiple _failed_ hibernations end up freeing the swap device while it is still use! swsusp_write() finds the block device for the swap file using swap_type_of(). It then uses blkdev_get() / blkdev_put() to open and close the block device. Unfortunately, blkdev_get() assumes ownership of the inode of the block_device passed to it. So blkdev_put() calls iput() on the inode. This is by design and other callers expect this behaviour. The fix is for swap_type_of() to take a reference on the inode using bdget(). Signed-off-by: Alan Jenkins Signed-off-by: Rafael J. Wysocki Cc: Len Brown Cc: Greg KH Signed-off-by: Linus Torvalds --- mm/swapfile.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 7e6304dfafab..312fafe0ab6e 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -635,7 +635,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p) if (!bdev) { if (bdev_p) - *bdev_p = sis->bdev; + *bdev_p = bdget(sis->bdev->bd_dev); spin_unlock(&swap_lock); return i; @@ -647,7 +647,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p) struct swap_extent, list); if (se->start_block == offset) { if (bdev_p) - *bdev_p = sis->bdev; + *bdev_p = bdget(sis->bdev->bd_dev); spin_unlock(&swap_lock); bdput(bdev); From 09664fda48c5dd63277f1f42888ca9d5dca6037a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 14 Feb 2009 02:02:16 +0100 Subject: [PATCH 254/263] PM: fix build for CONFIG_PM unset Compilation of kprobes.c with CONFIG_PM unset is broken due to some broken config dependncies. Fix that. Signed-off-by: Rafael J. Wysocki Cc: Len Brown Cc: Greg KH Reported-by: Ingo Molnar Tested-by: Masami Hiramatsu Signed-off-by: Linus Torvalds --- kernel/Makefile | 1 + kernel/power/Makefile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/Makefile b/kernel/Makefile index 170a9213c1b6..e4791b3ba55d 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -51,6 +51,7 @@ obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_KALLSYMS) += kallsyms.o obj-$(CONFIG_PM) += power/ +obj-$(CONFIG_FREEZER) += power/ obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o diff --git a/kernel/power/Makefile b/kernel/power/Makefile index d7a10167a25b..720ea4f781bd 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -3,7 +3,7 @@ ifeq ($(CONFIG_PM_DEBUG),y) EXTRA_CFLAGS += -DDEBUG endif -obj-y := main.o +obj-$(CONFIG_PM) += main.o obj-$(CONFIG_PM_SLEEP) += console.o obj-$(CONFIG_FREEZER) += process.o obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o From 3049103ddfc9aac111916bd2f39ac6976c431517 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Sat, 14 Feb 2009 02:03:08 +0100 Subject: [PATCH 255/263] swsusp: dont fiddle with swappiness sc.swappiness is not used in the swsusp memory shrinking path, do not set it. Signed-off-by: Johannes Weiner Reviewed-by: KOSAKI Motohiro Signed-off-by: Rafael J. Wysocki Cc: Len Brown Cc: Greg KH Signed-off-by: Linus Torvalds --- mm/vmscan.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 9a27c44aa327..550e8695070d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2112,7 +2112,6 @@ unsigned long shrink_all_memory(unsigned long nr_pages) .may_swap = 0, .swap_cluster_max = nr_pages, .may_writepage = 1, - .swappiness = vm_swappiness, .isolate_pages = isolate_pages_global, }; @@ -2146,10 +2145,8 @@ unsigned long shrink_all_memory(unsigned long nr_pages) int prio; /* Force reclaiming mapped pages in the passes #3 and #4 */ - if (pass > 2) { + if (pass > 2) sc.may_swap = 1; - sc.swappiness = 100; - } for (prio = DEF_PRIORITY; prio >= 0; prio--) { unsigned long nr_to_scan = nr_pages - ret; From 0cb57258fe01e9b21076b6a15b6aec7a24168228 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Sat, 14 Feb 2009 02:04:10 +0100 Subject: [PATCH 256/263] swsusp: clean up shrink_all_zones() Move local variables to innermost possible scopes and use local variables to cache calculations/reads done more than once. No change in functionality (intended). Signed-off-by: Johannes Weiner Reviewed-by: KOSAKI Motohiro Signed-off-by: Rafael J. Wysocki Cc: Len Brown Cc: Greg KH Acked-by: Pavel Machek Signed-off-by: Linus Torvalds --- mm/vmscan.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 550e8695070d..6177e3bcd66b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2057,31 +2057,31 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio, int pass, struct scan_control *sc) { struct zone *zone; - unsigned long nr_to_scan, ret = 0; - enum lru_list l; + unsigned long ret = 0; for_each_zone(zone) { + enum lru_list l; if (!populated_zone(zone)) continue; - if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY) continue; for_each_evictable_lru(l) { + enum zone_stat_item ls = NR_LRU_BASE + l; + unsigned long lru_pages = zone_page_state(zone, ls); + /* For pass = 0, we don't shrink the active list */ - if (pass == 0 && - (l == LRU_ACTIVE || l == LRU_ACTIVE_FILE)) + if (pass == 0 && (l == LRU_ACTIVE_ANON || + l == LRU_ACTIVE_FILE)) continue; - zone->lru[l].nr_scan += - (zone_page_state(zone, NR_LRU_BASE + l) - >> prio) + 1; + zone->lru[l].nr_scan += (lru_pages >> prio) + 1; if (zone->lru[l].nr_scan >= nr_pages || pass > 3) { + unsigned long nr_to_scan; + zone->lru[l].nr_scan = 0; - nr_to_scan = min(nr_pages, - zone_page_state(zone, - NR_LRU_BASE + l)); + nr_to_scan = min(nr_pages, lru_pages); ret += shrink_list(l, nr_to_scan, zone, sc, prio); if (ret >= nr_pages) @@ -2089,7 +2089,6 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio, } } } - return ret; } From ebae2604f2c3693717d9dc687c84578f0526480c Mon Sep 17 00:00:00 2001 From: Andrey Borzenkov Date: Sat, 14 Feb 2009 02:05:14 +0100 Subject: [PATCH 257/263] PM: Fix pm_notifiers during user mode hibernation Snapshot device is opened with O_RDONLY during suspend and O_WRONLY durig resume. Make sure we also call notifiers with correct parameter telling them what we are really doing. Signed-off-by: Andrey Borzenkov Signed-off-by: Rafael J. Wysocki Cc: Len Brown Cc: Greg KH Acked-by: Pavel Machek Signed-off-by: Linus Torvalds --- kernel/power/user.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/power/user.c b/kernel/power/user.c index 005b93d839ba..6c85359364f2 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -95,15 +95,15 @@ static int snapshot_open(struct inode *inode, struct file *filp) data->swap = swsusp_resume_device ? swap_type_of(swsusp_resume_device, 0, NULL) : -1; data->mode = O_RDONLY; - error = pm_notifier_call_chain(PM_RESTORE_PREPARE); - if (error) - pm_notifier_call_chain(PM_POST_RESTORE); - } else { - data->swap = -1; - data->mode = O_WRONLY; error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); if (error) pm_notifier_call_chain(PM_POST_HIBERNATION); + } else { + data->swap = -1; + data->mode = O_WRONLY; + error = pm_notifier_call_chain(PM_RESTORE_PREPARE); + if (error) + pm_notifier_call_chain(PM_POST_RESTORE); } if (error) atomic_inc(&snapshot_device_available); From b090f9fa53d51c8a33370071de9e391919ee1fa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Sat, 14 Feb 2009 02:06:17 +0100 Subject: [PATCH 258/263] PM: Wait for console in resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoids later waking up to a blinking cursor if the device woke up and returned to sleep before the console switch happened. Signed-off-by: Brian Swetland Signed-off-by: Arve Hjønnevåg Signed-off-by: Rafael J. Wysocki Cc: Len Brown Cc: Greg KH Signed-off-by: Linus Torvalds --- kernel/power/console.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/power/console.c b/kernel/power/console.c index b8628be2a465..a3961b205de7 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c @@ -78,6 +78,12 @@ void pm_restore_console(void) } set_console(orig_fgconsole); release_console_sem(); + + if (vt_waitactive(orig_fgconsole)) { + pr_debug("Resume: Can't switch VCs."); + return; + } + kmsg_redirect = orig_kmsg; } #endif From 403f307576396f3362fbb65af190885b6036c72c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Sat, 14 Feb 2009 02:07:24 +0100 Subject: [PATCH 259/263] PM: Fix suspend_console and resume_console to use only one semaphore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a race where a thread acquires the console while the console is suspended, and the console is resumed before this thread releases it. In this case, the secondary console semaphore would be left locked, and the primary semaphore would be released twice. This in turn would cause the console switch on suspend or resume to hang forever. Note that suspend_console does not actually lock the console for clients that use acquire_console_sem, it only locks it for clients that use try_acquire_console_sem. If we change suspend_console to fully lock the console, then the kernel may deadlock on suspend. One client of try_acquire_console_sem is acquire_console_semaphore_for_printk, which uses it to prevent printk from using the console while it is suspended. Signed-off-by: Arve Hjønnevåg Signed-off-by: Rafael J. Wysocki Cc: Len Brown Cc: Greg KH Signed-off-by: Linus Torvalds --- kernel/printk.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/kernel/printk.c b/kernel/printk.c index 69188f226a93..e3602d0755b0 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -73,7 +73,6 @@ EXPORT_SYMBOL(oops_in_progress); * driver system. */ static DECLARE_MUTEX(console_sem); -static DECLARE_MUTEX(secondary_console_sem); struct console *console_drivers; EXPORT_SYMBOL_GPL(console_drivers); @@ -891,12 +890,14 @@ void suspend_console(void) printk("Suspending console(s) (use no_console_suspend to debug)\n"); acquire_console_sem(); console_suspended = 1; + up(&console_sem); } void resume_console(void) { if (!console_suspend_enabled) return; + down(&console_sem); console_suspended = 0; release_console_sem(); } @@ -912,11 +913,9 @@ void resume_console(void) void acquire_console_sem(void) { BUG_ON(in_interrupt()); - if (console_suspended) { - down(&secondary_console_sem); - return; - } down(&console_sem); + if (console_suspended) + return; console_locked = 1; console_may_schedule = 1; } @@ -926,6 +925,10 @@ int try_acquire_console_sem(void) { if (down_trylock(&console_sem)) return -1; + if (console_suspended) { + up(&console_sem); + return -1; + } console_locked = 1; console_may_schedule = 0; return 0; @@ -979,7 +982,7 @@ void release_console_sem(void) unsigned wake_klogd = 0; if (console_suspended) { - up(&secondary_console_sem); + up(&console_sem); return; } From e6bd6760c92dc8475c79c4c4a8a16ac313c0b93d Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Sun, 15 Feb 2009 22:45:49 +0100 Subject: [PATCH 260/263] x86_64: acpi/wakeup_64 cleanup - remove %ds re-set, it's already set in wakeup_long64 - remove double labels and alignment (ENTRY already adds both) - use meaningful resume point labelname - skip alignment while jumping from wakeup_long64 to the resume point - remove .size, .type and unused labels [v2] - added ENDPROCs Signed-off-by: Jiri Slaby Acked-by: Cyrill Gorcunov Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki Signed-off-by: Len Brown --- arch/x86/kernel/acpi/wakeup_64.S | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index bcc293423a70..b5dee6a0de3a 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -13,7 +13,6 @@ * Hooray, we are in Long 64-bit mode (but still running in low memory) */ ENTRY(wakeup_long64) -wakeup_long64: movq saved_magic, %rax movq $0x123456789abcdef0, %rdx cmpq %rdx, %rax @@ -34,16 +33,12 @@ wakeup_long64: movq saved_rip, %rax jmp *%rax +ENDPROC(wakeup_long64) bogus_64_magic: jmp bogus_64_magic - .align 2 - .p2align 4,,15 -.globl do_suspend_lowlevel - .type do_suspend_lowlevel,@function -do_suspend_lowlevel: -.LFB5: +ENTRY(do_suspend_lowlevel) subq $8, %rsp xorl %eax, %eax call save_processor_state @@ -67,7 +62,7 @@ do_suspend_lowlevel: pushfq popq pt_regs_flags(%rax) - movq $.L97, saved_rip(%rip) + movq $resume_point, saved_rip(%rip) movq %rsp, saved_rsp movq %rbp, saved_rbp @@ -79,13 +74,9 @@ do_suspend_lowlevel: movl $3, %edi xorl %eax, %eax jmp acpi_enter_sleep_state -.L97: - .p2align 4,,7 -.L99: - .align 4 - movl $24, %eax - movw %ax, %ds + .align 4 +resume_point: /* We don't restore %rax, it must be 0 anyway */ movq $saved_context, %rax movq saved_context_cr4(%rax), %rbx @@ -117,12 +108,9 @@ do_suspend_lowlevel: xorl %eax, %eax addq $8, %rsp jmp restore_processor_state -.LFE5: -.Lfe5: - .size do_suspend_lowlevel, .Lfe5-do_suspend_lowlevel - +ENDPROC(do_suspend_lowlevel) + .data -ALIGN ENTRY(saved_rbp) .quad 0 ENTRY(saved_rsi) .quad 0 ENTRY(saved_rdi) .quad 0 From 6defa2fe2019f3729933516fba5cfd75eecd07de Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Sun, 15 Feb 2009 22:46:45 +0100 Subject: [PATCH 261/263] x86_64: Fix S3 fail path As acpi_enter_sleep_state can fail, take this into account in do_suspend_lowlevel and don't return to the do_suspend_lowlevel's caller. This would break (currently) fpu status and preempt count. Technically, this means use `call' instead of `jmp' and `jmp' to the `resume_point' after the `call' (i.e. if acpi_enter_sleep_state returns=fails). `resume_point' will handle the restore of fpu and preempt count gracefully. Signed-off-by: Jiri Slaby Signed-off-by: Rafael J. Wysocki Signed-off-by: Len Brown --- arch/x86/kernel/acpi/wakeup_64.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index b5dee6a0de3a..96258d9dc974 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -73,7 +73,9 @@ ENTRY(do_suspend_lowlevel) addq $8, %rsp movl $3, %edi xorl %eax, %eax - jmp acpi_enter_sleep_state + call acpi_enter_sleep_state + /* in case something went wrong, restore the machine status and go on */ + jmp resume_point .align 4 resume_point: From 4898c2b2f04051e19f4230683c0f0b15f71af887 Mon Sep 17 00:00:00 2001 From: Tony Vroon Date: Mon, 2 Feb 2009 11:11:10 +0000 Subject: [PATCH 262/263] fujitsu-laptop: Use RFKILL support bitmask from firmware Up until now, we polled the rfkill status for every incoming FUJ02E3 ACPI event. It turns out that the firmware has a bitmask which indicates what rfkill-related state it can report. The rfkill_supported bitmask is now used to avoid polling for rfkill at all in the notification handler if there is no support. Also, it is used in the platform device callbacks. As before we register all callbacks and report "unknown" if the firmware does not give us status updates for that particular bit. This was fed through checkpatch.pl and tested on the S6420, S7020 and P8010 platforms. Signed-off-by: Tony Vroon Tested-by: Stephen Gildea Acked-by: Jonathan Woithe Signed-off-by: Len Brown --- drivers/platform/x86/fujitsu-laptop.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index 65dc41540c62..45940f31fe9e 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -166,6 +166,7 @@ struct fujitsu_hotkey_t { struct platform_device *pf_device; struct kfifo *fifo; spinlock_t fifo_lock; + int rfkill_supported; int rfkill_state; int logolamp_registered; int kblamps_registered; @@ -526,7 +527,7 @@ static ssize_t show_lid_state(struct device *dev, struct device_attribute *attr, char *buf) { - if (fujitsu_hotkey->rfkill_state == UNSUPPORTED_CMD) + if (!(fujitsu_hotkey->rfkill_supported & 0x100)) return sprintf(buf, "unknown\n"); if (fujitsu_hotkey->rfkill_state & 0x100) return sprintf(buf, "open\n"); @@ -538,7 +539,7 @@ static ssize_t show_dock_state(struct device *dev, struct device_attribute *attr, char *buf) { - if (fujitsu_hotkey->rfkill_state == UNSUPPORTED_CMD) + if (!(fujitsu_hotkey->rfkill_supported & 0x200)) return sprintf(buf, "unknown\n"); if (fujitsu_hotkey->rfkill_state & 0x200) return sprintf(buf, "docked\n"); @@ -550,7 +551,7 @@ static ssize_t show_radios_state(struct device *dev, struct device_attribute *attr, char *buf) { - if (fujitsu_hotkey->rfkill_state == UNSUPPORTED_CMD) + if (!(fujitsu_hotkey->rfkill_supported & 0x20)) return sprintf(buf, "unknown\n"); if (fujitsu_hotkey->rfkill_state & 0x20) return sprintf(buf, "on\n"); @@ -928,8 +929,17 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) ; /* No action, result is discarded */ vdbg_printk(FUJLAPTOP_DBG_INFO, "Discarded %i ringbuffer entries\n", i); - fujitsu_hotkey->rfkill_state = - call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0); + fujitsu_hotkey->rfkill_supported = + call_fext_func(FUNC_RFKILL, 0x0, 0x0, 0x0); + + /* Make sure our bitmask of supported functions is cleared if the + RFKILL function block is not implemented, like on the S7020. */ + if (fujitsu_hotkey->rfkill_supported == UNSUPPORTED_CMD) + fujitsu_hotkey->rfkill_supported = 0; + + if (fujitsu_hotkey->rfkill_supported) + fujitsu_hotkey->rfkill_state = + call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0); /* Suspect this is a keymap of the application panel, print it */ printk(KERN_INFO "fujitsu-laptop: BTNI: [0x%x]\n", @@ -1005,8 +1015,9 @@ static void acpi_fujitsu_hotkey_notify(acpi_handle handle, u32 event, input = fujitsu_hotkey->input; - fujitsu_hotkey->rfkill_state = - call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0); + if (fujitsu_hotkey->rfkill_supported) + fujitsu_hotkey->rfkill_state = + call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0); switch (event) { case ACPI_FUJITSU_NOTIFY_CODE1: From ba193d64abfe644e8752affa310a368eda01f46e Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 19 Feb 2009 12:56:16 -0700 Subject: [PATCH 263/263] ACPI: remove CONFIG_ACPI_SYSTEM Remove CONFIG_ACPI_SYSTEM. It was always set the same as CONFIG_ACPI, and it had no menu label, so there was no way to set it to anything other than "y". Some things under CONFIG_ACPI_SYSTEM (acpi_irq_handled, acpi_os_gpe_count(), event_is_open, register_acpi_notifier(), etc.) are used unconditionally by the CA, the OSPM, and drivers, so we depend on them always being present. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown --- drivers/acpi/Kconfig | 7 ------- drivers/acpi/Makefile | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index a7799a99f2d9..8a851d0f4384 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -254,13 +254,6 @@ config ACPI_PCI_SLOT help you correlate PCI bus addresses with the physical geography of your slots. If you are unsure, say N. -config ACPI_SYSTEM - bool - default y - help - This driver will enable your system to shut down using ACPI, and - dump your ACPI DSDT table using /proc/acpi/dsdt. - config X86_PM_TIMER bool "Power Management Timer Support" if EMBEDDED depends on X86 diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 65d90c720b5a..b130ea0d0759 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -52,7 +52,7 @@ obj-$(CONFIG_ACPI_PROCESSOR) += processor.o obj-$(CONFIG_ACPI_CONTAINER) += container.o obj-$(CONFIG_ACPI_THERMAL) += thermal.o obj-y += power.o -obj-$(CONFIG_ACPI_SYSTEM) += system.o event.o +obj-y += system.o event.o obj-$(CONFIG_ACPI_DEBUG) += debug.o obj-$(CONFIG_ACPI_NUMA) += numa.o obj-$(CONFIG_ACPI_HOTPLUG_MEMORY) += acpi_memhotplug.o