From 616b1c7238f0de5cec12045267a924035f8ed317 Mon Sep 17 00:00:00 2001 From: Dean Roehrich Date: Fri, 2 Sep 2005 15:30:57 +1000 Subject: [PATCH 01/36] [XFS] Update copyrights SGI-PV: 933551 SGI-Modid: xfs-linux:xfs-kern:190625a Signed-off-by: Dean Roehrich Signed-off-by: Nathan Scott --- fs/xfs/xfs_dmapi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h index 55c17adaaa37..19e872856f6b 100644 --- a/fs/xfs/xfs_dmapi.h +++ b/fs/xfs/xfs_dmapi.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as From 536388be42c938fb6d0eece681526ce13bb50aab Mon Sep 17 00:00:00 2001 From: Dean Roehrich Date: Fri, 2 Sep 2005 15:35:43 +1000 Subject: [PATCH 02/36] [XFS] upate copyrights SGI-PV: 933765 SGI-Modid: xfs-linux:xfs-kern:190760a Signed-off-by: Dean Roehrich Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_vnode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index a6e57c647be4..56d85d85fb05 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as From bb3f724e12eb9c62c92ff6f14a856bc58ba35f5e Mon Sep 17 00:00:00 2001 From: Dean Roehrich Date: Fri, 2 Sep 2005 15:43:05 +1000 Subject: [PATCH 03/36] [XFS] send dmapi events from nopage for mmapped files SGI-PV: 935317 SGI-Modid: xfs-linux:xfs-kern:192007a Signed-off-by: Dean Roehrich Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_file.c | 90 +++++++++++++------------------------ 1 file changed, 32 insertions(+), 58 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index f1ce4323f56e..3881622bcf08 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -311,6 +311,31 @@ linvfs_fsync( #define nextdp(dp) ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen)) +#ifdef CONFIG_XFS_DMAPI + +STATIC struct page * +linvfs_filemap_nopage( + struct vm_area_struct *area, + unsigned long address, + int *type) +{ + struct inode *inode = area->vm_file->f_dentry->d_inode; + vnode_t *vp = LINVFS_GET_VP(inode); + xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); + int error; + + ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI); + + error = XFS_SEND_MMAP(mp, area, 0); + if (error) + return NULL; + + return filemap_nopage(area, address, type); +} + +#endif /* CONFIG_XFS_DMAPI */ + + STATIC int linvfs_readdir( struct file *filp, @@ -390,14 +415,6 @@ done: return -error; } -#ifdef CONFIG_XFS_DMAPI -STATIC void -linvfs_mmap_close( - struct vm_area_struct *vma) -{ - xfs_dm_mm_put(vma); -} -#endif /* CONFIG_XFS_DMAPI */ STATIC int linvfs_file_mmap( @@ -411,16 +428,11 @@ linvfs_file_mmap( vma->vm_ops = &linvfs_file_vm_ops; - if (vp->v_vfsp->vfs_flag & VFS_DMI) { - xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); - - error = -XFS_SEND_MMAP(mp, vma, 0); - if (error) - return error; #ifdef CONFIG_XFS_DMAPI + if (vp->v_vfsp->vfs_flag & VFS_DMI) { vma->vm_ops = &linvfs_dmapi_file_vm_ops; -#endif } +#endif /* CONFIG_XFS_DMAPI */ VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error); if (!error) @@ -474,6 +486,7 @@ linvfs_ioctl_invis( return error; } +#ifdef CONFIG_XFS_DMAPI #ifdef HAVE_VMOP_MPROTECT STATIC int linvfs_mprotect( @@ -494,6 +507,7 @@ linvfs_mprotect( return error; } #endif /* HAVE_VMOP_MPROTECT */ +#endif /* CONFIG_XFS_DMAPI */ #ifdef HAVE_FOP_OPEN_EXEC /* If the user is attempting to execute a file that is offline then @@ -528,49 +542,10 @@ open_exec_out: } #endif /* HAVE_FOP_OPEN_EXEC */ -/* - * Temporary workaround to the AIO direct IO write problem. - * This code can go and we can revert to do_sync_write once - * the writepage(s) rework is merged. - */ -STATIC ssize_t -linvfs_write( - struct file *filp, - const char __user *buf, - size_t len, - loff_t *ppos) -{ - struct kiocb kiocb; - ssize_t ret; - - init_sync_kiocb(&kiocb, filp); - kiocb.ki_pos = *ppos; - ret = __linvfs_write(&kiocb, buf, 0, len, kiocb.ki_pos); - *ppos = kiocb.ki_pos; - return ret; -} -STATIC ssize_t -linvfs_write_invis( - struct file *filp, - const char __user *buf, - size_t len, - loff_t *ppos) -{ - struct kiocb kiocb; - ssize_t ret; - - init_sync_kiocb(&kiocb, filp); - kiocb.ki_pos = *ppos; - ret = __linvfs_write(&kiocb, buf, IO_INVIS, len, kiocb.ki_pos); - *ppos = kiocb.ki_pos; - return ret; -} - - struct file_operations linvfs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, - .write = linvfs_write, + .write = do_sync_write, .readv = linvfs_readv, .writev = linvfs_writev, .aio_read = linvfs_aio_read, @@ -592,7 +567,7 @@ struct file_operations linvfs_file_operations = { struct file_operations linvfs_invis_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, - .write = linvfs_write_invis, + .write = do_sync_write, .readv = linvfs_readv_invis, .writev = linvfs_writev_invis, .aio_read = linvfs_aio_read_invis, @@ -626,8 +601,7 @@ static struct vm_operations_struct linvfs_file_vm_ops = { #ifdef CONFIG_XFS_DMAPI static struct vm_operations_struct linvfs_dmapi_file_vm_ops = { - .close = linvfs_mmap_close, - .nopage = filemap_nopage, + .nopage = linvfs_filemap_nopage, .populate = filemap_populate, #ifdef HAVE_VMOP_MPROTECT .mprotect = linvfs_mprotect, From cdb626878f6f5e37d678d30c9cacf5726b88a656 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Sep 2005 16:24:19 +1000 Subject: [PATCH 04/36] [XFS] replace vn_get usage by ihold SGI-PV: 938306 SGI-Modid: xfs-linux:xfs-kern:194627a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_vnode.c | 24 ----------------------- fs/xfs/linux-2.6/xfs_vnode.h | 21 ++++++-------------- fs/xfs/quota/xfs_qm_syscalls.c | 18 ++++++----------- fs/xfs/xfs_vfsops.c | 36 ++++------------------------------ 4 files changed, 16 insertions(+), 83 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index 250cad54e892..353276bda344 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -162,30 +162,6 @@ vn_initialize( return vp; } -/* - * Get a reference on a vnode. - */ -vnode_t * -vn_get( - struct vnode *vp, - vmap_t *vmap) -{ - struct inode *inode; - - XFS_STATS_INC(vn_get); - inode = LINVFS_GET_IP(vp); - if (inode->i_state & I_FREEING) - return NULL; - - inode = ilookup(vmap->v_vfsp->vfs_super, vmap->v_ino); - if (!inode) /* Inode not present */ - return NULL; - - vn_trace_exit(vp, "vn_get", (inst_t *)__return_address); - - return vp; -} - /* * Revalidate the Linux inode from the vattr. * Note: i_size _not_ updated; we must hold the inode diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 56d85d85fb05..6cb0a01df25d 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -504,20 +504,6 @@ extern void vn_init(void); extern int vn_wait(struct vnode *); extern vnode_t *vn_initialize(struct inode *); -/* - * Acquiring and invalidating vnodes: - * - * if (vn_get(vp, version, 0)) - * ...; - * vn_purge(vp, version); - * - * vn_get and vn_purge must be called with vmap_t arguments, sampled - * while a lock that the vnode's VOP_RECLAIM function acquires is - * held, to ensure that the vnode sampled with the lock held isn't - * recycled (VOP_RECLAIMed) or deallocated between the release of the lock - * and the subsequent vn_get or vn_purge. - */ - /* * vnode_map structures _must_ match vn_epoch and vnode structure sizes. */ @@ -532,7 +518,6 @@ typedef struct vnode_map { (vmap).v_ino = (vp)->v_inode.i_ino; } extern void vn_purge(struct vnode *, vmap_t *); -extern vnode_t *vn_get(struct vnode *, vmap_t *); extern int vn_revalidate(struct vnode *); extern void vn_revalidate_core(struct vnode *, vattr_t *); extern void vn_remove(struct vnode *); @@ -560,6 +545,12 @@ extern void vn_rele(struct vnode *); #define VN_RELE(vp) (iput(LINVFS_GET_IP(vp))) #endif +static inline struct vnode *vn_grab(struct vnode *vp) +{ + struct inode *inode = igrab(LINVFS_GET_IP(vp)); + return inode ? LINVFS_GET_VP(inode) : NULL; +} + /* * Vname handling macros. */ diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 68e98962dbef..15e02e8a9d4f 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -1053,7 +1053,6 @@ xfs_qm_dqrele_all_inodes( struct xfs_mount *mp, uint flags) { - vmap_t vmap; xfs_inode_t *ip, *topino; uint ireclaims; vnode_t *vp; @@ -1061,8 +1060,8 @@ xfs_qm_dqrele_all_inodes( ASSERT(mp->m_quotainfo); -again: XFS_MOUNT_ILOCK(mp); +again: ip = mp->m_inodes; if (ip == NULL) { XFS_MOUNT_IUNLOCK(mp); @@ -1090,18 +1089,14 @@ again: } vnode_refd = B_FALSE; if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { - /* - * Sample vp mapping while holding the mplock, lest - * we come across a non-existent vnode. - */ - VMAP(vp, vmap); ireclaims = mp->m_ireclaims; topino = mp->m_inodes; - XFS_MOUNT_IUNLOCK(mp); - - /* XXX restart limit ? */ - if ( ! (vp = vn_get(vp, &vmap))) + vp = vn_grab(vp); + if (!vp) goto again; + + XFS_MOUNT_IUNLOCK(mp); + /* XXX restart limit ? */ xfs_ilock(ip, XFS_ILOCK_EXCL); vnode_refd = B_TRUE; } else { @@ -1137,7 +1132,6 @@ again: */ if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) { /* XXX use a sentinel */ - XFS_MOUNT_IUNLOCK(mp); goto again; } ip = ip->i_mnext; diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 42bcc0215203..b8ce6cad2b71 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -906,7 +906,6 @@ xfs_sync_inodes( xfs_inode_t *ip_next; xfs_buf_t *bp; vnode_t *vp = NULL; - vmap_t vmap; int error; int last_error; uint64_t fflag; @@ -1101,48 +1100,21 @@ xfs_sync_inodes( * lock in xfs_ireclaim() after the inode is pulled from * the mount list will sleep until we release it here. * This keeps the vnode from being freed while we reference - * it. It is also cheaper and simpler than actually doing - * a vn_get() for every inode we touch here. + * it. */ if (xfs_ilock_nowait(ip, lock_flags) == 0) { - if ((flags & SYNC_BDFLUSH) || (vp == NULL)) { ip = ip->i_mnext; continue; } - /* - * We need to unlock the inode list lock in order - * to lock the inode. Insert a marker record into - * the inode list to remember our position, dropping - * the lock is now done inside the IPOINTER_INSERT - * macro. - * - * We also use the inode list lock to protect us - * in taking a snapshot of the vnode version number - * for use in calling vn_get(). - */ - VMAP(vp, vmap); - IPOINTER_INSERT(ip, mp); - - vp = vn_get(vp, &vmap); + vp = vn_grab(vp); if (vp == NULL) { - /* - * The vnode was reclaimed once we let go - * of the inode list lock. Skip to the - * next list entry. Remove the marker. - */ - - XFS_MOUNT_ILOCK(mp); - - mount_locked = B_TRUE; - vnode_refed = B_FALSE; - - IPOINTER_REMOVE(ip, mp); - + ip = ip->i_mnext; continue; } + IPOINTER_INSERT(ip, mp); xfs_ilock(ip, lock_flags); ASSERT(vp == XFS_ITOV(ip)); From eedb5530aad71aecbc1e99cb67f676c26280d3f9 Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Fri, 2 Sep 2005 16:39:56 +1000 Subject: [PATCH 05/36] [XFS] Make metadata IO completion consistent with other IO completion handlers. SGI-PV: 938409 SGI-Modid: xfs-linux:xfs-kern:22965a Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_buf.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index df0cba239dd5..58286b1d733b 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -1249,8 +1249,8 @@ bio_end_io_pagebuf( int error) { xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private; - unsigned int i, blocksize = pb->pb_target->pbr_bsize; - struct bio_vec *bvec = bio->bi_io_vec; + unsigned int blocksize = pb->pb_target->pbr_bsize; + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; if (bio->bi_size) return 1; @@ -1258,10 +1258,12 @@ bio_end_io_pagebuf( if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) pb->pb_error = EIO; - for (i = 0; i < bio->bi_vcnt; i++, bvec++) { + do { struct page *page = bvec->bv_page; - if (pb->pb_error) { + if (unlikely(pb->pb_error)) { + if (pb->pb_flags & PBF_READ) + ClearPageUptodate(page); SetPageError(page); } else if (blocksize == PAGE_CACHE_SIZE) { SetPageUptodate(page); @@ -1270,10 +1272,13 @@ bio_end_io_pagebuf( set_page_region(page, bvec->bv_offset, bvec->bv_len); } + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + if (_pagebuf_iolocked(pb)) { unlock_page(page); } - } + } while (bvec >= bio->bi_io_vec); _pagebuf_iodone(pb, 1); bio_put(bio); From bcec2b7f2bf856bdf2a8780a57fe78417a513682 Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Fri, 2 Sep 2005 16:40:17 +1000 Subject: [PATCH 06/36] [XFS] Add a chunk of tracing code to diagnose truncate related issues. SGI-PV: 938410 SGI-Modid: xfs-linux:xfs-kern:22966a Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_aops.c | 11 +++++++++++ fs/xfs/linux-2.6/xfs_lrw.h | 7 ++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index a3a4b5aaf5d9..bd9aba1f2353 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1202,6 +1202,16 @@ out_unlock: return error; } +STATIC int +linvfs_invalidate_page( + struct page *page, + unsigned long offset) +{ + xfs_page_trace(XFS_INVALIDPAGE_ENTER, + page->mapping->host, page, offset); + return block_invalidatepage(page, offset); +} + /* * Called to move a page into cleanable state - and from there * to be released. Possibly the page is already clean. We always @@ -1279,6 +1289,7 @@ struct address_space_operations linvfs_aops = { .writepage = linvfs_writepage, .sync_page = block_sync_page, .releasepage = linvfs_release_page, + .invalidatepage = linvfs_invalidate_page, .prepare_write = linvfs_prepare_write, .commit_write = generic_commit_write, .bmap = linvfs_bmap, diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h index f197a720e394..6294dcdb797c 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.h +++ b/fs/xfs/linux-2.6/xfs_lrw.h @@ -70,9 +70,10 @@ struct xfs_iomap; #define XFS_SENDFILE_ENTER 21 #define XFS_WRITEPAGE_ENTER 22 #define XFS_RELEASEPAGE_ENTER 23 -#define XFS_IOMAP_ALLOC_ENTER 24 -#define XFS_IOMAP_ALLOC_MAP 25 -#define XFS_IOMAP_UNWRITTEN 26 +#define XFS_INVALIDPAGE_ENTER 24 +#define XFS_IOMAP_ALLOC_ENTER 25 +#define XFS_IOMAP_ALLOC_MAP 26 +#define XFS_IOMAP_UNWRITTEN 27 extern void xfs_rw_enter_trace(int, struct xfs_iocore *, void *, size_t, loff_t, int); extern void xfs_inval_cached_trace(struct xfs_iocore *, From 3bdbfb104e53b367892cc9510e6722346dfb656b Mon Sep 17 00:00:00 2001 From: David Chinner Date: Fri, 2 Sep 2005 16:40:47 +1000 Subject: [PATCH 07/36] [XFS] Prevent the incore superblock sb_fdblocks count from leaking when we are getting ENOSPC errors on writes. When we fail to allocate space for indirect blocks in xfs_bmapi() make sure we release the direct block allocation before returning. SGI-PV: 938502 SGI-Modid: xfs-linux:xfs-kern:22986a Signed-off-by: David Chinner Signed-off-by: Nathan Scott --- fs/xfs/xfs_bmap.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 6f5d283888aa..3e76def1283d 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -4754,10 +4754,20 @@ xfs_bmapi( error = xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, -(alen), rsvd); - if (!error) + if (!error) { error = xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, -(indlen), rsvd); + if (error && rt) { + xfs_mod_incore_sb(ip->i_mount, + XFS_SBS_FREXTENTS, + extsz, rsvd); + } else if (error) { + xfs_mod_incore_sb(ip->i_mount, + XFS_SBS_FDBLOCKS, + alen, rsvd); + } + } if (error) { if (XFS_IS_QUOTA_ON(ip->i_mount)) From ad4a8ac4e9d9cffb0a4c9ebebc6bda9d8dbbfe99 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 2 Sep 2005 16:41:16 +1000 Subject: [PATCH 08/36] [XFS] Fix check for writeable file in xfs_ioc_space ioctl code SGI-PV: 938905 SGI-Modid: xfs-linux:xfs-kern:195240a Signed-off-by: Eric Sandeen Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 05a447e51cc0..35cbd88e1a54 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -982,7 +982,7 @@ xfs_ioc_space( if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) return -XFS_ERROR(EPERM); - if (!(filp->f_flags & FMODE_WRITE)) + if (!(filp->f_mode & FMODE_WRITE)) return -XFS_ERROR(EBADF); if (vp->v_type != VREG) From d52b44d07a43b723ac2fbf1bf4053031f723676c Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Fri, 2 Sep 2005 16:41:32 +1000 Subject: [PATCH 09/36] [XFS] Fix regression in transaction reserved-block accounting for direct writes. SGI-PV: 938145 SGI-Modid: xfs-linux:xfs-kern:23088a Signed-off-by: Nathan Scott --- fs/xfs/xfs_iomap.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 2edd6769e5d3..44999d557d8e 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -391,9 +391,9 @@ xfs_iomap_write_direct( xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS], *imapp; xfs_bmap_free_t free_list; int aeof; - xfs_filblks_t datablocks, qblocks, resblks; + xfs_filblks_t qblocks, resblks; int committed; - int numrtextents; + int resrtextents; /* * Make sure that the dquots are there. This doesn't hold @@ -434,14 +434,14 @@ xfs_iomap_write_direct( if (!(extsz = ip->i_d.di_extsize)) extsz = mp->m_sb.sb_rextsize; - numrtextents = qblocks = (count_fsb + extsz - 1); - do_div(numrtextents, mp->m_sb.sb_rextsize); + resrtextents = qblocks = (count_fsb + extsz - 1); + do_div(resrtextents, mp->m_sb.sb_rextsize); + resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); quota_flag = XFS_QMOPT_RES_RTBLKS; - datablocks = 0; } else { - datablocks = qblocks = count_fsb; + resrtextents = 0; + resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, count_fsb); quota_flag = XFS_QMOPT_RES_REGBLKS; - numrtextents = 0; } /* @@ -449,9 +449,8 @@ xfs_iomap_write_direct( */ xfs_iunlock(ip, XFS_ILOCK_EXCL); tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); - resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks); error = xfs_trans_reserve(tp, resblks, - XFS_WRITE_LOG_RES(mp), numrtextents, + XFS_WRITE_LOG_RES(mp), resrtextents, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); From 32fb9b57aef35b82434cfb4c9de18b484fc3ec88 Mon Sep 17 00:00:00 2001 From: Tim Shimmin Date: Fri, 2 Sep 2005 16:41:43 +1000 Subject: [PATCH 10/36] [XFS] Fix up the calculation of the reservation overhead to hopefully include all the components which make up the transaction in the ondisk log. Having this incomplete has shown up as problems on IRIX when some v2 log changes went in. The symptom was the msg of "xfs_log_write: reservation ran out. Need to up reservation" and was seen on synchronous writes on files with lots of holes (and therefore lots of extents). SGI-PV: 931457 SGI-Modid: xfs-linux:xfs-kern:23095a Signed-off-by: Tim Shimmin Signed-off-by: Nathan Scott --- fs/xfs/xfs_log.c | 60 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 1cd2ac163877..42975cb9e538 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3179,29 +3179,57 @@ xlog_ticket_get(xlog_t *log, * and their unit amount is the total amount of space required. * * The following lines of code account for non-transaction data - * which occupy space in the on-disk log. + * which occupy space in the on-disk log. + * + * Normal form of a transaction is: + * ... + * and then there are LR hdrs, split-recs and roundoff at end of syncs. + * + * We need to account for all the leadup data and trailer data + * around the transaction data. + * And then we need to account for the worst case in terms of using + * more space. + * The worst case will happen if: + * - the placement of the transaction happens to be such that the + * roundoff is at its maximum + * - the transaction data is synced before the commit record is synced + * i.e. | + * Therefore the commit record is in its own Log Record. + * This can happen as the commit record is called with its + * own region to xlog_write(). + * This then means that in the worst case, roundoff can happen for + * the commit-rec as well. + * The commit-rec is smaller than padding in this scenario and so it is + * not added separately. */ - /* for start-rec */ - unit_bytes += sizeof(xlog_op_header_t); - - /* for padding */ - if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) && - log->l_mp->m_sb.sb_logsunit > 1) { - /* log su roundoff */ - unit_bytes += log->l_mp->m_sb.sb_logsunit; - } else { - /* BB roundoff */ - unit_bytes += BBSIZE; - } - - /* for commit-rec */ + /* for trans header */ unit_bytes += sizeof(xlog_op_header_t); - + unit_bytes += sizeof(xfs_trans_header_t); + + /* for start-rec */ + unit_bytes += sizeof(xlog_op_header_t); + /* for LR headers */ num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log); unit_bytes += log->l_iclog_hsize * num_headers; + /* for commit-rec LR header - note: padding will subsume the ophdr */ + unit_bytes += log->l_iclog_hsize; + + /* for split-recs - ophdrs added when data split over LRs */ + unit_bytes += sizeof(xlog_op_header_t) * num_headers; + + /* for roundoff padding for transaction data and one for commit record */ + if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) && + log->l_mp->m_sb.sb_logsunit > 1) { + /* log su roundoff */ + unit_bytes += 2*log->l_mp->m_sb.sb_logsunit; + } else { + /* BB roundoff */ + unit_bytes += 2*BBSIZE; + } + tic->t_unit_res = unit_bytes; tic->t_curr_res = unit_bytes; tic->t_cnt = cnt; From 7e9c63961558092d584936a874cf3fee80002eb6 Mon Sep 17 00:00:00 2001 From: Tim Shimmin Date: Fri, 2 Sep 2005 16:42:05 +1000 Subject: [PATCH 11/36] [XFS] 929956 add log debugging and tracing info SGI-PV: 931456 SGI-Modid: xfs-linux:xfs-kern:23155a Signed-off-by: Tim Shimmin Signed-off-by: Nathan Scott --- fs/xfs/quota/xfs_dquot_item.c | 1 + fs/xfs/xfs_buf_item.c | 4 + fs/xfs/xfs_extfree_item.c | 2 + fs/xfs/xfs_inode_item.c | 9 ++ fs/xfs/xfs_log.c | 161 +++++++++++++++++++++++++++++++--- fs/xfs/xfs_log.h | 38 +++++++- fs/xfs/xfs_log_priv.h | 68 +++++++++++--- fs/xfs/xfs_trans.c | 3 +- fs/xfs/xfs_trans.h | 1 + 9 files changed, 265 insertions(+), 22 deletions(-) diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index f5271b7b1e84..e74eaa7dd1bc 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -509,6 +509,7 @@ xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t *qf, log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format); log_vector->i_len = sizeof(xfs_qoff_logitem_t); + XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_QUOTAOFF); qf->qql_format.qf_size = 1; } diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 30b8285ad476..a264657acfd9 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -274,6 +274,7 @@ xfs_buf_item_format( ((bip->bli_format.blf_map_size - 1) * sizeof(uint))); vecp->i_addr = (xfs_caddr_t)&bip->bli_format; vecp->i_len = base_size; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BFORMAT); vecp++; nvecs = 1; @@ -320,12 +321,14 @@ xfs_buf_item_format( buffer_offset = first_bit * XFS_BLI_CHUNK; vecp->i_addr = xfs_buf_offset(bp, buffer_offset); vecp->i_len = nbits * XFS_BLI_CHUNK; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); nvecs++; break; } else if (next_bit != last_bit + 1) { buffer_offset = first_bit * XFS_BLI_CHUNK; vecp->i_addr = xfs_buf_offset(bp, buffer_offset); vecp->i_len = nbits * XFS_BLI_CHUNK; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); nvecs++; vecp++; first_bit = next_bit; @@ -337,6 +340,7 @@ xfs_buf_item_format( buffer_offset = first_bit * XFS_BLI_CHUNK; vecp->i_addr = xfs_buf_offset(bp, buffer_offset); vecp->i_len = nbits * XFS_BLI_CHUNK; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); /* You would think we need to bump the nvecs here too, but we do not * this number is used by recovery, and it gets confused by the boundary * split here diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index db7cbd1bc857..cc7d1494a45d 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -107,6 +107,7 @@ xfs_efi_item_format(xfs_efi_log_item_t *efip, log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format); log_vector->i_len = size; + XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFI_FORMAT); ASSERT(size >= sizeof(xfs_efi_log_format_t)); } @@ -426,6 +427,7 @@ xfs_efd_item_format(xfs_efd_log_item_t *efdp, log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format); log_vector->i_len = size; + XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFD_FORMAT); ASSERT(size >= sizeof(xfs_efd_log_format_t)); } diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 0eed30f5cb19..276ec70eb7f9 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -248,6 +248,7 @@ xfs_inode_item_format( vecp->i_addr = (xfs_caddr_t)&iip->ili_format; vecp->i_len = sizeof(xfs_inode_log_format_t); + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT); vecp++; nvecs = 1; @@ -292,6 +293,7 @@ xfs_inode_item_format( vecp->i_addr = (xfs_caddr_t)&ip->i_d; vecp->i_len = sizeof(xfs_dinode_core_t); + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); vecp++; nvecs++; iip->ili_format.ilf_fields |= XFS_ILOG_CORE; @@ -349,6 +351,7 @@ xfs_inode_item_format( vecp->i_addr = (char *)(ip->i_df.if_u1.if_extents); vecp->i_len = ip->i_df.if_bytes; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); } else #endif { @@ -367,6 +370,7 @@ xfs_inode_item_format( vecp->i_addr = (xfs_caddr_t)ext_buffer; vecp->i_len = xfs_iextents_copy(ip, ext_buffer, XFS_DATA_FORK); + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); } ASSERT(vecp->i_len <= ip->i_df.if_bytes); iip->ili_format.ilf_dsize = vecp->i_len; @@ -384,6 +388,7 @@ xfs_inode_item_format( ASSERT(ip->i_df.if_broot != NULL); vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; vecp->i_len = ip->i_df.if_broot_bytes; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT); vecp++; nvecs++; iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; @@ -409,6 +414,7 @@ xfs_inode_item_format( ASSERT((ip->i_df.if_real_bytes == 0) || (ip->i_df.if_real_bytes == data_bytes)); vecp->i_len = (int)data_bytes; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL); vecp++; nvecs++; iip->ili_format.ilf_dsize = (unsigned)data_bytes; @@ -486,6 +492,7 @@ xfs_inode_item_format( vecp->i_len = xfs_iextents_copy(ip, ext_buffer, XFS_ATTR_FORK); #endif + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT); iip->ili_format.ilf_asize = vecp->i_len; vecp++; nvecs++; @@ -500,6 +507,7 @@ xfs_inode_item_format( ASSERT(ip->i_afp->if_broot != NULL); vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; vecp->i_len = ip->i_afp->if_broot_bytes; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT); vecp++; nvecs++; iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; @@ -523,6 +531,7 @@ xfs_inode_item_format( ASSERT((ip->i_afp->if_real_bytes == 0) || (ip->i_afp->if_real_bytes == data_bytes)); vecp->i_len = (int)data_bytes; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL); vecp++; nvecs++; iip->ili_format.ilf_asize = (unsigned)data_bytes; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 42975cb9e538..54a6f1142403 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -159,11 +159,15 @@ xfs_buftarg_t *xlog_target; void xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) { - if (! log->l_grant_trace) { - log->l_grant_trace = ktrace_alloc(1024, KM_NOSLEEP); - if (! log->l_grant_trace) + unsigned long cnts; + + if (!log->l_grant_trace) { + log->l_grant_trace = ktrace_alloc(2048, KM_NOSLEEP); + if (!log->l_grant_trace) return; } + /* ticket counts are 1 byte each */ + cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8; ktrace_enter(log->l_grant_trace, (void *)tic, @@ -178,10 +182,10 @@ xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) (void *)((unsigned long)CYCLE_LSN(log->l_tail_lsn)), (void *)((unsigned long)BLOCK_LSN(log->l_tail_lsn)), (void *)string, - (void *)((unsigned long)13), - (void *)((unsigned long)14), - (void *)((unsigned long)15), - (void *)((unsigned long)16)); + (void *)((unsigned long)tic->t_trans_type), + (void *)cnts, + (void *)((unsigned long)tic->t_curr_res), + (void *)((unsigned long)tic->t_unit_res)); } void @@ -274,9 +278,11 @@ xfs_log_done(xfs_mount_t *mp, * Release ticket if not permanent reservation or a specifc * request has been made to release a permanent reservation. */ + xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); xlog_ungrant_log_space(log, ticket); xlog_state_put_ticket(log, ticket); } else { + xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); xlog_regrant_reserve_log_space(log, ticket); } @@ -399,7 +405,8 @@ xfs_log_reserve(xfs_mount_t *mp, int cnt, xfs_log_ticket_t *ticket, __uint8_t client, - uint flags) + uint flags, + uint t_type) { xlog_t *log = mp->m_log; xlog_ticket_t *internal_ticket; @@ -421,13 +428,19 @@ xfs_log_reserve(xfs_mount_t *mp, if (*ticket != NULL) { ASSERT(flags & XFS_LOG_PERM_RESERV); internal_ticket = (xlog_ticket_t *)*ticket; + xlog_trace_loggrant(log, internal_ticket, "xfs_log_reserve: existing ticket (permanent trans)"); xlog_grant_push_ail(mp, internal_ticket->t_unit_res); retval = xlog_regrant_write_log_space(log, internal_ticket); } else { /* may sleep if need to allocate more tickets */ internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, client, flags); + internal_ticket->t_trans_type = t_type; *ticket = internal_ticket; + xlog_trace_loggrant(log, internal_ticket, + (internal_ticket->t_flags & XLOG_TIC_PERM_RESERV) ? + "xfs_log_reserve: create new ticket (permanent trans)" : + "xfs_log_reserve: create new ticket"); xlog_grant_push_ail(mp, (internal_ticket->t_unit_res * internal_ticket->t_cnt)); @@ -601,8 +614,9 @@ xfs_log_unmount_write(xfs_mount_t *mp) if (! (XLOG_FORCED_SHUTDOWN(log))) { reg[0].i_addr = (void*)&magic; reg[0].i_len = sizeof(magic); + XLOG_VEC_SET_TYPE(®[0], XLOG_REG_TYPE_UNMOUNT); - error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0); + error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0, 0); if (!error) { /* remove inited flag */ ((xlog_ticket_t *)tic)->t_flags = 0; @@ -1272,6 +1286,7 @@ xlog_commit_record(xfs_mount_t *mp, reg[0].i_addr = NULL; reg[0].i_len = 0; + XLOG_VEC_SET_TYPE(®[0], XLOG_REG_TYPE_COMMIT); ASSERT_ALWAYS(iclog); if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, @@ -1604,6 +1619,117 @@ xlog_state_finish_copy(xlog_t *log, +/* + * print out info relating to regions written which consume + * the reservation + */ +#if defined(XFS_LOG_RES_DEBUG) +STATIC void +xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) +{ + uint i; + uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t); + + /* match with XLOG_REG_TYPE_* in xfs_log.h */ + static char *res_type_str[XLOG_REG_TYPE_MAX] = { + "bformat", + "bchunk", + "efi_format", + "efd_format", + "iformat", + "icore", + "iext", + "ibroot", + "ilocal", + "iattr_ext", + "iattr_broot", + "iattr_local", + "qformat", + "dquot", + "quotaoff", + "LR header", + "unmount", + "commit", + "trans header" + }; + static char *trans_type_str[XFS_TRANS_TYPE_MAX] = { + "SETATTR_NOT_SIZE", + "SETATTR_SIZE", + "INACTIVE", + "CREATE", + "CREATE_TRUNC", + "TRUNCATE_FILE", + "REMOVE", + "LINK", + "RENAME", + "MKDIR", + "RMDIR", + "SYMLINK", + "SET_DMATTRS", + "GROWFS", + "STRAT_WRITE", + "DIOSTRAT", + "WRITE_SYNC", + "WRITEID", + "ADDAFORK", + "ATTRINVAL", + "ATRUNCATE", + "ATTR_SET", + "ATTR_RM", + "ATTR_FLAG", + "CLEAR_AGI_BUCKET", + "QM_SBCHANGE", + "DUMMY1", + "DUMMY2", + "QM_QUOTAOFF", + "QM_DQALLOC", + "QM_SETQLIM", + "QM_DQCLUSTER", + "QM_QINOCREATE", + "QM_QUOTAOFF_END", + "SB_UNIT", + "FSYNC_TS", + "GROWFSRT_ALLOC", + "GROWFSRT_ZERO", + "GROWFSRT_FREE", + "SWAPEXT" + }; + + xfs_fs_cmn_err(CE_WARN, mp, + "xfs_log_write: reservation summary:\n" + " trans type = %s (%u)\n" + " unit res = %d bytes\n" + " current res = %d bytes\n" + " total reg = %u bytes (o/flow = %u bytes)\n" + " ophdrs = %u (ophdr space = %u bytes)\n" + " ophdr + reg = %u bytes\n" + " num regions = %u\n", + ((ticket->t_trans_type <= 0 || + ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? + "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]), + ticket->t_trans_type, + ticket->t_unit_res, + ticket->t_curr_res, + ticket->t_res_arr_sum, ticket->t_res_o_flow, + ticket->t_res_num_ophdrs, ophdr_spc, + ticket->t_res_arr_sum + + ticket->t_res_o_flow + ophdr_spc, + ticket->t_res_num); + + for (i = 0; i < ticket->t_res_num; i++) { + uint r_type = ticket->t_res_arr[i].r_type; + cmn_err(CE_WARN, + "region[%u]: %s - %u bytes\n", + i, + ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ? + "bad-rtype" : res_type_str[r_type-1]), + ticket->t_res_arr[i].r_len); + } +} +#else +#define xlog_print_tic_res(mp, ticket) +#endif + /* * Write some region out to in-core log * @@ -1677,16 +1803,21 @@ xlog_write(xfs_mount_t * mp, * xlog_op_header_t and may need to be double word aligned. */ len = 0; - if (ticket->t_flags & XLOG_TIC_INITED) /* acct for start rec of xact */ + if (ticket->t_flags & XLOG_TIC_INITED) { /* acct for start rec of xact */ len += sizeof(xlog_op_header_t); + XLOG_TIC_ADD_OPHDR(ticket); + } for (index = 0; index < nentries; index++) { len += sizeof(xlog_op_header_t); /* each region gets >= 1 */ + XLOG_TIC_ADD_OPHDR(ticket); len += reg[index].i_len; + XLOG_TIC_ADD_REGION(ticket, reg[index].i_len, reg[index].i_type); } contwr = *start_lsn = 0; if (ticket->t_curr_res < len) { + xlog_print_tic_res(mp, ticket); #ifdef DEBUG xlog_panic( "xfs_log_write: reservation ran out. Need to up reservation"); @@ -1790,6 +1921,7 @@ xlog_write(xfs_mount_t * mp, len += sizeof(xlog_op_header_t); /* from splitting of region */ /* account for new log op header */ ticket->t_curr_res -= sizeof(xlog_op_header_t); + XLOG_TIC_ADD_OPHDR(ticket); } xlog_verify_dest_ptr(log, ptr); @@ -2282,6 +2414,9 @@ restart: */ if (log_offset == 0) { ticket->t_curr_res -= log->l_iclog_hsize; + XLOG_TIC_ADD_REGION(ticket, + log->l_iclog_hsize, + XLOG_REG_TYPE_LRHEADER); INT_SET(head->h_cycle, ARCH_CONVERT, log->l_curr_cycle); ASSIGN_LSN(head->h_lsn, log); ASSERT(log->l_curr_block >= 0); @@ -2468,6 +2603,7 @@ xlog_regrant_write_log_space(xlog_t *log, #endif tic->t_curr_res = tic->t_unit_res; + XLOG_TIC_RESET_RES(tic); if (tic->t_cnt > 0) return (0); @@ -2608,6 +2744,7 @@ xlog_regrant_reserve_log_space(xlog_t *log, XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w'); XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r'); ticket->t_curr_res = ticket->t_unit_res; + XLOG_TIC_RESET_RES(ticket); xlog_trace_loggrant(log, ticket, "xlog_regrant_reserve_log_space: sub current res"); xlog_verify_grant_head(log, 1); @@ -2624,6 +2761,7 @@ xlog_regrant_reserve_log_space(xlog_t *log, xlog_verify_grant_head(log, 0); GRANT_UNLOCK(log, s); ticket->t_curr_res = ticket->t_unit_res; + XLOG_TIC_RESET_RES(ticket); } /* xlog_regrant_reserve_log_space */ @@ -3237,10 +3375,13 @@ xlog_ticket_get(xlog_t *log, tic->t_tid = (xlog_tid_t)((__psint_t)tic & 0xffffffff); tic->t_clientid = client; tic->t_flags = XLOG_TIC_INITED; + tic->t_trans_type = 0; if (xflags & XFS_LOG_PERM_RESERV) tic->t_flags |= XLOG_TIC_PERM_RESERV; sv_init(&(tic->t_sema), SV_DEFAULT, "logtick"); + XLOG_TIC_RESET_RES(tic); + return tic; } /* xlog_ticket_get */ diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 0db122ddda3f..18961119fc65 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -114,9 +114,44 @@ xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) #define XFS_VOLUME 0x2 #define XFS_LOG 0xaa + +/* Region types for iovec's i_type */ +#if defined(XFS_LOG_RES_DEBUG) +#define XLOG_REG_TYPE_BFORMAT 1 +#define XLOG_REG_TYPE_BCHUNK 2 +#define XLOG_REG_TYPE_EFI_FORMAT 3 +#define XLOG_REG_TYPE_EFD_FORMAT 4 +#define XLOG_REG_TYPE_IFORMAT 5 +#define XLOG_REG_TYPE_ICORE 6 +#define XLOG_REG_TYPE_IEXT 7 +#define XLOG_REG_TYPE_IBROOT 8 +#define XLOG_REG_TYPE_ILOCAL 9 +#define XLOG_REG_TYPE_IATTR_EXT 10 +#define XLOG_REG_TYPE_IATTR_BROOT 11 +#define XLOG_REG_TYPE_IATTR_LOCAL 12 +#define XLOG_REG_TYPE_QFORMAT 13 +#define XLOG_REG_TYPE_DQUOT 14 +#define XLOG_REG_TYPE_QUOTAOFF 15 +#define XLOG_REG_TYPE_LRHEADER 16 +#define XLOG_REG_TYPE_UNMOUNT 17 +#define XLOG_REG_TYPE_COMMIT 18 +#define XLOG_REG_TYPE_TRANSHDR 19 +#define XLOG_REG_TYPE_MAX 19 +#endif + +#if defined(XFS_LOG_RES_DEBUG) +#define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t)) +#else +#define XLOG_VEC_SET_TYPE(vecp, t) +#endif + + typedef struct xfs_log_iovec { xfs_caddr_t i_addr; /* beginning address of region */ int i_len; /* length in bytes of region */ +#if defined(XFS_LOG_RES_DEBUG) + uint i_type; /* type of region */ +#endif } xfs_log_iovec_t; typedef void* xfs_log_ticket_t; @@ -159,7 +194,8 @@ int xfs_log_reserve(struct xfs_mount *mp, int count, xfs_log_ticket_t *ticket, __uint8_t clientid, - uint flags); + uint flags, + uint t_type); int xfs_log_write(struct xfs_mount *mp, xfs_log_iovec_t region[], int nentries, diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 1a1d452f15f9..eb7fdc6ebc32 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -335,18 +335,66 @@ typedef __uint32_t xlog_tid_t; #define XLOG_COVER_OPS 5 + +/* Ticket reservation region accounting */ +#if defined(XFS_LOG_RES_DEBUG) +#define XLOG_TIC_LEN_MAX 15 +#define XLOG_TIC_RESET_RES(t) ((t)->t_res_num = \ + (t)->t_res_arr_sum = (t)->t_res_num_ophdrs = 0) +#define XLOG_TIC_ADD_OPHDR(t) ((t)->t_res_num_ophdrs++) +#define XLOG_TIC_ADD_REGION(t, len, type) \ + do { \ + if ((t)->t_res_num == XLOG_TIC_LEN_MAX) { \ + /* add to overflow and start again */ \ + (t)->t_res_o_flow += (t)->t_res_arr_sum; \ + (t)->t_res_num = 0; \ + (t)->t_res_arr_sum = 0; \ + } \ + (t)->t_res_arr[(t)->t_res_num].r_len = (len); \ + (t)->t_res_arr[(t)->t_res_num].r_type = (type); \ + (t)->t_res_arr_sum += (len); \ + (t)->t_res_num++; \ + } while (0) + +/* + * Reservation region + * As would be stored in xfs_log_iovec but without the i_addr which + * we don't care about. + */ +typedef struct xlog_res { + uint r_len; + uint r_type; +} xlog_res_t; +#else +#define XLOG_TIC_RESET_RES(t) +#define XLOG_TIC_ADD_OPHDR(t) +#define XLOG_TIC_ADD_REGION(t, len, type) +#endif + + typedef struct xlog_ticket { - sv_t t_sema; /* sleep on this semaphore :20 */ - struct xlog_ticket *t_next; /* : 4 */ - struct xlog_ticket *t_prev; /* : 4 */ - xlog_tid_t t_tid; /* transaction identifier : 4 */ - int t_curr_res; /* current reservation in bytes : 4 */ - int t_unit_res; /* unit reservation in bytes : 4 */ - __uint8_t t_ocnt; /* original count : 1 */ - __uint8_t t_cnt; /* current count : 1 */ - __uint8_t t_clientid; /* who does this belong to; : 1 */ - __uint8_t t_flags; /* properties of reservation : 1 */ + sv_t t_sema; /* sleep on this semaphore : 20 */ + struct xlog_ticket *t_next; /* :4|8 */ + struct xlog_ticket *t_prev; /* :4|8 */ + xlog_tid_t t_tid; /* transaction identifier : 4 */ + int t_curr_res; /* current reservation in bytes : 4 */ + int t_unit_res; /* unit reservation in bytes : 4 */ + char t_ocnt; /* original count : 1 */ + char t_cnt; /* current count : 1 */ + char t_clientid; /* who does this belong to; : 1 */ + char t_flags; /* properties of reservation : 1 */ + uint t_trans_type; /* transaction type : 4 */ + +#if defined (XFS_LOG_RES_DEBUG) + /* reservation array fields */ + uint t_res_num; /* num in array : 4 */ + xlog_res_t t_res_arr[XLOG_TIC_LEN_MAX]; /* array of res : X */ + uint t_res_num_ophdrs; /* num op hdrs : 4 */ + uint t_res_arr_sum; /* array sum : 4 */ + uint t_res_o_flow; /* sum overflow : 4 */ +#endif } xlog_ticket_t; + #endif diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 06dfca531f79..92efe272b83d 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -276,7 +276,7 @@ xfs_trans_reserve( error = xfs_log_reserve(tp->t_mountp, logspace, logcount, &tp->t_ticket, - XFS_TRANSACTION, log_flags); + XFS_TRANSACTION, log_flags, tp->t_type); if (error) { goto undo_blocks; } @@ -1032,6 +1032,7 @@ xfs_trans_fill_vecs( tp->t_header.th_num_items = nitems; log_vector->i_addr = (xfs_caddr_t)&tp->t_header; log_vector->i_len = sizeof(xfs_trans_header_t); + XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_TRANSHDR); } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index ec541d66fa2a..9ee5eeee8026 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -112,6 +112,7 @@ typedef struct xfs_trans_header { #define XFS_TRANS_GROWFSRT_ZERO 38 #define XFS_TRANS_GROWFSRT_FREE 39 #define XFS_TRANS_SWAPEXT 40 +#define XFS_TRANS_TYPE_MAX 40 /* new transaction types need to be reflected in xfs_logprint(8) */ From e69a333b5e0c8c6b687b07665a3cb5545657d2aa Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Fri, 2 Sep 2005 16:42:26 +1000 Subject: [PATCH 12/36] [XFS] Add in grpid/nogrpid mount option parsing, actual code was always there.. SGI-PV: 939444 SGI-Modid: xfs-linux:xfs-kern:23162a Signed-off-by: Nathan Scott --- fs/xfs/xfs_vfsops.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index b8ce6cad2b71..d4b9545c2b5c 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -1621,6 +1621,10 @@ xfs_vget( #define MNTOPT_SWIDTH "swidth" /* data volume stripe width */ #define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */ #define MNTOPT_MTPT "mtpt" /* filesystem mount point */ +#define MNTOPT_GRPID "grpid" /* group-ID from parent directory */ +#define MNTOPT_NOGRPID "nogrpid" /* group-ID from current process */ +#define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */ +#define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */ #define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ #define MNTOPT_IHASHSIZE "ihashsize" /* size of inode hash table */ #define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ @@ -1741,6 +1745,12 @@ xfs_parseargs( } args->flags |= XFSMNT_IHASHSIZE; args->ihashsize = simple_strtoul(value, &eov, 10); + } else if (!strcmp(this_char, MNTOPT_GRPID) || + !strcmp(this_char, MNTOPT_BSDGROUPS)) { + vfsp->vfs_flag |= VFS_GRPID; + } else if (!strcmp(this_char, MNTOPT_NOGRPID) || + !strcmp(this_char, MNTOPT_SYSVGROUPS)) { + vfsp->vfs_flag &= ~VFS_GRPID; } else if (!strcmp(this_char, MNTOPT_WSYNC)) { args->flags |= XFSMNT_WSYNC; } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) { @@ -1862,6 +1872,7 @@ xfs_showargs( }; struct proc_xfs_info *xfs_infop; struct xfs_mount *mp = XFS_BHVTOM(bhv); + struct vfs *vfsp = XFS_MTOVFS(mp); for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) { if (mp->m_flags & xfs_infop->flag) @@ -1898,7 +1909,10 @@ xfs_showargs( if (!(mp->m_flags & XFS_MOUNT_32BITINOOPT)) seq_printf(m, "," MNTOPT_64BITINODE); - + + if (vfsp->vfs_flag & VFS_GRPID) + seq_printf(m, "," MNTOPT_GRPID); + return 0; } From 155ffd075caedcea5ad595c95403c71bfc391c4a Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Fri, 2 Sep 2005 16:43:48 +1000 Subject: [PATCH 13/36] [XFS] Remove extraneous quotacheck diagnostics. SGI-PV: 907752 SGI-Modid: xfs-linux:xfs-kern:23163a Signed-off-by: Nathan Scott --- fs/xfs/quota/xfs_qm.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index f665ca8f9e96..4badf38df5e9 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -388,11 +388,8 @@ xfs_qm_mount_quotas( goto write_changes; } -#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) - cmn_err(CE_NOTE, "Attempting to turn on disk quotas."); -#endif - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + /* * Allocate the quotainfo structure inside the mount struct, and * create quotainode(s), and change/rev superblock if necessary. @@ -410,19 +407,14 @@ xfs_qm_mount_quotas( */ if (XFS_QM_NEED_QUOTACHECK(mp) && !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) { -#ifdef DEBUG - cmn_err(CE_NOTE, "Doing a quotacheck. Please wait."); -#endif if ((error = xfs_qm_quotacheck(mp))) { /* Quotacheck has failed and quotas have * been disabled. */ return XFS_ERROR(error); } -#ifdef DEBUG - cmn_err(CE_NOTE, "Done quotacheck."); -#endif } + write_changes: /* * We actually don't have to acquire the SB_LOCK at all. From 0432dab2d2d3b35347a95c01c78a40781b6431fb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Sep 2005 16:46:51 +1000 Subject: [PATCH 14/36] [XFS] remove struct vnode::v_type SGI-PV: 936236 SGI-Modid: xfs-linux:xfs-kern:195878a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_ioctl.c | 16 +++++++++---- fs/xfs/linux-2.6/xfs_iops.c | 6 ++--- fs/xfs/linux-2.6/xfs_super.c | 35 +++++++++++++++++----------- fs/xfs/linux-2.6/xfs_vnode.c | 16 +------------ fs/xfs/linux-2.6/xfs_vnode.h | 26 ++++++--------------- fs/xfs/xfs_acl.c | 6 ++--- fs/xfs/xfs_inode.c | 3 +-- fs/xfs/xfs_vnodeops.c | 45 ++++++++++++++++++------------------ 8 files changed, 68 insertions(+), 85 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 35cbd88e1a54..6a3326bcd8d0 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -141,13 +141,19 @@ xfs_find_handle( return -XFS_ERROR(EINVAL); } - /* we need the vnode */ - vp = LINVFS_GET_VP(inode); - if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) { + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + case S_IFDIR: + case S_IFLNK: + break; + default: iput(inode); return -XFS_ERROR(EBADF); } + /* we need the vnode */ + vp = LINVFS_GET_VP(inode); + /* now we can grab the fsid */ memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t)); hsize = sizeof(xfs_fsid_t); @@ -386,7 +392,7 @@ xfs_readlink_by_handle( return -error; /* Restrict this handle operation to symlinks only. */ - if (vp->v_type != VLNK) { + if (!S_ISLNK(inode->i_mode)) { VN_RELE(vp); return -XFS_ERROR(EINVAL); } @@ -985,7 +991,7 @@ xfs_ioc_space( if (!(filp->f_mode & FMODE_WRITE)) return -XFS_ERROR(EBADF); - if (vp->v_type != VREG) + if (!VN_ISREG(vp)) return -XFS_ERROR(EINVAL); if (copy_from_user(&bf, arg, sizeof(bf))) diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index f252605514eb..d237cc5be767 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -140,7 +140,6 @@ linvfs_mknod( memset(&va, 0, sizeof(va)); va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; - va.va_type = IFTOVT(mode); va.va_mode = mode; switch (mode & S_IFMT) { @@ -308,14 +307,13 @@ linvfs_symlink( cvp = NULL; memset(&va, 0, sizeof(va)); - va.va_type = VLNK; - va.va_mode = irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO; + va.va_mode = S_IFLNK | + (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO); va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; error = 0; VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error); if (!error && cvp) { - ASSERT(cvp->v_type == VLNK); ip = LINVFS_GET_IP(cvp); d_instantiate(dentry, ip); validate_fields(dir); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index f6dd7de25927..d2c8a11e22b8 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -138,24 +138,25 @@ STATIC __inline__ void xfs_set_inodeops( struct inode *inode) { - vnode_t *vp = LINVFS_GET_VP(inode); - - if (vp->v_type == VNON) { - vn_mark_bad(vp); - } else if (S_ISREG(inode->i_mode)) { + switch (inode->i_mode & S_IFMT) { + case S_IFREG: inode->i_op = &linvfs_file_inode_operations; inode->i_fop = &linvfs_file_operations; inode->i_mapping->a_ops = &linvfs_aops; - } else if (S_ISDIR(inode->i_mode)) { + break; + case S_IFDIR: inode->i_op = &linvfs_dir_inode_operations; inode->i_fop = &linvfs_dir_operations; - } else if (S_ISLNK(inode->i_mode)) { + break; + case S_IFLNK: inode->i_op = &linvfs_symlink_inode_operations; if (inode->i_blocks) inode->i_mapping->a_ops = &linvfs_aops; - } else { + break; + default: inode->i_op = &linvfs_file_inode_operations; init_special_inode(inode, inode->i_mode, inode->i_rdev); + break; } } @@ -167,16 +168,23 @@ xfs_revalidate_inode( { struct inode *inode = LINVFS_GET_IP(vp); - inode->i_mode = (ip->i_d.di_mode & MODEMASK) | VTTOIF(vp->v_type); + inode->i_mode = ip->i_d.di_mode; inode->i_nlink = ip->i_d.di_nlink; inode->i_uid = ip->i_d.di_uid; inode->i_gid = ip->i_d.di_gid; - if (((1 << vp->v_type) & ((1<i_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + inode->i_rdev = + MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, + sysv_minor(ip->i_df.if_u2.if_rdev)); + break; + default: inode->i_rdev = 0; - } else { - xfs_dev_t dev = ip->i_df.if_u2.if_rdev; - inode->i_rdev = MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev)); + break; } + inode->i_blksize = PAGE_CACHE_SIZE; inode->i_generation = ip->i_d.di_gen; i_size_write(inode, ip->i_d.di_size); @@ -231,7 +239,6 @@ xfs_initialize_vnode( * finish our work. */ if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) { - vp->v_type = IFTOVT(ip->i_d.di_mode); xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip); xfs_set_inodeops(inode); diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index 353276bda344..ad16af38e965 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -44,19 +44,6 @@ DEFINE_SPINLOCK(vnumber_lock); #define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC]) sv_t vsync[NVSYNC]; -/* - * Translate stat(2) file types to vnode types and vice versa. - * Aware of numeric order of S_IFMT and vnode type values. - */ -enum vtype iftovt_tab[] = { - VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, - VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON -}; - -u_short vttoif_tab[] = { - 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, 0, S_IFSOCK -}; - void vn_init(void) @@ -95,7 +82,6 @@ vn_reclaim( vp->v_flag &= (VRECLM|VWAIT); VN_UNLOCK(vp, 0); - vp->v_type = VNON; vp->v_fbhv = NULL; #ifdef XFS_VNODE_TRACE @@ -174,7 +160,7 @@ vn_revalidate_core( { struct inode *inode = LINVFS_GET_IP(vp); - inode->i_mode = VTTOIF(vap->va_type) | vap->va_mode; + inode->i_mode = vap->va_mode; inode->i_nlink = vap->va_nlink; inode->i_uid = vap->va_uid; inode->i_gid = vap->va_gid; diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 6cb0a01df25d..bc9ed722ba1e 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -65,10 +65,6 @@ struct vattr; struct xfs_iomap; struct attrlist_cursor_kern; -/* - * Vnode types. VNON means no type. - */ -enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VFIFO, VBAD, VSOCK }; typedef xfs_ino_t vnumber_t; typedef struct dentry vname_t; @@ -77,11 +73,9 @@ typedef bhv_head_t vn_bhv_head_t; /* * MP locking protocols: * v_flag, v_vfsp VN_LOCK/VN_UNLOCK - * v_type read-only or fs-dependent */ typedef struct vnode { __u32 v_flag; /* vnode flags (see below) */ - enum vtype v_type; /* vnode type */ struct vfs *v_vfsp; /* ptr to containing VFS */ vnumber_t v_number; /* in-core vnode number */ vn_bhv_head_t v_bh; /* behavior head */ @@ -93,6 +87,12 @@ typedef struct vnode { /* inode MUST be last */ } vnode_t; +#define VN_ISLNK(vp) S_ISLNK((vp)->v_inode.i_mode) +#define VN_ISREG(vp) S_ISREG((vp)->v_inode.i_mode) +#define VN_ISDIR(vp) S_ISDIR((vp)->v_inode.i_mode) +#define VN_ISCHR(vp) S_ISCHR((vp)->v_inode.i_mode) +#define VN_ISBLK(vp) S_ISBLK((vp)->v_inode.i_mode) + #define v_fbhv v_bh.bh_first /* first behavior */ #define v_fops v_bh.bh_first->bd_ops /* first behavior ops */ @@ -132,17 +132,6 @@ typedef enum { #define LINVFS_GET_VP(inode) ((vnode_t *)list_entry(inode, vnode_t, v_inode)) #define LINVFS_GET_IP(vp) (&(vp)->v_inode) -/* - * Convert between vnode types and inode formats (since POSIX.1 - * defines mode word of stat structure in terms of inode formats). - */ -extern enum vtype iftovt_tab[]; -extern u_short vttoif_tab[]; -#define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12]) -#define VTTOIF(indx) (vttoif_tab[(int)(indx)]) -#define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode)) - - /* * Vnode flags. */ @@ -408,7 +397,6 @@ typedef struct vnodeops { */ typedef struct vattr { int va_mask; /* bit-mask of attributes present */ - enum vtype va_type; /* vnode type (for create) */ mode_t va_mode; /* file access mode and type */ xfs_nlink_t va_nlink; /* number of references to file */ uid_t va_uid; /* owner user id */ @@ -498,7 +486,7 @@ typedef struct vattr { * Check whether mandatory file locking is enabled. */ #define MANDLOCK(vp, mode) \ - ((vp)->v_type == VREG && ((mode) & (VSGID|(VEXEC>>3))) == VSGID) + (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID) extern void vn_init(void); extern int vn_wait(struct vnode *); diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 8d01dce8c532..92fd1d67f878 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -85,7 +85,7 @@ xfs_acl_vhasacl_default( { int error; - if (vp->v_type != VDIR) + if (!VN_ISDIR(vp)) return 0; xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error); return (error == 0); @@ -389,7 +389,7 @@ xfs_acl_allow_set( if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) return EPERM; - if (kind == _ACL_TYPE_DEFAULT && vp->v_type != VDIR) + if (kind == _ACL_TYPE_DEFAULT && !VN_ISDIR(vp)) return ENOTDIR; if (vp->v_vfsp->vfs_flag & VFS_RDONLY) return EROFS; @@ -750,7 +750,7 @@ xfs_acl_inherit( * If the new file is a directory, its default ACL is a copy of * the containing directory's default ACL. */ - if (vp->v_type == VDIR) + if (VN_ISDIR(vp)) xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error); if (!error && !basicperms) xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 34bdf5909687..db43308aae93 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1128,7 +1128,6 @@ xfs_ialloc( ASSERT(ip != NULL); vp = XFS_ITOV(ip); - vp->v_type = IFTOVT(mode); ip->i_d.di_mode = (__uint16_t)mode; ip->i_d.di_onlink = 0; ip->i_d.di_nlink = nlink; @@ -1250,7 +1249,7 @@ xfs_ialloc( */ xfs_trans_log_inode(tp, ip, flags); - /* now that we have a v_type we can set Linux inode ops (& unlock) */ + /* now that we have an i_mode we can set Linux inode ops (& unlock) */ VFS_INIT_VNODE(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1); *ipp = ip; diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 1377c868f3f4..c4aa24ff85a2 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -104,7 +104,7 @@ xfs_open( * If it's a directory with any blocks, read-ahead block 0 * as we're almost certain to have the next operation be a read there. */ - if (vp->v_type == VDIR && ip->i_d.di_nextents > 0) { + if (VN_ISDIR(vp) && ip->i_d.di_nextents > 0) { mode = xfs_ilock_map_shared(ip); if (ip->i_d.di_nextents > 0) (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); @@ -163,18 +163,21 @@ xfs_getattr( /* * Copy from in-core inode. */ - vap->va_type = vp->v_type; - vap->va_mode = ip->i_d.di_mode & MODEMASK; + vap->va_mode = ip->i_d.di_mode; vap->va_uid = ip->i_d.di_uid; vap->va_gid = ip->i_d.di_gid; vap->va_projid = ip->i_d.di_projid; /* * Check vnode type block/char vs. everything else. - * Do it with bitmask because that's faster than looking - * for multiple values individually. */ - if (((1 << vp->v_type) & ((1<i_d.di_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + vap->va_rdev = ip->i_df.if_u2.if_rdev; + vap->va_blocksize = BLKDEV_IOSIZE; + break; + default: vap->va_rdev = 0; if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { @@ -224,9 +227,7 @@ xfs_getattr( (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) : (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog); } - } else { - vap->va_rdev = ip->i_df.if_u2.if_rdev; - vap->va_blocksize = BLKDEV_IOSIZE; + break; } vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec; @@ -468,7 +469,7 @@ xfs_setattr( m |= S_ISGID; #if 0 /* Linux allows this, Irix doesn't. */ - if ((vap->va_mode & S_ISVTX) && vp->v_type != VDIR) + if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp)) m |= S_ISVTX; #endif if (m && !capable(CAP_FSETID)) @@ -546,10 +547,10 @@ xfs_setattr( goto error_return; } - if (vp->v_type == VDIR) { + if (VN_ISDIR(vp)) { code = XFS_ERROR(EISDIR); goto error_return; - } else if (vp->v_type != VREG) { + } else if (!VN_ISREG(vp)) { code = XFS_ERROR(EINVAL); goto error_return; } @@ -1567,7 +1568,7 @@ xfs_release( vp = BHV_TO_VNODE(bdp); ip = XFS_BHVTOI(bdp); - if ((vp->v_type != VREG) || (ip->i_d.di_mode == 0)) { + if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) { return 0; } @@ -1895,7 +1896,7 @@ xfs_create( dp = XFS_BHVTOI(dir_bdp); mp = dp->i_mount; - dm_di_mode = vap->va_mode|VTTOIF(vap->va_type); + dm_di_mode = vap->va_mode; namelen = VNAMELEN(dentry); if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { @@ -1973,8 +1974,7 @@ xfs_create( (error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen))) goto error_return; rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0; - error = xfs_dir_ialloc(&tp, dp, - MAKEIMODE(vap->va_type,vap->va_mode), 1, + error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 1, rdev, credp, prid, resblks > 0, &ip, &committed); if (error) { @@ -2620,7 +2620,7 @@ xfs_link( vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address); target_namelen = VNAMELEN(dentry); - if (src_vp->v_type == VDIR) + if (VN_ISDIR(src_vp)) return XFS_ERROR(EPERM); src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops); @@ -2805,7 +2805,7 @@ xfs_mkdir( tp = NULL; dp_joined_to_trans = B_FALSE; - dm_di_mode = vap->va_mode|VTTOIF(vap->va_type); + dm_di_mode = vap->va_mode; if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, @@ -2879,8 +2879,7 @@ xfs_mkdir( /* * create the directory inode. */ - error = xfs_dir_ialloc(&tp, dp, - MAKEIMODE(vap->va_type,vap->va_mode), 2, + error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 2, 0, credp, prid, resblks > 0, &cdp, NULL); if (error) { @@ -3650,7 +3649,7 @@ xfs_rwlock( vnode_t *vp; vp = BHV_TO_VNODE(bdp); - if (vp->v_type == VDIR) + if (VN_ISDIR(vp)) return 1; ip = XFS_BHVTOI(bdp); if (locktype == VRWLOCK_WRITE) { @@ -3681,7 +3680,7 @@ xfs_rwunlock( vnode_t *vp; vp = BHV_TO_VNODE(bdp); - if (vp->v_type == VDIR) + if (VN_ISDIR(vp)) return; ip = XFS_BHVTOI(bdp); if (locktype == VRWLOCK_WRITE) { @@ -4567,7 +4566,7 @@ xfs_change_file_space( /* * must be a regular file and have write permission */ - if (vp->v_type != VREG) + if (!VN_ISREG(vp)) return XFS_ERROR(EINVAL); xfs_ilock(ip, XFS_ILOCK_SHARED); From 6f948fbd443255e3a918438ce41cd7581cf8146d Mon Sep 17 00:00:00 2001 From: Tim Shimmin Date: Fri, 2 Sep 2005 16:52:55 +1000 Subject: [PATCH 15/36] [XFS] Need to unlock the AIL before calling xfs_force_shutdown() because when it goes to force out the log, and get the tail lsn, it will want to get the AIL lock. SGI-PV: 940076 SGI-Modid: xfs-linux:xfs-kern:23260a Signed-off-by: Tim Shimmin Signed-off-by: Nathan Scott --- fs/xfs/xfs_trans_ail.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 7bc5eab4c2c1..2a71b4f91bfa 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -379,8 +379,8 @@ xfs_trans_delete_ail( else { xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, "xfs_trans_delete_ail: attempting to delete a log item that is not in the AIL"); - xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); AIL_UNLOCK(mp, s); + xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); } } } From 760dea671ea9c5b8c732d76d09673d6d052a186f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Sep 2005 16:56:02 +1000 Subject: [PATCH 16/36] [XFS] Fix sparse warnings in kmem_* functions Patch from Victor Fusco SGI-PV: 940376 SGI-Modid: xfs-linux:xfs-kern:196705a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/kmem.c | 23 ++++++++++++----------- fs/xfs/linux-2.6/kmem.h | 23 ++++++++++++----------- fs/xfs/xfs_log_recover.c | 2 +- 3 files changed, 25 insertions(+), 23 deletions(-) diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index 364ea8c386b1..4b184559f231 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -45,11 +45,11 @@ void * -kmem_alloc(size_t size, int flags) +kmem_alloc(size_t size, unsigned int __nocast flags) { - int retries = 0; - int lflags = kmem_flags_convert(flags); - void *ptr; + int retries = 0; + unsigned int lflags = kmem_flags_convert(flags); + void *ptr; do { if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS) @@ -67,7 +67,7 @@ kmem_alloc(size_t size, int flags) } void * -kmem_zalloc(size_t size, int flags) +kmem_zalloc(size_t size, unsigned int __nocast flags) { void *ptr; @@ -89,7 +89,8 @@ kmem_free(void *ptr, size_t size) } void * -kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags) +kmem_realloc(void *ptr, size_t newsize, size_t oldsize, + unsigned int __nocast flags) { void *new; @@ -104,11 +105,11 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags) } void * -kmem_zone_alloc(kmem_zone_t *zone, int flags) +kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) { - int retries = 0; - int lflags = kmem_flags_convert(flags); - void *ptr; + int retries = 0; + unsigned int lflags = kmem_flags_convert(flags); + void *ptr; do { ptr = kmem_cache_alloc(zone, lflags); @@ -123,7 +124,7 @@ kmem_zone_alloc(kmem_zone_t *zone, int flags) } void * -kmem_zone_zalloc(kmem_zone_t *zone, int flags) +kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags) { void *ptr; diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index 1397b669b059..109fcf27e256 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -39,10 +39,10 @@ /* * memory management routines */ -#define KM_SLEEP 0x0001 -#define KM_NOSLEEP 0x0002 -#define KM_NOFS 0x0004 -#define KM_MAYFAIL 0x0008 +#define KM_SLEEP 0x0001u +#define KM_NOSLEEP 0x0002u +#define KM_NOFS 0x0004u +#define KM_MAYFAIL 0x0008u #define kmem_zone kmem_cache_s #define kmem_zone_t kmem_cache_t @@ -81,9 +81,9 @@ typedef unsigned long xfs_pflags_t; *(NSTATEP) = *(OSTATEP); \ } while (0) -static __inline unsigned int kmem_flags_convert(int flags) +static __inline unsigned int kmem_flags_convert(unsigned int __nocast flags) { - int lflags = __GFP_NOWARN; /* we'll report problems, if need be */ + unsigned int lflags = __GFP_NOWARN; /* we'll report problems, if need be */ #ifdef DEBUG if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) { @@ -125,12 +125,13 @@ kmem_zone_destroy(kmem_zone_t *zone) BUG(); } -extern void *kmem_zone_zalloc(kmem_zone_t *, int); -extern void *kmem_zone_alloc(kmem_zone_t *, int); +extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); +extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); -extern void *kmem_alloc(size_t, int); -extern void *kmem_realloc(void *, size_t, size_t, int); -extern void *kmem_zalloc(size_t, int); +extern void *kmem_alloc(size_t, unsigned int __nocast); +extern void *kmem_realloc(void *, size_t, size_t, + unsigned int __nocast); +extern void *kmem_zalloc(size_t, unsigned int __nocast); extern void kmem_free(void *, size_t); typedef struct shrinker *kmem_shaker_t; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 0aac28ddb81c..14faabaabf29 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1387,7 +1387,7 @@ xlog_recover_add_to_cont_trans( old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; old_len = item->ri_buf[item->ri_cnt-1].i_len; - ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0); + ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u); memcpy(&ptr[old_len], dp, len); /* d, s, l */ item->ri_buf[item->ri_cnt-1].i_len += len; item->ri_buf[item->ri_cnt-1].i_addr = ptr; From 592cb26bda6fe69838529acf71e50a6dee7acbb4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Sep 2005 16:56:14 +1000 Subject: [PATCH 17/36] [XFS] remove unessecary vnode flags SGI-PV: 934766 SGI-Modid: xfs-linux:xfs-kern:196852a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_vnode.c | 59 +----------------------------------- fs/xfs/linux-2.6/xfs_vnode.h | 4 --- fs/xfs/xfs_iget.c | 11 ------- 3 files changed, 1 insertion(+), 73 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index ad16af38e965..654da98de2a5 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -78,10 +78,6 @@ vn_reclaim( } ASSERT(vp->v_fbhv == NULL); - VN_LOCK(vp); - vp->v_flag &= (VRECLM|VWAIT); - VN_UNLOCK(vp, 0); - vp->v_fbhv = NULL; #ifdef XFS_VNODE_TRACE @@ -92,31 +88,6 @@ vn_reclaim( return 0; } -STATIC void -vn_wakeup( - struct vnode *vp) -{ - VN_LOCK(vp); - if (vp->v_flag & VWAIT) - sv_broadcast(vptosync(vp)); - vp->v_flag &= ~(VRECLM|VWAIT|VMODIFIED); - VN_UNLOCK(vp, 0); -} - -int -vn_wait( - struct vnode *vp) -{ - VN_LOCK(vp); - if (vp->v_flag & (VINACT | VRECLM)) { - vp->v_flag |= VWAIT; - sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0); - return 1; - } - VN_UNLOCK(vp, 0); - return 0; -} - struct vnode * vn_initialize( struct inode *inode) @@ -221,7 +192,6 @@ vn_purge( { vn_trace_entry(vp, "vn_purge", (inst_t *)__return_address); -again: /* * Check whether vp has already been reclaimed since our caller * sampled its version while holding a filesystem cache lock that @@ -233,19 +203,6 @@ again: return; } - /* - * If vp is being reclaimed or inactivated, wait until it is inert, - * then proceed. Can't assume that vnode is actually reclaimed - * just because the reclaimed flag is asserted -- a vn_alloc - * reclaim can fail. - */ - if (vp->v_flag & (VINACT | VRECLM)) { - ASSERT(vn_count(vp) == 0); - vp->v_flag |= VWAIT; - sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0); - goto again; - } - /* * Another process could have raced in and gotten this vnode... */ @@ -255,7 +212,6 @@ again: } XFS_STATS_DEC(vn_active); - vp->v_flag |= VRECLM; VN_UNLOCK(vp, 0); /* @@ -266,11 +222,6 @@ again: */ if (vn_reclaim(vp) != 0) panic("vn_purge: cannot reclaim"); - - /* - * Wakeup anyone waiting for vp to be reclaimed. - */ - vn_wakeup(vp); } /* @@ -315,11 +266,6 @@ vn_rele( * return. */ if (!vcnt) { - /* - * As soon as we turn this on, noone can find us in vn_get - * until we turn off VINACT or VRECLM - */ - vp->v_flag |= VINACT; VN_UNLOCK(vp, 0); /* @@ -330,10 +276,7 @@ vn_rele( VOP_INACTIVE(vp, NULL, cache); VN_LOCK(vp); - if (vp->v_flag & VWAIT) - sv_broadcast(vptosync(vp)); - - vp->v_flag &= ~(VINACT|VWAIT|VRECLM|VMODIFIED); + vp->v_flag &= ~VMODIFIED; } VN_UNLOCK(vp, 0); diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index bc9ed722ba1e..4a74569a5690 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -135,9 +135,6 @@ typedef enum { /* * Vnode flags. */ -#define VINACT 0x1 /* vnode is being inactivated */ -#define VRECLM 0x2 /* vnode is being reclaimed */ -#define VWAIT 0x4 /* waiting for VINACT/VRECLM to end */ #define VMODIFIED 0x8 /* XFS inode state possibly differs */ /* to the Linux inode state. */ @@ -489,7 +486,6 @@ typedef struct vattr { (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID) extern void vn_init(void); -extern int vn_wait(struct vnode *); extern vnode_t *vn_initialize(struct inode *); /* diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index d3da00045f26..fa796910f3aa 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -505,7 +505,6 @@ xfs_iget( vnode_t *vp = NULL; int error; -retry: XFS_STATS_INC(xs_ig_attempts); if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) { @@ -526,16 +525,6 @@ inode_allocate: iput(inode); } } else { - /* These are true if the inode is in inactive or - * reclaim. The linux inode is about to go away, - * wait for that path to finish, and try again. - */ - if (vp->v_flag & (VINACT | VRECLM)) { - vn_wait(vp); - iput(inode); - goto retry; - } - if (is_bad_inode(inode)) { iput(inode); return EIO; From 51c91ed52b8a9a30fcb2a465b40c20a1f11735ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Sep 2005 16:58:38 +1000 Subject: [PATCH 18/36] [XFS] add infrastructure for waiting on I/O completion at inode reclaim time SGI-PV: 934766 SGI-Modid: xfs-linux:xfs-kern:196854a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_aops.c | 11 ++------- fs/xfs/linux-2.6/xfs_vnode.c | 28 +++++++++++++++++---- fs/xfs/linux-2.6/xfs_vnode.h | 4 +++ fs/xfs/xfs_vnodeops.c | 47 +++--------------------------------- 4 files changed, 32 insertions(+), 58 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index bd9aba1f2353..b55cb7f02e88 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -139,7 +139,7 @@ linvfs_unwritten_convert( XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); XFS_BUF_UNDATAIO(bp); - iput(LINVFS_GET_IP(vp)); + vn_iowake(vp); pagebuf_iodone(bp, 0, 0); } @@ -448,14 +448,7 @@ xfs_map_unwritten( if (!pb) return -EAGAIN; - /* Take a reference to the inode to prevent it from - * being reclaimed while we have outstanding unwritten - * extent IO on it. - */ - if ((igrab(inode)) != inode) { - pagebuf_free(pb); - return -EAGAIN; - } + atomic_inc(&LINVFS_GET_VP(inode)->v_iocount); /* Set the count to 1 initially, this will stop an I/O * completion callout which happens before we have started diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index 654da98de2a5..46afc86a2862 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -42,17 +42,33 @@ DEFINE_SPINLOCK(vnumber_lock); */ #define NVSYNC 37 #define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC]) -sv_t vsync[NVSYNC]; +STATIC wait_queue_head_t vsync[NVSYNC]; void vn_init(void) { - register sv_t *svp; - register int i; + int i; - for (svp = vsync, i = 0; i < NVSYNC; i++, svp++) - init_sv(svp, SV_DEFAULT, "vsy", i); + for (i = 0; i < NVSYNC; i++) + init_waitqueue_head(&vsync[i]); +} + +void +vn_iowait( + struct vnode *vp) +{ + wait_queue_head_t *wq = vptosync(vp); + + wait_event(*wq, (atomic_read(&vp->v_iocount) == 0)); +} + +void +vn_iowake( + struct vnode *vp) +{ + if (atomic_dec_and_test(&vp->v_iocount)) + wake_up(vptosync(vp)); } /* @@ -111,6 +127,8 @@ vn_initialize( /* Initialize the first behavior and the behavior chain head. */ vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode"); + atomic_set(&vp->v_iocount, 0); + #ifdef XFS_VNODE_TRACE vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP); #endif /* XFS_VNODE_TRACE */ diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 4a74569a5690..9977afa38900 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -80,6 +80,7 @@ typedef struct vnode { vnumber_t v_number; /* in-core vnode number */ vn_bhv_head_t v_bh; /* behavior head */ spinlock_t v_lock; /* VN_LOCK/VN_UNLOCK */ + atomic_t v_iocount; /* outstanding I/O count */ #ifdef XFS_VNODE_TRACE struct ktrace *v_trace; /* trace header structure */ #endif @@ -506,6 +507,9 @@ extern int vn_revalidate(struct vnode *); extern void vn_revalidate_core(struct vnode *, vattr_t *); extern void vn_remove(struct vnode *); +extern void vn_iowait(struct vnode *vp); +extern void vn_iowake(struct vnode *vp); + static inline int vn_count(struct vnode *vp) { return atomic_read(&LINVFS_GET_IP(vp)->i_count); diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index c4aa24ff85a2..58bfe629b933 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -3846,51 +3846,10 @@ xfs_reclaim( return 0; } - if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { - if (ip->i_d.di_size > 0) { - /* - * Flush and invalidate any data left around that is - * a part of this file. - * - * Get the inode's i/o lock so that buffers are pushed - * out while holding the proper lock. We can't hold - * the inode lock here since flushing out buffers may - * cause us to try to get the lock in xfs_strategy(). - * - * We don't have to call remapf() here, because there - * cannot be any mapped file references to this vnode - * since it is being reclaimed. - */ - xfs_ilock(ip, XFS_IOLOCK_EXCL); + vn_iowait(vp); - /* - * If we hit an IO error, we need to make sure that the - * buffer and page caches of file data for - * the file are tossed away. We don't want to use - * VOP_FLUSHINVAL_PAGES here because we don't want dirty - * pages to stay attached to the vnode, but be - * marked P_BAD. pdflush/vnode_pagebad - * hates that. - */ - if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { - VOP_FLUSHINVAL_PAGES(vp, 0, -1, FI_NONE); - } else { - VOP_TOSS_PAGES(vp, 0, -1, FI_NONE); - } - - ASSERT(VN_CACHED(vp) == 0); - ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || - ip->i_delayed_blks == 0); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - } else if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { - /* - * di_size field may not be quite accurate if we're - * shutting down. - */ - VOP_TOSS_PAGES(vp, 0, -1, FI_NONE); - ASSERT(VN_CACHED(vp) == 0); - } - } + ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); + ASSERT(VN_CACHED(vp) == 0); /* If we have nothing to flush with this inode then complete the * teardown now, otherwise break the link between the xfs inode From 0829c3602f4df95898752c402ea90b92a3e33154 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Sep 2005 16:58:49 +1000 Subject: [PATCH 19/36] [XFS] Add infrastructure for tracking I/O completions SGI-PV: 934766 SGI-Modid: xfs-linux:xfs-kern:196856a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_aops.c | 156 ++++++++++++++++++++--------------- fs/xfs/linux-2.6/xfs_buf.c | 2 +- fs/xfs/linux-2.6/xfs_linux.h | 1 + fs/xfs/linux-2.6/xfs_super.c | 58 +++++++++---- 4 files changed, 132 insertions(+), 85 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index b55cb7f02e88..ed98c7ac7cfd 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -104,22 +104,24 @@ xfs_page_trace( #define xfs_page_trace(tag, inode, page, mask) #endif -void -linvfs_unwritten_done( - struct buffer_head *bh, - int uptodate) +/* + * Schedule IO completion handling on a xfsdatad if this was + * the final hold on this ioend. + */ +STATIC void +xfs_finish_ioend( + xfs_ioend_t *ioend) { - xfs_buf_t *pb = (xfs_buf_t *)bh->b_private; + if (atomic_dec_and_test(&ioend->io_remaining)) + queue_work(xfsdatad_workqueue, &ioend->io_work); +} - ASSERT(buffer_unwritten(bh)); - bh->b_end_io = NULL; - clear_buffer_unwritten(bh); - if (!uptodate) - pagebuf_ioerror(pb, EIO); - if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { - pagebuf_iodone(pb, 1, 1); - } - end_buffer_async_write(bh, uptodate); +STATIC void +xfs_destroy_ioend( + xfs_ioend_t *ioend) +{ + vn_iowake(ioend->io_vnode); + mempool_free(ioend, xfs_ioend_pool); } /* @@ -127,20 +129,66 @@ linvfs_unwritten_done( * to written extents (buffered IO). */ STATIC void -linvfs_unwritten_convert( - xfs_buf_t *bp) +xfs_end_bio_unwritten( + void *data) { - vnode_t *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *); - int error; + xfs_ioend_t *ioend = data; + vnode_t *vp = ioend->io_vnode; + xfs_off_t offset = ioend->io_offset; + size_t size = ioend->io_size; + int error; - BUG_ON(atomic_read(&bp->pb_hold) < 1); - VOP_BMAP(vp, XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp), - BMAPI_UNWRITTEN, NULL, NULL, error); - XFS_BUF_SET_FSPRIVATE(bp, NULL); - XFS_BUF_CLR_IODONE_FUNC(bp); - XFS_BUF_UNDATAIO(bp); - vn_iowake(vp); - pagebuf_iodone(bp, 0, 0); + if (ioend->io_uptodate) + VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); + xfs_destroy_ioend(ioend); +} + +/* + * Allocate and initialise an IO completion structure. + * We need to track unwritten extent write completion here initially. + * We'll need to extend this for updating the ondisk inode size later + * (vs. incore size). + */ +STATIC xfs_ioend_t * +xfs_alloc_ioend( + struct inode *inode) +{ + xfs_ioend_t *ioend; + + ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS); + + /* + * Set the count to 1 initially, which will prevent an I/O + * completion callback from happening before we have started + * all the I/O from calling the completion routine too early. + */ + atomic_set(&ioend->io_remaining, 1); + ioend->io_uptodate = 1; /* cleared if any I/O fails */ + ioend->io_vnode = LINVFS_GET_VP(inode); + atomic_inc(&ioend->io_vnode->v_iocount); + ioend->io_offset = 0; + ioend->io_size = 0; + + INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend); + + return ioend; +} + +void +linvfs_unwritten_done( + struct buffer_head *bh, + int uptodate) +{ + xfs_ioend_t *ioend = bh->b_private; + + ASSERT(buffer_unwritten(bh)); + bh->b_end_io = NULL; + clear_buffer_unwritten(bh); + if (!uptodate) + ioend->io_uptodate = 0; + + xfs_finish_ioend(ioend); + end_buffer_async_write(bh, uptodate); } /* @@ -255,7 +303,7 @@ xfs_probe_unwritten_page( struct address_space *mapping, pgoff_t index, xfs_iomap_t *iomapp, - xfs_buf_t *pb, + xfs_ioend_t *ioend, unsigned long max_offset, unsigned long *fsbs, unsigned int bbits) @@ -283,7 +331,7 @@ xfs_probe_unwritten_page( break; xfs_map_at_offset(page, bh, p_offset, bbits, iomapp); set_buffer_unwritten_io(bh); - bh->b_private = pb; + bh->b_private = ioend; p_offset += bh->b_size; (*fsbs)++; } while ((bh = bh->b_this_page) != head); @@ -434,27 +482,15 @@ xfs_map_unwritten( { struct buffer_head *bh = curr; xfs_iomap_t *tmp; - xfs_buf_t *pb; - loff_t offset, size; + xfs_ioend_t *ioend; + loff_t offset; unsigned long nblocks = 0; offset = start_page->index; offset <<= PAGE_CACHE_SHIFT; offset += p_offset; - /* get an "empty" pagebuf to manage IO completion - * Proper values will be set before returning */ - pb = pagebuf_lookup(iomapp->iomap_target, 0, 0, 0); - if (!pb) - return -EAGAIN; - - atomic_inc(&LINVFS_GET_VP(inode)->v_iocount); - - /* Set the count to 1 initially, this will stop an I/O - * completion callout which happens before we have started - * all the I/O from calling pagebuf_iodone too early. - */ - atomic_set(&pb->pb_io_remaining, 1); + ioend = xfs_alloc_ioend(inode); /* First map forwards in the page consecutive buffers * covering this unwritten extent @@ -467,12 +503,12 @@ xfs_map_unwritten( break; xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp); set_buffer_unwritten_io(bh); - bh->b_private = pb; + bh->b_private = ioend; p_offset += bh->b_size; nblocks++; } while ((bh = bh->b_this_page) != head); - atomic_add(nblocks, &pb->pb_io_remaining); + atomic_add(nblocks, &ioend->io_remaining); /* If we reached the end of the page, map forwards in any * following pages which are also covered by this extent. @@ -489,13 +525,13 @@ xfs_map_unwritten( tloff = min(tlast, tloff); for (tindex = start_page->index + 1; tindex < tloff; tindex++) { page = xfs_probe_unwritten_page(mapping, - tindex, iomapp, pb, + tindex, iomapp, ioend, PAGE_CACHE_SIZE, &bs, bbits); if (!page) break; nblocks += bs; - atomic_add(bs, &pb->pb_io_remaining); - xfs_convert_page(inode, page, iomapp, wbc, pb, + atomic_add(bs, &ioend->io_remaining); + xfs_convert_page(inode, page, iomapp, wbc, ioend, startio, all_bh); /* stop if converting the next page might add * enough blocks that the corresponding byte @@ -507,12 +543,12 @@ xfs_map_unwritten( if (tindex == tlast && (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) { page = xfs_probe_unwritten_page(mapping, - tindex, iomapp, pb, + tindex, iomapp, ioend, pg_offset, &bs, bbits); if (page) { nblocks += bs; - atomic_add(bs, &pb->pb_io_remaining); - xfs_convert_page(inode, page, iomapp, wbc, pb, + atomic_add(bs, &ioend->io_remaining); + xfs_convert_page(inode, page, iomapp, wbc, ioend, startio, all_bh); if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) goto enough; @@ -521,21 +557,9 @@ xfs_map_unwritten( } enough: - size = nblocks; /* NB: using 64bit number here */ - size <<= block_bits; /* convert fsb's to byte range */ - - XFS_BUF_DATAIO(pb); - XFS_BUF_ASYNC(pb); - XFS_BUF_SET_SIZE(pb, size); - XFS_BUF_SET_COUNT(pb, size); - XFS_BUF_SET_OFFSET(pb, offset); - XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode)); - XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_convert); - - if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { - pagebuf_iodone(pb, 1, 1); - } - + ioend->io_size = (xfs_off_t)nblocks << block_bits; + ioend->io_offset = offset; + xfs_finish_ioend(ioend); return 0; } diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 58286b1d733b..fba40cbdbcf1 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -67,7 +67,7 @@ STATIC int xfsbufd_wakeup(int, unsigned int); STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); STATIC struct workqueue_struct *xfslogd_workqueue; -STATIC struct workqueue_struct *xfsdatad_workqueue; +struct workqueue_struct *xfsdatad_workqueue; /* * Pagebuf debugging diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 42dc5e4662ed..1c63fd3118d7 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -104,6 +104,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index d2c8a11e22b8..1a0bcbbc0a86 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -70,11 +70,14 @@ #include #include #include +#include #include STATIC struct quotactl_ops linvfs_qops; STATIC struct super_operations linvfs_sops; -STATIC kmem_zone_t *linvfs_inode_zone; +STATIC kmem_zone_t *xfs_vnode_zone; +STATIC kmem_zone_t *xfs_ioend_zone; +mempool_t *xfs_ioend_pool; STATIC struct xfs_mount_args * xfs_args_allocate( @@ -281,8 +284,7 @@ linvfs_alloc_inode( { vnode_t *vp; - vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_zone, - kmem_flags_convert(KM_SLEEP)); + vp = kmem_cache_alloc(xfs_vnode_zone, kmem_flags_convert(KM_SLEEP)); if (!vp) return NULL; return LINVFS_GET_IP(vp); @@ -292,11 +294,11 @@ STATIC void linvfs_destroy_inode( struct inode *inode) { - kmem_cache_free(linvfs_inode_zone, LINVFS_GET_VP(inode)); + kmem_zone_free(xfs_vnode_zone, LINVFS_GET_VP(inode)); } STATIC void -init_once( +linvfs_inode_init_once( void *data, kmem_cache_t *cachep, unsigned long flags) @@ -309,21 +311,41 @@ init_once( } STATIC int -init_inodecache( void ) +linvfs_init_zones(void) { - linvfs_inode_zone = kmem_cache_create("linvfs_icache", + xfs_vnode_zone = kmem_cache_create("xfs_vnode", sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT, - init_once, NULL); - if (linvfs_inode_zone == NULL) - return -ENOMEM; + linvfs_inode_init_once, NULL); + if (!xfs_vnode_zone) + goto out; + + xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend"); + if (!xfs_ioend_zone) + goto out_destroy_vnode_zone; + + xfs_ioend_pool = mempool_create(4 * MAX_BUF_PER_PAGE, + mempool_alloc_slab, mempool_free_slab, + xfs_ioend_zone); + if (!xfs_ioend_pool) + goto out_free_ioend_zone; + return 0; + + + out_free_ioend_zone: + kmem_zone_destroy(xfs_ioend_zone); + out_destroy_vnode_zone: + kmem_zone_destroy(xfs_vnode_zone); + out: + return -ENOMEM; } STATIC void -destroy_inodecache( void ) +linvfs_destroy_zones(void) { - if (kmem_cache_destroy(linvfs_inode_zone)) - printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__); + mempool_destroy(xfs_ioend_pool); + kmem_zone_destroy(xfs_vnode_zone); + kmem_zone_destroy(xfs_ioend_zone); } /* @@ -873,9 +895,9 @@ init_xfs_fs( void ) ktrace_init(64); - error = init_inodecache(); + error = linvfs_init_zones(); if (error < 0) - goto undo_inodecache; + goto undo_zones; error = pagebuf_init(); if (error < 0) @@ -896,9 +918,9 @@ undo_register: pagebuf_terminate(); undo_pagebuf: - destroy_inodecache(); + linvfs_destroy_zones(); -undo_inodecache: +undo_zones: return error; } @@ -910,7 +932,7 @@ exit_xfs_fs( void ) unregister_filesystem(&xfs_fs_type); xfs_cleanup(); pagebuf_terminate(); - destroy_inodecache(); + linvfs_destroy_zones(); ktrace_uninit(); } From 65b3da3705ff873d8704074a75ac983495863380 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2005 08:18:12 +1000 Subject: [PATCH 20/36] [XFS] Add in the new xfs_aops.h header file for I/O completion struct. SGI-PV: 934766 SGI-Modid: xfs-linux:xfs-kern:196857a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_aops.h | 49 +++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 fs/xfs/linux-2.6/xfs_aops.h diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h new file mode 100644 index 000000000000..ee46307a7321 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_aops.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_AOPS_H__ +#define __XFS_AOPS_H__ + +extern struct workqueue_struct *xfsdatad_workqueue; +extern mempool_t *xfs_ioend_pool; + +typedef void (*xfs_ioend_func_t)(void *); + +typedef struct xfs_ioend { + unsigned int io_uptodate; /* I/O status register */ + atomic_t io_remaining; /* hold count */ + struct vnode *io_vnode; /* file being written to */ + size_t io_size; /* size of the extent */ + xfs_off_t io_offset; /* offset in the file */ + struct work_struct io_work; /* xfsdatad work queue */ +} xfs_ioend_t; + +#endif /* __XFS_IOPS_H__ */ From f09738638d3bae6501e8e160c66233832d8c280f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2005 08:22:52 +1000 Subject: [PATCH 21/36] [XFS] Delay direct I/O completion to a workqueue This is nessecary because aio+dio completions may happen from irq context but we need process context for converting unwritten extents. We also queue regular direct I/O completions to workqueue for regularity, there's only one queue_work call per syscall. SGI-PV: 934766 SGI-Modid: xfs-linux:xfs-kern:196857a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_aops.c | 74 ++++++++++++++++++++++++------------- fs/xfs/linux-2.6/xfs_lrw.c | 3 -- 2 files changed, 48 insertions(+), 29 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index ed98c7ac7cfd..2add9a8a8df7 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -126,7 +126,7 @@ xfs_destroy_ioend( /* * Issue transactions to convert a buffer range from unwritten - * to written extents (buffered IO). + * to written extents. */ STATIC void xfs_end_bio_unwritten( @@ -191,29 +191,6 @@ linvfs_unwritten_done( end_buffer_async_write(bh, uptodate); } -/* - * Issue transactions to convert a buffer range from unwritten - * to written extents (direct IO). - */ -STATIC void -linvfs_unwritten_convert_direct( - struct kiocb *iocb, - loff_t offset, - ssize_t size, - void *private) -{ - struct inode *inode = iocb->ki_filp->f_dentry->d_inode; - ASSERT(!private || inode == (struct inode *)private); - - /* private indicates an unwritten extent lay beneath this IO */ - if (private && size > 0) { - vnode_t *vp = LINVFS_GET_VP(inode); - int error; - - VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); - } -} - STATIC int xfs_map_blocks( struct inode *inode, @@ -1045,6 +1022,44 @@ linvfs_get_blocks_direct( create, 1, BMAPI_WRITE|BMAPI_DIRECT); } +STATIC void +linvfs_end_io_direct( + struct kiocb *iocb, + loff_t offset, + ssize_t size, + void *private) +{ + xfs_ioend_t *ioend = iocb->private; + + /* + * Non-NULL private data means we need to issue a transaction to + * convert a range from unwritten to written extents. This needs + * to happen from process contect but aio+dio I/O completion + * happens from irq context so we need to defer it to a workqueue. + * This is not nessecary for synchronous direct I/O, but we do + * it anyway to keep the code uniform and simpler. + * + * The core direct I/O code might be changed to always call the + * completion handler in the future, in which case all this can + * go away. + */ + if (private && size > 0) { + ioend->io_offset = offset; + ioend->io_size = size; + xfs_finish_ioend(ioend); + } else { + ASSERT(size >= 0); + xfs_destroy_ioend(ioend); + } + + /* + * blockdev_direct_IO can return an error even afer the I/O + * completion handler was called. Thus we need to protect + * against double-freeing. + */ + iocb->private = NULL; +} + STATIC ssize_t linvfs_direct_IO( int rw, @@ -1059,16 +1074,23 @@ linvfs_direct_IO( xfs_iomap_t iomap; int maps = 1; int error; + ssize_t ret; VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error); if (error) return -error; - return blockdev_direct_IO_own_locking(rw, iocb, inode, + iocb->private = xfs_alloc_ioend(inode); + + ret = blockdev_direct_IO_own_locking(rw, iocb, inode, iomap.iomap_target->pbr_bdev, iov, offset, nr_segs, linvfs_get_blocks_direct, - linvfs_unwritten_convert_direct); + linvfs_end_io_direct); + + if (unlikely(ret <= 0 && iocb->private)) + xfs_destroy_ioend(iocb->private); + return ret; } diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index acab58c48043..3b5fabe8dae9 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -660,9 +660,6 @@ xfs_write( (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? mp->m_rtdev_targp : mp->m_ddev_targp; - if (ioflags & IO_ISAIO) - return XFS_ERROR(-ENOSYS); - if ((pos & target->pbr_smask) || (count & target->pbr_smask)) return XFS_ERROR(-EINVAL); From c1a073bdff997216eac25254a2716faf640e4e8d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2005 08:23:35 +1000 Subject: [PATCH 22/36] [XFS] Delay I/O completion for unwritten extents after conversion SGI-PV: 936584 SGI-Modid: xfs-linux:xfs-kern:196886a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_aops.c | 27 +++++++++++++++++++++++++-- fs/xfs/linux-2.6/xfs_aops.h | 1 + 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 2add9a8a8df7..ea615e2f476d 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -136,10 +136,21 @@ xfs_end_bio_unwritten( vnode_t *vp = ioend->io_vnode; xfs_off_t offset = ioend->io_offset; size_t size = ioend->io_size; + struct buffer_head *bh, *next; int error; if (ioend->io_uptodate) VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); + + /* ioend->io_buffer_head is only non-NULL for buffered I/O */ + for (bh = ioend->io_buffer_head; bh; bh = next) { + next = bh->b_private; + + bh->b_end_io = NULL; + clear_buffer_unwritten(bh); + end_buffer_async_write(bh, ioend->io_uptodate); + } + xfs_destroy_ioend(ioend); } @@ -165,6 +176,7 @@ xfs_alloc_ioend( atomic_set(&ioend->io_remaining, 1); ioend->io_uptodate = 1; /* cleared if any I/O fails */ ioend->io_vnode = LINVFS_GET_VP(inode); + ioend->io_buffer_head = NULL; atomic_inc(&ioend->io_vnode->v_iocount); ioend->io_offset = 0; ioend->io_size = 0; @@ -180,15 +192,26 @@ linvfs_unwritten_done( int uptodate) { xfs_ioend_t *ioend = bh->b_private; + static spinlock_t unwritten_done_lock = SPIN_LOCK_UNLOCKED; + unsigned long flags; ASSERT(buffer_unwritten(bh)); bh->b_end_io = NULL; - clear_buffer_unwritten(bh); + if (!uptodate) ioend->io_uptodate = 0; + /* + * Deep magic here. We reuse b_private in the buffer_heads to build + * a chain for completing the I/O from user context after we've issued + * a transaction to convert the unwritten extent. + */ + spin_lock_irqsave(&unwritten_done_lock, flags); + bh->b_private = ioend->io_buffer_head; + ioend->io_buffer_head = bh; + spin_unlock_irqrestore(&unwritten_done_lock, flags); + xfs_finish_ioend(ioend); - end_buffer_async_write(bh, uptodate); } STATIC int diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index ee46307a7321..2fa62974a04d 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h @@ -41,6 +41,7 @@ typedef struct xfs_ioend { unsigned int io_uptodate; /* I/O status register */ atomic_t io_remaining; /* hold count */ struct vnode *io_vnode; /* file being written to */ + struct buffer_head *io_buffer_head;/* buffer linked list head */ size_t io_size; /* size of the extent */ xfs_off_t io_offset; /* offset in the file */ struct work_struct io_work; /* xfsdatad work queue */ From 56d433e430eb399a4b6d0e73d28af6e1d4713547 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2005 08:23:54 +1000 Subject: [PATCH 23/36] [XFS] streamline the clear_inode path SGI-PV: 940531 SGI-Modid: xfs-linux:xfs-kern:196888a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_super.c | 34 ++++++--- fs/xfs/linux-2.6/xfs_vnode.c | 144 ----------------------------------- fs/xfs/linux-2.6/xfs_vnode.h | 3 - 3 files changed, 25 insertions(+), 156 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 1a0bcbbc0a86..9b40a2799f7e 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -383,18 +383,34 @@ linvfs_clear_inode( struct inode *inode) { vnode_t *vp = LINVFS_GET_VP(inode); + int error, cache; - if (vp) { - vn_rele(vp); - vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); - /* - * Do all our cleanup, and remove this vnode. - */ - vn_remove(vp); - } + vn_trace_entry(vp, "clear_inode", (inst_t *)__return_address); + + ASSERT(vp->v_fbhv != NULL); + + XFS_STATS_INC(vn_rele); + XFS_STATS_INC(vn_remove); + XFS_STATS_INC(vn_reclaim); + XFS_STATS_DEC(vn_active); + + VOP_INACTIVE(vp, NULL, cache); + + VN_LOCK(vp); + vp->v_flag &= ~VMODIFIED; + VN_UNLOCK(vp, 0); + + VOP_RECLAIM(vp, error); + if (error) + panic("vn_purge: cannot reclaim"); + + ASSERT(vp->v_fbhv == NULL); + +#ifdef XFS_VNODE_TRACE + ktrace_free(vp->v_trace); +#endif } - /* * Enqueue a work item to be picked up by the vfs xfssyncd thread. * Doing this has two advantages: diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index 46afc86a2862..268f45bf6a9a 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -71,39 +71,6 @@ vn_iowake( wake_up(vptosync(vp)); } -/* - * Clean a vnode of filesystem-specific data and prepare it for reuse. - */ -STATIC int -vn_reclaim( - struct vnode *vp) -{ - int error; - - XFS_STATS_INC(vn_reclaim); - vn_trace_entry(vp, "vn_reclaim", (inst_t *)__return_address); - - /* - * Only make the VOP_RECLAIM call if there are behaviors - * to call. - */ - if (vp->v_fbhv) { - VOP_RECLAIM(vp, error); - if (error) - return -error; - } - ASSERT(vp->v_fbhv == NULL); - - vp->v_fbhv = NULL; - -#ifdef XFS_VNODE_TRACE - ktrace_free(vp->v_trace); - vp->v_trace = NULL; -#endif - - return 0; -} - struct vnode * vn_initialize( struct inode *inode) @@ -197,51 +164,6 @@ vn_revalidate( return -error; } -/* - * purge a vnode from the cache - * At this point the vnode is guaranteed to have no references (vn_count == 0) - * The caller has to make sure that there are no ways someone could - * get a handle (via vn_get) on the vnode (usually done via a mount/vfs lock). - */ -void -vn_purge( - struct vnode *vp, - vmap_t *vmap) -{ - vn_trace_entry(vp, "vn_purge", (inst_t *)__return_address); - - /* - * Check whether vp has already been reclaimed since our caller - * sampled its version while holding a filesystem cache lock that - * its VOP_RECLAIM function acquires. - */ - VN_LOCK(vp); - if (vp->v_number != vmap->v_number) { - VN_UNLOCK(vp, 0); - return; - } - - /* - * Another process could have raced in and gotten this vnode... - */ - if (vn_count(vp) > 0) { - VN_UNLOCK(vp, 0); - return; - } - - XFS_STATS_DEC(vn_active); - VN_UNLOCK(vp, 0); - - /* - * Call VOP_RECLAIM and clean vp. The FSYNC_INVAL flag tells - * vp's filesystem to flush and invalidate all cached resources. - * When vn_reclaim returns, vp should have no private data, - * either in a system cache or attached to v_data. - */ - if (vn_reclaim(vp) != 0) - panic("vn_purge: cannot reclaim"); -} - /* * Add a reference to a referenced vnode. */ @@ -261,72 +183,6 @@ vn_hold( return vp; } -/* - * Call VOP_INACTIVE on last reference. - */ -void -vn_rele( - struct vnode *vp) -{ - int vcnt; - int cache; - - XFS_STATS_INC(vn_rele); - - VN_LOCK(vp); - - vn_trace_entry(vp, "vn_rele", (inst_t *)__return_address); - vcnt = vn_count(vp); - - /* - * Since we always get called from put_inode we know - * that i_count won't be decremented after we - * return. - */ - if (!vcnt) { - VN_UNLOCK(vp, 0); - - /* - * Do not make the VOP_INACTIVE call if there - * are no behaviors attached to the vnode to call. - */ - if (vp->v_fbhv) - VOP_INACTIVE(vp, NULL, cache); - - VN_LOCK(vp); - vp->v_flag &= ~VMODIFIED; - } - - VN_UNLOCK(vp, 0); - - vn_trace_exit(vp, "vn_rele", (inst_t *)__return_address); -} - -/* - * Finish the removal of a vnode. - */ -void -vn_remove( - struct vnode *vp) -{ - vmap_t vmap; - - /* Make sure we don't do this to the same vnode twice */ - if (!(vp->v_fbhv)) - return; - - XFS_STATS_INC(vn_remove); - vn_trace_exit(vp, "vn_remove", (inst_t *)__return_address); - - /* - * After the following purge the vnode - * will no longer exist. - */ - VMAP(vp, vmap); - vn_purge(vp, &vmap); -} - - #ifdef XFS_VNODE_TRACE #define KTRACE_ENTER(vp, vk, s, line, ra) \ diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 9977afa38900..35f306cebb87 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -502,10 +502,8 @@ typedef struct vnode_map { (vmap).v_number = (vp)->v_number, \ (vmap).v_ino = (vp)->v_inode.i_ino; } -extern void vn_purge(struct vnode *, vmap_t *); extern int vn_revalidate(struct vnode *); extern void vn_revalidate_core(struct vnode *, vattr_t *); -extern void vn_remove(struct vnode *); extern void vn_iowait(struct vnode *vp); extern void vn_iowake(struct vnode *vp); @@ -519,7 +517,6 @@ static inline int vn_count(struct vnode *vp) * Vnode reference counting functions (and macros for compatibility). */ extern vnode_t *vn_hold(struct vnode *); -extern void vn_rele(struct vnode *); #if defined(XFS_VNODE_TRACE) #define VN_HOLD(vp) \ From 4cd4a034a3ef020d9de48fe0a3f5f976e5134669 Mon Sep 17 00:00:00 2001 From: Tim Shimmin Date: Mon, 5 Sep 2005 08:24:10 +1000 Subject: [PATCH 24/36] [XFS] Need to be able to reset sb_qflags if not mounting with quotas having previously mounted with quotas. SGI-PV: 940491 SGI-Modid: xfs-linux:xfs-kern:23388a Signed-off-by: Tim Shimmin Signed-off-by: Nathan Scott --- fs/xfs/quota/xfs_dquot.h | 16 +------- fs/xfs/quota/xfs_qm.c | 14 +------ fs/xfs/quota/xfs_qm.h | 2 - fs/xfs/quota/xfs_qm_bhv.c | 44 +--------------------- fs/xfs/xfs_qmops.c | 78 +++++++++++++++++++++++++++++++++++++-- fs/xfs/xfs_quota.h | 17 ++++++++- 6 files changed, 95 insertions(+), 76 deletions(-) diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index 39175103c8e0..8ebc87176c78 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -113,20 +113,6 @@ typedef struct xfs_dquot { #define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++) -/* - * Quota Accounting/Enforcement flags - */ -#define XFS_ALL_QUOTA_ACCT \ - (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT) -#define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD) -#define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD) - -#define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) -#define XFS_IS_QUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ENFD) -#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) -#define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) -#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) - #ifdef DEBUG static inline int XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp) diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 4badf38df5e9..efde16e0a913 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -365,16 +365,6 @@ xfs_qm_mount_quotas( int error = 0; uint sbf; - /* - * If a file system had quotas running earlier, but decided to - * mount without -o uquota/pquota/gquota options, revoke the - * quotachecked license, and bail out. - */ - if (! XFS_IS_QUOTA_ON(mp) && - (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT)) { - mp->m_qflags = 0; - goto write_changes; - } /* * If quotas on realtime volumes is not supported, we disable @@ -2002,7 +1992,7 @@ xfs_qm_quotacheck( ASSERT(mp->m_quotainfo != NULL); ASSERT(xfs_Gqm != NULL); xfs_qm_destroy_quotainfo(mp); - xfs_mount_reset_sbqflags(mp); + (void)xfs_mount_reset_sbqflags(mp); } else { cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname); } diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index b03eecf3b6cb..0b00b3c67015 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h @@ -184,8 +184,6 @@ typedef struct xfs_dquot_acct { #define XFS_QM_HOLD(xqm) ((xqm)->qm_nrefs++) #define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) -extern void xfs_mount_reset_sbqflags(xfs_mount_t *); - extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); extern int xfs_qm_mount_quotas(xfs_mount_t *, int); extern void xfs_qm_mount_quotainit(xfs_mount_t *, uint); diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index dc3c37a1e158..8890a18a99d8 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -229,48 +229,6 @@ xfs_qm_syncall( return error; } -/* - * Clear the quotaflags in memory and in the superblock. - */ -void -xfs_mount_reset_sbqflags( - xfs_mount_t *mp) -{ - xfs_trans_t *tp; - unsigned long s; - - mp->m_qflags = 0; - /* - * It is OK to look at sb_qflags here in mount path, - * without SB_LOCK. - */ - if (mp->m_sb.sb_qflags == 0) - return; - s = XFS_SB_LOCK(mp); - mp->m_sb.sb_qflags = 0; - XFS_SB_UNLOCK(mp, s); - - /* - * if the fs is readonly, let the incore superblock run - * with quotas off but don't flush the update out to disk - */ - if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY) - return; -#ifdef QUOTADEBUG - xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); -#endif - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); - if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, - XFS_DEFAULT_LOG_COUNT)) { - xfs_trans_cancel(tp, 0); - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_mount_reset_sbqflags: Superblock update failed!"); - return; - } - xfs_mod_sb(tp, XFS_SB_QFLAGS); - xfs_trans_commit(tp, 0, NULL); -} - STATIC int xfs_qm_newmount( xfs_mount_t *mp, diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c index 4f40c92863d5..a6cd6324e946 100644 --- a/fs/xfs/xfs_qmops.c +++ b/fs/xfs/xfs_qmops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -42,7 +42,8 @@ #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" - +#include "xfs_quota.h" +#include "xfs_error.h" STATIC struct xfs_dquot * xfs_dqvopchown_default( @@ -54,8 +55,79 @@ xfs_dqvopchown_default( return NULL; } +/* + * Clear the quotaflags in memory and in the superblock. + */ +int +xfs_mount_reset_sbqflags(xfs_mount_t *mp) +{ + int error; + xfs_trans_t *tp; + unsigned long s; + + mp->m_qflags = 0; + /* + * It is OK to look at sb_qflags here in mount path, + * without SB_LOCK. + */ + if (mp->m_sb.sb_qflags == 0) + return 0; + s = XFS_SB_LOCK(mp); + mp->m_sb.sb_qflags = 0; + XFS_SB_UNLOCK(mp, s); + + /* + * if the fs is readonly, let the incore superblock run + * with quotas off but don't flush the update out to disk + */ + if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY) + return 0; +#ifdef QUOTADEBUG + xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); +#endif + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); + if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, + XFS_DEFAULT_LOG_COUNT))) { + xfs_trans_cancel(tp, 0); + xfs_fs_cmn_err(CE_ALERT, mp, + "xfs_mount_reset_sbqflags: Superblock update failed!"); + return error; + } + xfs_mod_sb(tp, XFS_SB_QFLAGS); + error = xfs_trans_commit(tp, 0, NULL); + return error; +} + +STATIC int +xfs_noquota_init( + xfs_mount_t *mp, + uint *needquotamount, + uint *quotaflags) +{ + int error = 0; + + *quotaflags = 0; + *needquotamount = B_FALSE; + + ASSERT(!XFS_IS_QUOTA_ON(mp)); + + /* + * If a file system had quotas running earlier, but decided to + * mount without -o uquota/pquota/gquota options, revoke the + * quotachecked license. + */ + if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) { + cmn_err(CE_NOTE, + "XFS resetting qflags for filesystem %s", + mp->m_fsname); + + error = xfs_mount_reset_sbqflags(mp); + } + return error; +} + xfs_qmops_t xfs_qmcore_stub = { - .xfs_qminit = (xfs_qminit_t) fs_noerr, + .xfs_qminit = (xfs_qminit_t) xfs_noquota_init, .xfs_qmdone = (xfs_qmdone_t) fs_noerr, .xfs_qmmount = (xfs_qmmount_t) fs_noerr, .xfs_qmunmount = (xfs_qmunmount_t) fs_noerr, diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 7134576ae7fa..32cb79752d5d 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -159,6 +159,20 @@ typedef struct xfs_qoff_logformat { #define XFS_OQUOTA_CHKD 0x0020 /* quotacheck run on other (grp/prj) quotas */ #define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */ +/* + * Quota Accounting/Enforcement flags + */ +#define XFS_ALL_QUOTA_ACCT \ + (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT) +#define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD) +#define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD) + +#define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) +#define XFS_IS_QUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ENFD) +#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) +#define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) +#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) + /* * Incore only flags for quotaoff - these bits get cleared when quota(s) * are in the process of getting turned off. These flags are in m_qflags but @@ -362,6 +376,7 @@ typedef struct xfs_dqtrxops { f | XFS_QMOPT_RES_REGBLKS) extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *); +extern int xfs_mount_reset_sbqflags(struct xfs_mount *); extern struct bhv_vfsops xfs_qmops; From 0c147f9a864f043e6f93a4bb3519c1166419bd74 Mon Sep 17 00:00:00 2001 From: Felix Blyakher Date: Mon, 5 Sep 2005 08:24:49 +1000 Subject: [PATCH 25/36] [XFS] Check if there is first behavior before calling VOP_RECLAIM from linvfs_clear_inode(). The behavior may go away in VOP_INACTIVE. SGI-PV: 941000 SGI-Modid: xfs-linux:xfs-kern:197355a Signed-off-by: Felix Blyakher Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_super.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 9b40a2799f7e..cd3f8b3270ac 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -400,9 +400,11 @@ linvfs_clear_inode( vp->v_flag &= ~VMODIFIED; VN_UNLOCK(vp, 0); - VOP_RECLAIM(vp, error); - if (error) - panic("vn_purge: cannot reclaim"); + if (vp->v_fbhv) { + VOP_RECLAIM(vp, error); + if (error) + panic("vn_purge: cannot reclaim"); + } ASSERT(vp->v_fbhv == NULL); From 526c420c44b45b11e25a98f37702cc3044ba9bdc Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 5 Sep 2005 08:25:06 +1000 Subject: [PATCH 26/36] [XFS] add handlers to fix xfs_flock_t alignment issues in compat ioctls SGI-PV: 938899 SGI-Modid: xfs-linux:xfs-kern:197403a Signed-off-by: Eric Sandeen Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_ioctl32.c | 65 ++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 0f8f1384eb36..4636b7f86f1f 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -47,8 +47,52 @@ #include "xfs_vnode.h" #include "xfs_dfrag.h" +#define _NATIVE_IOC(cmd, type) \ + _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) + #if defined(CONFIG_IA64) || defined(CONFIG_X86_64) #define BROKEN_X86_ALIGNMENT +/* on ia32 l_start is on a 32-bit boundary */ +typedef struct xfs_flock64_32 { + __s16 l_type; + __s16 l_whence; + __s64 l_start __attribute__((packed)); + /* len == 0 means until end of file */ + __s64 l_len __attribute__((packed)); + __s32 l_sysid; + __u32 l_pid; + __s32 l_pad[4]; /* reserve area */ +} xfs_flock64_32_t; + +#define XFS_IOC_ALLOCSP_32 _IOW ('X', 10, struct xfs_flock64_32) +#define XFS_IOC_FREESP_32 _IOW ('X', 11, struct xfs_flock64_32) +#define XFS_IOC_ALLOCSP64_32 _IOW ('X', 36, struct xfs_flock64_32) +#define XFS_IOC_FREESP64_32 _IOW ('X', 37, struct xfs_flock64_32) +#define XFS_IOC_RESVSP_32 _IOW ('X', 40, struct xfs_flock64_32) +#define XFS_IOC_UNRESVSP_32 _IOW ('X', 41, struct xfs_flock64_32) +#define XFS_IOC_RESVSP64_32 _IOW ('X', 42, struct xfs_flock64_32) +#define XFS_IOC_UNRESVSP64_32 _IOW ('X', 43, struct xfs_flock64_32) + +/* just account for different alignment */ +STATIC unsigned long +xfs_ioctl32_flock( + unsigned long arg) +{ + xfs_flock64_32_t __user *p32 = (void __user *)arg; + xfs_flock64_t __user *p = compat_alloc_user_space(sizeof(*p)); + + if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) || + copy_in_user(&p->l_whence, &p32->l_whence, sizeof(s16)) || + copy_in_user(&p->l_start, &p32->l_start, sizeof(s64)) || + copy_in_user(&p->l_len, &p32->l_len, sizeof(s64)) || + copy_in_user(&p->l_sysid, &p32->l_sysid, sizeof(s32)) || + copy_in_user(&p->l_pid, &p32->l_pid, sizeof(u32)) || + copy_in_user(&p->l_pad, &p32->l_pad, 4*sizeof(u32))) + return -EFAULT; + + return (unsigned long)p; +} + #else typedef struct xfs_fsop_bulkreq32 { @@ -103,7 +147,6 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg) /* not handled case XFS_IOC_FD_TO_HANDLE: case XFS_IOC_PATH_TO_HANDLE: - case XFS_IOC_PATH_TO_HANDLE: case XFS_IOC_PATH_TO_FSHANDLE: case XFS_IOC_OPEN_BY_HANDLE: case XFS_IOC_FSSETDM_BY_HANDLE: @@ -124,8 +167,21 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg) case XFS_IOC_ERROR_CLEARALL: break; -#ifndef BROKEN_X86_ALIGNMENT - /* xfs_flock_t and xfs_bstat_t have wrong u32 vs u64 alignment */ +#ifdef BROKEN_X86_ALIGNMENT + /* xfs_flock_t has wrong u32 vs u64 alignment */ + case XFS_IOC_ALLOCSP_32: + case XFS_IOC_FREESP_32: + case XFS_IOC_ALLOCSP64_32: + case XFS_IOC_FREESP64_32: + case XFS_IOC_RESVSP_32: + case XFS_IOC_UNRESVSP_32: + case XFS_IOC_RESVSP64_32: + case XFS_IOC_UNRESVSP64_32: + arg = xfs_ioctl32_flock(arg); + cmd = _NATIVE_IOC(cmd, struct xfs_flock64); + break; + +#else /* These are handled fine if no alignment issues */ case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: case XFS_IOC_RESVSP: @@ -134,6 +190,9 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg) case XFS_IOC_FREESP64: case XFS_IOC_RESVSP64: case XFS_IOC_UNRESVSP64: + break; + + /* xfs_bstat_t still has wrong u32 vs u64 alignment */ case XFS_IOC_SWAPEXT: break; From 53937c52c3f1dff6100174f50a85c068f16713ae Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Mon, 5 Sep 2005 08:27:50 +1000 Subject: [PATCH 27/36] [XFS] Manage spinlock differences between kernel versions a bit. SGI-PV: 904196 SGI-Modid: xfs-linux:xfs-kern:23563a Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/spin.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/xfs/linux-2.6/spin.h b/fs/xfs/linux-2.6/spin.h index bcf60a0b8df0..0039504069a5 100644 --- a/fs/xfs/linux-2.6/spin.h +++ b/fs/xfs/linux-2.6/spin.h @@ -45,6 +45,9 @@ typedef spinlock_t lock_t; #define SPLDECL(s) unsigned long s +#ifndef DEFINE_SPINLOCK +#define DEFINE_SPINLOCK(s) spinlock_t s = SPIN_LOCK_UNLOCKED +#endif #define spinlock_init(lock, name) spin_lock_init(lock) #define spinlock_destroy(lock) From 02ba71de98d5eee63e82cc2d88f9ea8430810a9a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2005 08:28:02 +1000 Subject: [PATCH 28/36] [XFS] allow a null behaviour pointer in linvfs_clear_inode SGI-PV: 940531 SGI-Modid: xfs-linux:xfs-kern:197782a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_super.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index cd3f8b3270ac..910e43bfc95b 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -387,14 +387,17 @@ linvfs_clear_inode( vn_trace_entry(vp, "clear_inode", (inst_t *)__return_address); - ASSERT(vp->v_fbhv != NULL); - XFS_STATS_INC(vn_rele); XFS_STATS_INC(vn_remove); XFS_STATS_INC(vn_reclaim); XFS_STATS_DEC(vn_active); - VOP_INACTIVE(vp, NULL, cache); + /* + * This can happen because xfs_iget_core calls xfs_idestroy if we + * find an inode with di_mode == 0 but without IGET_CREATE set. + */ + if (vp->v_fbhv) + VOP_INACTIVE(vp, NULL, cache); VN_LOCK(vp); vp->v_flag &= ~VMODIFIED; From 0f9fffbcc1817c655d6dd40960ae2e0086b0f64f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2005 08:28:16 +1000 Subject: [PATCH 29/36] [XFS] remove some dead code from pagebuf SGI-PV: 934766 SGI-Modid: xfs-linux:xfs-kern:197783a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_buf.c | 30 ------------------------------ fs/xfs/linux-2.6/xfs_buf.h | 7 ------- 2 files changed, 37 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index fba40cbdbcf1..f43689754dae 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -699,25 +699,6 @@ xfs_buf_read_flags( return NULL; } -/* - * Create a skeletal pagebuf (no pages associated with it). - */ -xfs_buf_t * -pagebuf_lookup( - xfs_buftarg_t *target, - loff_t ioff, - size_t isize, - page_buf_flags_t flags) -{ - xfs_buf_t *pb; - - pb = pagebuf_allocate(flags); - if (pb) { - _pagebuf_initialize(pb, target, ioff, isize, flags); - } - return pb; -} - /* * If we are not low on memory then do the readahead in a deadlock * safe manner. @@ -891,17 +872,6 @@ pagebuf_rele( PB_TRACE(pb, "rele", pb->pb_relse); - /* - * pagebuf_lookup buffers are not hashed, not delayed write, - * and don't have their own release routines. Special case. - */ - if (unlikely(!hash)) { - ASSERT(!pb->pb_relse); - if (atomic_dec_and_test(&pb->pb_hold)) - xfs_buf_free(pb); - return; - } - if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) { int do_free = 1; diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 3f8f69a66aea..c322e9d71f3d 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -206,13 +206,6 @@ extern xfs_buf_t *xfs_buf_read_flags( /* allocate and read a buffer */ #define xfs_buf_read(target, blkno, len, flags) \ xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED) -extern xfs_buf_t *pagebuf_lookup( - xfs_buftarg_t *, - loff_t, /* starting offset of range */ - size_t, /* length of range */ - page_buf_flags_t); /* PBF_READ, PBF_WRITE, */ - /* PBF_FORCEIO, */ - extern xfs_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */ /* no memory or disk address */ size_t len, From efa092f3d4c60be7e81de515db9f06e5f8426afc Mon Sep 17 00:00:00 2001 From: Tim Shimmin Date: Mon, 5 Sep 2005 08:29:01 +1000 Subject: [PATCH 30/36] [XFS] Fixes a bug in the quota code when allocating a new dquot record which can cause an extent hole to be filled and a free extent to be processed. In this case, we make a few mistakes: forget to pass back the transaction, forget to put a hold on the buffer and forget to add the buf to the new transaction. SGI-PV: 940366 SGI-Modid: xfs-linux:xfs-kern:23594a Signed-off-by: Tim Shimmin Signed-off-by: Nathan Scott --- fs/xfs/quota/xfs_dquot.c | 43 +++++++++++++++++++++++++++++++++------- fs/xfs/xfs_trans.h | 1 + fs/xfs/xfs_trans_buf.c | 23 +++++++++++++++++++++ 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 46ce1e3ce1d6..e2e8d35fa4d0 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -421,7 +421,7 @@ xfs_qm_init_dquot_blk( */ STATIC int xfs_qm_dqalloc( - xfs_trans_t *tp, + xfs_trans_t **tpp, xfs_mount_t *mp, xfs_dquot_t *dqp, xfs_inode_t *quotip, @@ -433,6 +433,7 @@ xfs_qm_dqalloc( xfs_bmbt_irec_t map; int nmaps, error, committed; xfs_buf_t *bp; + xfs_trans_t *tp = *tpp; ASSERT(tp != NULL); xfs_dqtrace_entry(dqp, "DQALLOC"); @@ -492,10 +493,32 @@ xfs_qm_dqalloc( xfs_qm_init_dquot_blk(tp, mp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT), dqp->dq_flags & XFS_DQ_ALLTYPES, bp); - if ((error = xfs_bmap_finish(&tp, &flist, firstblock, &committed))) { + /* + * xfs_bmap_finish() may commit the current transaction and + * start a second transaction if the freelist is not empty. + * + * Since we still want to modify this buffer, we need to + * ensure that the buffer is not released on commit of + * the first transaction and ensure the buffer is added to the + * second transaction. + * + * If there is only one transaction then don't stop the buffer + * from being released when it commits later on. + */ + + xfs_trans_bhold(tp, bp); + + if ((error = xfs_bmap_finish(tpp, &flist, firstblock, &committed))) { goto error1; } + if (committed) { + tp = *tpp; + xfs_trans_bjoin(tp, bp); + } else { + xfs_trans_bhold_release(tp, bp); + } + *O_bpp = bp; return 0; @@ -514,7 +537,7 @@ xfs_qm_dqalloc( */ STATIC int xfs_qm_dqtobp( - xfs_trans_t *tp, + xfs_trans_t **tpp, xfs_dquot_t *dqp, xfs_disk_dquot_t **O_ddpp, xfs_buf_t **O_bpp, @@ -528,6 +551,7 @@ xfs_qm_dqtobp( xfs_disk_dquot_t *ddq; xfs_dqid_t id; boolean_t newdquot; + xfs_trans_t *tp = (tpp ? *tpp : NULL); mp = dqp->q_mount; id = INT_GET(dqp->q_core.d_id, ARCH_CONVERT); @@ -579,9 +603,10 @@ xfs_qm_dqtobp( return (ENOENT); ASSERT(tp); - if ((error = xfs_qm_dqalloc(tp, mp, dqp, quotip, + if ((error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, dqp->q_fileoffset, &bp))) return (error); + tp = *tpp; newdquot = B_TRUE; } else { /* @@ -645,7 +670,7 @@ xfs_qm_dqtobp( /* ARGSUSED */ STATIC int xfs_qm_dqread( - xfs_trans_t *tp, + xfs_trans_t **tpp, xfs_dqid_t id, xfs_dquot_t *dqp, /* dquot to get filled in */ uint flags) @@ -653,15 +678,19 @@ xfs_qm_dqread( xfs_disk_dquot_t *ddqp; xfs_buf_t *bp; int error; + xfs_trans_t *tp; + + ASSERT(tpp); /* * get a pointer to the on-disk dquot and the buffer containing it * dqp already knows its own type (GROUP/USER). */ xfs_dqtrace_entry(dqp, "DQREAD"); - if ((error = xfs_qm_dqtobp(tp, dqp, &ddqp, &bp, flags))) { + if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) { return (error); } + tp = *tpp; /* copy everything from disk dquot to the incore dquot */ memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); @@ -740,7 +769,7 @@ xfs_qm_idtodq( * Read it from disk; xfs_dqread() takes care of * all the necessary initialization of dquot's fields (locks, etc) */ - if ((error = xfs_qm_dqread(tp, id, dqp, flags))) { + if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) { /* * This can happen if quotas got turned off (ESRCH), * or if the dquot didn't exist on disk and we ask to diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 9ee5eeee8026..a263aec8b3a6 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -999,6 +999,7 @@ struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int); void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *); void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *); void xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *); +void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *); void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 144da7a85466..e733293dd7f4 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -713,6 +713,29 @@ xfs_trans_bhold(xfs_trans_t *tp, xfs_buf_item_trace("BHOLD", bip); } +/* + * Cancel the previous buffer hold request made on this buffer + * for this transaction. + */ +void +xfs_trans_bhold_release(xfs_trans_t *tp, + xfs_buf_t *bp) +{ + xfs_buf_log_item_t *bip; + + ASSERT(XFS_BUF_ISBUSY(bp)); + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); + ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); + ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); + ASSERT(atomic_read(&bip->bli_refcount) > 0); + ASSERT(bip->bli_flags & XFS_BLI_HOLD); + bip->bli_flags &= ~XFS_BLI_HOLD; + xfs_buf_item_trace("BHOLD RELEASE", bip); +} + /* * This is called to mark bytes first through last inclusive of the given * buffer as needing to be logged when the transaction is committed. From ba403ab43e896c57f32995ccba9a6bd6ec8dd1b9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2005 08:33:00 +1000 Subject: [PATCH 31/36] [XFS] Retry linux inode cacech lookup if we found a stale inode. This fixes crashes under high nfs load SGI-PV: 941429 SGI-Modid: xfs-linux:xfs-kern:197929a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/xfs_iget.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index fa796910f3aa..0d9ae8fb4138 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -30,6 +30,8 @@ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ */ +#include + #include "xfs.h" #include "xfs_macros.h" @@ -507,14 +509,13 @@ xfs_iget( XFS_STATS_INC(xs_ig_attempts); +retry: if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) { bhv_desc_t *bdp; xfs_inode_t *ip; - int newnode; vp = LINVFS_GET_VP(inode); if (inode->i_state & I_NEW) { -inode_allocate: vn_initialize(inode); error = xfs_iget_core(vp, mp, tp, ino, flags, lock_flags, ipp, bno); @@ -525,22 +526,25 @@ inode_allocate: iput(inode); } } else { - if (is_bad_inode(inode)) { + /* + * If the inode is not fully constructed due to + * filehandle mistmatches wait for the inode to go + * away and try again. + * + * iget_locked will call __wait_on_freeing_inode + * to wait for the inode to go away. + */ + if (is_bad_inode(inode) || + ((bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), + &xfs_vnodeops)) == NULL)) { iput(inode); - return EIO; + delay(1); + goto retry; } - bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops); - if (bdp == NULL) { - XFS_STATS_INC(xs_ig_dup); - goto inode_allocate; - } ip = XFS_BHVTOI(bdp); if (lock_flags != 0) xfs_ilock(ip, lock_flags); - newnode = (ip->i_d.di_mode == 0); - if (newnode) - xfs_iocore_inode_reinit(ip); XFS_STATS_INC(xs_ig_found); *ipp = ip; error = 0; From 2f926587512869ebf6bc820bd5f030e127aae774 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Mon, 5 Sep 2005 08:33:35 +1000 Subject: [PATCH 32/36] [XFS] Fix racy access to pb_flags. pagebuf_rele() modified pb_flags after the pagebuf had been unlocked if the buffer was delwri. At high load, this could result in a race when the superblock was being synced that would result the flags being incorrect and the iodone functions being executed incorrectly. This then leads to iclog callback failures or AIL list corruptions resulting in filesystem shutdowns. SGI-PV: 923981 SGI-Modid: xfs-linux:xfs-kern:23616a Signed-off-by: David Chinner Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_buf.c | 66 +++++++++++++++++++++++++++++--------- fs/xfs/linux-2.6/xfs_buf.h | 3 +- 2 files changed, 51 insertions(+), 18 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index f43689754dae..e6340906342c 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -590,8 +590,10 @@ found: PB_SET_OWNER(pb); } - if (pb->pb_flags & PBF_STALE) + if (pb->pb_flags & PBF_STALE) { + ASSERT((pb->pb_flags & _PBF_DELWRI_Q) == 0); pb->pb_flags &= PBF_MAPPED; + } PB_TRACE(pb, "got_lock", 0); XFS_STATS_INC(pb_get_locked); return (pb); @@ -872,6 +874,17 @@ pagebuf_rele( PB_TRACE(pb, "rele", pb->pb_relse); + /* + * pagebuf_lookup buffers are not hashed, not delayed write, + * and don't have their own release routines. Special case. + */ + if (unlikely(!hash)) { + ASSERT(!pb->pb_relse); + if (atomic_dec_and_test(&pb->pb_hold)) + xfs_buf_free(pb); + return; + } + if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) { int do_free = 1; @@ -883,22 +896,23 @@ pagebuf_rele( do_free = 0; } - if (pb->pb_flags & PBF_DELWRI) { - pb->pb_flags |= PBF_ASYNC; - atomic_inc(&pb->pb_hold); - pagebuf_delwri_queue(pb, 0); - do_free = 0; - } else if (pb->pb_flags & PBF_FS_MANAGED) { + if (pb->pb_flags & PBF_FS_MANAGED) { do_free = 0; } if (do_free) { + ASSERT((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == 0); list_del_init(&pb->pb_hash_list); spin_unlock(&hash->bh_lock); pagebuf_free(pb); } else { spin_unlock(&hash->bh_lock); } + } else { + /* + * Catch reference count leaks + */ + ASSERT(atomic_read(&pb->pb_hold) >= 0); } } @@ -976,13 +990,24 @@ pagebuf_lock( * pagebuf_unlock * * pagebuf_unlock releases the lock on the buffer object created by - * pagebuf_lock or pagebuf_cond_lock (not any - * pinning of underlying pages created by pagebuf_pin). + * pagebuf_lock or pagebuf_cond_lock (not any pinning of underlying pages + * created by pagebuf_pin). + * + * If the buffer is marked delwri but is not queued, do so before we + * unlock the buffer as we need to set flags correctly. We also need to + * take a reference for the delwri queue because the unlocker is going to + * drop their's and they don't know we just queued it. */ void pagebuf_unlock( /* unlock buffer */ xfs_buf_t *pb) /* buffer to unlock */ { + if ((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == PBF_DELWRI) { + atomic_inc(&pb->pb_hold); + pb->pb_flags |= PBF_ASYNC; + pagebuf_delwri_queue(pb, 0); + } + PB_CLEAR_OWNER(pb); up(&pb->pb_sema); PB_TRACE(pb, "unlock", 0); @@ -1486,6 +1511,11 @@ again: ASSERT(btp == bp->pb_target); if (!(bp->pb_flags & PBF_FS_MANAGED)) { spin_unlock(&hash->bh_lock); + /* + * Catch superblock reference count leaks + * immediately + */ + BUG_ON(bp->pb_bn == 0); delay(100); goto again; } @@ -1661,17 +1691,20 @@ pagebuf_delwri_queue( int unlock) { PB_TRACE(pb, "delwri_q", (long)unlock); - ASSERT(pb->pb_flags & PBF_DELWRI); + ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) == + (PBF_DELWRI|PBF_ASYNC)); spin_lock(&pbd_delwrite_lock); /* If already in the queue, dequeue and place at tail */ if (!list_empty(&pb->pb_list)) { + ASSERT(pb->pb_flags & _PBF_DELWRI_Q); if (unlock) { atomic_dec(&pb->pb_hold); } list_del(&pb->pb_list); } + pb->pb_flags |= _PBF_DELWRI_Q; list_add_tail(&pb->pb_list, &pbd_delwrite_queue); pb->pb_queuetime = jiffies; spin_unlock(&pbd_delwrite_lock); @@ -1688,10 +1721,11 @@ pagebuf_delwri_dequeue( spin_lock(&pbd_delwrite_lock); if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) { + ASSERT(pb->pb_flags & _PBF_DELWRI_Q); list_del_init(&pb->pb_list); dequeued = 1; } - pb->pb_flags &= ~PBF_DELWRI; + pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); spin_unlock(&pbd_delwrite_lock); if (dequeued) @@ -1770,7 +1804,7 @@ xfsbufd( break; } - pb->pb_flags &= ~PBF_DELWRI; + pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); pb->pb_flags |= PBF_WRITE; list_move(&pb->pb_list, &tmp); } @@ -1820,15 +1854,13 @@ xfs_flush_buftarg( if (pb->pb_target != target) continue; - ASSERT(pb->pb_flags & PBF_DELWRI); + ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)); PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb)); if (pagebuf_ispin(pb)) { pincount++; continue; } - pb->pb_flags &= ~PBF_DELWRI; - pb->pb_flags |= PBF_WRITE; list_move(&pb->pb_list, &tmp); } spin_unlock(&pbd_delwrite_lock); @@ -1837,12 +1869,14 @@ xfs_flush_buftarg( * Dropped the delayed write list lock, now walk the temporary list */ list_for_each_entry_safe(pb, n, &tmp, pb_list) { + pagebuf_lock(pb); + pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); + pb->pb_flags |= PBF_WRITE; if (wait) pb->pb_flags &= ~PBF_ASYNC; else list_del_init(&pb->pb_list); - pagebuf_lock(pb); pagebuf_iostrategy(pb); } diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index c322e9d71f3d..4b7fe3b5e460 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -89,6 +89,7 @@ typedef enum page_buf_flags_e { /* pb_flags values */ _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ + _PBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ } page_buf_flags_t; #define PBF_UPDATE (PBF_READ | PBF_WRITE) @@ -337,8 +338,6 @@ extern void pagebuf_trace( - - /* These are just for xfs_syncsub... it sets an internal variable * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t */ From 4df08c52582be558e12316ae60bf077ca8f17a1e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2005 08:34:18 +1000 Subject: [PATCH 33/36] [XFS] Switch kernel thread handling to the kthread_ API SGI-PV: 942063 SGI-Modid: xfs-linux:xfs-kern:198388a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_buf.c | 24 ++++++++---------------- fs/xfs/linux-2.6/xfs_super.c | 36 +++++++++--------------------------- fs/xfs/linux-2.6/xfs_vfs.c | 1 - fs/xfs/linux-2.6/xfs_vfs.h | 2 -- 4 files changed, 17 insertions(+), 46 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index e6340906342c..655bf4a78afe 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -54,6 +54,7 @@ #include #include #include +#include #include "xfs_linux.h" @@ -1742,9 +1743,7 @@ pagebuf_runall_queues( } /* Defines for pagebuf daemon */ -STATIC DECLARE_COMPLETION(xfsbufd_done); STATIC struct task_struct *xfsbufd_task; -STATIC int xfsbufd_active; STATIC int xfsbufd_force_flush; STATIC int xfsbufd_force_sleep; @@ -1770,14 +1769,8 @@ xfsbufd( xfs_buftarg_t *target; xfs_buf_t *pb, *n; - /* Set up the thread */ - daemonize("xfsbufd"); current->flags |= PF_MEMALLOC; - xfsbufd_task = current; - xfsbufd_active = 1; - barrier(); - INIT_LIST_HEAD(&tmp); do { if (unlikely(freezing(current))) { @@ -1825,9 +1818,9 @@ xfsbufd( purge_addresses(); xfsbufd_force_flush = 0; - } while (xfsbufd_active); + } while (!kthread_should_stop()); - complete_and_exit(&xfsbufd_done, 0); + return 0; } /* @@ -1910,9 +1903,11 @@ xfs_buf_daemons_start(void) if (!xfsdatad_workqueue) goto out_destroy_xfslogd_workqueue; - error = kernel_thread(xfsbufd, NULL, CLONE_FS|CLONE_FILES); - if (error < 0) + xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd"); + if (IS_ERR(xfsbufd_task)) { + error = PTR_ERR(xfsbufd_task); goto out_destroy_xfsdatad_workqueue; + } return 0; out_destroy_xfsdatad_workqueue: @@ -1929,10 +1924,7 @@ xfs_buf_daemons_start(void) STATIC void xfs_buf_daemons_stop(void) { - xfsbufd_active = 0; - barrier(); - wait_for_completion(&xfsbufd_done); - + kthread_stop(xfsbufd_task); destroy_workqueue(xfslogd_workqueue); destroy_workqueue(xfsdatad_workqueue); } diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 910e43bfc95b..0da87bfc9999 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -72,6 +72,7 @@ #include #include #include +#include STATIC struct quotactl_ops linvfs_qops; STATIC struct super_operations linvfs_sops; @@ -516,25 +517,16 @@ xfssyncd( { long timeleft; vfs_t *vfsp = (vfs_t *) arg; - struct list_head tmp; struct vfs_sync_work *work, *n; + LIST_HEAD (tmp); - daemonize("xfssyncd"); - - vfsp->vfs_sync_work.w_vfs = vfsp; - vfsp->vfs_sync_work.w_syncer = vfs_sync_worker; - vfsp->vfs_sync_task = current; - wmb(); - wake_up(&vfsp->vfs_wait_sync_task); - - INIT_LIST_HEAD(&tmp); timeleft = (xfs_syncd_centisecs * HZ) / 100; for (;;) { set_current_state(TASK_INTERRUPTIBLE); timeleft = schedule_timeout(timeleft); /* swsusp */ try_to_freeze(); - if (vfsp->vfs_flag & VFS_UMOUNT) + if (kthread_should_stop()) break; spin_lock(&vfsp->vfs_sync_lock); @@ -563,10 +555,6 @@ xfssyncd( } } - vfsp->vfs_sync_task = NULL; - wmb(); - wake_up(&vfsp->vfs_wait_sync_task); - return 0; } @@ -574,13 +562,11 @@ STATIC int linvfs_start_syncd( vfs_t *vfsp) { - int pid; - - pid = kernel_thread(xfssyncd, (void *) vfsp, - CLONE_VM | CLONE_FS | CLONE_FILES); - if (pid < 0) - return -pid; - wait_event(vfsp->vfs_wait_sync_task, vfsp->vfs_sync_task); + vfsp->vfs_sync_work.w_syncer = vfs_sync_worker; + vfsp->vfs_sync_work.w_vfs = vfsp; + vfsp->vfs_sync_task = kthread_run(xfssyncd, vfsp, "xfssyncd"); + if (IS_ERR(vfsp->vfs_sync_task)) + return -PTR_ERR(vfsp->vfs_sync_task); return 0; } @@ -588,11 +574,7 @@ STATIC void linvfs_stop_syncd( vfs_t *vfsp) { - vfsp->vfs_flag |= VFS_UMOUNT; - wmb(); - - wake_up_process(vfsp->vfs_sync_task); - wait_event(vfsp->vfs_wait_sync_task, !vfsp->vfs_sync_task); + kthread_stop(vfsp->vfs_sync_task); } STATIC void diff --git a/fs/xfs/linux-2.6/xfs_vfs.c b/fs/xfs/linux-2.6/xfs_vfs.c index 669c61644959..34cc902ec119 100644 --- a/fs/xfs/linux-2.6/xfs_vfs.c +++ b/fs/xfs/linux-2.6/xfs_vfs.c @@ -251,7 +251,6 @@ vfs_allocate( void ) bhv_head_init(VFS_BHVHEAD(vfsp), "vfs"); INIT_LIST_HEAD(&vfsp->vfs_sync_list); spin_lock_init(&vfsp->vfs_sync_lock); - init_waitqueue_head(&vfsp->vfs_wait_sync_task); init_waitqueue_head(&vfsp->vfs_wait_single_sync_task); return vfsp; } diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h index 7ee1f714e9ba..f0ab574fb47a 100644 --- a/fs/xfs/linux-2.6/xfs_vfs.h +++ b/fs/xfs/linux-2.6/xfs_vfs.h @@ -65,7 +65,6 @@ typedef struct vfs { spinlock_t vfs_sync_lock; /* work item list lock */ int vfs_sync_seq; /* sync thread generation no. */ wait_queue_head_t vfs_wait_single_sync_task; - wait_queue_head_t vfs_wait_sync_task; } vfs_t; #define vfs_fbhv vfs_bh.bh_first /* 1st on vfs behavior chain */ @@ -96,7 +95,6 @@ typedef enum { #define VFS_RDONLY 0x0001 /* read-only vfs */ #define VFS_GRPID 0x0002 /* group-ID assigned from directory */ #define VFS_DMI 0x0004 /* filesystem has the DMI enabled */ -#define VFS_UMOUNT 0x0008 /* unmount in progress */ #define VFS_END 0x0008 /* max flag */ #define SYNC_ATTR 0x0001 /* sync attributes */ From a3c476d8a19ded7c5f1e17ea07df377764d9d1d3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2005 08:40:49 +1000 Subject: [PATCH 34/36] [XFS] replace "extern inline" with "static inline" Patch from Adrian Bunk , thanks a lot! SGI-PV: 942227 SGI-Modid: xfs-linux:xfs-kern:198642a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_buf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 4b7fe3b5e460..67c19f799232 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -444,7 +444,7 @@ extern void pagebuf_trace( #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->pb_addr) -extern inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset) +static inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset) { if (bp->pb_flags & PBF_MAPPED) return XFS_BUF_PTR(bp) + offset; From c31e887807a3eab26614ee142629ba447cbcc0dc Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Mon, 5 Sep 2005 10:06:55 +1000 Subject: [PATCH 35/36] [XFS] Fix incorrect use of BMAPI_READ in unwritten extent handling (luckily just cosmetic). SGI-PV: 942232 SGI-Modid: xfs-linux-melb:xfs-kern:23718a Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_aops.c | 2 +- fs/xfs/xfs_iomap.c | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index ea615e2f476d..c6c077978fe3 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -804,7 +804,7 @@ xfs_page_state_convert( continue; if (!iomp) { err = xfs_map_blocks(inode, offset, len, &iomap, - BMAPI_READ|BMAPI_IGNSTATE); + BMAPI_WRITE|BMAPI_IGNSTATE); if (err) { goto error; } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 44999d557d8e..d0f5be63cddb 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -226,13 +226,12 @@ xfs_iomap( xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, io, offset, count); lockmode = XFS_LCK_MAP_SHARED(mp, io); bmapi_flags = XFS_BMAPI_ENTIRE; - if (flags & BMAPI_IGNSTATE) - bmapi_flags |= XFS_BMAPI_IGSTATE; break; case BMAPI_WRITE: xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, io, offset, count); lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR; - bmapi_flags = 0; + if (flags & BMAPI_IGNSTATE) + bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE; XFS_ILOCK(mp, io, lockmode); break; case BMAPI_ALLOCATE: From cde410a99d0dd38eb218be884d02034fcdf5125b Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Mon, 5 Sep 2005 11:47:01 +1000 Subject: [PATCH 36/36] [XFS] Sort out some cosmetic differences between XFS trees. SGI-PV: 904196 SGI-Modid: xfs-linux-melb:xfs-kern:23719a Signed-off-by: Nathan Scott --- fs/xfs/Makefile | 151 +------------------------------- fs/xfs/Makefile-linux-2.6 | 141 +++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_iops.c | 9 +- fs/xfs/linux-2.6/xfs_linux.h | 12 ++- fs/xfs/quota/Makefile | 1 + fs/xfs/quota/Makefile-linux-2.6 | 53 +++++++++++ fs/xfs/support/debug.c | 1 + fs/xfs/xfs_vfsops.c | 10 +-- 8 files changed, 217 insertions(+), 161 deletions(-) create mode 100644 fs/xfs/Makefile-linux-2.6 create mode 100644 fs/xfs/quota/Makefile create mode 100644 fs/xfs/quota/Makefile-linux-2.6 diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index d3ff78354638..49e3e7e5e3dc 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -1,150 +1 @@ -# -# Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it would be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# -# Further, this software is distributed without any warranty that it is -# free of the rightful claim of any third person regarding infringement -# or the like. Any license provided herein, whether implied or -# otherwise, applies only to this software file. Patent licenses, if -# any, provided herein do not apply to combinations of this program with -# other software, or any other product whatsoever. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write the Free Software Foundation, Inc., 59 -# Temple Place - Suite 330, Boston MA 02111-1307, USA. -# -# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, -# Mountain View, CA 94043, or: -# -# http://www.sgi.com -# -# For further information regarding this notice, see: -# -# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ -# - -EXTRA_CFLAGS += -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char - -ifeq ($(CONFIG_XFS_DEBUG),y) - EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG - EXTRA_CFLAGS += -DPAGEBUF_LOCK_TRACKING -endif -ifeq ($(CONFIG_XFS_TRACE),y) - EXTRA_CFLAGS += -DXFS_ALLOC_TRACE - EXTRA_CFLAGS += -DXFS_ATTR_TRACE - EXTRA_CFLAGS += -DXFS_BLI_TRACE - EXTRA_CFLAGS += -DXFS_BMAP_TRACE - EXTRA_CFLAGS += -DXFS_BMBT_TRACE - EXTRA_CFLAGS += -DXFS_DIR_TRACE - EXTRA_CFLAGS += -DXFS_DIR2_TRACE - EXTRA_CFLAGS += -DXFS_DQUOT_TRACE - EXTRA_CFLAGS += -DXFS_ILOCK_TRACE - EXTRA_CFLAGS += -DXFS_LOG_TRACE - EXTRA_CFLAGS += -DXFS_RW_TRACE - EXTRA_CFLAGS += -DPAGEBUF_TRACE - EXTRA_CFLAGS += -DXFS_VNODE_TRACE -endif - -obj-$(CONFIG_XFS_FS) += xfs.o - -xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \ - xfs_dquot.o \ - xfs_dquot_item.o \ - xfs_trans_dquot.o \ - xfs_qm_syscalls.o \ - xfs_qm_bhv.o \ - xfs_qm.o) -ifeq ($(CONFIG_XFS_QUOTA),y) -xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o -endif - -xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o -xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o -xfs-$(CONFIG_PROC_FS) += linux-2.6/xfs_stats.o -xfs-$(CONFIG_SYSCTL) += linux-2.6/xfs_sysctl.o -xfs-$(CONFIG_COMPAT) += linux-2.6/xfs_ioctl32.o -xfs-$(CONFIG_XFS_EXPORT) += linux-2.6/xfs_export.o - - -xfs-y += xfs_alloc.o \ - xfs_alloc_btree.o \ - xfs_attr.o \ - xfs_attr_leaf.o \ - xfs_behavior.o \ - xfs_bit.o \ - xfs_bmap.o \ - xfs_bmap_btree.o \ - xfs_btree.o \ - xfs_buf_item.o \ - xfs_da_btree.o \ - xfs_dir.o \ - xfs_dir2.o \ - xfs_dir2_block.o \ - xfs_dir2_data.o \ - xfs_dir2_leaf.o \ - xfs_dir2_node.o \ - xfs_dir2_sf.o \ - xfs_dir_leaf.o \ - xfs_error.o \ - xfs_extfree_item.o \ - xfs_fsops.o \ - xfs_ialloc.o \ - xfs_ialloc_btree.o \ - xfs_iget.o \ - xfs_inode.o \ - xfs_inode_item.o \ - xfs_iocore.o \ - xfs_iomap.o \ - xfs_itable.o \ - xfs_dfrag.o \ - xfs_log.o \ - xfs_log_recover.o \ - xfs_macros.o \ - xfs_mount.o \ - xfs_rename.o \ - xfs_trans.o \ - xfs_trans_ail.o \ - xfs_trans_buf.o \ - xfs_trans_extfree.o \ - xfs_trans_inode.o \ - xfs_trans_item.o \ - xfs_utils.o \ - xfs_vfsops.o \ - xfs_vnodeops.o \ - xfs_rw.o \ - xfs_dmops.o \ - xfs_qmops.o - -xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o - -# Objects in linux-2.6/ -xfs-y += $(addprefix linux-2.6/, \ - kmem.o \ - xfs_aops.o \ - xfs_buf.o \ - xfs_file.o \ - xfs_fs_subr.o \ - xfs_globals.o \ - xfs_ioctl.o \ - xfs_iops.o \ - xfs_lrw.o \ - xfs_super.o \ - xfs_vfs.o \ - xfs_vnode.o) - -# Objects in support/ -xfs-y += $(addprefix support/, \ - debug.o \ - move.o \ - qsort.o \ - uuid.o) - -xfs-$(CONFIG_XFS_TRACE) += support/ktrace.o - +include $(TOPDIR)/fs/xfs/Makefile-linux-$(VERSION).$(PATCHLEVEL) diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6 new file mode 100644 index 000000000000..fbfcbe5a7cda --- /dev/null +++ b/fs/xfs/Makefile-linux-2.6 @@ -0,0 +1,141 @@ +# +# Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +EXTRA_CFLAGS += -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char + +XFS_LINUX := linux-2.6 + +ifeq ($(CONFIG_XFS_DEBUG),y) + EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG + EXTRA_CFLAGS += -DPAGEBUF_LOCK_TRACKING +endif +ifeq ($(CONFIG_XFS_TRACE),y) + EXTRA_CFLAGS += -DXFS_ALLOC_TRACE + EXTRA_CFLAGS += -DXFS_ATTR_TRACE + EXTRA_CFLAGS += -DXFS_BLI_TRACE + EXTRA_CFLAGS += -DXFS_BMAP_TRACE + EXTRA_CFLAGS += -DXFS_BMBT_TRACE + EXTRA_CFLAGS += -DXFS_DIR_TRACE + EXTRA_CFLAGS += -DXFS_DIR2_TRACE + EXTRA_CFLAGS += -DXFS_DQUOT_TRACE + EXTRA_CFLAGS += -DXFS_ILOCK_TRACE + EXTRA_CFLAGS += -DXFS_LOG_TRACE + EXTRA_CFLAGS += -DXFS_RW_TRACE + EXTRA_CFLAGS += -DPAGEBUF_TRACE + EXTRA_CFLAGS += -DXFS_VNODE_TRACE +endif + +obj-$(CONFIG_XFS_FS) += xfs.o +obj-$(CONFIG_XFS_QUOTA) += quota/ + +xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o +xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o +xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o +xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o +xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o +xfs-$(CONFIG_XFS_EXPORT) += $(XFS_LINUX)/xfs_export.o + + +xfs-y += xfs_alloc.o \ + xfs_alloc_btree.o \ + xfs_attr.o \ + xfs_attr_leaf.o \ + xfs_behavior.o \ + xfs_bit.o \ + xfs_bmap.o \ + xfs_bmap_btree.o \ + xfs_btree.o \ + xfs_buf_item.o \ + xfs_da_btree.o \ + xfs_dir.o \ + xfs_dir2.o \ + xfs_dir2_block.o \ + xfs_dir2_data.o \ + xfs_dir2_leaf.o \ + xfs_dir2_node.o \ + xfs_dir2_sf.o \ + xfs_dir_leaf.o \ + xfs_error.o \ + xfs_extfree_item.o \ + xfs_fsops.o \ + xfs_ialloc.o \ + xfs_ialloc_btree.o \ + xfs_iget.o \ + xfs_inode.o \ + xfs_inode_item.o \ + xfs_iocore.o \ + xfs_iomap.o \ + xfs_itable.o \ + xfs_dfrag.o \ + xfs_log.o \ + xfs_log_recover.o \ + xfs_macros.o \ + xfs_mount.o \ + xfs_rename.o \ + xfs_trans.o \ + xfs_trans_ail.o \ + xfs_trans_buf.o \ + xfs_trans_extfree.o \ + xfs_trans_inode.o \ + xfs_trans_item.o \ + xfs_utils.o \ + xfs_vfsops.o \ + xfs_vnodeops.o \ + xfs_rw.o \ + xfs_dmops.o \ + xfs_qmops.o + +xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o + +# Objects in linux/ +xfs-y += $(addprefix $(XFS_LINUX)/, \ + kmem.o \ + xfs_aops.o \ + xfs_buf.o \ + xfs_file.o \ + xfs_fs_subr.o \ + xfs_globals.o \ + xfs_ioctl.o \ + xfs_iops.o \ + xfs_lrw.o \ + xfs_super.o \ + xfs_vfs.o \ + xfs_vnode.o) + +# Objects in support/ +xfs-y += $(addprefix support/, \ + debug.o \ + move.o \ + uuid.o) + +xfs-$(CONFIG_XFS_TRACE) += support/ktrace.o + diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index d237cc5be767..77708a8c9f87 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -423,9 +423,14 @@ linvfs_follow_link( return NULL; } -static void linvfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) +STATIC void +linvfs_put_link( + struct dentry *dentry, + struct nameidata *nd, + void *p) { - char *s = nd_get_link(nd); + char *s = nd_get_link(nd); + if (!IS_ERR(s)) kfree(s); } diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 1c63fd3118d7..68c5d885ed9c 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -64,7 +64,6 @@ #include #include -#include #include #include #include @@ -255,11 +254,18 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh) #define MAX(a,b) (max(a,b)) #define howmany(x, y) (((x)+((y)-1))/(y)) #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) +#define qsort(a,n,s,fn) sort(a,n,s,fn,NULL) +/* + * Various platform dependent calls that don't fit anywhere else + */ #define xfs_stack_trace() dump_stack() - #define xfs_itruncate_data(ip, off) \ (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off))) +#define xfs_statvfs_fsid(statp, mp) \ + ({ u64 id = huge_encode_dev((mp)->m_dev); \ + __kernel_fsid_t *fsid = &(statp)->f_fsid; \ + (fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); }) /* Move the kernel do_div definition off to one side */ @@ -372,6 +378,4 @@ static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y) return(x * y); } -#define qsort(a, n, s, cmp) sort(a, n, s, cmp, NULL) - #endif /* __XFS_LINUX__ */ diff --git a/fs/xfs/quota/Makefile b/fs/xfs/quota/Makefile new file mode 100644 index 000000000000..7a4f725b2824 --- /dev/null +++ b/fs/xfs/quota/Makefile @@ -0,0 +1 @@ +include $(TOPDIR)/fs/xfs/quota/Makefile-linux-$(VERSION).$(PATCHLEVEL) diff --git a/fs/xfs/quota/Makefile-linux-2.6 b/fs/xfs/quota/Makefile-linux-2.6 new file mode 100644 index 000000000000..8b7b676718b9 --- /dev/null +++ b/fs/xfs/quota/Makefile-linux-2.6 @@ -0,0 +1,53 @@ +# +# Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +EXTRA_CFLAGS += -I $(TOPDIR)/fs/xfs -I $(TOPDIR)/fs/xfs/linux-2.6 + +ifeq ($(CONFIG_XFS_DEBUG),y) + EXTRA_CFLAGS += -g -DDEBUG + #EXTRA_CFLAGS += -DQUOTADEBUG +endif +ifeq ($(CONFIG_XFS_TRACE),y) + EXTRA_CFLAGS += -DXFS_DQUOT_TRACE + EXTRA_CFLAGS += -DXFS_VNODE_TRACE +endif + +obj-$(CONFIG_XFS_QUOTA) += xfs_quota.o + +xfs_quota-y += xfs_dquot.o \ + xfs_dquot_item.o \ + xfs_trans_dquot.o \ + xfs_qm_syscalls.o \ + xfs_qm_bhv.o \ + xfs_qm.o + +xfs_quota-$(CONFIG_PROC_FS) += xfs_qm_stats.o diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index 4ed7b6928cd7..4e1a5ec22fa3 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c @@ -31,6 +31,7 @@ */ #include "debug.h" +#include "spin.h" #include #include diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index d4b9545c2b5c..f1a904e23ade 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -795,7 +795,6 @@ xfs_statvfs( xfs_mount_t *mp; xfs_sb_t *sbp; unsigned long s; - u64 id; mp = XFS_BHVTOM(bdp); sbp = &(mp->m_sb); @@ -823,9 +822,7 @@ xfs_statvfs( statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); XFS_SB_UNLOCK(mp, s); - id = huge_encode_dev(mp->m_dev); - statp->f_fsid.val[0] = (u32)id; - statp->f_fsid.val[1] = (u32)(id >> 32); + xfs_statvfs_fsid(statp, mp); statp->f_namelen = MAXNAMELEN - 1; return 0; @@ -1505,7 +1502,10 @@ xfs_syncsub( * eventually kicked out of the cache. */ if (flags & SYNC_REFCACHE) { - xfs_refcache_purge_some(mp); + if (flags & SYNC_WAIT) + xfs_refcache_purge_mp(mp); + else + xfs_refcache_purge_some(mp); } /*