diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 3349c9a1e845..ff065578969f 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -139,6 +139,8 @@ xfs_attr_get( args.value = value; args.valuelen = *valuelenp; + /* Entirely possible to look up a name which doesn't exist */ + args.op_flags = XFS_DA_OP_OKNOENT; lock_mode = xfs_ilock_attr_map_shared(ip); if (!xfs_inode_hasattr(ip)) diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 3264d81488db..cd2201f5ab52 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -1822,6 +1822,7 @@ xfs_da3_path_shift( struct xfs_da_args *args; struct xfs_da_node_entry *btree; struct xfs_da3_icnode_hdr nodehdr; + struct xfs_buf *bp; xfs_dablk_t blkno = 0; int level; int error; @@ -1866,20 +1867,24 @@ xfs_da3_path_shift( */ for (blk++, level++; level < path->active; blk++, level++) { /* - * Release the old block. - * (if it's dirty, trans won't actually let go) + * Read the next child block into a local buffer. + */ + error = xfs_da3_node_read(args->trans, dp, blkno, -1, &bp, + args->whichfork); + if (error) + return error; + + /* + * Release the old block (if it's dirty, the trans doesn't + * actually let go) and swap the local buffer into the path + * structure. This ensures failure of the above read doesn't set + * a NULL buffer in an active slot in the path. */ if (release) xfs_trans_brelse(args->trans, blk->bp); - - /* - * Read the next child block. - */ blk->blkno = blkno; - error = xfs_da3_node_read(args->trans, dp, blkno, -1, - &blk->bp, args->whichfork); - if (error) - return error; + blk->bp = bp; + info = blk->bp->b_addr; ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) || diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 74bcbabfa523..b14bbd6bb05f 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -680,8 +680,15 @@ typedef struct xfs_attr_leaf_name_remote { typedef struct xfs_attr_leafblock { xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */ xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */ - xfs_attr_leaf_name_local_t namelist; /* grows from bottom of buf */ - xfs_attr_leaf_name_remote_t valuelist; /* grows from bottom of buf */ + /* + * The rest of the block contains the following structures after the + * leaf entries, growing from the bottom up. The variables are never + * referenced and definining them can actually make gcc optimize away + * accesses to the 'entries' array above index 0 so don't do that. + * + * xfs_attr_leaf_name_local_t namelist; + * xfs_attr_leaf_name_remote_t valuelist; + */ } xfs_attr_leafblock_t; /* diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index e0ba97610f01..9de401d297e5 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -362,6 +362,7 @@ xfs_dir_lookup( struct xfs_da_args *args; int rval; int v; /* type-checking value */ + int lock_mode; ASSERT(S_ISDIR(dp->i_d.di_mode)); XFS_STATS_INC(xs_dir_lookup); @@ -387,6 +388,7 @@ xfs_dir_lookup( if (ci_name) args->op_flags |= XFS_DA_OP_CILOOKUP; + lock_mode = xfs_ilock_data_map_shared(dp); if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { rval = xfs_dir2_sf_lookup(args); goto out_check_rval; @@ -419,6 +421,7 @@ out_check_rval: } } out_free: + xfs_iunlock(dp, lock_mode); kmem_free(args); return rval; } diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 6a57fdbc63ef..824131e71bc5 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -252,7 +252,8 @@ xfs_dir3_data_reada_verify( return; case cpu_to_be32(XFS_DIR2_DATA_MAGIC): case cpu_to_be32(XFS_DIR3_DATA_MAGIC): - xfs_dir3_data_verify(bp); + bp->b_ops = &xfs_dir3_data_buf_ops; + bp->b_ops->verify_read(bp); return; default: xfs_buf_ioerror(bp, -EFSCORRUPTED); diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 21009dbdc21d..47425140f343 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -186,9 +186,6 @@ xfs_mount_validate_sb( if (xfs_sb_version_hassparseinodes(sbp)) { uint32_t align; - xfs_alert(mp, - "EXPERIMENTAL sparse inode feature enabled. Use at your own risk!"); - align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize >> sbp->sb_blocklog; if (sbp->sb_inoalignmt != align) { diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 098cd78fe708..a989a9c7edb7 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -171,6 +171,7 @@ xfs_dir2_block_getdents( int wantoff; /* starting block offset */ xfs_off_t cook; struct xfs_da_geometry *geo = args->geo; + int lock_mode; /* * If the block number in the offset is out of range, we're done. @@ -178,7 +179,9 @@ xfs_dir2_block_getdents( if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk) return 0; + lock_mode = xfs_ilock_data_map_shared(dp); error = xfs_dir3_block_read(NULL, dp, &bp); + xfs_iunlock(dp, lock_mode); if (error) return error; @@ -529,9 +532,12 @@ xfs_dir2_leaf_getdents( * current buffer, need to get another one. */ if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) { + int lock_mode; + lock_mode = xfs_ilock_data_map_shared(dp); error = xfs_dir2_leaf_readbuf(args, bufsize, map_info, &curoff, &bp); + xfs_iunlock(dp, lock_mode); if (error || !map_info->map_valid) break; @@ -653,7 +659,6 @@ xfs_readdir( struct xfs_da_args args = { NULL }; int rval; int v; - uint lock_mode; trace_xfs_readdir(dp); @@ -666,7 +671,7 @@ xfs_readdir( args.dp = dp; args.geo = dp->i_mount->m_dir_geo; - lock_mode = xfs_ilock_data_map_shared(dp); + xfs_ilock(dp, XFS_IOLOCK_SHARED); if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) rval = xfs_dir2_sf_getdents(&args, ctx); else if ((rval = xfs_dir2_isblock(&args, &v))) @@ -675,7 +680,7 @@ xfs_readdir( rval = xfs_dir2_block_getdents(&args, ctx); else rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize); - xfs_iunlock(dp, lock_mode); + xfs_iunlock(dp, XFS_IOLOCK_SHARED); return rval; } diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 6964d7ceba96..30cb3afb67f0 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -251,7 +251,7 @@ xfs_qm_init_dquot_blk( d->dd_diskdq.d_id = cpu_to_be32(curid); d->dd_diskdq.d_flags = type; if (xfs_sb_version_hascrc(&mp->m_sb)) { - uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid); + uuid_copy(&d->dd_uuid, &mp->m_sb.sb_meta_uuid); xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), XFS_DQUOT_CRC_OFF); } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index db4acc1c3e73..de2c2376242b 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -317,24 +317,33 @@ xfs_file_read_iter( return -EIO; /* - * Locking is a bit tricky here. If we take an exclusive lock - * for direct IO, we effectively serialise all new concurrent - * read IO to this file and block it behind IO that is currently in - * progress because IO in progress holds the IO lock shared. We only - * need to hold the lock exclusive to blow away the page cache, so - * only take lock exclusively if the page cache needs invalidation. - * This allows the normal direct IO case of no page cache pages to - * proceeed concurrently without serialisation. + * Locking is a bit tricky here. If we take an exclusive lock for direct + * IO, we effectively serialise all new concurrent read IO to this file + * and block it behind IO that is currently in progress because IO in + * progress holds the IO lock shared. We only need to hold the lock + * exclusive to blow away the page cache, so only take lock exclusively + * if the page cache needs invalidation. This allows the normal direct + * IO case of no page cache pages to proceeed concurrently without + * serialisation. */ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) { xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); + /* + * The generic dio code only flushes the range of the particular + * I/O. Because we take an exclusive lock here, this whole + * sequence is considerably more expensive for us. This has a + * noticeable performance impact for any file with cached pages, + * even when outside of the range of the particular I/O. + * + * Hence, amortize the cost of the lock against a full file + * flush and reduce the chances of repeated iolock cycles going + * forward. + */ if (inode->i_mapping->nrpages) { - ret = filemap_write_and_wait_range( - VFS_I(ip)->i_mapping, - pos, pos + size - 1); + ret = filemap_write_and_wait(VFS_I(ip)->i_mapping); if (ret) { xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); return ret; @@ -345,9 +354,7 @@ xfs_file_read_iter( * we fail to invalidate a page, but this should never * happen on XFS. Warn if it does fail. */ - ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, - pos >> PAGE_CACHE_SHIFT, - (pos + size - 1) >> PAGE_CACHE_SHIFT); + ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping); WARN_ON_ONCE(ret); ret = 0; } @@ -733,19 +740,19 @@ xfs_file_dio_aio_write( pos = iocb->ki_pos; end = pos + count - 1; + /* + * See xfs_file_read_iter() for why we do a full-file flush here. + */ if (mapping->nrpages) { - ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, - pos, end); + ret = filemap_write_and_wait(VFS_I(ip)->i_mapping); if (ret) goto out; /* - * Invalidate whole pages. This can return an error if - * we fail to invalidate a page, but this should never - * happen on XFS. Warn if it does fail. + * Invalidate whole pages. This can return an error if we fail + * to invalidate a page, but this should never happen on XFS. + * Warn if it does fail. */ - ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, - pos >> PAGE_CACHE_SHIFT, - end >> PAGE_CACHE_SHIFT); + ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping); WARN_ON_ONCE(ret); ret = 0; } diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 9b3438a7680f..ee3aaa0a5317 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -250,7 +250,7 @@ xfs_growfs_data_private( agf->agf_freeblks = cpu_to_be32(tmpsize); agf->agf_longest = cpu_to_be32(tmpsize); if (xfs_sb_version_hascrc(&mp->m_sb)) - uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_uuid); + uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid); error = xfs_bwrite(bp); xfs_buf_relse(bp); @@ -273,7 +273,7 @@ xfs_growfs_data_private( if (xfs_sb_version_hascrc(&mp->m_sb)) { agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC); agfl->agfl_seqno = cpu_to_be32(agno); - uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid); + uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid); } agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp); @@ -309,7 +309,7 @@ xfs_growfs_data_private( agi->agi_newino = cpu_to_be32(NULLAGINO); agi->agi_dirino = cpu_to_be32(NULLAGINO); if (xfs_sb_version_hascrc(&mp->m_sb)) - uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid); + uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid); if (xfs_sb_version_hasfinobt(&mp->m_sb)) { agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp)); agi->agi_free_level = cpu_to_be32(1); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d8230ba1b471..30555f8fd44b 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -164,7 +164,7 @@ xfs_ilock( (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); - ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); + ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); if (lock_flags & XFS_IOLOCK_EXCL) mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); @@ -212,7 +212,7 @@ xfs_ilock_nowait( (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); - ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); + ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); if (lock_flags & XFS_IOLOCK_EXCL) { if (!mrtryupdate(&ip->i_iolock)) @@ -281,7 +281,7 @@ xfs_iunlock( (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); - ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); + ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); ASSERT(lock_flags != 0); if (lock_flags & XFS_IOLOCK_EXCL) @@ -362,32 +362,52 @@ int xfs_lots_retries; int xfs_lock_delays; #endif +#ifdef CONFIG_LOCKDEP +static bool +xfs_lockdep_subclass_ok( + int subclass) +{ + return subclass < MAX_LOCKDEP_SUBCLASSES; +} +#else +#define xfs_lockdep_subclass_ok(subclass) (true) +#endif + /* * Bump the subclass so xfs_lock_inodes() acquires each lock with a different - * value. This shouldn't be called for page fault locking, but we also need to - * ensure we don't overrun the number of lockdep subclasses for the iolock or - * mmaplock as that is limited to 12 by the mmap lock lockdep annotations. + * value. This can be called for any type of inode lock combination, including + * parent locking. Care must be taken to ensure we don't overrun the subclass + * storage fields in the class mask we build. */ static inline int xfs_lock_inumorder(int lock_mode, int subclass) { + int class = 0; + + ASSERT(!(lock_mode & (XFS_ILOCK_PARENT | XFS_ILOCK_RTBITMAP | + XFS_ILOCK_RTSUM))); + ASSERT(xfs_lockdep_subclass_ok(subclass)); + if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { - ASSERT(subclass + XFS_LOCK_INUMORDER < - (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT))); - lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; + ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS); + ASSERT(xfs_lockdep_subclass_ok(subclass + + XFS_IOLOCK_PARENT_VAL)); + class += subclass << XFS_IOLOCK_SHIFT; + if (lock_mode & XFS_IOLOCK_PARENT) + class += XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT; } if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) { - ASSERT(subclass + XFS_LOCK_INUMORDER < - (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT))); - lock_mode |= (subclass + XFS_LOCK_INUMORDER) << - XFS_MMAPLOCK_SHIFT; + ASSERT(subclass <= XFS_MMAPLOCK_MAX_SUBCLASS); + class += subclass << XFS_MMAPLOCK_SHIFT; } - if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) - lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; + if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) { + ASSERT(subclass <= XFS_ILOCK_MAX_SUBCLASS); + class += subclass << XFS_ILOCK_SHIFT; + } - return lock_mode; + return (lock_mode & ~XFS_LOCK_SUBCLASS_MASK) | class; } /* @@ -399,6 +419,11 @@ xfs_lock_inumorder(int lock_mode, int subclass) * transaction (such as truncate). This can result in deadlock since the long * running trans might need to wait for the inode we just locked in order to * push the tail and free space in the log. + * + * xfs_lock_inodes() can only be used to lock one type of lock at a time - + * the iolock, the mmaplock or the ilock, but not more than one at a time. If we + * lock more than one at a time, lockdep will report false positives saying we + * have violated locking orders. */ void xfs_lock_inodes( @@ -409,8 +434,29 @@ xfs_lock_inodes( int attempts = 0, i, j, try_lock; xfs_log_item_t *lp; - /* currently supports between 2 and 5 inodes */ + /* + * Currently supports between 2 and 5 inodes with exclusive locking. We + * support an arbitrary depth of locking here, but absolute limits on + * inodes depend on the the type of locking and the limits placed by + * lockdep annotations in xfs_lock_inumorder. These are all checked by + * the asserts. + */ ASSERT(ips && inodes >= 2 && inodes <= 5); + ASSERT(lock_mode & (XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL | + XFS_ILOCK_EXCL)); + ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED | + XFS_ILOCK_SHARED))); + ASSERT(!(lock_mode & XFS_IOLOCK_EXCL) || + inodes <= XFS_IOLOCK_MAX_SUBCLASS + 1); + ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) || + inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1); + ASSERT(!(lock_mode & XFS_ILOCK_EXCL) || + inodes <= XFS_ILOCK_MAX_SUBCLASS + 1); + + if (lock_mode & XFS_IOLOCK_EXCL) { + ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL | XFS_ILOCK_EXCL))); + } else if (lock_mode & XFS_MMAPLOCK_EXCL) + ASSERT(!(lock_mode & XFS_ILOCK_EXCL)); try_lock = 0; i = 0; @@ -629,30 +675,29 @@ xfs_lookup( { xfs_ino_t inum; int error; - uint lock_mode; trace_xfs_lookup(dp, name); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; - lock_mode = xfs_ilock_data_map_shared(dp); + xfs_ilock(dp, XFS_IOLOCK_SHARED); error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); - xfs_iunlock(dp, lock_mode); - if (error) - goto out; + goto out_unlock; error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp); if (error) goto out_free_name; + xfs_iunlock(dp, XFS_IOLOCK_SHARED); return 0; out_free_name: if (ci_name) kmem_free(ci_name->name); -out: +out_unlock: + xfs_iunlock(dp, XFS_IOLOCK_SHARED); *ipp = NULL; return error; } @@ -787,7 +832,7 @@ xfs_ialloc( if (ip->i_d.di_version == 3) { ASSERT(ip->i_d.di_ino == ino); - ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid)); + ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_meta_uuid)); ip->i_d.di_crc = 0; ip->i_d.di_changecount = 1; ip->i_d.di_lsn = 0; @@ -1149,7 +1194,8 @@ xfs_create( goto out_trans_cancel; - xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); + xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL | + XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT); unlock_dp_on_error = true; xfs_bmap_init(&free_list, &first_block); @@ -1185,7 +1231,7 @@ xfs_create( * the transaction cancel unlocking dp so don't do it explicitly in the * error path. */ - xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); unlock_dp_on_error = false; error = xfs_dir_createname(tp, dp, name, ip->i_ino, @@ -1258,7 +1304,7 @@ xfs_create( xfs_qm_dqrele(pdqp); if (unlock_dp_on_error) - xfs_iunlock(dp, XFS_ILOCK_EXCL); + xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); return error; } @@ -1403,10 +1449,11 @@ xfs_link( if (error) goto error_return; + xfs_ilock(tdp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, tdp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); /* * If we are using project inheritance, we only allow hard link @@ -2510,9 +2557,10 @@ xfs_remove( goto out_trans_cancel; } + xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); /* @@ -2893,6 +2941,12 @@ xfs_rename( * whether the target directory is the same as the source * directory, we can lock from 2 to 4 inodes. */ + if (!new_parent) + xfs_ilock(src_dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); + else + xfs_lock_two_inodes(src_dp, target_dp, + XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); + xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); /* @@ -2900,9 +2954,9 @@ xfs_rename( * we can rely on either trans_commit or trans_cancel to unlock * them. */ - xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, src_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); if (new_parent) - xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, target_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); if (target_ip) xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 8f22d20368d8..ca9e11989cbd 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -284,9 +284,9 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) * Flags for lockdep annotations. * * XFS_LOCK_PARENT - for directory operations that require locking a - * parent directory inode and a child entry inode. The parent gets locked - * with this flag so it gets a lockdep subclass of 1 and the child entry - * lock will have a lockdep subclass of 0. + * parent directory inode and a child entry inode. IOLOCK requires nesting, + * MMAPLOCK does not support this class, ILOCK requires a single subclass + * to differentiate parent from child. * * XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary * inodes do not participate in the normal lock order, and thus have their @@ -295,30 +295,63 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) * XFS_LOCK_INUMORDER - for locking several inodes at the some time * with xfs_lock_inodes(). This flag is used as the starting subclass * and each subsequent lock acquired will increment the subclass by one. - * So the first lock acquired will have a lockdep subclass of 4, the - * second lock will have a lockdep subclass of 5, and so on. It is - * the responsibility of the class builder to shift this to the correct - * portion of the lock_mode lockdep mask. + * However, MAX_LOCKDEP_SUBCLASSES == 8, which means we are greatly + * limited to the subclasses we can represent via nesting. We need at least + * 5 inodes nest depth for the ILOCK through rename, and we also have to support + * XFS_ILOCK_PARENT, which gives 6 subclasses. Then we have XFS_ILOCK_RTBITMAP + * and XFS_ILOCK_RTSUM, which are another 2 unique subclasses, so that's all + * 8 subclasses supported by lockdep. + * + * This also means we have to number the sub-classes in the lowest bits of + * the mask we keep, and we have to ensure we never exceed 3 bits of lockdep + * mask and we can't use bit-masking to build the subclasses. What a mess. + * + * Bit layout: + * + * Bit Lock Region + * 16-19 XFS_IOLOCK_SHIFT dependencies + * 20-23 XFS_MMAPLOCK_SHIFT dependencies + * 24-31 XFS_ILOCK_SHIFT dependencies + * + * IOLOCK values + * + * 0-3 subclass value + * 4-7 PARENT subclass values + * + * MMAPLOCK values + * + * 0-3 subclass value + * 4-7 unused + * + * ILOCK values + * 0-4 subclass values + * 5 PARENT subclass (not nestable) + * 6 RTBITMAP subclass (not nestable) + * 7 RTSUM subclass (not nestable) + * */ -#define XFS_LOCK_PARENT 1 -#define XFS_LOCK_RTBITMAP 2 -#define XFS_LOCK_RTSUM 3 -#define XFS_LOCK_INUMORDER 4 +#define XFS_IOLOCK_SHIFT 16 +#define XFS_IOLOCK_PARENT_VAL 4 +#define XFS_IOLOCK_MAX_SUBCLASS (XFS_IOLOCK_PARENT_VAL - 1) +#define XFS_IOLOCK_DEP_MASK 0x000f0000 +#define XFS_IOLOCK_PARENT (XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT) -#define XFS_IOLOCK_SHIFT 16 -#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) +#define XFS_MMAPLOCK_SHIFT 20 +#define XFS_MMAPLOCK_NUMORDER 0 +#define XFS_MMAPLOCK_MAX_SUBCLASS 3 +#define XFS_MMAPLOCK_DEP_MASK 0x00f00000 -#define XFS_MMAPLOCK_SHIFT 20 +#define XFS_ILOCK_SHIFT 24 +#define XFS_ILOCK_PARENT_VAL 5 +#define XFS_ILOCK_MAX_SUBCLASS (XFS_ILOCK_PARENT_VAL - 1) +#define XFS_ILOCK_RTBITMAP_VAL 6 +#define XFS_ILOCK_RTSUM_VAL 7 +#define XFS_ILOCK_DEP_MASK 0xff000000 +#define XFS_ILOCK_PARENT (XFS_ILOCK_PARENT_VAL << XFS_ILOCK_SHIFT) +#define XFS_ILOCK_RTBITMAP (XFS_ILOCK_RTBITMAP_VAL << XFS_ILOCK_SHIFT) +#define XFS_ILOCK_RTSUM (XFS_ILOCK_RTSUM_VAL << XFS_ILOCK_SHIFT) -#define XFS_ILOCK_SHIFT 24 -#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) -#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT) -#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT) - -#define XFS_IOLOCK_DEP_MASK 0x000f0000 -#define XFS_MMAPLOCK_DEP_MASK 0x00f00000 -#define XFS_ILOCK_DEP_MASK 0xff000000 -#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \ +#define XFS_LOCK_SUBCLASS_MASK (XFS_IOLOCK_DEP_MASK | \ XFS_MMAPLOCK_DEP_MASK | \ XFS_ILOCK_DEP_MASK) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 2e40f5e3cdf2..512a0945d52a 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1895,15 +1895,25 @@ xlog_recover_get_buf_lsn( */ goto recover_immediately; case XFS_SB_MAGIC: + /* + * superblock uuids are magic. We may or may not have a + * sb_meta_uuid on disk, but it will be set in the in-core + * superblock. We set the uuid pointer for verification + * according to the superblock feature mask to ensure we check + * the relevant UUID in the superblock. + */ lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn); - uuid = &((struct xfs_dsb *)blk)->sb_uuid; + if (xfs_sb_version_hasmetauuid(&mp->m_sb)) + uuid = &((struct xfs_dsb *)blk)->sb_meta_uuid; + else + uuid = &((struct xfs_dsb *)blk)->sb_uuid; break; default: break; } if (lsn != (xfs_lsn_t)-1) { - if (!uuid_equal(&mp->m_sb.sb_uuid, uuid)) + if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid)) goto recover_immediately; return lsn; } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 1fb16562c159..f98ce83b7bc4 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1528,6 +1528,10 @@ xfs_fs_fill_super( } } + if (xfs_sb_version_hassparseinodes(&mp->m_sb)) + xfs_alert(mp, + "EXPERIMENTAL sparse inode feature enabled. Use at your own risk!"); + error = xfs_mountfs(mp); if (error) goto out_filestream_unmount; diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 05c44bf51b5f..996481eeb491 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -240,7 +240,8 @@ xfs_symlink( if (error) goto out_trans_cancel; - xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); + xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL | + XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT); unlock_dp_on_error = true; /* @@ -288,7 +289,7 @@ xfs_symlink( * the transaction cancel unlocking dp so don't do it explicitly in the * error path. */ - xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); unlock_dp_on_error = false; /* @@ -421,7 +422,7 @@ out_release_inode: xfs_qm_dqrele(pdqp); if (unlock_dp_on_error) - xfs_iunlock(dp, XFS_ILOCK_EXCL); + xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); return error; }