diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index ce84ffd0264c..0f0df2759b09 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -35,6 +35,7 @@ #include "xfs_error.h" #include "xfs_trace.h" +struct workqueue_struct *xfs_alloc_wq; #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) @@ -68,7 +69,7 @@ xfs_alloc_lookup_eq( * Lookup the first record greater than or equal to [bno, len] * in the btree given by cur. */ -STATIC int /* error */ +int /* error */ xfs_alloc_lookup_ge( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agblock_t bno, /* starting block of extent */ @@ -2207,7 +2208,7 @@ xfs_alloc_read_agf( * group or loop over the allocation groups to find the result. */ int /* error */ -xfs_alloc_vextent( +__xfs_alloc_vextent( xfs_alloc_arg_t *args) /* allocation argument structure */ { xfs_agblock_t agsize; /* allocation group size */ @@ -2417,6 +2418,37 @@ error0: return error; } +static void +xfs_alloc_vextent_worker( + struct work_struct *work) +{ + struct xfs_alloc_arg *args = container_of(work, + struct xfs_alloc_arg, work); + unsigned long pflags; + + /* we are in a transaction context here */ + current_set_flags_nested(&pflags, PF_FSTRANS); + + args->result = __xfs_alloc_vextent(args); + complete(args->done); + + current_restore_flags_nested(&pflags, PF_FSTRANS); +} + + +int /* error */ +xfs_alloc_vextent( + xfs_alloc_arg_t *args) /* allocation argument structure */ +{ + DECLARE_COMPLETION_ONSTACK(done); + + args->done = &done; + INIT_WORK(&args->work, xfs_alloc_vextent_worker); + queue_work(xfs_alloc_wq, &args->work); + wait_for_completion(&done); + return args->result; +} + /* * Free an extent. * Just break up the extent address and hand off to xfs_free_ag_extent diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 2f52b924be79..3a7e7d8f8ded 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h @@ -25,6 +25,8 @@ struct xfs_perag; struct xfs_trans; struct xfs_busy_extent; +extern struct workqueue_struct *xfs_alloc_wq; + /* * Freespace allocation types. Argument to xfs_alloc_[v]extent. */ @@ -119,6 +121,9 @@ typedef struct xfs_alloc_arg { char isfl; /* set if is freelist blocks - !acctg */ char userdata; /* set if this is user data */ xfs_fsblock_t firstblock; /* io first block allocated */ + struct completion *done; + struct work_struct work; + int result; } xfs_alloc_arg_t; /* @@ -243,6 +248,13 @@ xfs_alloc_lookup_le( xfs_extlen_t len, /* length of extent */ int *stat); /* success/failure */ +int /* error */ +xfs_alloc_lookup_ge( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat); /* success/failure */ + int /* error */ xfs_alloc_get_rec( struct xfs_btree_cur *cur, /* btree cursor */ diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 08b9ac644c31..65d61b948ead 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -853,6 +853,8 @@ xfs_attr_shortform_addname(xfs_da_args_t *args) { int newsize, forkoff, retval; + trace_xfs_attr_sf_addname(args); + retval = xfs_attr_shortform_lookup(args); if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { return(retval); @@ -896,6 +898,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) xfs_dabuf_t *bp; int retval, error, committed, forkoff; + trace_xfs_attr_leaf_addname(args); + /* * Read the (only) block in the attribute list in. */ @@ -920,6 +924,9 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) xfs_da_brelse(args->trans, bp); return(retval); } + + trace_xfs_attr_leaf_replace(args); + args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */ args->blkno2 = args->blkno; /* set 2nd entry info*/ args->index2 = args->index; @@ -1090,6 +1097,8 @@ xfs_attr_leaf_removename(xfs_da_args_t *args) xfs_dabuf_t *bp; int error, committed, forkoff; + trace_xfs_attr_leaf_removename(args); + /* * Remove the attribute. */ @@ -1223,6 +1232,8 @@ xfs_attr_node_addname(xfs_da_args_t *args) xfs_mount_t *mp; int committed, retval, error; + trace_xfs_attr_node_addname(args); + /* * Fill in bucket of arguments/results/context to carry around. */ @@ -1249,6 +1260,9 @@ restart: } else if (retval == EEXIST) { if (args->flags & ATTR_CREATE) goto out; + + trace_xfs_attr_node_replace(args); + args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */ args->blkno2 = args->blkno; /* set 2nd entry info*/ args->index2 = args->index; @@ -1480,6 +1494,8 @@ xfs_attr_node_removename(xfs_da_args_t *args) xfs_dabuf_t *bp; int retval, error, committed, forkoff; + trace_xfs_attr_node_removename(args); + /* * Tie a string around our finger to remind us where we are. */ diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index d25eafd4d28d..76d93dc953e1 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -235,6 +235,8 @@ xfs_attr_shortform_create(xfs_da_args_t *args) xfs_inode_t *dp; xfs_ifork_t *ifp; + trace_xfs_attr_sf_create(args); + dp = args->dp; ASSERT(dp != NULL); ifp = dp->i_afp; @@ -268,6 +270,8 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) xfs_inode_t *dp; xfs_ifork_t *ifp; + trace_xfs_attr_sf_add(args); + dp = args->dp; mp = dp->i_mount; dp->i_d.di_forkoff = forkoff; @@ -337,6 +341,8 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) xfs_mount_t *mp; xfs_inode_t *dp; + trace_xfs_attr_sf_remove(args); + dp = args->dp; mp = dp->i_mount; base = sizeof(xfs_attr_sf_hdr_t); @@ -405,6 +411,8 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args) int i; xfs_ifork_t *ifp; + trace_xfs_attr_sf_lookup(args); + ifp = args->dp->i_afp; ASSERT(ifp->if_flags & XFS_IFINLINE); sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; @@ -476,6 +484,8 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) xfs_dabuf_t *bp; xfs_ifork_t *ifp; + trace_xfs_attr_sf_to_leaf(args); + dp = args->dp; ifp = dp->i_afp; sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; @@ -775,6 +785,8 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff) char *tmpbuffer; int error, i; + trace_xfs_attr_leaf_to_sf(args); + dp = args->dp; tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP); ASSERT(tmpbuffer != NULL); @@ -848,6 +860,8 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args) xfs_dablk_t blkno; int error; + trace_xfs_attr_leaf_to_node(args); + dp = args->dp; bp1 = bp2 = NULL; error = xfs_da_grow_inode(args, &blkno); @@ -911,6 +925,8 @@ xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp) xfs_dabuf_t *bp; int error; + trace_xfs_attr_leaf_create(args); + dp = args->dp; ASSERT(dp != NULL); error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp, @@ -948,6 +964,8 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, xfs_dablk_t blkno; int error; + trace_xfs_attr_leaf_split(state->args); + /* * Allocate space for a new leaf node. */ @@ -977,10 +995,13 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, * * Insert the "new" entry in the correct block. */ - if (state->inleaf) + if (state->inleaf) { + trace_xfs_attr_leaf_add_old(state->args); error = xfs_attr_leaf_add(oldblk->bp, state->args); - else + } else { + trace_xfs_attr_leaf_add_new(state->args); error = xfs_attr_leaf_add(newblk->bp, state->args); + } /* * Update last hashval in each block since we added the name. @@ -1001,6 +1022,8 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args) xfs_attr_leaf_map_t *map; int tablesize, entsize, sum, tmp, i; + trace_xfs_attr_leaf_add(args); + leaf = bp->data; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); ASSERT((args->index >= 0) @@ -1128,8 +1151,6 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) (be32_to_cpu(entry->hashval) <= be32_to_cpu((entry+1)->hashval))); /* - * Copy the attribute name and value into the new space. - * * For "remote" attribute values, simply note that we need to * allocate space for the "remote" value. We can't actually * allocate the extents in this transaction, and we can't decide @@ -1265,6 +1286,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); args = state->args; + trace_xfs_attr_leaf_rebalance(args); + /* * Check ordering of blocks, reverse if it makes things simpler. * @@ -1810,6 +1833,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, xfs_mount_t *mp; char *tmpbuffer; + trace_xfs_attr_leaf_unbalance(state->args); + /* * Set up environment. */ @@ -1919,6 +1944,8 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args) int probe, span; xfs_dahash_t hashval; + trace_xfs_attr_leaf_lookup(args); + leaf = bp->data; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); ASSERT(be16_to_cpu(leaf->hdr.count) @@ -2445,6 +2472,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args) char *name; #endif /* DEBUG */ + trace_xfs_attr_leaf_clearflag(args); /* * Set up the operation. */ @@ -2509,6 +2537,8 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args) xfs_dabuf_t *bp; int error; + trace_xfs_attr_leaf_setflag(args); + /* * Set up the operation. */ @@ -2565,6 +2595,8 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args) char *name1, *name2; #endif /* DEBUG */ + trace_xfs_attr_leaf_flipflags(args); + /* * Read the block containing the "old" attr */ diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 3548c6f75593..85e7e327bcd8 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -5124,6 +5124,15 @@ xfs_bunmapi( cur->bc_private.b.flags = 0; } else cur = NULL; + + if (isrt) { + /* + * Synchronize by locking the bitmap inode. + */ + xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); + } + extno = 0; while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 && (nexts == 0 || extno < nexts)) { diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 77c74257c2a3..7f1a6f5b05a6 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -108,6 +108,8 @@ xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level, int error; xfs_trans_t *tp; + trace_xfs_da_node_create(args); + tp = args->trans; error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork); if (error) @@ -140,6 +142,8 @@ xfs_da_split(xfs_da_state_t *state) xfs_dabuf_t *bp; int max, action, error, i; + trace_xfs_da_split(state->args); + /* * Walk back up the tree splitting/inserting/adjusting as necessary. * If we need to insert and there isn't room, split the node, then @@ -178,10 +182,12 @@ xfs_da_split(xfs_da_state_t *state) state->extravalid = 1; if (state->inleaf) { state->extraafter = 0; /* before newblk */ + trace_xfs_attr_leaf_split_before(state->args); error = xfs_attr_leaf_split(state, oldblk, &state->extrablk); } else { state->extraafter = 1; /* after newblk */ + trace_xfs_attr_leaf_split_after(state->args); error = xfs_attr_leaf_split(state, newblk, &state->extrablk); } @@ -300,6 +306,8 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, xfs_mount_t *mp; xfs_dir2_leaf_t *leaf; + trace_xfs_da_root_split(state->args); + /* * Copy the existing (incorrect) block from the root node position * to a free space somewhere. @@ -380,6 +388,8 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, int newcount, error; int useextra; + trace_xfs_da_node_split(state->args); + node = oldblk->bp->data; ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); @@ -466,6 +476,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, int count, tmp; xfs_trans_t *tp; + trace_xfs_da_node_rebalance(state->args); + node1 = blk1->bp->data; node2 = blk2->bp->data; /* @@ -574,6 +586,8 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, xfs_da_node_entry_t *btree; int tmp; + trace_xfs_da_node_add(state->args); + node = oldblk->bp->data; ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count))); @@ -619,6 +633,8 @@ xfs_da_join(xfs_da_state_t *state) xfs_da_state_blk_t *drop_blk, *save_blk; int action, error; + trace_xfs_da_join(state->args); + action = 0; drop_blk = &state->path.blk[ state->path.active-1 ]; save_blk = &state->altpath.blk[ state->path.active-1 ]; @@ -723,6 +739,8 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) xfs_dabuf_t *bp; int error; + trace_xfs_da_root_join(state->args); + args = state->args; ASSERT(args != NULL); ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); @@ -941,6 +959,8 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk) xfs_da_node_entry_t *btree; int tmp; + trace_xfs_da_node_remove(state->args); + node = drop_blk->bp->data; ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count)); ASSERT(drop_blk->index >= 0); @@ -984,6 +1004,8 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, int tmp; xfs_trans_t *tp; + trace_xfs_da_node_unbalance(state->args); + drop_node = drop_blk->bp->data; save_node = save_blk->bp->data; ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); @@ -1230,6 +1252,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, /* * Link new block in before existing block. */ + trace_xfs_da_link_before(args); new_info->forw = cpu_to_be32(old_blk->blkno); new_info->back = old_info->back; if (old_info->back) { @@ -1251,6 +1274,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, /* * Link new block in after existing block. */ + trace_xfs_da_link_after(args); new_info->forw = old_info->forw; new_info->back = cpu_to_be32(old_blk->blkno); if (old_info->forw) { @@ -1348,6 +1372,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, * Unlink the leaf block from the doubly linked chain of leaves. */ if (be32_to_cpu(save_info->back) == drop_blk->blkno) { + trace_xfs_da_unlink_back(args); save_info->back = drop_info->back; if (drop_info->back) { error = xfs_da_read_buf(args->trans, args->dp, @@ -1365,6 +1390,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, xfs_da_buf_done(bp); } } else { + trace_xfs_da_unlink_forward(args); save_info->forw = drop_info->forw; if (drop_info->forw) { error = xfs_da_read_buf(args->trans, args->dp, @@ -1652,6 +1678,8 @@ xfs_da_grow_inode( int count; int error; + trace_xfs_da_grow_inode(args); + if (args->whichfork == XFS_DATA_FORK) { bno = args->dp->i_mount->m_dirleafblk; count = args->dp->i_mount->m_dirblkfsbs; @@ -1690,6 +1718,8 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, xfs_dir2_leaf_t *dead_leaf2; xfs_dahash_t dead_hash; + trace_xfs_da_swap_lastblock(args); + dead_buf = *dead_bufp; dead_blkno = *dead_blknop; tp = args->trans; @@ -1878,6 +1908,8 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, xfs_trans_t *tp; xfs_mount_t *mp; + trace_xfs_da_shrink_inode(args); + dp = args->dp; w = args->whichfork; tp = args->trans; diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 286a051f12cf..1ad3a4b8ca40 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -37,9 +37,9 @@ STATIC int xfs_trim_extents( struct xfs_mount *mp, xfs_agnumber_t agno, - xfs_fsblock_t start, - xfs_fsblock_t end, - xfs_fsblock_t minlen, + xfs_daddr_t start, + xfs_daddr_t end, + xfs_daddr_t minlen, __uint64_t *blocks_trimmed) { struct block_device *bdev = mp->m_ddev_targp->bt_bdev; @@ -67,7 +67,7 @@ xfs_trim_extents( /* * Look up the longest btree in the AGF and start with it. */ - error = xfs_alloc_lookup_le(cur, 0, + error = xfs_alloc_lookup_ge(cur, 0, be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest), &i); if (error) goto out_del_cursor; @@ -77,8 +77,10 @@ xfs_trim_extents( * enough to be worth discarding. */ while (i) { - xfs_agblock_t fbno; - xfs_extlen_t flen; + xfs_agblock_t fbno; + xfs_extlen_t flen; + xfs_daddr_t dbno; + xfs_extlen_t dlen; error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); if (error) @@ -86,10 +88,18 @@ xfs_trim_extents( XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest)); + /* + * use daddr format for all range/len calculations as that is + * the format the range/len variables are supplied in by + * userspace. + */ + dbno = XFS_AGB_TO_DADDR(mp, agno, fbno); + dlen = XFS_FSB_TO_BB(mp, flen); + /* * Too small? Give up. */ - if (flen < minlen) { + if (dlen < minlen) { trace_xfs_discard_toosmall(mp, agno, fbno, flen); goto out_del_cursor; } @@ -99,8 +109,7 @@ xfs_trim_extents( * supposed to discard skip it. Do not bother to trim * down partially overlapping ranges for now. */ - if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start || - XFS_AGB_TO_FSB(mp, agno, fbno) > end) { + if (dbno + dlen < start || dbno > end) { trace_xfs_discard_exclude(mp, agno, fbno, flen); goto next_extent; } @@ -115,10 +124,7 @@ xfs_trim_extents( } trace_xfs_discard_extent(mp, agno, fbno, flen); - error = -blkdev_issue_discard(bdev, - XFS_AGB_TO_DADDR(mp, agno, fbno), - XFS_FSB_TO_BB(mp, flen), - GFP_NOFS, 0); + error = -blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS, 0); if (error) goto out_del_cursor; *blocks_trimmed += flen; @@ -137,6 +143,15 @@ out_put_perag: return error; } +/* + * trim a range of the filesystem. + * + * Note: the parameters passed from userspace are byte ranges into the + * filesystem which does not match to the format we use for filesystem block + * addressing. FSB addressing is sparse (AGNO|AGBNO), while the incoming format + * is a linear address range. Hence we need to use DADDR based conversions and + * comparisons for determining the correct offset and regions to trim. + */ int xfs_ioc_trim( struct xfs_mount *mp, @@ -145,7 +160,7 @@ xfs_ioc_trim( struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; unsigned int granularity = q->limits.discard_granularity; struct fstrim_range range; - xfs_fsblock_t start, end, minlen; + xfs_daddr_t start, end, minlen; xfs_agnumber_t start_agno, end_agno, agno; __uint64_t blocks_trimmed = 0; int error, last_error = 0; @@ -159,22 +174,22 @@ xfs_ioc_trim( /* * Truncating down the len isn't actually quite correct, but using - * XFS_B_TO_FSB would mean we trivially get overflows for values + * BBTOB would mean we trivially get overflows for values * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default * used by the fstrim application. In the end it really doesn't * matter as trimming blocks is an advisory interface. */ - start = XFS_B_TO_FSBT(mp, range.start); - end = start + XFS_B_TO_FSBT(mp, range.len) - 1; - minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen)); + start = BTOBB(range.start); + end = start + BTOBBT(range.len) - 1; + minlen = BTOBB(max_t(u64, granularity, range.minlen)); - if (start >= mp->m_sb.sb_dblocks) + if (XFS_BB_TO_FSB(mp, start) >= mp->m_sb.sb_dblocks) return -XFS_ERROR(EINVAL); - if (end > mp->m_sb.sb_dblocks - 1) - end = mp->m_sb.sb_dblocks - 1; + if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1) + end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1; - start_agno = XFS_FSB_TO_AGNO(mp, start); - end_agno = XFS_FSB_TO_AGNO(mp, end); + start_agno = xfs_daddr_to_agno(mp, start); + end_agno = xfs_daddr_to_agno(mp, end); for (agno = start_agno; agno <= end_agno; agno++) { error = -xfs_trim_extents(mp, agno, start, end, minlen, diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 4be16a0cbe5a..1155208fa830 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -1065,7 +1065,7 @@ out: return -ENOMEM; } -void __exit +void xfs_qm_exit(void) { kmem_zone_destroy(xfs_qm_dqtrxzone); diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index a98cb4524e6c..bcc6c249b2c7 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -289,7 +289,7 @@ xfs_iget_cache_hit( if (lock_flags != 0) xfs_ilock(ip, lock_flags); - xfs_iflags_clear(ip, XFS_ISTALE); + xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE); XFS_STATS_INC(xs_ig_found); return 0; @@ -314,6 +314,7 @@ xfs_iget_cache_miss( struct xfs_inode *ip; int error; xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); + int iflags; ip = xfs_inode_alloc(mp, ino); if (!ip) @@ -358,8 +359,11 @@ xfs_iget_cache_miss( * memory barrier that ensures this detection works correctly at lookup * time. */ + iflags = XFS_INEW; + if (flags & XFS_IGET_DONTCACHE) + iflags |= XFS_IDONTCACHE; ip->i_udquot = ip->i_gdquot = NULL; - xfs_iflags_set(ip, XFS_INEW); + xfs_iflags_set(ip, iflags); /* insert the new inode */ spin_lock(&pag->pag_ici_lock); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index f123dbe6d42a..7fee3387e1c8 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -387,10 +387,11 @@ xfs_set_projid(struct xfs_inode *ip, #define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT) #define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */ #define XFS_IPINNED (1 << __XFS_IPINNED_BIT) +#define XFS_IDONTCACHE (1 << 9) /* don't cache the inode long term */ /* * Per-lifetime flags need to be reset when re-using a reclaimable inode during - * inode lookup. Thi prevents unintended behaviour on the new inode from + * inode lookup. This prevents unintended behaviour on the new inode from * ocurring. */ #define XFS_IRECLAIM_RESET_FLAGS \ @@ -553,6 +554,7 @@ do { \ */ #define XFS_IGET_CREATE 0x1 #define XFS_IGET_UNTRUSTED 0x2 +#define XFS_IGET_DONTCACHE 0x4 int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, struct xfs_dinode **, diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index f588320dc4b9..91f8ff547ab3 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -209,6 +209,7 @@ xfs_open_by_handle( struct file *filp; struct inode *inode; struct dentry *dentry; + fmode_t fmode; if (!capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); @@ -228,26 +229,21 @@ xfs_open_by_handle( hreq->oflags |= O_LARGEFILE; #endif - /* Put open permission in namei format. */ permflag = hreq->oflags; - if ((permflag+1) & O_ACCMODE) - permflag++; - if (permflag & O_TRUNC) - permflag |= 2; - + fmode = OPEN_FMODE(permflag); if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) && - (permflag & FMODE_WRITE) && IS_APPEND(inode)) { + (fmode & FMODE_WRITE) && IS_APPEND(inode)) { error = -XFS_ERROR(EPERM); goto out_dput; } - if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) { + if ((fmode & FMODE_WRITE) && IS_IMMUTABLE(inode)) { error = -XFS_ERROR(EACCES); goto out_dput; } /* Can't write directories. */ - if (S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) { + if (S_ISDIR(inode->i_mode) && (fmode & FMODE_WRITE)) { error = -XFS_ERROR(EISDIR); goto out_dput; } diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 9720c54bbed0..acc2bf264dab 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -75,7 +75,8 @@ xfs_bulkstat_one_int( return XFS_ERROR(ENOMEM); error = xfs_iget(mp, NULL, ino, - XFS_IGET_UNTRUSTED, XFS_ILOCK_SHARED, &ip); + (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), + XFS_ILOCK_SHARED, &ip); if (error) { *stat = BULKSTAT_RV_NOTHING; goto out_free; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 98a9cb5ffd17..6db1fef38bff 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -726,8 +726,9 @@ xfs_log_unmount_write(xfs_mount_t *mp) .lv_iovecp = ®, }; - /* remove inited flag */ + /* remove inited flag, and account for space used */ tic->t_flags = 0; + tic->t_curr_res -= sizeof(magic); error = xlog_write(log, &vec, tic, &lsn, NULL, XLOG_UNMOUNT_TRANS); /* diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 7c75c7374d5a..8ecad5bad66c 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3161,37 +3161,26 @@ xlog_recover_process_iunlinks( */ continue; } + /* + * Unlock the buffer so that it can be acquired in the normal + * course of the transaction to truncate and free each inode. + * Because we are not racing with anyone else here for the AGI + * buffer, we don't even need to hold it locked to read the + * initial unlinked bucket entries out of the buffer. We keep + * buffer reference though, so that it stays pinned in memory + * while we need the buffer. + */ agi = XFS_BUF_TO_AGI(agibp); + xfs_buf_unlock(agibp); for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { agino = be32_to_cpu(agi->agi_unlinked[bucket]); while (agino != NULLAGINO) { - /* - * Release the agi buffer so that it can - * be acquired in the normal course of the - * transaction to truncate and free the inode. - */ - xfs_buf_relse(agibp); - agino = xlog_recover_process_one_iunlink(mp, agno, agino, bucket); - - /* - * Reacquire the agibuffer and continue around - * the loop. This should never fail as we know - * the buffer was good earlier on. - */ - error = xfs_read_agi(mp, NULL, agno, &agibp); - ASSERT(error == 0); - agi = XFS_BUF_TO_AGI(agibp); } } - - /* - * Release the buffer for the current agi so we can - * go on to the next one. - */ - xfs_buf_relse(agibp); + xfs_buf_rele(agibp); } mp->m_dmevmask = mp_dmevmask; diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 87323f1ded64..ca4f31534a0a 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -183,6 +183,7 @@ error_cancel: oblocks = map.br_startoff + map.br_blockcount; } return 0; + error: return error; } @@ -2139,11 +2140,9 @@ xfs_rtfree_extent( xfs_buf_t *sumbp; /* summary file block buffer */ mp = tp->t_mountp; - /* - * Synchronize by locking the bitmap inode. - */ - xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); + + ASSERT(mp->m_rbmip->i_itemp != NULL); + ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); #if defined(__KERNEL__) && defined(DEBUG) /* diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 912442cf0f82..dab9a5f6dfd6 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -950,6 +950,22 @@ xfs_fs_evict_inode( xfs_inactive(ip); } +/* + * We do an unlocked check for XFS_IDONTCACHE here because we are already + * serialised against cache hits here via the inode->i_lock and igrab() in + * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be + * racing with us, and it avoids needing to grab a spinlock here for every inode + * we drop the final reference on. + */ +STATIC int +xfs_fs_drop_inode( + struct inode *inode) +{ + struct xfs_inode *ip = XFS_I(inode); + + return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE); +} + STATIC void xfs_free_fsname( struct xfs_mount *mp) @@ -1433,6 +1449,7 @@ static const struct super_operations xfs_super_operations = { .destroy_inode = xfs_fs_destroy_inode, .dirty_inode = xfs_fs_dirty_inode, .evict_inode = xfs_fs_evict_inode, + .drop_inode = xfs_fs_drop_inode, .put_super = xfs_fs_put_super, .sync_fs = xfs_fs_sync_fs, .freeze_fs = xfs_fs_freeze, @@ -1606,12 +1623,28 @@ xfs_init_workqueues(void) xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_NON_REENTRANT, 0); if (!xfs_syncd_wq) return -ENOMEM; + + /* + * The allocation workqueue can be used in memory reclaim situations + * (writepage path), and parallelism is only limited by the number of + * AGs in all the filesystems mounted. Hence use the default large + * max_active value for this workqueue. + */ + xfs_alloc_wq = alloc_workqueue("xfsalloc", WQ_MEM_RECLAIM, 0); + if (!xfs_alloc_wq) + goto out_destroy_syncd; + return 0; + +out_destroy_syncd: + destroy_workqueue(xfs_syncd_wq); + return -ENOMEM; } STATIC void xfs_destroy_workqueues(void) { + destroy_workqueue(xfs_alloc_wq); destroy_workqueue(xfs_syncd_wq); } diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 75eb54af4d58..06838c42b2a0 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -627,16 +627,19 @@ DECLARE_EVENT_CLASS(xfs_namespace_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, dp_ino) + __field(int, namelen) __dynamic_array(char, name, name->len) ), TP_fast_assign( __entry->dev = VFS_I(dp)->i_sb->s_dev; __entry->dp_ino = dp->i_ino; + __entry->namelen = name->len; memcpy(__get_str(name), name->name, name->len); ), - TP_printk("dev %d:%d dp ino 0x%llx name %s", + TP_printk("dev %d:%d dp ino 0x%llx name %.*s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->dp_ino, + __entry->namelen, __get_str(name)) ) @@ -658,6 +661,8 @@ TRACE_EVENT(xfs_rename, __field(dev_t, dev) __field(xfs_ino_t, src_dp_ino) __field(xfs_ino_t, target_dp_ino) + __field(int, src_namelen) + __field(int, target_namelen) __dynamic_array(char, src_name, src_name->len) __dynamic_array(char, target_name, target_name->len) ), @@ -665,15 +670,20 @@ TRACE_EVENT(xfs_rename, __entry->dev = VFS_I(src_dp)->i_sb->s_dev; __entry->src_dp_ino = src_dp->i_ino; __entry->target_dp_ino = target_dp->i_ino; + __entry->src_namelen = src_name->len; + __entry->target_namelen = target_name->len; memcpy(__get_str(src_name), src_name->name, src_name->len); - memcpy(__get_str(target_name), target_name->name, target_name->len); + memcpy(__get_str(target_name), target_name->name, + target_name->len); ), TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx" - " src name %s target name %s", + " src name %.*s target name %.*s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->src_dp_ino, __entry->target_dp_ino, + __entry->src_namelen, __get_str(src_name), + __entry->target_namelen, __get_str(target_name)) ) @@ -1408,7 +1418,7 @@ DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed); -DECLARE_EVENT_CLASS(xfs_dir2_class, +DECLARE_EVENT_CLASS(xfs_da_class, TP_PROTO(struct xfs_da_args *args), TP_ARGS(args), TP_STRUCT__entry( @@ -1443,7 +1453,7 @@ DECLARE_EVENT_CLASS(xfs_dir2_class, ) #define DEFINE_DIR2_EVENT(name) \ -DEFINE_EVENT(xfs_dir2_class, name, \ +DEFINE_EVENT(xfs_da_class, name, \ TP_PROTO(struct xfs_da_args *args), \ TP_ARGS(args)) DEFINE_DIR2_EVENT(xfs_dir2_sf_addname); @@ -1472,6 +1482,64 @@ DEFINE_DIR2_EVENT(xfs_dir2_node_replace); DEFINE_DIR2_EVENT(xfs_dir2_node_removename); DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf); +#define DEFINE_ATTR_EVENT(name) \ +DEFINE_EVENT(xfs_da_class, name, \ + TP_PROTO(struct xfs_da_args *args), \ + TP_ARGS(args)) +DEFINE_ATTR_EVENT(xfs_attr_sf_add); +DEFINE_ATTR_EVENT(xfs_attr_sf_addname); +DEFINE_ATTR_EVENT(xfs_attr_sf_create); +DEFINE_ATTR_EVENT(xfs_attr_sf_lookup); +DEFINE_ATTR_EVENT(xfs_attr_sf_remove); +DEFINE_ATTR_EVENT(xfs_attr_sf_removename); +DEFINE_ATTR_EVENT(xfs_attr_sf_to_leaf); + +DEFINE_ATTR_EVENT(xfs_attr_leaf_add); +DEFINE_ATTR_EVENT(xfs_attr_leaf_add_old); +DEFINE_ATTR_EVENT(xfs_attr_leaf_add_new); +DEFINE_ATTR_EVENT(xfs_attr_leaf_addname); +DEFINE_ATTR_EVENT(xfs_attr_leaf_create); +DEFINE_ATTR_EVENT(xfs_attr_leaf_lookup); +DEFINE_ATTR_EVENT(xfs_attr_leaf_replace); +DEFINE_ATTR_EVENT(xfs_attr_leaf_removename); +DEFINE_ATTR_EVENT(xfs_attr_leaf_split); +DEFINE_ATTR_EVENT(xfs_attr_leaf_split_before); +DEFINE_ATTR_EVENT(xfs_attr_leaf_split_after); +DEFINE_ATTR_EVENT(xfs_attr_leaf_clearflag); +DEFINE_ATTR_EVENT(xfs_attr_leaf_setflag); +DEFINE_ATTR_EVENT(xfs_attr_leaf_flipflags); +DEFINE_ATTR_EVENT(xfs_attr_leaf_to_sf); +DEFINE_ATTR_EVENT(xfs_attr_leaf_to_node); +DEFINE_ATTR_EVENT(xfs_attr_leaf_rebalance); +DEFINE_ATTR_EVENT(xfs_attr_leaf_unbalance); + +DEFINE_ATTR_EVENT(xfs_attr_node_addname); +DEFINE_ATTR_EVENT(xfs_attr_node_lookup); +DEFINE_ATTR_EVENT(xfs_attr_node_replace); +DEFINE_ATTR_EVENT(xfs_attr_node_removename); + +#define DEFINE_DA_EVENT(name) \ +DEFINE_EVENT(xfs_da_class, name, \ + TP_PROTO(struct xfs_da_args *args), \ + TP_ARGS(args)) +DEFINE_DA_EVENT(xfs_da_split); +DEFINE_DA_EVENT(xfs_da_join); +DEFINE_DA_EVENT(xfs_da_link_before); +DEFINE_DA_EVENT(xfs_da_link_after); +DEFINE_DA_EVENT(xfs_da_unlink_back); +DEFINE_DA_EVENT(xfs_da_unlink_forward); +DEFINE_DA_EVENT(xfs_da_root_split); +DEFINE_DA_EVENT(xfs_da_root_join); +DEFINE_DA_EVENT(xfs_da_node_add); +DEFINE_DA_EVENT(xfs_da_node_create); +DEFINE_DA_EVENT(xfs_da_node_split); +DEFINE_DA_EVENT(xfs_da_node_remove); +DEFINE_DA_EVENT(xfs_da_node_rebalance); +DEFINE_DA_EVENT(xfs_da_node_unbalance); +DEFINE_DA_EVENT(xfs_da_swap_lastblock); +DEFINE_DA_EVENT(xfs_da_grow_inode); +DEFINE_DA_EVENT(xfs_da_shrink_inode); + DECLARE_EVENT_CLASS(xfs_dir2_space_class, TP_PROTO(struct xfs_da_args *args, int idx), TP_ARGS(args, idx),