From 1f981697432daea3c0abf938b39174dcfb29340e Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 26 Jul 2012 11:26:36 +0100 Subject: [PATCH 01/27] GFS2: Merge two nearly identical xattr functions There were two functions in the xattr code which were nearly identical, the only difference being that one was copy data into the unstuffed xattrs and the other was copying data out from it. This patch merges the two functions such that the code which deal with iteration over the unstuffed xattrs is no longer duplicated. Signed-off-by: Steven Whitehouse --- fs/gfs2/xattr.c | 94 ++++++++++++++++--------------------------------- 1 file changed, 30 insertions(+), 64 deletions(-) diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 27a0b4a901f5..5404ed1582ff 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -448,17 +448,18 @@ ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size) } /** - * ea_get_unstuffed - actually copies the unstuffed data into the - * request buffer + * ea_iter_unstuffed - copies the unstuffed xattr data to/from the + * request buffer * @ip: The GFS2 inode * @ea: The extended attribute header structure - * @data: The data to be copied + * @din: The data to be copied in + * @dout: The data to be copied out (one of din,dout will be NULL) * * Returns: errno */ -static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, - char *data) +static int gfs2_iter_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, + const char *din, char *dout) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head **bh; @@ -467,6 +468,8 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, __be64 *dataptrs = GFS2_EA2DATAPTRS(ea); unsigned int x; int error = 0; + unsigned char *pos; + unsigned cp_size; bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS); if (!bh) @@ -497,12 +500,21 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, goto out; } - memcpy(data, bh[x]->b_data + sizeof(struct gfs2_meta_header), - (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize); + pos = bh[x]->b_data + sizeof(struct gfs2_meta_header); + cp_size = (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize; + + if (dout) { + memcpy(dout, pos, cp_size); + dout += sdp->sd_jbsize; + } + + if (din) { + gfs2_trans_add_bh(ip->i_gl, bh[x], 1); + memcpy(pos, din, cp_size); + din += sdp->sd_jbsize; + } amount -= sdp->sd_jbsize; - data += sdp->sd_jbsize; - brelse(bh[x]); } @@ -523,7 +535,7 @@ static int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, memcpy(data, GFS2_EA2DATA(el->el_ea), len); return len; } - ret = ea_get_unstuffed(ip, el->el_ea, data); + ret = gfs2_iter_unstuffed(ip, el->el_ea, NULL, data); if (ret < 0) return ret; return len; @@ -1220,69 +1232,23 @@ static int gfs2_xattr_set(struct dentry *dentry, const char *name, size, flags, type); } + static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, char *data) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct buffer_head **bh; unsigned int amount = GFS2_EA_DATA_LEN(ea); unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize); - __be64 *dataptrs = GFS2_EA2DATAPTRS(ea); - unsigned int x; - int error; + int ret; - bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS); - if (!bh) - return -ENOMEM; + ret = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0); + if (ret) + return ret; - error = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0); - if (error) - goto out; - - for (x = 0; x < nptrs; x++) { - error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0, - bh + x); - if (error) { - while (x--) - brelse(bh[x]); - goto fail; - } - dataptrs++; - } - - for (x = 0; x < nptrs; x++) { - error = gfs2_meta_wait(sdp, bh[x]); - if (error) { - for (; x < nptrs; x++) - brelse(bh[x]); - goto fail; - } - if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) { - for (; x < nptrs; x++) - brelse(bh[x]); - error = -EIO; - goto fail; - } - - gfs2_trans_add_bh(ip->i_gl, bh[x], 1); - - memcpy(bh[x]->b_data + sizeof(struct gfs2_meta_header), data, - (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize); - - amount -= sdp->sd_jbsize; - data += sdp->sd_jbsize; - - brelse(bh[x]); - } - -out: - kfree(bh); - return error; - -fail: + ret = gfs2_iter_unstuffed(ip, ea, data, NULL); gfs2_trans_end(sdp); - kfree(bh); - return error; + + return ret; } int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) From 71f890f7f758f340215d48fed5223f9cce05b652 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 30 Jul 2012 14:53:19 +0100 Subject: [PATCH 02/27] GFS2: Remove rs_requested field from reservations The rs_requested field is left over from the original allocation code, however this should have been a parameter passed to the various functions from gfs2_inplace_reserve() and not a member of the reservation structure as the value is not required after the initial allocation. This also helps simplify the code since we no longer need to set the rs_requested to zero. Also the gfs2_inplace_release() function can also be simplified since the reservation structure will always be defined when it is called, and the only remaining task is to unlock the rgrp if required. It can also now be called unconditionally too, resulting in a further simplification. Signed-off-by: Steven Whitehouse --- fs/gfs2/aops.c | 9 +++++---- fs/gfs2/file.c | 4 ++-- fs/gfs2/incore.h | 3 --- fs/gfs2/inode.c | 9 +++------ fs/gfs2/quota.c | 9 +++++---- fs/gfs2/rgrp.c | 35 ++++++++--------------------------- fs/gfs2/rgrp.h | 8 -------- fs/gfs2/trans.h | 7 +++---- fs/gfs2/xattr.c | 2 +- 9 files changed, 27 insertions(+), 59 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index d6526347d386..00eaa83871b7 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -612,6 +612,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, struct gfs2_sbd *sdp = GFS2_SB(mapping->host); struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); unsigned int data_blocks = 0, ind_blocks = 0, rblocks; + unsigned requested = 0; int alloc_required; int error = 0; pgoff_t index = pos >> PAGE_CACHE_SHIFT; @@ -641,7 +642,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, if (error) goto out_unlock; - error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); + requested = data_blocks + ind_blocks; + error = gfs2_inplace_reserve(ip, requested); if (error) goto out_qunlock; } @@ -654,7 +656,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, if (&ip->i_inode == sdp->sd_rindex) rblocks += 2 * RES_STATFS; if (alloc_required) - rblocks += gfs2_rg_blocks(ip); + rblocks += gfs2_rg_blocks(ip, requested); error = gfs2_trans_begin(sdp, rblocks, PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); @@ -868,8 +870,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, brelse(dibh); failed: gfs2_trans_end(sdp); - if (gfs2_mb_reserved(ip)) - gfs2_inplace_release(ip); + gfs2_inplace_release(ip); if (ip->i_res->rs_qa_qd_num) gfs2_quota_unlock(ip); if (inode == sdp->sd_rindex) { diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 382000ffac1f..30e21997a1a1 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -441,7 +441,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) rblocks += data_blocks ? data_blocks : 1; if (ind_blocks || data_blocks) { rblocks += RES_STATFS + RES_QUOTA; - rblocks += gfs2_rg_blocks(ip); + rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); } ret = gfs2_trans_begin(sdp, rblocks, 0); if (ret) @@ -845,7 +845,7 @@ retry: &max_bytes, &data_blocks, &ind_blocks); rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + - RES_RG_HDR + gfs2_rg_blocks(ip); + RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks); if (gfs2_is_jdata(ip)) rblocks += data_blocks ? data_blocks : 1; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index aaecc8085fc5..52078a161ecd 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -250,9 +250,6 @@ struct gfs2_blkreserv { /* components used during write (step 1): */ atomic_t rs_sizehint; /* hint of the write size */ - /* components used during inplace_reserve (step 2): */ - u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */ - /* components used during get_local_rgrp (step 3): */ struct gfs2_rgrpd *rs_rgd; /* pointer to the gfs2_rgrpd */ struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */ diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 753af3d86bbc..f2709ea887da 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -737,10 +737,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, brelse(bh); gfs2_trans_end(sdp); - /* Check if we reserved space in the rgrp. Function link_dinode may - not, depending on whether alloc is required. */ - if (gfs2_mb_reserved(dip)) - gfs2_inplace_release(dip); + gfs2_inplace_release(dip); gfs2_quota_unlock(dip); mark_inode_dirty(inode); gfs2_glock_dq_uninit_m(2, ghs); @@ -897,7 +894,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, goto out_gunlock_q; error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - gfs2_rg_blocks(dip) + + gfs2_rg_blocks(dip, sdp->sd_max_dirres) + 2 * RES_DINODE + RES_STATFS + RES_QUOTA, 0); if (error) @@ -1378,7 +1375,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, goto out_gunlock_q; error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - gfs2_rg_blocks(ndip) + + gfs2_rg_blocks(ndip, sdp->sd_max_dirres) + 4 * RES_DINODE + 4 * RES_LEAF + RES_STATFS + RES_QUOTA + 4, 0); if (error) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index a3bde91645c2..420bc3805ccc 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -765,6 +765,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) struct gfs2_holder *ghs, i_gh; unsigned int qx, x; struct gfs2_quota_data *qd; + unsigned reserved; loff_t offset; unsigned int nalloc = 0, blocks; int error; @@ -811,13 +812,13 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) * two blocks need to be updated instead of 1 */ blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; - error = gfs2_inplace_reserve(ip, 1 + - (nalloc * (data_blocks + ind_blocks))); + reserved = 1 + (nalloc * (data_blocks + ind_blocks)); + error = gfs2_inplace_reserve(ip, reserved); if (error) goto out_alloc; if (nalloc) - blocks += gfs2_rg_blocks(ip) + nalloc * ind_blocks + RES_STATFS; + blocks += gfs2_rg_blocks(ip, reserved) + nalloc * ind_blocks + RES_STATFS; error = gfs2_trans_begin(sdp, blocks, 0); if (error) @@ -1598,7 +1599,7 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, error = gfs2_inplace_reserve(ip, blocks); if (error) goto out_i; - blocks += gfs2_rg_blocks(ip); + blocks += gfs2_rg_blocks(ip, blocks); } /* Some quotas span block boundaries and can update two blocks, diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c9ed814eeb6f..a2b43bb83499 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -539,7 +539,6 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) E.g. We can't set rs_rgd to NULL because the rgd glock is held and dequeued through this pointer. Can't: atomic_set(&rs->rs_sizehint, 0); - Can't: rs->rs_requested = 0; Can't: rs->rs_rgd = NULL;*/ rs->rs_bi = NULL; rs->rs_biblk = 0; @@ -1350,7 +1349,7 @@ static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd) * Returns: 0 if successful or BFITNOENT if there isn't enough free space */ -static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) +static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, unsigned requested) { struct gfs2_bitmap *bi = rgd->rd_bits; const u32 length = rgd->rd_length; @@ -1422,8 +1421,7 @@ do_search: what we can. If there's not enough, keep looking. */ if (nonzero == NULL) rsv_bytes = search_bytes; - else if ((nonzero - ptr) * GFS2_NBBY >= - ip->i_res->rs_requested) + else if ((nonzero - ptr) * GFS2_NBBY >= requested) rsv_bytes = (nonzero - ptr); if (rsv_bytes) { @@ -1461,17 +1459,16 @@ skip: * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) */ -static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) +static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, + unsigned requested) { - struct gfs2_blkreserv *rs = ip->i_res; - if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) return 0; /* Look for a multi-block reservation. */ if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS && - rg_mblk_search(rgd, ip) != BFITNOENT) + rg_mblk_search(rgd, ip, requested) != BFITNOENT) return 1; - if (unclaimed_blocks(rgd) >= rs->rs_requested) + if (unclaimed_blocks(rgd) >= requested) return 1; return 0; @@ -1562,7 +1559,6 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; - rs->rs_requested = requested; if (gfs2_assert_warn(sdp, requested)) { error = -EINVAL; goto out; @@ -1606,7 +1602,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) case 0: if (gfs2_rs_active(rs)) { if (unclaimed_blocks(rs->rs_rgd) + - rs->rs_free >= rs->rs_requested) { + rs->rs_free >= requested) { ip->i_rgd = rs->rs_rgd; return 0; } @@ -1616,7 +1612,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) and look for a suitable rgrp. */ gfs2_rs_deltree(rs); } - if (try_rgrp_fit(rs->rs_rgd, ip)) { + if (try_rgrp_fit(rs->rs_rgd, ip, requested)) { if (sdp->sd_args.ar_rgrplvb) gfs2_rgrp_bh_get(rs->rs_rgd); ip->i_rgd = rs->rs_rgd; @@ -1656,8 +1652,6 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) error = -ENOSPC; out: - if (error) - rs->rs_requested = 0; return error; } @@ -1672,15 +1666,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip) { struct gfs2_blkreserv *rs = ip->i_res; - if (!rs) - return; - - if (!rs->rs_free) - gfs2_rs_deltree(rs); - if (rs->rs_rgd_gh.gh_gl) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); - rs->rs_requested = 0; } /** @@ -2021,12 +2008,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, int error; struct gfs2_bitmap *bi; - /* Only happens if there is a bug in gfs2, return something distinctive - * to ensure that it is noticed. - */ - if (ip->i_res->rs_requested == 0) - return -ECANCELED; - /* If we have a reservation, claim blocks from it. */ if (gfs2_rs_active(ip->i_res)) { BUG_ON(!ip->i_res->rs_free); diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index ca6e26729b86..0b0e9cc7e3d9 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -73,14 +73,6 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); extern int gfs2_fitrim(struct file *filp, void __user *argp); -/* This is how to tell if a multi-block reservation is "inplace" reserved: */ -static inline int gfs2_mb_reserved(struct gfs2_inode *ip) -{ - if (ip->i_res && ip->i_res->rs_requested) - return 1; - return 0; -} - /* This is how to tell if a multi-block reservation is in the rgrp tree: */ static inline int gfs2_rs_active(struct gfs2_blkreserv *rs) { diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index 41f42cdccbb8..bf2ae9aeee7a 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -28,11 +28,10 @@ struct gfs2_glock; /* reserve either the number of blocks to be allocated plus the rg header * block, or all of the blocks in the rg, whichever is smaller */ -static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip) +static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned requested) { - const struct gfs2_blkreserv *rs = ip->i_res; - if (rs && rs->rs_requested < ip->i_rgd->rd_length) - return rs->rs_requested + 1; + if (requested < ip->i_rgd->rd_length) + return requested + 1; return ip->i_rgd->rd_length; } diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 5404ed1582ff..db330e5518cd 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -739,7 +739,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, goto out_gunlock_q; error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), - blks + gfs2_rg_blocks(ip) + + blks + gfs2_rg_blocks(ip, blks) + RES_DINODE + RES_STATFS + RES_QUOTA, 0); if (error) goto out_ipres; From 4a993fb1503d11496974bd86c0b7123f63d9c8a2 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 31 Jul 2012 15:21:20 +0100 Subject: [PATCH 03/27] GFS2: Add structure to contain rgrp, bitmap, offset tuple This patch introduces a new structure, gfs2_rbm, which is a tuple of a resource group, a bitmap within the resource group and an offset within that bitmap. This is designed to make manipulating these sets of variables easier. There is also a new helper function which converts this representation back to a disk block address. In addition, the rbtree nodes which are used for the reservations were not being correctly initialised, which is now fixed. Also, the tracing was not passing through the inode where it should have been. That is mostly fixed aside from one corner case. This needs to be revisited since there can also be a NULL rgrp in some cases which results in the device being incorrect in the trace. This is intended to be the first step towards cleaning up some of the allocation code, and some further bug fixes. Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 2 +- fs/gfs2/incore.h | 15 +++- fs/gfs2/rgrp.c | 175 ++++++++++++++++++++----------------------- fs/gfs2/rgrp.h | 16 ++-- fs/gfs2/super.c | 2 +- fs/gfs2/trace_gfs2.h | 10 +-- 6 files changed, 105 insertions(+), 115 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 49cd7dd4a9fa..1fd3ae237bdd 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -786,7 +786,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, goto out_rlist; if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */ - gfs2_rs_deltree(ip->i_res); + gfs2_rs_deltree(ip, ip->i_res); error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + RES_INDIRECT + RES_STATFS + RES_QUOTA, diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 52078a161ecd..d5e254604c72 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -102,6 +102,17 @@ struct gfs2_rgrpd { u32 rd_rs_cnt; /* count of current reservations */ }; +struct gfs2_rbm { + struct gfs2_rgrpd *rgd; + struct gfs2_bitmap *bi; /* Bitmap must belong to the rgd */ + u32 offset; /* The offset is bitmap relative */ +}; + +static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm) +{ + return rbm->rgd->rd_data0 + (rbm->bi->bi_start * GFS2_NBBY) + rbm->offset; +} + enum gfs2_state_bits { BH_Pinned = BH_PrivateStart, BH_Escaped = BH_PrivateStart + 1, @@ -251,13 +262,11 @@ struct gfs2_blkreserv { atomic_t rs_sizehint; /* hint of the write size */ /* components used during get_local_rgrp (step 3): */ - struct gfs2_rgrpd *rs_rgd; /* pointer to the gfs2_rgrpd */ + struct gfs2_rbm rs_rbm; struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */ struct rb_node rs_node; /* link to other block reservations */ /* components used during block searches and assignments (step 4): */ - struct gfs2_bitmap *rs_bi; /* bitmap for the current allocation */ - u32 rs_biblk; /* start block relative to the bi */ u32 rs_free; /* how many blocks are still free */ /* ancillary quota stuff */ diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index a2b43bb83499..eaa41885a00d 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -192,7 +192,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) */ static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) { - u64 startblk = gfs2_rs_startblk(rs); + u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm); if (blk >= startblk + rs->rs_free) return 1; @@ -487,6 +487,8 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) if (!res) error = -ENOMEM; + rb_init_node(&res->rs_node); + down_write(&ip->i_rw_mutex); if (ip->i_res) kmem_cache_free(gfs2_rsrv_cachep, res); @@ -499,8 +501,8 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) { gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", - rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk, - rs->rs_free); + rs->rs_rbm.rgd->rd_addr, gfs2_rbm_to_block(&rs->rs_rbm), + rs->rs_rbm.offset, rs->rs_free); } /** @@ -508,40 +510,28 @@ static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) * @rs: The reservation to remove * */ -static void __rs_deltree(struct gfs2_blkreserv *rs) +static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) { struct gfs2_rgrpd *rgd; if (!gfs2_rs_active(rs)) return; - rgd = rs->rs_rgd; - /* We can't do this: The reason is that when the rgrp is invalidated, - it's in the "middle" of acquiring the glock, but the HOLDER bit - isn't set yet: - BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/ - trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL); - - if (!RB_EMPTY_ROOT(&rgd->rd_rstree)) - rb_erase(&rs->rs_node, &rgd->rd_rstree); + rgd = rs->rs_rbm.rgd; + trace_gfs2_rs(ip, rs, TRACE_RS_TREEDEL); + rb_erase(&rs->rs_node, &rgd->rd_rstree); + rb_init_node(&rs->rs_node); BUG_ON(!rgd->rd_rs_cnt); rgd->rd_rs_cnt--; if (rs->rs_free) { /* return reserved blocks to the rgrp and the ip */ - BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free); - rs->rs_rgd->rd_reserved -= rs->rs_free; + BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); + rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; rs->rs_free = 0; - clear_bit(GBF_FULL, &rs->rs_bi->bi_flags); + clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags); smp_mb__after_clear_bit(); } - /* We can't change any of the step 1 or step 2 components of the rs. - E.g. We can't set rs_rgd to NULL because the rgd glock is held and - dequeued through this pointer. - Can't: atomic_set(&rs->rs_sizehint, 0); - Can't: rs->rs_rgd = NULL;*/ - rs->rs_bi = NULL; - rs->rs_biblk = 0; } /** @@ -549,17 +539,16 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) * @rs: The reservation to remove * */ -void gfs2_rs_deltree(struct gfs2_blkreserv *rs) +void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) { struct gfs2_rgrpd *rgd; - if (!gfs2_rs_active(rs)) - return; - - rgd = rs->rs_rgd; - spin_lock(&rgd->rd_rsspin); - __rs_deltree(rs); - spin_unlock(&rgd->rd_rsspin); + rgd = rs->rs_rbm.rgd; + if (rgd) { + spin_lock(&rgd->rd_rsspin); + __rs_deltree(ip, rs); + spin_unlock(&rgd->rd_rsspin); + } } /** @@ -571,7 +560,7 @@ void gfs2_rs_delete(struct gfs2_inode *ip) { down_write(&ip->i_rw_mutex); if (ip->i_res) { - gfs2_rs_deltree(ip->i_res); + gfs2_rs_deltree(ip, ip->i_res); trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE); BUG_ON(ip->i_res->rs_free); kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); @@ -596,7 +585,7 @@ static void return_all_reservations(struct gfs2_rgrpd *rgd) spin_lock(&rgd->rd_rsspin); while ((n = rb_first(&rgd->rd_rstree))) { rs = rb_entry(n, struct gfs2_blkreserv, rs_node); - __rs_deltree(rs); + __rs_deltree(NULL, rs); } spin_unlock(&rgd->rd_rsspin); } @@ -1284,7 +1273,7 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, struct rb_node **newn, *parent = NULL; int rc; struct gfs2_blkreserv *rs = ip->i_res; - struct gfs2_rgrpd *rgd = rs->rs_rgd; + struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0; spin_lock(&rgd->rd_rsspin); @@ -1312,8 +1301,8 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, /* Do our reservation work */ rs = ip->i_res; rs->rs_free = amount; - rs->rs_biblk = biblk; - rs->rs_bi = bi; + rs->rs_rbm.offset = biblk; + rs->rs_rbm.bi = bi; rb_link_node(&rs->rs_node, parent, newn); rb_insert_color(&rs->rs_node, &rgd->rd_rstree); @@ -1564,34 +1553,34 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) goto out; } if (gfs2_rs_active(rs)) { - begin = rs->rs_rgd; + begin = rs->rs_rbm.rgd; flags = 0; /* Yoda: Do or do not. There is no try */ } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { - rs->rs_rgd = begin = ip->i_rgd; + rs->rs_rbm.rgd = begin = ip->i_rgd; } else { - rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); + rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); } - if (rs->rs_rgd == NULL) + if (rs->rs_rbm.rgd == NULL) return -EBADSLT; while (loops < 3) { rg_locked = 0; - if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { + if (gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { rg_locked = 1; error = 0; } else if (!loops && !gfs2_rs_active(rs) && - rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) { + rs->rs_rbm.rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) { /* If the rgrp already is maxed out for contenders, we can eliminate it as a "first pass" without even requesting the rgrp glock. */ error = GLR_TRYFAILED; } else { - error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl, + error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, LM_ST_EXCLUSIVE, flags, &rs->rs_rgd_gh); if (!error && sdp->sd_args.ar_rgrplvb) { - error = update_rgrp_lvb(rs->rs_rgd); + error = update_rgrp_lvb(rs->rs_rbm.rgd); if (error) { gfs2_glock_dq_uninit(&rs->rs_rgd_gh); return error; @@ -1601,36 +1590,36 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) switch (error) { case 0: if (gfs2_rs_active(rs)) { - if (unclaimed_blocks(rs->rs_rgd) + + if (unclaimed_blocks(rs->rs_rbm.rgd) + rs->rs_free >= requested) { - ip->i_rgd = rs->rs_rgd; + ip->i_rgd = rs->rs_rbm.rgd; return 0; } /* We have a multi-block reservation, but the rgrp doesn't have enough free blocks to satisfy the request. Free the reservation and look for a suitable rgrp. */ - gfs2_rs_deltree(rs); + gfs2_rs_deltree(ip, rs); } - if (try_rgrp_fit(rs->rs_rgd, ip, requested)) { + if (try_rgrp_fit(rs->rs_rbm.rgd, ip, requested)) { if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rgd); - ip->i_rgd = rs->rs_rgd; + gfs2_rgrp_bh_get(rs->rs_rbm.rgd); + ip->i_rgd = rs->rs_rbm.rgd; return 0; } - if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) { + if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) { if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rgd); - try_rgrp_unlink(rs->rs_rgd, &last_unlinked, + gfs2_rgrp_bh_get(rs->rs_rbm.rgd); + try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, ip->i_no_addr); } if (!rg_locked) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); /* fall through */ case GLR_TRYFAILED: - rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd); - rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */ - if (rs->rs_rgd != begin) /* If we didn't wrap */ + rs->rs_rbm.rgd = gfs2_rgrpd_get_next(rs->rs_rbm.rgd); + rs->rs_rbm.rgd = rs->rs_rbm.rgd ? : begin; /* if NULL, wrap */ + if (rs->rs_rbm.rgd != begin) /* If we didn't wrap */ break; flags &= ~LM_FLAG_TRY; @@ -1776,11 +1765,11 @@ do_search: if (rs == NULL) break; - BUG_ON(rs->rs_bi != bi); + BUG_ON(rs->rs_rbm.bi != bi); biblk = BFITNOENT; /* This should jump to the first block after the reservation. */ - goal = rs->rs_biblk + rs->rs_free; + goal = rs->rs_rbm.offset + rs->rs_free; if (goal >= bi->bi_len * GFS2_NBBY) break; } @@ -1805,9 +1794,7 @@ skip: /** * gfs2_alloc_extent - allocate an extent from a given bitmap - * @rgd: the resource group descriptor - * @bi: the bitmap within the rgrp - * @blk: the block within the bitmap + * @rbm: the resource group information * @dinode: TRUE if the first block we allocate is for a dinode * @n: The extent length * @@ -1815,9 +1802,12 @@ skip: * Set the found bits to @new_state to change block's allocation state. * Returns: starting block number of the extent (fs scope) */ -static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, - u32 blk, bool dinode, unsigned int *n) +static u64 gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, + unsigned int *n) { + struct gfs2_rgrpd *rgd = rbm->rgd; + struct gfs2_bitmap *bi = rbm->bi; + u32 blk = rbm->offset; const unsigned int elen = *n; u32 goal, rgblk; const u8 *buffer = NULL; @@ -1956,21 +1946,21 @@ static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, unsigned int *nblocks) { struct gfs2_blkreserv *rs = ip->i_res; - struct gfs2_rgrpd *rgd = rs->rs_rgd; + struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; struct gfs2_bitmap *bi; - u64 start_block = gfs2_rs_startblk(rs); + u64 start_block = gfs2_rbm_to_block(&rs->rs_rbm); const unsigned int elen = *nblocks; - bi = rs->rs_bi; + bi = rs->rs_rbm.bi; gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) { if (gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, - bi->bi_len, rs->rs_biblk) != GFS2_BLKST_FREE) + bi->bi_len, rs->rs_rbm.offset) != GFS2_BLKST_FREE) break; - gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk, + gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_rbm.offset, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); - rs->rs_biblk++; + rs->rs_rbm.offset++; rs->rs_free--; BUG_ON(!rgd->rd_reserved); @@ -1980,7 +1970,7 @@ static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); if (!rs->rs_free || *nblocks != elen) - gfs2_rs_deltree(rs); + gfs2_rs_deltree(ip, rs); return start_block; } @@ -2001,40 +1991,37 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; - struct gfs2_rgrpd *rgd; + struct gfs2_rbm rbm = { .rgd = ip->i_rgd, }; unsigned int ndata; - u32 goal, blk; /* block, within the rgrp scope */ + u32 goal; /* block, within the rgrp scope */ u64 block; /* block, within the file system scope */ int error; - struct gfs2_bitmap *bi; /* If we have a reservation, claim blocks from it. */ if (gfs2_rs_active(ip->i_res)) { BUG_ON(!ip->i_res->rs_free); - rgd = ip->i_res->rs_rgd; + rbm.rgd = ip->i_res->rs_rbm.rgd; block = claim_reserved_blks(ip, dinode, nblocks); if (*nblocks) goto found_blocks; } - rgd = ip->i_rgd; - - if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) - goal = ip->i_goal - rgd->rd_data0; + if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal)) + goal = ip->i_goal - rbm.rgd->rd_data0; else - goal = rgd->rd_last_alloc; + goal = rbm.rgd->rd_last_alloc; - blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); + rbm.offset = rgblk_search(rbm.rgd, goal, GFS2_BLKST_FREE, &rbm.bi); /* Since all blocks are reserved in advance, this shouldn't happen */ - if (blk == BFITNOENT) { + if (rbm.offset == BFITNOENT) { printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", *nblocks); printk(KERN_WARNING "FULL=%d\n", - test_bit(GBF_FULL, &rgd->rd_bits->bi_flags)); + test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); goto rgrp_error; } - block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); + block = gfs2_alloc_extent(&rbm, dinode, nblocks); found_blocks: ndata = *nblocks; if (dinode) @@ -2052,22 +2039,22 @@ found_blocks: brelse(dibh); } } - if (rgd->rd_free < *nblocks) { + if (rbm.rgd->rd_free < *nblocks) { printk(KERN_WARNING "nblocks=%u\n", *nblocks); goto rgrp_error; } - rgd->rd_free -= *nblocks; + rbm.rgd->rd_free -= *nblocks; if (dinode) { - rgd->rd_dinodes++; - *generation = rgd->rd_igeneration++; + rbm.rgd->rd_dinodes++; + *generation = rbm.rgd->rd_igeneration++; if (*generation == 0) - *generation = rgd->rd_igeneration++; + *generation = rbm.rgd->rd_igeneration++; } - gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); - gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); - gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); + gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh, 1); + gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); + gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data); gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); if (dinode) @@ -2081,14 +2068,14 @@ found_blocks: gfs2_quota_change(ip, ndata, ip->i_inode.i_uid, ip->i_inode.i_gid); - rgd->rd_free_clone -= *nblocks; - trace_gfs2_block_alloc(ip, rgd, block, *nblocks, + rbm.rgd->rd_free_clone -= *nblocks; + trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); *bn = block; return 0; rgrp_error: - gfs2_rgrp_error(rgd); + gfs2_rgrp_error(rbm.rgd); return -EIO; } diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 0b0e9cc7e3d9..c98f6af07e1c 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -46,7 +46,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); extern int gfs2_rs_alloc(struct gfs2_inode *ip); -extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); +extern void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs); extern void gfs2_rs_delete(struct gfs2_inode *ip); extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); @@ -73,22 +73,16 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); extern int gfs2_fitrim(struct file *filp, void __user *argp); -/* This is how to tell if a multi-block reservation is in the rgrp tree: */ -static inline int gfs2_rs_active(struct gfs2_blkreserv *rs) +/* This is how to tell if a reservation is in the rgrp tree: */ +static inline bool gfs2_rs_active(struct gfs2_blkreserv *rs) { - if (rs && rs->rs_bi) - return 1; - return 0; + return rs && !RB_EMPTY_NODE(&rs->rs_node); } + static inline u32 gfs2_bi2rgd_blk(const struct gfs2_bitmap *bi, u32 blk) { return (bi->bi_start * GFS2_NBBY) + blk; } -static inline u64 gfs2_rs_startblk(const struct gfs2_blkreserv *rs) -{ - return gfs2_bi2rgd_blk(rs->rs_bi, rs->rs_biblk) + rs->rs_rgd->rd_data0; -} - #endif /* __RGRP_DOT_H__ */ diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index fc3168f47a14..3cbac6803038 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1557,7 +1557,7 @@ out_truncate: out_unlock: /* Error path for case 1 */ if (gfs2_rs_active(ip->i_res)) - gfs2_rs_deltree(ip->i_res); + gfs2_rs_deltree(ip, ip->i_res); if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) gfs2_glock_dq(&ip->i_iopen_gh); diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index a25c252fe412..b947aa4dfca4 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -526,12 +526,12 @@ TRACE_EVENT(gfs2_rs, ), TP_fast_assign( - __entry->dev = rs->rs_rgd ? rs->rs_rgd->rd_sbd->sd_vfs->s_dev : 0; - __entry->rd_addr = rs->rs_rgd ? rs->rs_rgd->rd_addr : 0; - __entry->rd_free_clone = rs->rs_rgd ? rs->rs_rgd->rd_free_clone : 0; - __entry->rd_reserved = rs->rs_rgd ? rs->rs_rgd->rd_reserved : 0; + __entry->dev = rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_sbd->sd_vfs->s_dev : 0; + __entry->rd_addr = rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_addr : 0; + __entry->rd_free_clone = rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_free_clone : 0; + __entry->rd_reserved = rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_reserved : 0; __entry->inum = ip ? ip->i_no_addr : 0; - __entry->start = gfs2_rs_startblk(rs); + __entry->start = gfs2_rbm_to_block(&rs->rs_rbm); __entry->free = rs->rs_free; __entry->func = func; ), From 5b924ae2dcb1cc5e78445a0cedb5a3673bb5ad8a Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 1 Aug 2012 20:35:05 +0100 Subject: [PATCH 04/27] GFS2: Replace rgblk_search with gfs2_rbm_find This is part of a series of patches which are introducing the gfs2_rbm structure throughout the block allocation code. The main aim of this part is to create a search function which can deal directly with struct gfs2_rbm. In this case it specifies the initial position at which to start the search and also the point at which the search terminates. The net result of this is to clean up the search code and make it rather more readable, and the various possible exceptions which may occur during the search are partitioned into their own functions. There are some bug fixes too. We should not be checking the reservations while allocating extents - the time for that is when we are searching for where to put the extent, not when we've already made that decision. Also, rgblk_search had two uses, and in only one of those cases did it make sense to check for reservations. This is fixed in the new gfs2_rbm_find function, which has a cleaner interface. The reservation checking has been improved by always checking for contiguous reservations, and returning the first free block after all contiguous reservations. This is done under the spin lock to ensure consistancy of the tree. The allocation of extents is now in all cases done by the existing allocation code, and if there is an active reservation, that is updated after the fact. Again this is done under the spin lock, since it entails changing the lookup key for the reservation in question. Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 7 + fs/gfs2/rgrp.c | 460 +++++++++++++++++++++++++---------------------- 2 files changed, 257 insertions(+), 210 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index d5e254604c72..99d7c64b5091 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -113,6 +113,13 @@ static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm) return rbm->rgd->rd_data0 + (rbm->bi->bi_start * GFS2_NBBY) + rbm->offset; } +static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1, + const struct gfs2_rbm *rbm2) +{ + return (rbm1->rgd == rbm2->rgd) && (rbm1->bi == rbm2->bi) && + (rbm1->offset == rbm2->offset); +} + enum gfs2_state_bits { BH_Pinned = BH_PrivateStart, BH_Escaped = BH_PrivateStart + 1, diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index eaa41885a00d..bd3b926949d0 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -67,10 +67,6 @@ static const char valid_change[16] = { 1, 0, 0, 0 }; -static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, - unsigned char old_state, - struct gfs2_bitmap **rbi); - /** * gfs2_setbit - Set a bit in the bitmaps * @rgd: the resource group descriptor @@ -201,36 +197,6 @@ static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) return 0; } -/** - * rs_find - Find a rgrp multi-block reservation that contains a given block - * @rgd: The rgrp - * @rgblk: The block we're looking for, relative to the rgrp - */ -static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk) -{ - struct rb_node **newn; - int rc; - u64 fsblk = rgblk + rgd->rd_data0; - - spin_lock(&rgd->rd_rsspin); - newn = &rgd->rd_rstree.rb_node; - while (*newn) { - struct gfs2_blkreserv *cur = - rb_entry(*newn, struct gfs2_blkreserv, rs_node); - rc = rs_cmp(fsblk, 1, cur); - if (rc < 0) - newn = &((*newn)->rb_left); - else if (rc > 0) - newn = &((*newn)->rb_right); - else { - spin_unlock(&rgd->rd_rsspin); - return cur; - } - } - spin_unlock(&rgd->rd_rsspin); - return NULL; -} - /** * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing * a block in a given allocation state. @@ -1306,9 +1272,6 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, rb_link_node(&rs->rs_node, parent, newn); rb_insert_color(&rs->rs_node, &rgd->rd_rstree); - /* Do our inode accounting for the reservation */ - /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ - /* Do our rgrp accounting for the reservation */ rgd->rd_reserved += amount; /* blocks reserved */ rgd->rd_rs_cnt++; /* number of in-tree reservations */ @@ -1463,6 +1426,199 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, return 0; } +/** + * gfs2_next_unreserved_block - Return next block that is not reserved + * @rgd: The resource group + * @block: The starting block + * @ip: Ignore any reservations for this inode + * + * If the block does not appear in any reservation, then return the + * block number unchanged. If it does appear in the reservation, then + * keep looking through the tree of reservations in order to find the + * first block number which is not reserved. + */ + +static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, + const struct gfs2_inode *ip) +{ + struct gfs2_blkreserv *rs; + struct rb_node *n; + int rc; + + spin_lock(&rgd->rd_rsspin); + n = rb_first(&rgd->rd_rstree); + while (n) { + rs = rb_entry(n, struct gfs2_blkreserv, rs_node); + rc = rs_cmp(block, 1, rs); + if (rc < 0) + n = n->rb_left; + else if (rc > 0) + n = n->rb_right; + else + break; + } + + if (n) { + while ((rs_cmp(block, 1, rs) == 0) && (ip->i_res != rs)) { + block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free; + n = rb_next(&rs->rs_node); + if (n == NULL) + break; + rs = rb_entry(n, struct gfs2_blkreserv, rs_node); + } + } + + spin_unlock(&rgd->rd_rsspin); + return block; +} + +/** + * gfs2_rbm_from_block - Set the rbm based upon rgd and block number + * @rbm: The rbm with rgd already set correctly + * @block: The block number (filesystem relative) + * + * This sets the bi and offset members of an rbm based on a + * resource group and a filesystem relative block number. The + * resource group must be set in the rbm on entry, the bi and + * offset members will be set by this function. + * + * Returns: 0 on success, or an error code + */ + +static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) +{ + u64 rblock = block - rbm->rgd->rd_data0; + u32 goal = (u32)rblock; + int x; + + if (WARN_ON_ONCE(rblock > UINT_MAX)) + return -EINVAL; + + for (x = 0; x < rbm->rgd->rd_length; x++) { + rbm->bi = rbm->rgd->rd_bits + x; + if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { + rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); + return 0; + } + } + + return -E2BIG; +} + +/** + * gfs2_reservation_check_and_update - Check for reservations during block alloc + * @rbm: The current position in the resource group + * + * This checks the current position in the rgrp to see whether there is + * a reservation covering this block. If not then this function is a + * no-op. If there is, then the position is moved to the end of the + * contiguous reservation(s) so that we are pointing at the first + * non-reserved block. + * + * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error + */ + +static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, + const struct gfs2_inode *ip) +{ + u64 block = gfs2_rbm_to_block(rbm); + u64 nblock; + int ret; + + nblock = gfs2_next_unreserved_block(rbm->rgd, block, ip); + if (nblock == block) + return 0; + ret = gfs2_rbm_from_block(rbm, nblock); + if (ret < 0) + return ret; + return 1; +} + +/** + * gfs2_rbm_find - Look for blocks of a particular state + * @rbm: Value/result starting position and final position + * @state: The state which we want to find + * @ip: If set, check for reservations + * @nowrap: Stop looking at the end of the rgrp, rather than wrapping + * around until we've reached the starting point. + * + * Side effects: + * - If looking for free blocks, we set GBF_FULL on each bitmap which + * has no free blocks in it. + * + * Returns: 0 on success, -ENOSPC if there is no block of the requested state + */ + +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, + const struct gfs2_inode *ip, bool nowrap) +{ + struct buffer_head *bh; + struct gfs2_bitmap *initial_bi; + u32 initial_offset; + u32 offset; + u8 *buffer; + int index; + int n = 0; + int iters = rbm->rgd->rd_length; + int ret; + + /* If we are not starting at the beginning of a bitmap, then we + * need to add one to the bitmap count to ensure that we search + * the starting bitmap twice. + */ + if (rbm->offset != 0) + iters++; + + while(1) { + if (test_bit(GBF_FULL, &rbm->bi->bi_flags) && + (state == GFS2_BLKST_FREE)) + goto next_bitmap; + + bh = rbm->bi->bi_bh; + buffer = bh->b_data + rbm->bi->bi_offset; + WARN_ON(!buffer_uptodate(bh)); + if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone) + buffer = rbm->bi->bi_clone + rbm->bi->bi_offset; +find_next: + initial_offset = rbm->offset; + offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state); + if (offset == BFITNOENT) + goto bitmap_full; + rbm->offset = offset; + if (ip == NULL) + return 0; + + initial_bi = rbm->bi; + ret = gfs2_reservation_check_and_update(rbm, ip); + if (ret == 0) + return 0; + if (ret > 0) { + n += (rbm->bi - initial_bi); + goto find_next; + } + return ret; + +bitmap_full: /* Mark bitmap as full and fall through */ + if ((state == GFS2_BLKST_FREE) && initial_offset == 0) + set_bit(GBF_FULL, &rbm->bi->bi_flags); + +next_bitmap: /* Find next bitmap in the rgrp */ + rbm->offset = 0; + index = rbm->bi - rbm->rgd->rd_bits; + index++; + if (index == rbm->rgd->rd_length) + index = 0; + rbm->bi = &rbm->rgd->rd_bits[index]; + if ((index == 0) && nowrap) + break; + n++; + if (n >= iters) + break; + } + + return -ENOSPC; +} + /** * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes * @rgd: The rgrp @@ -1475,34 +1631,33 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip) { - u32 goal = 0, block; - u64 no_addr; + u64 block; struct gfs2_sbd *sdp = rgd->rd_sbd; struct gfs2_glock *gl; struct gfs2_inode *ip; int error; int found = 0; - struct gfs2_bitmap *bi; + struct gfs2_rbm rbm = { .rgd = rgd, .bi = rgd->rd_bits, .offset = 0 }; - while (goal < rgd->rd_data) { + while (1) { down_write(&sdp->sd_log_flush_lock); - block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, &bi); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, true); up_write(&sdp->sd_log_flush_lock); - if (block == BFITNOENT) + if (error == -ENOSPC) + break; + if (WARN_ON_ONCE(error)) break; - block = gfs2_bi2rgd_blk(bi, block); - /* rgblk_search can return a block < goal, so we need to - keep it marching forward. */ - no_addr = block + rgd->rd_data0; - goal = max(block + 1, goal + 1); - if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) + block = gfs2_rbm_to_block(&rbm); + if (gfs2_rbm_from_block(&rbm, block + 1)) + break; + if (*last_unlinked != NO_BLOCK && block <= *last_unlinked) continue; - if (no_addr == skip) + if (block == skip) continue; - *last_unlinked = no_addr; + *last_unlinked = block; - error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl); + error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl); if (error) continue; @@ -1692,105 +1847,6 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) return type; } -/** - * rgblk_search - find a block in @state - * @rgd: the resource group descriptor - * @goal: the goal block within the RG (start here to search for avail block) - * @state: GFS2_BLKST_XXX the before-allocation state to find - * @rbi: address of the pointer to the bitmap containing the block found - * - * Walk rgrp's bitmap to find bits that represent a block in @state. - * - * This function never fails, because we wouldn't call it unless we - * know (from reservation results, etc.) that a block is available. - * - * Scope of @goal is just within rgrp, not the whole filesystem. - * Scope of @returned block is just within bitmap, not the whole filesystem. - * - * Returns: the block number found relative to the bitmap rbi - */ - -static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, unsigned char state, - struct gfs2_bitmap **rbi) -{ - struct gfs2_bitmap *bi = NULL; - const u32 length = rgd->rd_length; - u32 biblk = BFITNOENT; - unsigned int buf, x; - const u8 *buffer = NULL; - - *rbi = NULL; - /* Find bitmap block that contains bits for goal block */ - for (buf = 0; buf < length; buf++) { - bi = rgd->rd_bits + buf; - /* Convert scope of "goal" from rgrp-wide to within found bit block */ - if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { - goal -= bi->bi_start * GFS2_NBBY; - goto do_search; - } - } - buf = 0; - goal = 0; - -do_search: - /* Search (up to entire) bitmap in this rgrp for allocatable block. - "x <= length", instead of "x < length", because we typically start - the search in the middle of a bit block, but if we can't find an - allocatable block anywhere else, we want to be able wrap around and - search in the first part of our first-searched bit block. */ - for (x = 0; x <= length; x++) { - bi = rgd->rd_bits + buf; - - if (test_bit(GBF_FULL, &bi->bi_flags) && - (state == GFS2_BLKST_FREE)) - goto skip; - - /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone - bitmaps, so we must search the originals for that. */ - buffer = bi->bi_bh->b_data + bi->bi_offset; - WARN_ON(!buffer_uptodate(bi->bi_bh)); - if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) - buffer = bi->bi_clone + bi->bi_offset; - - while (1) { - struct gfs2_blkreserv *rs; - u32 rgblk; - - biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); - if (biblk == BFITNOENT) - break; - /* Check if this block is reserved() */ - rgblk = gfs2_bi2rgd_blk(bi, biblk); - rs = rs_find(rgd, rgblk); - if (rs == NULL) - break; - - BUG_ON(rs->rs_rbm.bi != bi); - biblk = BFITNOENT; - /* This should jump to the first block after the - reservation. */ - goal = rs->rs_rbm.offset + rs->rs_free; - if (goal >= bi->bi_len * GFS2_NBBY) - break; - } - if (biblk != BFITNOENT) - break; - - if ((goal == 0) && (state == GFS2_BLKST_FREE)) - set_bit(GBF_FULL, &bi->bi_flags); - - /* Try next bitmap block (wrap back to rgrp header if at end) */ -skip: - buf++; - buf %= length; - goal = 0; - } - - if (biblk != BFITNOENT) - *rbi = bi; - - return biblk; -} /** * gfs2_alloc_extent - allocate an extent from a given bitmap @@ -1809,9 +1865,8 @@ static u64 gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, struct gfs2_bitmap *bi = rbm->bi; u32 blk = rbm->offset; const unsigned int elen = *n; - u32 goal, rgblk; + u32 goal; const u8 *buffer = NULL; - struct gfs2_blkreserv *rs; *n = 0; buffer = bi->bi_bh->b_data + bi->bi_offset; @@ -1824,10 +1879,6 @@ static u64 gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, goal++; if (goal >= (bi->bi_len * GFS2_NBBY)) break; - rgblk = gfs2_bi2rgd_blk(bi, goal); - rs = rs_find(rgd, rgblk); - if (rs) /* Oops, we bumped into someone's reservation */ - break; if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != GFS2_BLKST_FREE) break; @@ -1933,46 +1984,41 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) } /** - * claim_reserved_blks - Claim previously reserved blocks - * @ip: the inode that's claiming the reservation - * @dinode: 1 if this block is a dinode block, otherwise data block - * @nblocks: desired extent length + * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation + * @ip: The inode we have just allocated blocks for + * @rbm: The start of the allocated blocks + * @len: The extent length * - * Lay claim to previously reserved blocks. - * Returns: Starting block number of the blocks claimed. - * Sets *nblocks to the actual extent length allocated. + * Adjusts a reservation after an allocation has taken place. If the + * reservation does not match the allocation, or if it is now empty + * then it is removed. */ -static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, - unsigned int *nblocks) + +static void gfs2_adjust_reservation(struct gfs2_inode *ip, + const struct gfs2_rbm *rbm, unsigned len) { struct gfs2_blkreserv *rs = ip->i_res; - struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; - struct gfs2_bitmap *bi; - u64 start_block = gfs2_rbm_to_block(&rs->rs_rbm); - const unsigned int elen = *nblocks; + struct gfs2_rgrpd *rgd = rbm->rgd; + unsigned rlen; + u64 block; + int ret; - bi = rs->rs_rbm.bi; - gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); - - for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) { - if (gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, - bi->bi_len, rs->rs_rbm.offset) != GFS2_BLKST_FREE) - break; - gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_rbm.offset, - dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); - rs->rs_rbm.offset++; - rs->rs_free--; - - BUG_ON(!rgd->rd_reserved); - rgd->rd_reserved--; - dinode = false; + spin_lock(&rgd->rd_rsspin); + if (gfs2_rs_active(rs)) { + if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) { + block = gfs2_rbm_to_block(rbm); + ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len); + rlen = min(rs->rs_free, len); + rs->rs_free -= rlen; + rgd->rd_reserved -= rlen; + trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); + if (rs->rs_free && !ret) + goto out; + } + __rs_deltree(ip, rs); } - - trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); - if (!rs->rs_free || *nblocks != elen) - gfs2_rs_deltree(ip, rs); - - return start_block; +out: + spin_unlock(&rgd->rd_rsspin); } /** @@ -1993,36 +2039,30 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, struct buffer_head *dibh; struct gfs2_rbm rbm = { .rgd = ip->i_rgd, }; unsigned int ndata; - u32 goal; /* block, within the rgrp scope */ + u64 goal; u64 block; /* block, within the file system scope */ int error; - /* If we have a reservation, claim blocks from it. */ - if (gfs2_rs_active(ip->i_res)) { - BUG_ON(!ip->i_res->rs_free); - rbm.rgd = ip->i_res->rs_rbm.rgd; - block = claim_reserved_blks(ip, dinode, nblocks); - if (*nblocks) - goto found_blocks; - } - - if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal)) - goal = ip->i_goal - rbm.rgd->rd_data0; + if (gfs2_rs_active(ip->i_res)) + goal = gfs2_rbm_to_block(&ip->i_res->rs_rbm); + else if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal)) + goal = ip->i_goal; else - goal = rbm.rgd->rd_last_alloc; + goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; - rbm.offset = rgblk_search(rbm.rgd, goal, GFS2_BLKST_FREE, &rbm.bi); + gfs2_rbm_from_block(&rbm, goal); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false); /* Since all blocks are reserved in advance, this shouldn't happen */ - if (rbm.offset == BFITNOENT) { - printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", *nblocks); - printk(KERN_WARNING "FULL=%d\n", - test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); + if (error) { + fs_warn(sdp, "error=%d, nblocks=%u, full=%d\n", error, *nblocks, + test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); goto rgrp_error; } block = gfs2_alloc_extent(&rbm, dinode, nblocks); -found_blocks: + if (gfs2_rs_active(ip->i_res)) + gfs2_adjust_reservation(ip, &rbm, *nblocks); ndata = *nblocks; if (dinode) ndata--; From 3983903a712e74548fa08ef25d68e55b8e4349c6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 3 Aug 2012 11:10:30 +0100 Subject: [PATCH 05/27] GFS2: Update gfs2_get_block_type() to use rbm Use the new gfs2_rbm_from_block() function to replace an open coded version of the same code. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index bd3b926949d0..0c1be38f8370 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1824,27 +1824,14 @@ void gfs2_inplace_release(struct gfs2_inode *ip) static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) { - struct gfs2_bitmap *bi = NULL; - u32 length, rgrp_block, buf_block; - unsigned int buf; - unsigned char type; + struct gfs2_rbm rbm = { .rgd = rgd, }; + int ret; - length = rgd->rd_length; - rgrp_block = block - rgd->rd_data0; + ret = gfs2_rbm_from_block(&rbm, block); + WARN_ON_ONCE(ret != 0); - for (buf = 0; buf < length; buf++) { - bi = rgd->rd_bits + buf; - if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY) - break; - } - - gfs2_assert(rgd->rd_sbd, buf < length); - buf_block = rgrp_block - bi->bi_start * GFS2_NBBY; - - type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, - bi->bi_len, buf_block); - - return type; + return gfs2_testbit(rgd, rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, + rbm.bi->bi_len, rbm.offset); } From 3b1d0b9d0b6f4b76293c8f30cc95aa946bd34150 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 3 Aug 2012 11:23:28 +0100 Subject: [PATCH 06/27] GFS2: Update rgblk_free() to use rbm Replace open coded version with a call to gfs2_rbm_from_block() Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 44 ++++++++++++++------------------------------ 1 file changed, 14 insertions(+), 30 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 0c1be38f8370..06476b34a1b9 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1890,46 +1890,30 @@ static u64 gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, u32 blen, unsigned char new_state) { - struct gfs2_rgrpd *rgd; - struct gfs2_bitmap *bi = NULL; - u32 length, rgrp_blk, buf_blk; - unsigned int buf; + struct gfs2_rbm rbm; - rgd = gfs2_blk2rgrpd(sdp, bstart, 1); - if (!rgd) { + rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1); + if (!rbm.rgd) { if (gfs2_consist(sdp)) fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); return NULL; } - length = rgd->rd_length; - - rgrp_blk = bstart - rgd->rd_data0; - while (blen--) { - for (buf = 0; buf < length; buf++) { - bi = rgd->rd_bits + buf; - if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY) - break; + gfs2_rbm_from_block(&rbm, bstart); + bstart++; + if (!rbm.bi->bi_clone) { + rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size, + GFP_NOFS | __GFP_NOFAIL); + memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset, + rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, + rbm.bi->bi_len); } - - gfs2_assert(rgd->rd_sbd, buf < length); - - buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY; - rgrp_blk++; - - if (!bi->bi_clone) { - bi->bi_clone = kmalloc(bi->bi_bh->b_size, - GFP_NOFS | __GFP_NOFAIL); - memcpy(bi->bi_clone + bi->bi_offset, - bi->bi_bh->b_data + bi->bi_offset, - bi->bi_len); - } - gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); - gfs2_setbit(rgd, NULL, bi, buf_blk, new_state); + gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1); + gfs2_setbit(rbm.rgd, NULL, rbm.bi, rbm.offset, new_state); } - return rgd; + return rbm.rgd; } /** From 24d634e8f3b43fe2eb7c7d66567de7aba8edc308 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Sun, 5 Aug 2012 22:04:08 -0700 Subject: [PATCH 07/27] GFS2: Use RB_CLEAR_NODE() rather than rb_init_node() gfs2 calls RB_EMPTY_NODE() to check if nodes are not on an rbtree. The corresponding initialization function is RB_CLEAR_NODE(). rb_init_node() was never clearly defined and is going away. Signed-off-by: Michel Lespinasse Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 06476b34a1b9..7ce22d8c489b 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -453,7 +453,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) if (!res) error = -ENOMEM; - rb_init_node(&res->rs_node); + RB_CLEAR_NODE(&res->rs_node); down_write(&ip->i_rw_mutex); if (ip->i_res) @@ -486,7 +486,7 @@ static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) rgd = rs->rs_rbm.rgd; trace_gfs2_rs(ip, rs, TRACE_RS_TREEDEL); rb_erase(&rs->rs_node, &rgd->rd_rstree); - rb_init_node(&rs->rs_node); + RB_CLEAR_NODE(&rs->rs_node); BUG_ON(!rgd->rd_rs_cnt); rgd->rd_rs_cnt--; From 5d50d5324612d28c47b9361e5424f13a19c888cd Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 7 Aug 2012 13:47:12 +0100 Subject: [PATCH 08/27] GFS2: Fix case where reservation finished at end of rgrp One corner case which the original patch failed to take into account was when there is a reservation which ended such that the following block was one beyond the end of the rgrp in question. This extra test fixes that case. Signed-off-by: Steven Whitehouse Reported-by: Bob Peterson Tested-by: Bob Peterson --- fs/gfs2/rgrp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 7ce22d8c489b..c17029a92b8f 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1596,6 +1596,12 @@ find_next: n += (rbm->bi - initial_bi); goto find_next; } + if (ret == -E2BIG) { + index = 0; + rbm->offset = 0; + n += (rbm->bi - initial_bi); + goto res_covered_end_of_rgrp; + } return ret; bitmap_full: /* Mark bitmap as full and fall through */ @@ -1608,6 +1614,7 @@ next_bitmap: /* Find next bitmap in the rgrp */ index++; if (index == rbm->rgd->rd_length) index = 0; +res_covered_end_of_rgrp: rbm->bi = &rbm->rgd->rd_bits[index]; if ((index == 0) && nowrap) break; From 8d8b752a0f55282498b918f6a28f87ee2bcf19c3 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 7 Aug 2012 13:28:17 -0400 Subject: [PATCH 09/27] GFS2: rbm code cleanup This patch fixes a few small rbm related things. First, it fixes a corner case where the rbm needs to switch bitmaps and wasn't adjusting its buffer pointer. Second, there's a white space issue fixed. Third, the logic in function gfs2_rbm_from_block was optimized a bit. Lastly, a check for goal block overflows was added to function gfs2_alloc_blocks. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c17029a92b8f..c26711882a6b 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -467,7 +467,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) { gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", - rs->rs_rbm.rgd->rd_addr, gfs2_rbm_to_block(&rs->rs_rbm), + rs->rs_rbm.rgd->rd_addr, gfs2_rbm_to_block(&rs->rs_rbm), rs->rs_rbm.offset, rs->rs_free); } @@ -1493,16 +1493,18 @@ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) if (WARN_ON_ONCE(rblock > UINT_MAX)) return -EINVAL; + if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) + return -E2BIG; for (x = 0; x < rbm->rgd->rd_length; x++) { rbm->bi = rbm->rgd->rd_bits + x; if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); - return 0; + break; } } - return -E2BIG; + return 0; } /** @@ -1579,7 +1581,6 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, WARN_ON(!buffer_uptodate(bh)); if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone) buffer = rbm->bi->bi_clone + rbm->bi->bi_offset; -find_next: initial_offset = rbm->offset; offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state); if (offset == BFITNOENT) @@ -1594,7 +1595,7 @@ find_next: return 0; if (ret > 0) { n += (rbm->bi - initial_bi); - goto find_next; + goto next_iter; } if (ret == -E2BIG) { index = 0; @@ -1619,6 +1620,7 @@ res_covered_end_of_rgrp: if ((index == 0) && nowrap) break; n++; +next_iter: if (n >= iters) break; } @@ -2028,6 +2030,10 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, else goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; + if ((goal < rbm.rgd->rd_data0) || + (goal >= rbm.rgd->rd_data0 + rbm.rgd->rd_data)) + rbm.rgd = gfs2_blk2rgrpd(sdp, goal, 1); + gfs2_rbm_from_block(&rbm, goal); error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false); From 8e711e100fd63b9cbf095030d45fd5ee2fa4c995 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 9 Aug 2012 12:48:42 -0500 Subject: [PATCH 10/27] GFS2: change function gfs2_direct_IO to use a normal gfs2_glock_dq This patch changes function gfs2_direct_IO so that it uses a normal call to gfs2_glock_dq rather than a call to a multiple-dq of one item. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/aops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 00eaa83871b7..01c4975da4bc 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -1024,7 +1024,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, offset, nr_segs, gfs2_get_block_direct, NULL, NULL, 0); out: - gfs2_glock_dq_m(1, &gh); + gfs2_glock_dq(&gh); gfs2_holder_uninit(&gh); return rv; } From 4abb6ad9eae0aebfcec4f188a408447cc4ea1cb4 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 9 Aug 2012 12:48:43 -0500 Subject: [PATCH 11/27] GFS2: inline __gfs2_glock_schedule_for_reclaim Since function gfs2_glock_schedule_for_reclaim is only two significant lines, we can eliminate it, simplifying the code and making it more readable. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 1ed81f40da0d..67f3e42d4bd2 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -185,20 +185,6 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) spin_unlock(&lru_lock); } -/** - * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list - * @gl: the glock - * - * If the glock is demotable, then we add it (or move it) to the end - * of the glock LRU list. - */ - -static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) -{ - if (demote_ok(gl)) - gfs2_glock_add_to_lru(gl); -} - /** * gfs2_glock_put_nolock() - Decrement reference count on glock * @gl: The glock to put @@ -1121,8 +1107,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh) !test_bit(GLF_DEMOTE, &gl->gl_flags)) fast_path = 1; } - if (!test_bit(GLF_LFLUSH, &gl->gl_flags)) - __gfs2_glock_schedule_for_reclaim(gl); + if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl)) + gfs2_glock_add_to_lru(gl); + trace_gfs2_glock_queue(gh, 0); spin_unlock(&gl->gl_spin); if (likely(fast_path)) From 07a790494260e102eb42840537af82e84d2ab766 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 9 Aug 2012 12:48:44 -0500 Subject: [PATCH 12/27] GFS2: Combine functions gfs2_glock_wait and wait_on_holder Function gfs2_glock_wait only called function wait_on_holder and returned its return code, so they were combined for readability. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 67f3e42d4bd2..5c8790960735 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -869,7 +869,14 @@ static int gfs2_glock_demote_wait(void *word) return 0; } -static void wait_on_holder(struct gfs2_holder *gh) +/** + * gfs2_glock_wait - wait on a glock acquisition + * @gh: the glock holder + * + * Returns: 0 on success + */ + +int gfs2_glock_wait(struct gfs2_holder *gh) { unsigned long time1 = jiffies; @@ -880,6 +887,7 @@ static void wait_on_holder(struct gfs2_holder *gh) gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time + GL_GLOCK_HOLD_INCR, GL_GLOCK_MAX_HOLD); + return gh->gh_error; } static void wait_on_demote(struct gfs2_glock *gl) @@ -915,19 +923,6 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, trace_gfs2_demote_rq(gl); } -/** - * gfs2_glock_wait - wait on a glock acquisition - * @gh: the glock holder - * - * Returns: 0 on success - */ - -int gfs2_glock_wait(struct gfs2_holder *gh) -{ - wait_on_holder(gh); - return gh->gh_error; -} - void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) { struct va_format vaf; From 81e1d45061d09f296d7664af70d3334840d123a0 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 9 Aug 2012 12:48:45 -0500 Subject: [PATCH 13/27] GFS2: Combine functions gfs2_glock_dq_wait and wait_on_demote Function gfs2_glock_dq_wait called two-line function wait_on_demote, so they were combined. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 5c8790960735..fca6a873dcf3 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -890,12 +890,6 @@ int gfs2_glock_wait(struct gfs2_holder *gh) return gh->gh_error; } -static void wait_on_demote(struct gfs2_glock *gl) -{ - might_sleep(); - wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); -} - /** * handle_callback - process a demote request * @gl: the glock @@ -1123,7 +1117,8 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; gfs2_glock_dq(gh); - wait_on_demote(gl); + might_sleep(); + wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); } /** From e5dc76b9afcfb936818261d65f6f9e1f2c346d59 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 9 Aug 2012 12:48:46 -0500 Subject: [PATCH 14/27] GFS2: Eliminate redundant calls to may_grant Function add_to_queue was checking may_grant for the passed-in holder for every iteration of its gh2 loop. Now it only checks it once at the beginning to see if a try lock is futile. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index fca6a873dcf3..e6c2fd53cab2 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -954,7 +954,7 @@ __acquires(&gl->gl_spin) struct gfs2_sbd *sdp = gl->gl_sbd; struct list_head *insert_pt = NULL; struct gfs2_holder *gh2; - int try_lock = 0; + int try_futile = 0; BUG_ON(gh->gh_owner_pid == NULL); if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) @@ -962,7 +962,7 @@ __acquires(&gl->gl_spin) if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { if (test_bit(GLF_LOCK, &gl->gl_flags)) - try_lock = 1; + try_futile = !may_grant(gl, gh); if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) goto fail; } @@ -971,9 +971,8 @@ __acquires(&gl->gl_spin) if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid && (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK))) goto trap_recursive; - if (try_lock && - !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) && - !may_grant(gl, gh)) { + if (try_futile && + !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { fail: gh->gh_error = GLR_TRYFAILED; gfs2_holder_wake(gh); From 29c05b205d4d30248982d5167bb142146db89bd3 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 9 Aug 2012 12:48:47 -0500 Subject: [PATCH 15/27] GFS2: Eliminate unnecessary check for state > 3 in bitfit Function gfs2_bitfit was checking for state > 3, but that's impossible since it is only called from rgblk_search, which receives only GFS2_BLKST_ constants. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c26711882a6b..47d2346575ad 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -228,8 +228,6 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, u64 mask = 0x5555555555555555ULL; u32 bit; - BUG_ON(state > 3); - /* Mask off bits we don't care about at the start of the search */ mask <<= spoint; tmp = gfs2_bit_search(ptr, mask, state); From c04a2ef3a8b51cf58d8ff18bb4d6af17252f0fb6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 13 Aug 2012 11:14:57 +0100 Subject: [PATCH 16/27] GFS2: Use rbm for gfs2_testbit() Change the arguments to gfs2_testbit() so that it now just takes an rbm specifying the position of the two bit entry to return. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 72 ++++++++++++++++++++------------------------------ 1 file changed, 28 insertions(+), 44 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 47d2346575ad..3a288cec5af0 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -117,30 +117,21 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2, /** * gfs2_testbit - test a bit in the bitmaps - * @rgd: the resource group descriptor - * @buffer: the buffer that holds the bitmaps - * @buflen: the length (in bytes) of the buffer - * @block: the block to read + * @rbm: The bit to test * + * Returns: The two bit block state of the requested bit */ -static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, - const unsigned char *buffer, - unsigned int buflen, u32 block) +static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm) { - const unsigned char *byte, *end; - unsigned char cur_state; + const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset; + const u8 *byte; unsigned int bit; - byte = buffer + (block / GFS2_NBBY); - bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; - end = buffer + buflen; + byte = buffer + (rbm->offset / GFS2_NBBY); + bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; - gfs2_assert(rgd->rd_sbd, byte < end); - - cur_state = (*byte >> bit) & GFS2_BIT_MASK; - - return cur_state; + return (*byte >> bit) & GFS2_BIT_MASK; } /** @@ -1837,8 +1828,7 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) ret = gfs2_rbm_from_block(&rbm, block); WARN_ON_ONCE(ret != 0); - return gfs2_testbit(rgd, rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, - rbm.bi->bi_len, rbm.offset); + return gfs2_testbit(&rbm); } @@ -1846,42 +1836,35 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) * gfs2_alloc_extent - allocate an extent from a given bitmap * @rbm: the resource group information * @dinode: TRUE if the first block we allocate is for a dinode - * @n: The extent length + * @n: The extent length (value/result) * - * Add the found bitmap buffer to the transaction. + * Add the bitmap buffer to the transaction. * Set the found bits to @new_state to change block's allocation state. - * Returns: starting block number of the extent (fs scope) */ -static u64 gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, +static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, unsigned int *n) { - struct gfs2_rgrpd *rgd = rbm->rgd; - struct gfs2_bitmap *bi = rbm->bi; - u32 blk = rbm->offset; + struct gfs2_rbm pos = { .rgd = rbm->rgd, }; const unsigned int elen = *n; - u32 goal; - const u8 *buffer = NULL; + u64 block; + int ret; - *n = 0; - buffer = bi->bi_bh->b_data + bi->bi_offset; - gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); - gfs2_setbit(rgd, bi->bi_clone, bi, blk, + *n = 1; + block = gfs2_rbm_to_block(rbm); + gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1); + gfs2_setbit(rbm->rgd, rbm->bi->bi_clone, rbm->bi, rbm->offset, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); - (*n)++; - goal = blk; + block++; while (*n < elen) { - goal++; - if (goal >= (bi->bi_len * GFS2_NBBY)) + ret = gfs2_rbm_from_block(&pos, block); + WARN_ON(ret); + if (gfs2_testbit(&pos) != GFS2_BLKST_FREE) break; - if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != - GFS2_BLKST_FREE) - break; - gfs2_setbit(rgd, bi->bi_clone, bi, goal, GFS2_BLKST_USED); + gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1); + gfs2_setbit(pos.rgd, pos.bi->bi_clone, pos.bi, pos.offset, GFS2_BLKST_USED); (*n)++; + block++; } - blk = gfs2_bi2rgd_blk(bi, blk); - rgd->rd_last_alloc = blk + *n - 1; - return rgd->rd_data0 + blk; } /** @@ -2042,7 +2025,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, goto rgrp_error; } - block = gfs2_alloc_extent(&rbm, dinode, nblocks); + gfs2_alloc_extent(&rbm, dinode, nblocks); + block = gfs2_rbm_to_block(&rbm); if (gfs2_rs_active(ip->i_res)) gfs2_adjust_reservation(ip, &rbm, *nblocks); ndata = *nblocks; From 3e6339dd2850353781513dec42a8d56690e9e1db Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 13 Aug 2012 11:37:51 +0100 Subject: [PATCH 17/27] GFS2: Use rbm for gfs2_setbit() Use the rbm structure for gfs2_setbit() in order to simplify the arguments to the function. We have to add a bool to control whether the clone bitmap should be updated (if it exists) but otherwise it is a more or less direct substitution. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 46 ++++++++++++++++++++-------------------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 3a288cec5af0..55a2651666c9 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -69,47 +69,42 @@ static const char valid_change[16] = { /** * gfs2_setbit - Set a bit in the bitmaps - * @rgd: the resource group descriptor - * @buf2: the clone buffer that holds the bitmaps - * @bi: the bitmap structure - * @block: the block to set + * @rbm: The position of the bit to set + * @do_clone: Also set the clone bitmap, if it exists * @new_state: the new state of the block * */ -static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2, - struct gfs2_bitmap *bi, u32 block, +static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, unsigned char new_state) { unsigned char *byte1, *byte2, *end, cur_state; - unsigned int buflen = bi->bi_len; - const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; + unsigned int buflen = rbm->bi->bi_len; + const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; - byte1 = bi->bi_bh->b_data + bi->bi_offset + (block / GFS2_NBBY); - end = bi->bi_bh->b_data + bi->bi_offset + buflen; + byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); + end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen; BUG_ON(byte1 >= end); cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; if (unlikely(!valid_change[new_state * 4 + cur_state])) { - printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, " - "new_state=%d\n", - (unsigned long long)block, cur_state, new_state); - printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n", - (unsigned long long)rgd->rd_addr, - (unsigned long)bi->bi_start); - printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n", - (unsigned long)bi->bi_offset, - (unsigned long)bi->bi_len); + printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, " + "new_state=%d\n", rbm->offset, cur_state, new_state); + printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n", + (unsigned long long)rbm->rgd->rd_addr, + rbm->bi->bi_start); + printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n", + rbm->bi->bi_offset, rbm->bi->bi_len); dump_stack(); - gfs2_consist_rgrpd(rgd); + gfs2_consist_rgrpd(rbm->rgd); return; } *byte1 ^= (cur_state ^ new_state) << bit; - if (buf2) { - byte2 = buf2 + bi->bi_offset + (block / GFS2_NBBY); + if (do_clone && rbm->bi->bi_clone) { + byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; *byte2 ^= (cur_state ^ new_state) << bit; } @@ -1852,8 +1847,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, *n = 1; block = gfs2_rbm_to_block(rbm); gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1); - gfs2_setbit(rbm->rgd, rbm->bi->bi_clone, rbm->bi, rbm->offset, - dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); + gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); block++; while (*n < elen) { ret = gfs2_rbm_from_block(&pos, block); @@ -1861,7 +1855,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, if (gfs2_testbit(&pos) != GFS2_BLKST_FREE) break; gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1); - gfs2_setbit(pos.rgd, pos.bi->bi_clone, pos.bi, pos.offset, GFS2_BLKST_USED); + gfs2_setbit(&pos, true, GFS2_BLKST_USED); (*n)++; block++; } @@ -1900,7 +1894,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, rbm.bi->bi_len); } gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1); - gfs2_setbit(rbm.rgd, NULL, rbm.bi, rbm.offset, new_state); + gfs2_setbit(&rbm, false, new_state); } return rbm.rgd; From 2b9731e8bb1bdc6b9da149f4e482a071747207f3 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 20 Aug 2012 17:07:49 +0100 Subject: [PATCH 18/27] GFS2: Fix ->show_options() for statfs slow The ->show_options() function for GFS2 was not correctly displaying the value when statfs slow in in use. Signed-off-by: Steven Whitehouse Reported-by: Milos Jakubicek --- fs/gfs2/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 3cbac6803038..79cac7057691 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1366,6 +1366,8 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) val = sdp->sd_tune.gt_statfs_quantum; if (val != 30) seq_printf(s, ",statfs_quantum=%d", val); + else if (sdp->sd_tune.gt_statfs_slow) + seq_puts(s, ",statfs_quantum=0"); val = sdp->sd_tune.gt_quota_quantum; if (val != 60) seq_printf(s, ",quota_quantum=%d", val); From 137834a696fd51ef8c710a0ad854b585027c7df0 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 23 Aug 2012 13:43:40 +0100 Subject: [PATCH 19/27] GFS2: Fall back to ignoring reservations, if there are no other blocks left When we get to the stage of allocating blocks, we know that the resource group in question must contain enough free blocks, otherwise gfs2_inplace_reserve() would have failed. So if we are left with only free blocks which are reserved, then we must use those. This can happen if another node has sneeked in and use some blocks reserved on this node, for example. Generally this will happen very rarely and only when the resouce group is nearly full. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 55a2651666c9..30c864e70298 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -2012,6 +2012,11 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_rbm_from_block(&rbm, goal); error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false); + if (error == -ENOSPC) { + gfs2_rbm_from_block(&rbm, goal); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, false); + } + /* Since all blocks are reserved in advance, this shouldn't happen */ if (error) { fs_warn(sdp, "error=%d, nblocks=%u, full=%d\n", error, *nblocks, From 9e733d3923fb0e4eeae7b827019332d246576a22 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 23 Aug 2012 15:37:59 +0100 Subject: [PATCH 20/27] GFS2: Improve block reservation tracing This patch improves the tracing of block reservations by removing some corner cases and also providing more useful detail in the traces. A new field is added to the reservation structure to contain the inode number. This is used since in certain contexts it is not possible to access the inode itself to obtain this information. As a result we can then display the inode number for all tracepoints and also in case we dump the resource group. The "del" tracepoint operation has been removed. This could be called with the reservation rgrp set to NULL. That resulted in not printing the device number, and thus making the information largely useless anyway. Also, the conditional on the rgrp being NULL can then be removed from the tracepoint. After this change, all the block reservation tracepoint calls will be called with the rgrp information. The existing ins,clm and tdel calls to the block reservation tracepoint are sufficient to track the entire life of the block reservation. In gfs2_block_alloc() the error detection is updated to print out the inode number of the problematic inode. This can then be compared against the information in the glock dump,tracepoints, etc. Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 6 ++---- fs/gfs2/rgrp.c | 22 ++++++++++------------ fs/gfs2/trace_gfs2.h | 18 ++++++++---------- 3 files changed, 20 insertions(+), 26 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 99d7c64b5091..6aaa07c7c731 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -268,13 +268,11 @@ struct gfs2_blkreserv { /* components used during write (step 1): */ atomic_t rs_sizehint; /* hint of the write size */ - /* components used during get_local_rgrp (step 3): */ - struct gfs2_rbm rs_rbm; struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */ struct rb_node rs_node; /* link to other block reservations */ - - /* components used during block searches and assignments (step 4): */ + struct gfs2_rbm rs_rbm; /* Start of reservation */ u32 rs_free; /* how many blocks are still free */ + u64 rs_inum; /* Inode number for reservation */ /* ancillary quota stuff */ struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS]; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 30c864e70298..87ee0b70f818 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -448,10 +448,11 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) return error; } -static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) +static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) { - gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", - rs->rs_rbm.rgd->rd_addr, gfs2_rbm_to_block(&rs->rs_rbm), + gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n", + (unsigned long long)rs->rs_inum, + (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm), rs->rs_rbm.offset, rs->rs_free); } @@ -468,7 +469,7 @@ static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) return; rgd = rs->rs_rbm.rgd; - trace_gfs2_rs(ip, rs, TRACE_RS_TREEDEL); + trace_gfs2_rs(rs, TRACE_RS_TREEDEL); rb_erase(&rs->rs_node, &rgd->rd_rstree); RB_CLEAR_NODE(&rs->rs_node); BUG_ON(!rgd->rd_rs_cnt); @@ -511,7 +512,6 @@ void gfs2_rs_delete(struct gfs2_inode *ip) down_write(&ip->i_rw_mutex); if (ip->i_res) { gfs2_rs_deltree(ip, ip->i_res); - trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE); BUG_ON(ip->i_res->rs_free); kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); ip->i_res = NULL; @@ -1253,6 +1253,7 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, rs->rs_free = amount; rs->rs_rbm.offset = biblk; rs->rs_rbm.bi = bi; + rs->rs_inum = ip->i_no_addr; rb_link_node(&rs->rs_node, parent, newn); rb_insert_color(&rs->rs_node, &rgd->rd_rstree); @@ -1260,7 +1261,7 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, rgd->rd_reserved += amount; /* blocks reserved */ rgd->rd_rs_cnt++; /* number of in-tree reservations */ spin_unlock(&rgd->rd_rsspin); - trace_gfs2_rs(ip, rs, TRACE_RS_INSERT); + trace_gfs2_rs(rs, TRACE_RS_INSERT); return rs; } @@ -1966,7 +1967,7 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip, rlen = min(rs->rs_free, len); rs->rs_free -= rlen; rgd->rd_reserved -= rlen; - trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); + trace_gfs2_rs(rs, TRACE_RS_CLAIM); if (rs->rs_free && !ret) goto out; } @@ -2005,10 +2006,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, else goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; - if ((goal < rbm.rgd->rd_data0) || - (goal >= rbm.rgd->rd_data0 + rbm.rgd->rd_data)) - rbm.rgd = gfs2_blk2rgrpd(sdp, goal, 1); - gfs2_rbm_from_block(&rbm, goal); error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false); @@ -2019,7 +2016,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, /* Since all blocks are reserved in advance, this shouldn't happen */ if (error) { - fs_warn(sdp, "error=%d, nblocks=%u, full=%d\n", error, *nblocks, + fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d\n", + (unsigned long long)ip->i_no_addr, error, *nblocks, test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); goto rgrp_error; } diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index b947aa4dfca4..bbdc78af60ca 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -509,10 +509,9 @@ TRACE_EVENT(gfs2_block_alloc, /* Keep track of multi-block reservations as they are allocated/freed */ TRACE_EVENT(gfs2_rs, - TP_PROTO(const struct gfs2_inode *ip, const struct gfs2_blkreserv *rs, - u8 func), + TP_PROTO(const struct gfs2_blkreserv *rs, u8 func), - TP_ARGS(ip, rs, func), + TP_ARGS(rs, func), TP_STRUCT__entry( __field( dev_t, dev ) @@ -526,18 +525,17 @@ TRACE_EVENT(gfs2_rs, ), TP_fast_assign( - __entry->dev = rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_sbd->sd_vfs->s_dev : 0; - __entry->rd_addr = rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_addr : 0; - __entry->rd_free_clone = rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_free_clone : 0; - __entry->rd_reserved = rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_reserved : 0; - __entry->inum = ip ? ip->i_no_addr : 0; + __entry->dev = rs->rs_rbm.rgd->rd_sbd->sd_vfs->s_dev; + __entry->rd_addr = rs->rs_rbm.rgd->rd_addr; + __entry->rd_free_clone = rs->rs_rbm.rgd->rd_free_clone; + __entry->rd_reserved = rs->rs_rbm.rgd->rd_reserved; + __entry->inum = rs->rs_inum; __entry->start = gfs2_rbm_to_block(&rs->rs_rbm); __entry->free = rs->rs_free; __entry->func = func; ), - TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s " - "f:%lu", + TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->inum, (unsigned long long)__entry->start, From c743ffd09fa7d3464c6f74767a3ae2ca5dc3ebf7 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Sat, 25 Aug 2012 18:21:47 +0100 Subject: [PATCH 21/27] GFS2: Fix unclaimed_blocks() wrapping bug and clean up When rgd->rd_free_clone is less than rgd->rd_reserved, the unclaimed_blocks() calculation would wrap and produce incorrect results. This patch checks for this condition when this function is called from gfs2_mblk_search() In addition, the use of this particular function in other places in the code has been dropped by means of a general clean up of gfs2_inplace_reserve(). This function is now much easier to follow. Also the setting of the rgd->rd_last_alloc field is corrected. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 193 ++++++++++++++++++++++--------------------------- 1 file changed, 88 insertions(+), 105 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 87ee0b70f818..886954126704 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1231,7 +1231,7 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, BUG_ON(!ip->i_res); BUG_ON(gfs2_rs_active(rs)); /* Figure out where to put new node */ - /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ + while (*newn) { struct gfs2_blkreserv *cur = rb_entry(*newn, struct gfs2_blkreserv, rs_node); @@ -1276,17 +1276,16 @@ static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd) /** * rg_mblk_search - find a group of multiple free blocks * @rgd: the resource group descriptor - * @rs: the block reservation * @ip: pointer to the inode for which we're reserving blocks + * @requested: number of blocks required for this allocation * * This is very similar to rgblk_search, except we're looking for whole * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing * on aligned dwords for speed's sake. * - * Returns: 0 if successful or BFITNOENT if there isn't enough free space */ -static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, unsigned requested) +static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, unsigned requested) { struct gfs2_bitmap *bi = rgd->rd_bits; const u32 length = rgd->rd_length; @@ -1299,11 +1298,16 @@ static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, unsigne u32 best_rs_bytes, unclaimed; int best_rs_blocks; + if ((rgd->rd_free_clone < rgd->rd_reserved) || + (unclaimed_blocks(rgd) < max(requested, RGRP_RSRV_MINBLKS))) + return; + /* Find bitmap block that contains bits for goal block */ if (rgrp_contains_block(rgd, ip->i_goal)) goal = ip->i_goal - rgd->rd_data0; else goal = rgd->rd_last_alloc; + for (buf = 0; buf < length; buf++) { bi = rgd->rd_bits + buf; /* Convert scope of "goal" from rgrp-wide to within @@ -1366,10 +1370,8 @@ do_search: BUG_ON(blk >= bi->bi_len * GFS2_NBBY); rs = rs_insert(bi, ip, blk, rsv_bytes * GFS2_NBBY); - if (IS_ERR(rs)) - return PTR_ERR(rs); if (rs) - return 0; + return; } ptr += ALIGN(search_bytes, sizeof(u64)); } @@ -1380,35 +1382,6 @@ skip: buf %= length; goal = 0; } - - return BFITNOENT; -} - -/** - * try_rgrp_fit - See if a given reservation will fit in a given RG - * @rgd: the RG data - * @ip: the inode - * - * If there's room for the requested blocks to be allocated from the RG: - * This will try to get a multi-block reservation first, and if that doesn't - * fit, it will take what it can. - * - * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) - */ - -static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, - unsigned requested) -{ - if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) - return 0; - /* Look for a multi-block reservation. */ - if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS && - rg_mblk_search(rgd, ip, requested) != BFITNOENT) - return 1; - if (unclaimed_blocks(rgd) >= requested) - return 1; - - return 0; } /** @@ -1678,6 +1651,19 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip return; } +static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) +{ + struct gfs2_rgrpd *rgd = *pos; + + rgd = gfs2_rgrpd_get_next(rgd); + if (rgd == NULL) + rgd = gfs2_rgrpd_get_next(NULL); + *pos = rgd; + if (rgd != begin) /* If we didn't wrap */ + return true; + return false; +} + /** * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for @@ -1697,10 +1683,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; - if (gfs2_assert_warn(sdp, requested)) { - error = -EINVAL; - goto out; - } + if (gfs2_assert_warn(sdp, requested)) + return -EINVAL; if (gfs2_rs_active(rs)) { begin = rs->rs_rbm.rgd; flags = 0; /* Yoda: Do or do not. There is no try */ @@ -1713,84 +1697,82 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) return -EBADSLT; while (loops < 3) { - rg_locked = 0; + rg_locked = 1; - if (gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { - rg_locked = 1; - error = 0; - } else if (!loops && !gfs2_rs_active(rs) && - rs->rs_rbm.rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) { - /* If the rgrp already is maxed out for contenders, - we can eliminate it as a "first pass" without even - requesting the rgrp glock. */ - error = GLR_TRYFAILED; - } else { + if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { + rg_locked = 0; error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, LM_ST_EXCLUSIVE, flags, &rs->rs_rgd_gh); - if (!error && sdp->sd_args.ar_rgrplvb) { + if (error == GLR_TRYFAILED) + goto next_rgrp; + if (unlikely(error)) + return error; + if (sdp->sd_args.ar_rgrplvb) { error = update_rgrp_lvb(rs->rs_rbm.rgd); - if (error) { + if (unlikely(error)) { gfs2_glock_dq_uninit(&rs->rs_rgd_gh); return error; } } } - switch (error) { - case 0: - if (gfs2_rs_active(rs)) { - if (unclaimed_blocks(rs->rs_rbm.rgd) + - rs->rs_free >= requested) { - ip->i_rgd = rs->rs_rbm.rgd; - return 0; - } - /* We have a multi-block reservation, but the - rgrp doesn't have enough free blocks to - satisfy the request. Free the reservation - and look for a suitable rgrp. */ - gfs2_rs_deltree(ip, rs); - } - if (try_rgrp_fit(rs->rs_rbm.rgd, ip, requested)) { - if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rbm.rgd); - ip->i_rgd = rs->rs_rbm.rgd; - return 0; - } - if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) { - if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rbm.rgd); - try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, - ip->i_no_addr); - } - if (!rg_locked) - gfs2_glock_dq_uninit(&rs->rs_rgd_gh); - /* fall through */ - case GLR_TRYFAILED: - rs->rs_rbm.rgd = gfs2_rgrpd_get_next(rs->rs_rbm.rgd); - rs->rs_rbm.rgd = rs->rs_rbm.rgd ? : begin; /* if NULL, wrap */ - if (rs->rs_rbm.rgd != begin) /* If we didn't wrap */ - break; - flags &= ~LM_FLAG_TRY; - loops++; - /* Check that fs hasn't grown if writing to rindex */ - if (ip == GFS2_I(sdp->sd_rindex) && - !sdp->sd_rindex_uptodate) { - error = gfs2_ri_update(ip); - if (error) - goto out; - } else if (loops == 2) - /* Flushing the log may release space */ - gfs2_log_flush(sdp, NULL); - break; - default: - goto out; + /* Skip unuseable resource groups */ + if (rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) + goto skip_rgrp; + + if (sdp->sd_args.ar_rgrplvb) + gfs2_rgrp_bh_get(rs->rs_rbm.rgd); + + /* Get a reservation if we don't already have one */ + if (!gfs2_rs_active(rs)) + rg_mblk_search(rs->rs_rbm.rgd, ip, requested); + + /* Skip rgrps when we can't get a reservation on first pass */ + if (!gfs2_rs_active(rs) && (loops < 1)) + goto check_rgrp; + + /* If rgrp has enough free space, use it */ + if (rs->rs_rbm.rgd->rd_free_clone >= requested) { + ip->i_rgd = rs->rs_rbm.rgd; + return 0; } - } - error = -ENOSPC; -out: - return error; + /* Drop reservation, if we couldn't use reserved rgrp */ + if (gfs2_rs_active(rs)) + gfs2_rs_deltree(ip, rs); +check_rgrp: + /* Check for unlinked inodes which can be reclaimed */ + if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) + try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, + ip->i_no_addr); +skip_rgrp: + /* Unlock rgrp if required */ + if (!rg_locked) + gfs2_glock_dq_uninit(&rs->rs_rgd_gh); +next_rgrp: + /* Find the next rgrp, and continue looking */ + if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) + continue; + + /* If we've scanned all the rgrps, but found no free blocks + * then this checks for some less likely conditions before + * trying again. + */ + flags &= ~LM_FLAG_TRY; + loops++; + /* Check that fs hasn't grown if writing to rindex */ + if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { + error = gfs2_ri_update(ip); + if (error) + return error; + } + /* Flushing the log may release space */ + if (loops == 2) + gfs2_log_flush(sdp, NULL); + } + + return -ENOSPC; } /** @@ -2024,6 +2006,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_alloc_extent(&rbm, dinode, nblocks); block = gfs2_rbm_to_block(&rbm); + rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0; if (gfs2_rs_active(ip->i_res)) gfs2_adjust_reservation(ip, &rbm, *nblocks); ndata = *nblocks; From 0688a5ecea61a36ba12d17a18ab9f8712145cfa2 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 28 Aug 2012 08:45:56 -0400 Subject: [PATCH 22/27] GFS2: Stop block extents at the end of bitmaps This patch stops multiple block allocations if a nonzero return code is received from gfs2_rbm_from_block. Without this patch, if enough pressure is put on the file system, you get a kernel warning quickly followed by: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] gfs2_alloc_blocks+0x2c8/0x880 [gfs2] With this patch, things run normally. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 886954126704..defb8265ce52 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1834,8 +1834,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, block++; while (*n < elen) { ret = gfs2_rbm_from_block(&pos, block); - WARN_ON(ret); - if (gfs2_testbit(&pos) != GFS2_BLKST_FREE) + if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) break; gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1); gfs2_setbit(&pos, true, GFS2_BLKST_USED); From 56aa72d0fcc9c4a3af4d0111d8d7f336b63adff9 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 5 Sep 2012 16:55:11 -0400 Subject: [PATCH 23/27] GFS2: Get rid of I_MUTEX_QUOTA usage GFS2 uses i_mutex on its system quota inode to synchronize writes to quota file. Since this is an internal inode to GFS2 (not part of directory hiearchy or visible by user) we are safe to define locking rules for it. So let's just get it its own locking class to make it clear. Signed-off-by: Jan Kara Signed-off-by: J. Bruce Fields Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_fstype.c | 8 ++++++++ fs/gfs2/quota.c | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index e5af9dc420ef..e443966c8106 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -766,6 +767,7 @@ fail: return error; } +static struct lock_class_key gfs2_quota_imutex_key; static int init_inodes(struct gfs2_sbd *sdp, int undo) { @@ -803,6 +805,12 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) fs_err(sdp, "can't get quota file inode: %d\n", error); goto fail_rindex; } + /* + * i_mutex on quota files is special. Since this inode is hidden system + * file, we are safe to define locking ourselves. + */ + lockdep_set_class(&sdp->sd_quota_inode->i_mutex, + &gfs2_quota_imutex_key); error = gfs2_rindex_update(sdp); if (error) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 420bc3805ccc..4021deca61ef 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -782,7 +782,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) return -ENOMEM; sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL); - mutex_lock_nested(&ip->i_inode.i_mutex, I_MUTEX_QUOTA); + mutex_lock(&ip->i_inode.i_mutex); for (qx = 0; qx < num_qd; qx++) { error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, &ghs[qx]); From ff7f4cb461163967a9dbb8c569e2447b7520654f Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 10 Sep 2012 10:03:50 +0100 Subject: [PATCH 24/27] GFS2: Consolidate free block searching functions With the recently added block reservation code, an additional function was added to search for free blocks. This had a restriction of only being able to search for aligned extents of free blocks. As a result the allocation patterns when reserving blocks were suboptimal when the existing allocation of blocks for an inode was not aligned to the same boundary. This patch resolves that problem by adding the ability for gfs2_rbm_find to search for extents of a particular minimum size. We can then use gfs2_rbm_find for both looking for reservations, and also looking for free blocks on an individual basis when we actually come to do the allocation later on. As a result we only need a single set of code to deal with both situations. The function gfs2_rbm_from_block() is moved up rgrp.c so that it occurs before all of its callers. Many thanks are due to Bob for helping track down the final issue in this patch. That fix to the rb_tree traversal and to not share block reservations from a dirctory to its children is included here. Signed-off-by: Steven Whitehouse Signed-off-by: Bob Peterson --- fs/gfs2/incore.h | 1 - fs/gfs2/inode.c | 11 +- fs/gfs2/rgrp.c | 367 +++++++++++++++++++++++++---------------------- fs/gfs2/rgrp.h | 6 - 4 files changed, 195 insertions(+), 190 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 6aaa07c7c731..3d469d37345e 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -99,7 +99,6 @@ struct gfs2_rgrpd { #define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ spinlock_t rd_rsspin; /* protects reservation related vars */ struct rb_root rd_rstree; /* multi-block reservation tree */ - u32 rd_rs_cnt; /* count of current reservations */ }; struct gfs2_rbm { diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index f2709ea887da..381893ceefa4 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -712,14 +712,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_gunlock2; - /* The newly created inode needs a reservation so it can allocate - xattrs. At the same time, we want new blocks allocated to the new - dinode to be as contiguous as possible. Since we allocated the - dinode block under the directory's reservation, we transfer - ownership of that reservation to the new inode. The directory - doesn't need a reservation unless it needs a new allocation. */ - ip->i_res = dip->i_res; - dip->i_res = NULL; + error = gfs2_rs_alloc(ip); + if (error) + goto fail_gunlock2; error = gfs2_acl_create(dip, inode); if (error) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index defb8265ce52..b933cdcda7f4 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -35,9 +35,6 @@ #define BFITNOENT ((u32)~0) #define NO_BLOCK ((u64)~0) -#define RSRV_CONTENTION_FACTOR 4 -#define RGRP_RSRV_MAX_CONTENDERS 2 - #if BITS_PER_LONG == 32 #define LBITMASK (0x55555555UL) #define LBITSKIP55 (0x55555555UL) @@ -67,6 +64,10 @@ static const char valid_change[16] = { 1, 0, 0, 0 }; +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, + const struct gfs2_inode *ip, bool nowrap); + + /** * gfs2_setbit - Set a bit in the bitmaps * @rbm: The position of the bit to set @@ -234,6 +235,130 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit; } +/** + * gfs2_rbm_from_block - Set the rbm based upon rgd and block number + * @rbm: The rbm with rgd already set correctly + * @block: The block number (filesystem relative) + * + * This sets the bi and offset members of an rbm based on a + * resource group and a filesystem relative block number. The + * resource group must be set in the rbm on entry, the bi and + * offset members will be set by this function. + * + * Returns: 0 on success, or an error code + */ + +static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) +{ + u64 rblock = block - rbm->rgd->rd_data0; + u32 goal = (u32)rblock; + int x; + + if (WARN_ON_ONCE(rblock > UINT_MAX)) + return -EINVAL; + if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) + return -E2BIG; + + for (x = 0; x < rbm->rgd->rd_length; x++) { + rbm->bi = rbm->rgd->rd_bits + x; + if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { + rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); + break; + } + } + + return 0; +} + +/** + * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned + * @rbm: Position to search (value/result) + * @n_unaligned: Number of unaligned blocks to check + * @len: Decremented for each block found (terminate on zero) + * + * Returns: true if a non-free block is encountered + */ + +static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len) +{ + u64 block; + u32 n; + u8 res; + + for (n = 0; n < n_unaligned; n++) { + res = gfs2_testbit(rbm); + if (res != GFS2_BLKST_FREE) + return true; + (*len)--; + if (*len == 0) + return true; + block = gfs2_rbm_to_block(rbm); + if (gfs2_rbm_from_block(rbm, block + 1)) + return true; + } + + return false; +} + +/** + * gfs2_free_extlen - Return extent length of free blocks + * @rbm: Starting position + * @len: Max length to check + * + * Starting at the block specified by the rbm, see how many free blocks + * there are, not reading more than len blocks ahead. This can be done + * using memchr_inv when the blocks are byte aligned, but has to be done + * on a block by block basis in case of unaligned blocks. Also this + * function can cope with bitmap boundaries (although it must stop on + * a resource group boundary) + * + * Returns: Number of free blocks in the extent + */ + +static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) +{ + struct gfs2_rbm rbm = *rrbm; + u32 n_unaligned = rbm.offset & 3; + u32 size = len; + u32 bytes; + u32 chunk_size; + u8 *ptr, *start, *end; + u64 block; + + if (n_unaligned && + gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len)) + goto out; + + /* Start is now byte aligned */ + while (len > 3) { + start = rbm.bi->bi_bh->b_data; + if (rbm.bi->bi_clone) + start = rbm.bi->bi_clone; + end = start + rbm.bi->bi_bh->b_size; + start += rbm.bi->bi_offset; + BUG_ON(rbm.offset & 3); + start += (rbm.offset / GFS2_NBBY); + bytes = min_t(u32, len / GFS2_NBBY, (end - start)); + ptr = memchr_inv(start, 0, bytes); + chunk_size = ((ptr == NULL) ? bytes : (ptr - start)); + chunk_size *= GFS2_NBBY; + BUG_ON(len < chunk_size); + len -= chunk_size; + block = gfs2_rbm_to_block(&rbm); + gfs2_rbm_from_block(&rbm, block + chunk_size); + n_unaligned = 3; + if (ptr) + break; + n_unaligned = len & 3; + } + + /* Deal with any bits left over at the end */ + if (n_unaligned) + gfs2_unaligned_extlen(&rbm, n_unaligned, &len); +out: + return size - len; +} + /** * gfs2_bitcount - count the number of bits in a certain state * @rgd: the resource group descriptor @@ -472,8 +597,6 @@ static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) trace_gfs2_rs(rs, TRACE_RS_TREEDEL); rb_erase(&rs->rs_node, &rgd->rd_rstree); RB_CLEAR_NODE(&rs->rs_node); - BUG_ON(!rgd->rd_rs_cnt); - rgd->rd_rs_cnt--; if (rs->rs_free) { /* return reserved blocks to the rgrp and the ip */ @@ -1208,179 +1331,85 @@ out: /** * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree - * @bi: the bitmap with the blocks * @ip: the inode structure - * @biblk: the 32-bit block number relative to the start of the bitmap - * @amount: the number of blocks to reserve * - * Returns: NULL - reservation was already taken, so not inserted - * pointer to the inserted reservation */ -static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, - struct gfs2_inode *ip, u32 biblk, - int amount) +static void rs_insert(struct gfs2_inode *ip) { struct rb_node **newn, *parent = NULL; int rc; struct gfs2_blkreserv *rs = ip->i_res; struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; - u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0; + u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm); + + BUG_ON(gfs2_rs_active(rs)); spin_lock(&rgd->rd_rsspin); newn = &rgd->rd_rstree.rb_node; - BUG_ON(!ip->i_res); - BUG_ON(gfs2_rs_active(rs)); - /* Figure out where to put new node */ - while (*newn) { struct gfs2_blkreserv *cur = rb_entry(*newn, struct gfs2_blkreserv, rs_node); parent = *newn; - rc = rs_cmp(fsblock, amount, cur); + rc = rs_cmp(fsblock, rs->rs_free, cur); if (rc > 0) newn = &((*newn)->rb_right); else if (rc < 0) newn = &((*newn)->rb_left); else { spin_unlock(&rgd->rd_rsspin); - return NULL; /* reservation already in use */ + WARN_ON(1); + return; } } - /* Do our reservation work */ - rs = ip->i_res; - rs->rs_free = amount; - rs->rs_rbm.offset = biblk; - rs->rs_rbm.bi = bi; - rs->rs_inum = ip->i_no_addr; rb_link_node(&rs->rs_node, parent, newn); rb_insert_color(&rs->rs_node, &rgd->rd_rstree); /* Do our rgrp accounting for the reservation */ - rgd->rd_reserved += amount; /* blocks reserved */ - rgd->rd_rs_cnt++; /* number of in-tree reservations */ + rgd->rd_reserved += rs->rs_free; /* blocks reserved */ spin_unlock(&rgd->rd_rsspin); trace_gfs2_rs(rs, TRACE_RS_INSERT); - return rs; } /** - * unclaimed_blocks - return number of blocks that aren't spoken for - */ -static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd) -{ - return rgd->rd_free_clone - rgd->rd_reserved; -} - -/** - * rg_mblk_search - find a group of multiple free blocks + * rg_mblk_search - find a group of multiple free blocks to form a reservation * @rgd: the resource group descriptor * @ip: pointer to the inode for which we're reserving blocks * @requested: number of blocks required for this allocation * - * This is very similar to rgblk_search, except we're looking for whole - * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing - * on aligned dwords for speed's sake. - * */ -static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, unsigned requested) +static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, + unsigned requested) { - struct gfs2_bitmap *bi = rgd->rd_bits; - const u32 length = rgd->rd_length; - u32 blk; - unsigned int buf, x, search_bytes; - u8 *buffer = NULL; - u8 *ptr, *end, *nonzero; - u32 goal, rsv_bytes; - struct gfs2_blkreserv *rs; - u32 best_rs_bytes, unclaimed; - int best_rs_blocks; + struct gfs2_rbm rbm = { .rgd = rgd, }; + u64 goal; + struct gfs2_blkreserv *rs = ip->i_res; + u32 extlen; + u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved; + int ret; - if ((rgd->rd_free_clone < rgd->rd_reserved) || - (unclaimed_blocks(rgd) < max(requested, RGRP_RSRV_MINBLKS))) + extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested); + extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks); + if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen)) return; /* Find bitmap block that contains bits for goal block */ if (rgrp_contains_block(rgd, ip->i_goal)) - goal = ip->i_goal - rgd->rd_data0; + goal = ip->i_goal; else - goal = rgd->rd_last_alloc; + goal = rgd->rd_last_alloc + rgd->rd_data0; - for (buf = 0; buf < length; buf++) { - bi = rgd->rd_bits + buf; - /* Convert scope of "goal" from rgrp-wide to within - found bit block */ - if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { - goal -= bi->bi_start * GFS2_NBBY; - goto do_search; - } - } - buf = 0; - goal = 0; + if (WARN_ON(gfs2_rbm_from_block(&rbm, goal))) + return; -do_search: - best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint), - (RGRP_RSRV_MINBLKS * rgd->rd_length)); - best_rs_bytes = (best_rs_blocks * - (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) / - GFS2_NBBY; /* 1 + is for our not-yet-created reservation */ - best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64)); - unclaimed = unclaimed_blocks(rgd); - if (best_rs_bytes * GFS2_NBBY > unclaimed) - best_rs_bytes = unclaimed >> GFS2_BIT_SIZE; - - for (x = 0; x <= length; x++) { - bi = rgd->rd_bits + buf; - - if (test_bit(GBF_FULL, &bi->bi_flags)) - goto skip; - - WARN_ON(!buffer_uptodate(bi->bi_bh)); - if (bi->bi_clone) - buffer = bi->bi_clone + bi->bi_offset; - else - buffer = bi->bi_bh->b_data + bi->bi_offset; - - /* We have to keep the reservations aligned on u64 boundaries - otherwise we could get situations where a byte can't be - used because it's after a reservation, but a free bit still - is within the reservation's area. */ - ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64)); - end = (buffer + bi->bi_len); - while (ptr < end) { - rsv_bytes = 0; - if ((ptr + best_rs_bytes) <= end) - search_bytes = best_rs_bytes; - else - search_bytes = end - ptr; - BUG_ON(!search_bytes); - nonzero = memchr_inv(ptr, 0, search_bytes); - /* If the lot is all zeroes, reserve the whole size. If - there's enough zeroes to satisfy the request, use - what we can. If there's not enough, keep looking. */ - if (nonzero == NULL) - rsv_bytes = search_bytes; - else if ((nonzero - ptr) * GFS2_NBBY >= requested) - rsv_bytes = (nonzero - ptr); - - if (rsv_bytes) { - blk = ((ptr - buffer) * GFS2_NBBY); - BUG_ON(blk >= bi->bi_len * GFS2_NBBY); - rs = rs_insert(bi, ip, blk, - rsv_bytes * GFS2_NBBY); - if (rs) - return; - } - ptr += ALIGN(search_bytes, sizeof(u64)); - } -skip: - /* Try next bitmap block (wrap back to rgrp header - if at end) */ - buf++; - buf %= length; - goal = 0; + ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, extlen, ip, true); + if (ret == 0) { + rs->rs_rbm = rbm; + rs->rs_free = extlen; + rs->rs_inum = ip->i_no_addr; + rs_insert(ip); } } @@ -1388,6 +1417,7 @@ skip: * gfs2_next_unreserved_block - Return next block that is not reserved * @rgd: The resource group * @block: The starting block + * @length: The required length * @ip: Ignore any reservations for this inode * * If the block does not appear in any reservation, then return the @@ -1397,6 +1427,7 @@ skip: */ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, + u32 length, const struct gfs2_inode *ip) { struct gfs2_blkreserv *rs; @@ -1404,10 +1435,10 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, int rc; spin_lock(&rgd->rd_rsspin); - n = rb_first(&rgd->rd_rstree); + n = rgd->rd_rstree.rb_node; while (n) { rs = rb_entry(n, struct gfs2_blkreserv, rs_node); - rc = rs_cmp(block, 1, rs); + rc = rs_cmp(block, length, rs); if (rc < 0) n = n->rb_left; else if (rc > 0) @@ -1417,9 +1448,9 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, } if (n) { - while ((rs_cmp(block, 1, rs) == 0) && (ip->i_res != rs)) { + while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) { block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free; - n = rb_next(&rs->rs_node); + n = n->rb_right; if (n == NULL) break; rs = rb_entry(n, struct gfs2_blkreserv, rs_node); @@ -1430,44 +1461,11 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, return block; } -/** - * gfs2_rbm_from_block - Set the rbm based upon rgd and block number - * @rbm: The rbm with rgd already set correctly - * @block: The block number (filesystem relative) - * - * This sets the bi and offset members of an rbm based on a - * resource group and a filesystem relative block number. The - * resource group must be set in the rbm on entry, the bi and - * offset members will be set by this function. - * - * Returns: 0 on success, or an error code - */ - -static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) -{ - u64 rblock = block - rbm->rgd->rd_data0; - u32 goal = (u32)rblock; - int x; - - if (WARN_ON_ONCE(rblock > UINT_MAX)) - return -EINVAL; - if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) - return -E2BIG; - - for (x = 0; x < rbm->rgd->rd_length; x++) { - rbm->bi = rbm->rgd->rd_bits + x; - if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { - rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); - break; - } - } - - return 0; -} - /** * gfs2_reservation_check_and_update - Check for reservations during block alloc * @rbm: The current position in the resource group + * @ip: The inode for which we are searching for blocks + * @minext: The minimum extent length * * This checks the current position in the rgrp to see whether there is * a reservation covering this block. If not then this function is a @@ -1479,15 +1477,33 @@ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) */ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, - const struct gfs2_inode *ip) + const struct gfs2_inode *ip, + u32 minext) { u64 block = gfs2_rbm_to_block(rbm); + u32 extlen = 1; u64 nblock; int ret; - nblock = gfs2_next_unreserved_block(rbm->rgd, block, ip); + /* + * If we have a minimum extent length, then skip over any extent + * which is less than the min extent length in size. + */ + if (minext) { + extlen = gfs2_free_extlen(rbm, minext); + nblock = block + extlen; + if (extlen < minext) + goto fail; + } + + /* + * Check the extent which has been found against the reservations + * and skip if parts of it are already reserved + */ + nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip); if (nblock == block) return 0; +fail: ret = gfs2_rbm_from_block(rbm, nblock); if (ret < 0) return ret; @@ -1498,6 +1514,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, * gfs2_rbm_find - Look for blocks of a particular state * @rbm: Value/result starting position and final position * @state: The state which we want to find + * @minext: The requested extent length (0 for a single block) * @ip: If set, check for reservations * @nowrap: Stop looking at the end of the rgrp, rather than wrapping * around until we've reached the starting point. @@ -1509,7 +1526,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, * Returns: 0 on success, -ENOSPC if there is no block of the requested state */ -static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, const struct gfs2_inode *ip, bool nowrap) { struct buffer_head *bh; @@ -1548,7 +1565,7 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, return 0; initial_bi = rbm->bi; - ret = gfs2_reservation_check_and_update(rbm, ip); + ret = gfs2_reservation_check_and_update(rbm, ip, minext); if (ret == 0) return 0; if (ret > 0) { @@ -1608,7 +1625,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip while (1) { down_write(&sdp->sd_log_flush_lock); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, true); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, 0, NULL, true); up_write(&sdp->sd_log_flush_lock); if (error == -ENOSPC) break; @@ -1988,11 +2005,11 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; gfs2_rbm_from_block(&rbm, goal); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false); if (error == -ENOSPC) { gfs2_rbm_from_block(&rbm, goal); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, false); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false); } /* Since all blocks are reserved in advance, this shouldn't happen */ diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index c98f6af07e1c..24077958dcf6 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -79,10 +79,4 @@ static inline bool gfs2_rs_active(struct gfs2_blkreserv *rs) return rs && !RB_EMPTY_NODE(&rs->rs_node); } - -static inline u32 gfs2_bi2rgd_blk(const struct gfs2_bitmap *bi, u32 blk) -{ - return (bi->bi_start * GFS2_NBBY) + blk; -} - #endif /* __RGRP_DOT_H__ */ From 3701530aed9711fcfe9b29d3de0f7a27c8dbc8ae Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 12 Sep 2012 09:40:31 -0400 Subject: [PATCH 25/27] GFS2: Fix infinite loop in rbm_find This patch fixes an infinite loop in gfs2_rbm_find that was introduced by the previous patch. The problem occurred when the length was less than 3 but the rbm block was byte-aligned, causing it to improperly return a extent length of zero, which caused it to spin. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse Tested-by: Bob Peterson Tested-by: Barry Marson --- fs/gfs2/rgrp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index b933cdcda7f4..3cc402ce6fea 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -329,6 +329,7 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len)) goto out; + n_unaligned = len & 3; /* Start is now byte aligned */ while (len > 3) { start = rbm.bi->bi_bh->b_data; From a0b4df294387c777c3091a43e9970572e10261b1 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 17 Sep 2012 21:50:31 -0500 Subject: [PATCH 26/27] GFS2: fix s_writers.counter imbalance in gfs2_ail_empty_gl gfs2_ail_empty_gl() contains an "inline version" of gfs2_trans_begin(), so it needs an explicit sb_start_intwrite() as well, to balance the sb_end_intwrite() which will be called by gfs2_trans_end(). With this, xfstest 068 passes on lock_nolock local gfs2. Without it, we reach a writer count of -1 and get stuck. Signed-off-by: Eric Sandeen Signed-off-by: Steven Whitehouse --- fs/gfs2/glops.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 4bdcf3784187..32cc4fde975c 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -94,6 +94,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) /* A shortened, inline version of gfs2_trans_begin() */ tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); tr.tr_ip = (unsigned long)__builtin_return_address(0); + sb_start_intwrite(sdp->sd_vfs); gfs2_log_reserve(sdp, tr.tr_reserved); BUG_ON(current->journal_info); current->journal_info = &tr; From 2216db70c95a9610f5279b8da53bec614d98270f Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Thu, 20 Sep 2012 09:52:58 -0500 Subject: [PATCH 27/27] GFS2: Write out dirty inode metadata in delayed deletes If a dirty GFS2 inode was being deleted but was in use by another node, its metadata was not getting written out before GFS2 checked for dirty buffers in gfs2_ail_flush(). GFS2 was relying on inode_go_sync() to write out the metadata when the other node tried to free the file, but it failed the error check before it got that far. This patch writes out the metadata before calling gfs2_ail_flush() Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse --- fs/gfs2/super.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 79cac7057691..a8d90f2f576c 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1545,6 +1545,11 @@ static void gfs2_evict_inode(struct inode *inode) out_truncate: gfs2_log_flush(sdp, ip->i_gl); + if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) { + struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl); + filemap_fdatawrite(metamapping); + filemap_fdatawait(metamapping); + } write_inode_now(inode, 1); gfs2_ail_flush(ip->i_gl, 0);