diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 79e898cb5562..041b6948aecc 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4717,6 +4717,66 @@ error0: return error; } +/* + * When a delalloc extent is split (e.g., due to a hole punch), the original + * indlen reservation must be shared across the two new extents that are left + * behind. + * + * Given the original reservation and the worst case indlen for the two new + * extents (as calculated by xfs_bmap_worst_indlen()), split the original + * reservation fairly across the two new extents. If necessary, steal available + * blocks from a deleted extent to make up a reservation deficiency (e.g., if + * ores == 1). The number of stolen blocks is returned. The availability and + * subsequent accounting of stolen blocks is the responsibility of the caller. + */ +static xfs_filblks_t +xfs_bmap_split_indlen( + xfs_filblks_t ores, /* original res. */ + xfs_filblks_t *indlen1, /* ext1 worst indlen */ + xfs_filblks_t *indlen2, /* ext2 worst indlen */ + xfs_filblks_t avail) /* stealable blocks */ +{ + xfs_filblks_t len1 = *indlen1; + xfs_filblks_t len2 = *indlen2; + xfs_filblks_t nres = len1 + len2; /* new total res. */ + xfs_filblks_t stolen = 0; + + /* + * Steal as many blocks as we can to try and satisfy the worst case + * indlen for both new extents. + */ + while (nres > ores && avail) { + nres--; + avail--; + stolen++; + } + + /* + * The only blocks available are those reserved for the original + * extent and what we can steal from the extent being removed. + * If this still isn't enough to satisfy the combined + * requirements for the two new extents, skim blocks off of each + * of the new reservations until they match what is available. + */ + while (nres > ores) { + if (len1) { + len1--; + nres--; + } + if (nres == ores) + break; + if (len2) { + len2--; + nres--; + } + } + + *indlen1 = len1; + *indlen2 = len2; + + return stolen; +} + /* * Called by xfs_bmapi to update file extent records and the btree * after removing space (or undoing a delayed allocation). @@ -4981,28 +5041,29 @@ xfs_bmap_del_extent( XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) + 1); } else { + xfs_filblks_t stolen; ASSERT(whichfork == XFS_DATA_FORK); - temp = xfs_bmap_worst_indlen(ip, temp); + + /* + * Distribute the original indlen reservation across the + * two new extents. Steal blocks from the deleted extent + * if necessary. Stealing blocks simply fudges the + * fdblocks accounting in xfs_bunmapi(). + */ + temp = xfs_bmap_worst_indlen(ip, got.br_blockcount); + temp2 = xfs_bmap_worst_indlen(ip, new.br_blockcount); + stolen = xfs_bmap_split_indlen(da_old, &temp, &temp2, + del->br_blockcount); + da_new = temp + temp2 - stolen; + del->br_blockcount -= stolen; + + /* + * Set the reservation for each extent. Warn if either + * is zero as this can lead to delalloc problems. + */ + WARN_ON_ONCE(!temp || !temp2); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - temp2 = xfs_bmap_worst_indlen(ip, temp2); new.br_startblock = nullstartblock((int)temp2); - da_new = temp + temp2; - while (da_new > da_old) { - if (temp) { - temp--; - da_new--; - xfs_bmbt_set_startblock(ep, - nullstartblock((int)temp)); - } - if (da_new == da_old) - break; - if (temp2) { - temp2--; - da_new--; - new.br_startblock = - nullstartblock((int)temp2); - } - } } trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); xfs_iext_insert(ip, *idx + 1, 1, &new, state); @@ -5293,31 +5354,7 @@ xfs_bunmapi( goto nodelete; } } - if (wasdel) { - ASSERT(startblockval(del.br_startblock) > 0); - /* Update realtime/data freespace, unreserve quota */ - if (isrt) { - xfs_filblks_t rtexts; - rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); - do_div(rtexts, mp->m_sb.sb_rextsize); - xfs_mod_frextents(mp, (int64_t)rtexts); - (void)xfs_trans_reserve_quota_nblks(NULL, - ip, -((long)del.br_blockcount), 0, - XFS_QMOPT_RES_RTBLKS); - } else { - xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, - false); - (void)xfs_trans_reserve_quota_nblks(NULL, - ip, -((long)del.br_blockcount), 0, - XFS_QMOPT_RES_REGBLKS); - } - ip->i_delayed_blks -= del.br_blockcount; - if (cur) - cur->bc_private.b.flags |= - XFS_BTCUR_BPRV_WASDEL; - } else if (cur) - cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL; /* * If it's the case where the directory code is running * with no block reservation, and the deleted block is in @@ -5339,11 +5376,45 @@ xfs_bunmapi( error = -ENOSPC; goto error0; } + + /* + * Unreserve quota and update realtime free space, if + * appropriate. If delayed allocation, update the inode delalloc + * counter now and wait to update the sb counters as + * xfs_bmap_del_extent() might need to borrow some blocks. + */ + if (wasdel) { + ASSERT(startblockval(del.br_startblock) > 0); + if (isrt) { + xfs_filblks_t rtexts; + + rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); + do_div(rtexts, mp->m_sb.sb_rextsize); + xfs_mod_frextents(mp, (int64_t)rtexts); + (void)xfs_trans_reserve_quota_nblks(NULL, + ip, -((long)del.br_blockcount), 0, + XFS_QMOPT_RES_RTBLKS); + } else { + (void)xfs_trans_reserve_quota_nblks(NULL, + ip, -((long)del.br_blockcount), 0, + XFS_QMOPT_RES_REGBLKS); + } + ip->i_delayed_blks -= del.br_blockcount; + if (cur) + cur->bc_private.b.flags |= + XFS_BTCUR_BPRV_WASDEL; + } else if (cur) + cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL; + error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del, &tmp_logflags, whichfork); logflags |= tmp_logflags; if (error) goto error0; + + if (!isrt && wasdel) + xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false); + bno = del.br_startoff - 1; nodelete: /* diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 63ee03db796c..75a557432d0f 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -2235,6 +2235,9 @@ xfs_dir2_node_trim_free( dp = args->dp; tp = args->trans; + + *rvalp = 0; + /* * Read the freespace block. */ @@ -2255,7 +2258,6 @@ xfs_dir2_node_trim_free( */ if (freehdr.nused > 0) { xfs_trans_brelse(tp, bp); - *rvalp = 0; return 0; } /* diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 75a39a8257ee..7ba680e572fc 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1474,6 +1474,7 @@ xfs_vm_write_failed( loff_t from = pos & (PAGE_CACHE_SIZE - 1); loff_t to = from + len; struct buffer_head *bh, *head; + struct xfs_mount *mp = XFS_I(inode)->i_mount; /* * The request pos offset might be 32 or 64 bit, this is all fine @@ -1515,7 +1516,8 @@ xfs_vm_write_failed( if (!buffer_delay(bh) && !buffer_unwritten(bh)) continue; - if (!buffer_new(bh) && block_offset < i_size_read(inode)) + if (!xfs_mp_fail_writes(mp) && !buffer_new(bh) && + block_offset < i_size_read(inode)) continue; if (buffer_delay(bh)) @@ -1555,6 +1557,7 @@ xfs_vm_write_begin( pgoff_t index = pos >> PAGE_CACHE_SHIFT; struct page *page; int status; + struct xfs_mount *mp = XFS_I(mapping->host)->i_mount; ASSERT(len <= PAGE_CACHE_SIZE); @@ -1563,6 +1566,8 @@ xfs_vm_write_begin( return -ENOMEM; status = __block_write_begin(page, pos, len, xfs_get_blocks); + if (xfs_mp_fail_writes(mp)) + status = -EIO; if (unlikely(status)) { struct inode *inode = mapping->host; size_t isize = i_size_read(inode); @@ -1575,6 +1580,8 @@ xfs_vm_write_begin( * allocated in this write, not blocks that were previously * written successfully. */ + if (xfs_mp_fail_writes(mp)) + isize = 0; if (pos + len > isize) { ssize_t start = max_t(ssize_t, pos, isize); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 1c8611f6b464..bac6b3435591 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -147,6 +147,17 @@ typedef struct xfs_mount { * to various other kinds of pain inflicted on the pNFS server. */ __uint32_t m_generation; + +#ifdef DEBUG + /* + * DEBUG mode instrumentation to test and/or trigger delayed allocation + * block killing in the event of failed writes. When enabled, all + * buffered writes are forced to fail. All delalloc blocks in the range + * of the write (including pre-existing delalloc blocks!) are tossed as + * part of the write failure error handling sequence. + */ + bool m_fail_writes; +#endif } xfs_mount_t; /* @@ -263,6 +274,20 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks); } +#ifdef DEBUG +static inline bool +xfs_mp_fail_writes(struct xfs_mount *mp) +{ + return mp->m_fail_writes; +} +#else +static inline bool +xfs_mp_fail_writes(struct xfs_mount *mp) +{ + return 0; +} +#endif + /* * Per-ag incore structure, copies of information in agf and agi, to improve the * performance of allocation group selection. diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 641d625eb334..6ced4f143494 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -18,10 +18,13 @@ #include "xfs.h" #include "xfs_sysfs.h" +#include "xfs_format.h" #include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_log.h" #include "xfs_log_priv.h" #include "xfs_stats.h" +#include "xfs_mount.h" struct xfs_sysfs_attr { struct attribute attr; @@ -45,16 +48,6 @@ to_attr(struct attribute *attr) #define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr -/* - * xfs_mount kobject. This currently has no attributes and thus no need for show - * and store helpers. The mp kobject serves as the per-mount parent object that - * is identified by the fsname under sysfs. - */ - -struct kobj_type xfs_mp_ktype = { - .release = xfs_sysfs_release, -}; - STATIC ssize_t xfs_sysfs_object_show( struct kobject *kobject, @@ -83,6 +76,71 @@ static const struct sysfs_ops xfs_sysfs_ops = { .store = xfs_sysfs_object_store, }; +/* + * xfs_mount kobject. The mp kobject also serves as the per-mount parent object + * that is identified by the fsname under sysfs. + */ + +static inline struct xfs_mount * +to_mp(struct kobject *kobject) +{ + struct xfs_kobj *kobj = to_kobj(kobject); + + return container_of(kobj, struct xfs_mount, m_kobj); +} + +#ifdef DEBUG + +STATIC ssize_t +fail_writes_store( + struct kobject *kobject, + const char *buf, + size_t count) +{ + struct xfs_mount *mp = to_mp(kobject); + int ret; + int val; + + ret = kstrtoint(buf, 0, &val); + if (ret) + return ret; + + if (val == 1) + mp->m_fail_writes = true; + else if (val == 0) + mp->m_fail_writes = false; + else + return -EINVAL; + + return count; +} + +STATIC ssize_t +fail_writes_show( + struct kobject *kobject, + char *buf) +{ + struct xfs_mount *mp = to_mp(kobject); + + return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_writes ? 1 : 0); +} +XFS_SYSFS_ATTR_RW(fail_writes); + +#endif /* DEBUG */ + +static struct attribute *xfs_mp_attrs[] = { +#ifdef DEBUG + ATTR_LIST(fail_writes), +#endif + NULL, +}; + +struct kobj_type xfs_mp_ktype = { + .release = xfs_sysfs_release, + .sysfs_ops = &xfs_sysfs_ops, + .default_attrs = xfs_mp_attrs, +}; + #ifdef DEBUG /* debug */ diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 748b16aff45a..20c53666cb4b 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1028,6 +1028,8 @@ __xfs_trans_roll( struct xfs_trans_res tres; int error; + *committed = 0; + /* * Ensure that the inode is always logged. */ @@ -1082,6 +1084,6 @@ xfs_trans_roll( struct xfs_trans **tpp, struct xfs_inode *dp) { - int committed = 0; + int committed; return __xfs_trans_roll(tpp, dp, &committed); }