Merge branch 'xfs-misc-fixes-4.6-4' into for-next

This commit is contained in:
Dave Chinner 2016-03-15 11:44:35 +11:00
commit 2cdb958aba
6 changed files with 221 additions and 56 deletions

View File

@ -4717,6 +4717,66 @@ error0:
return error;
}
/*
* When a delalloc extent is split (e.g., due to a hole punch), the original
* indlen reservation must be shared across the two new extents that are left
* behind.
*
* Given the original reservation and the worst case indlen for the two new
* extents (as calculated by xfs_bmap_worst_indlen()), split the original
* reservation fairly across the two new extents. If necessary, steal available
* blocks from a deleted extent to make up a reservation deficiency (e.g., if
* ores == 1). The number of stolen blocks is returned. The availability and
* subsequent accounting of stolen blocks is the responsibility of the caller.
*/
static xfs_filblks_t
xfs_bmap_split_indlen(
xfs_filblks_t ores, /* original res. */
xfs_filblks_t *indlen1, /* ext1 worst indlen */
xfs_filblks_t *indlen2, /* ext2 worst indlen */
xfs_filblks_t avail) /* stealable blocks */
{
xfs_filblks_t len1 = *indlen1;
xfs_filblks_t len2 = *indlen2;
xfs_filblks_t nres = len1 + len2; /* new total res. */
xfs_filblks_t stolen = 0;
/*
* Steal as many blocks as we can to try and satisfy the worst case
* indlen for both new extents.
*/
while (nres > ores && avail) {
nres--;
avail--;
stolen++;
}
/*
* The only blocks available are those reserved for the original
* extent and what we can steal from the extent being removed.
* If this still isn't enough to satisfy the combined
* requirements for the two new extents, skim blocks off of each
* of the new reservations until they match what is available.
*/
while (nres > ores) {
if (len1) {
len1--;
nres--;
}
if (nres == ores)
break;
if (len2) {
len2--;
nres--;
}
}
*indlen1 = len1;
*indlen2 = len2;
return stolen;
}
/*
* Called by xfs_bmapi to update file extent records and the btree
* after removing space (or undoing a delayed allocation).
@ -4981,28 +5041,29 @@ xfs_bmap_del_extent(
XFS_IFORK_NEXT_SET(ip, whichfork,
XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
} else {
xfs_filblks_t stolen;
ASSERT(whichfork == XFS_DATA_FORK);
temp = xfs_bmap_worst_indlen(ip, temp);
/*
* Distribute the original indlen reservation across the
* two new extents. Steal blocks from the deleted extent
* if necessary. Stealing blocks simply fudges the
* fdblocks accounting in xfs_bunmapi().
*/
temp = xfs_bmap_worst_indlen(ip, got.br_blockcount);
temp2 = xfs_bmap_worst_indlen(ip, new.br_blockcount);
stolen = xfs_bmap_split_indlen(da_old, &temp, &temp2,
del->br_blockcount);
da_new = temp + temp2 - stolen;
del->br_blockcount -= stolen;
/*
* Set the reservation for each extent. Warn if either
* is zero as this can lead to delalloc problems.
*/
WARN_ON_ONCE(!temp || !temp2);
xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
temp2 = xfs_bmap_worst_indlen(ip, temp2);
new.br_startblock = nullstartblock((int)temp2);
da_new = temp + temp2;
while (da_new > da_old) {
if (temp) {
temp--;
da_new--;
xfs_bmbt_set_startblock(ep,
nullstartblock((int)temp));
}
if (da_new == da_old)
break;
if (temp2) {
temp2--;
da_new--;
new.br_startblock =
nullstartblock((int)temp2);
}
}
}
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
xfs_iext_insert(ip, *idx + 1, 1, &new, state);
@ -5293,31 +5354,7 @@ xfs_bunmapi(
goto nodelete;
}
}
if (wasdel) {
ASSERT(startblockval(del.br_startblock) > 0);
/* Update realtime/data freespace, unreserve quota */
if (isrt) {
xfs_filblks_t rtexts;
rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
do_div(rtexts, mp->m_sb.sb_rextsize);
xfs_mod_frextents(mp, (int64_t)rtexts);
(void)xfs_trans_reserve_quota_nblks(NULL,
ip, -((long)del.br_blockcount), 0,
XFS_QMOPT_RES_RTBLKS);
} else {
xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount,
false);
(void)xfs_trans_reserve_quota_nblks(NULL,
ip, -((long)del.br_blockcount), 0,
XFS_QMOPT_RES_REGBLKS);
}
ip->i_delayed_blks -= del.br_blockcount;
if (cur)
cur->bc_private.b.flags |=
XFS_BTCUR_BPRV_WASDEL;
} else if (cur)
cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
/*
* If it's the case where the directory code is running
* with no block reservation, and the deleted block is in
@ -5339,11 +5376,45 @@ xfs_bunmapi(
error = -ENOSPC;
goto error0;
}
/*
* Unreserve quota and update realtime free space, if
* appropriate. If delayed allocation, update the inode delalloc
* counter now and wait to update the sb counters as
* xfs_bmap_del_extent() might need to borrow some blocks.
*/
if (wasdel) {
ASSERT(startblockval(del.br_startblock) > 0);
if (isrt) {
xfs_filblks_t rtexts;
rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
do_div(rtexts, mp->m_sb.sb_rextsize);
xfs_mod_frextents(mp, (int64_t)rtexts);
(void)xfs_trans_reserve_quota_nblks(NULL,
ip, -((long)del.br_blockcount), 0,
XFS_QMOPT_RES_RTBLKS);
} else {
(void)xfs_trans_reserve_quota_nblks(NULL,
ip, -((long)del.br_blockcount), 0,
XFS_QMOPT_RES_REGBLKS);
}
ip->i_delayed_blks -= del.br_blockcount;
if (cur)
cur->bc_private.b.flags |=
XFS_BTCUR_BPRV_WASDEL;
} else if (cur)
cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del,
&tmp_logflags, whichfork);
logflags |= tmp_logflags;
if (error)
goto error0;
if (!isrt && wasdel)
xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false);
bno = del.br_startoff - 1;
nodelete:
/*

View File

@ -2235,6 +2235,9 @@ xfs_dir2_node_trim_free(
dp = args->dp;
tp = args->trans;
*rvalp = 0;
/*
* Read the freespace block.
*/
@ -2255,7 +2258,6 @@ xfs_dir2_node_trim_free(
*/
if (freehdr.nused > 0) {
xfs_trans_brelse(tp, bp);
*rvalp = 0;
return 0;
}
/*

View File

@ -1474,6 +1474,7 @@ xfs_vm_write_failed(
loff_t from = pos & (PAGE_CACHE_SIZE - 1);
loff_t to = from + len;
struct buffer_head *bh, *head;
struct xfs_mount *mp = XFS_I(inode)->i_mount;
/*
* The request pos offset might be 32 or 64 bit, this is all fine
@ -1515,7 +1516,8 @@ xfs_vm_write_failed(
if (!buffer_delay(bh) && !buffer_unwritten(bh))
continue;
if (!buffer_new(bh) && block_offset < i_size_read(inode))
if (!xfs_mp_fail_writes(mp) && !buffer_new(bh) &&
block_offset < i_size_read(inode))
continue;
if (buffer_delay(bh))
@ -1555,6 +1557,7 @@ xfs_vm_write_begin(
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
struct page *page;
int status;
struct xfs_mount *mp = XFS_I(mapping->host)->i_mount;
ASSERT(len <= PAGE_CACHE_SIZE);
@ -1563,6 +1566,8 @@ xfs_vm_write_begin(
return -ENOMEM;
status = __block_write_begin(page, pos, len, xfs_get_blocks);
if (xfs_mp_fail_writes(mp))
status = -EIO;
if (unlikely(status)) {
struct inode *inode = mapping->host;
size_t isize = i_size_read(inode);
@ -1575,6 +1580,8 @@ xfs_vm_write_begin(
* allocated in this write, not blocks that were previously
* written successfully.
*/
if (xfs_mp_fail_writes(mp))
isize = 0;
if (pos + len > isize) {
ssize_t start = max_t(ssize_t, pos, isize);

View File

@ -147,6 +147,17 @@ typedef struct xfs_mount {
* to various other kinds of pain inflicted on the pNFS server.
*/
__uint32_t m_generation;
#ifdef DEBUG
/*
* DEBUG mode instrumentation to test and/or trigger delayed allocation
* block killing in the event of failed writes. When enabled, all
* buffered writes are forced to fail. All delalloc blocks in the range
* of the write (including pre-existing delalloc blocks!) are tossed as
* part of the write failure error handling sequence.
*/
bool m_fail_writes;
#endif
} xfs_mount_t;
/*
@ -263,6 +274,20 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
}
#ifdef DEBUG
static inline bool
xfs_mp_fail_writes(struct xfs_mount *mp)
{
return mp->m_fail_writes;
}
#else
static inline bool
xfs_mp_fail_writes(struct xfs_mount *mp)
{
return 0;
}
#endif
/*
* Per-ag incore structure, copies of information in agf and agi, to improve the
* performance of allocation group selection.

View File

@ -18,10 +18,13 @@
#include "xfs.h"
#include "xfs_sysfs.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
#include "xfs_stats.h"
#include "xfs_mount.h"
struct xfs_sysfs_attr {
struct attribute attr;
@ -45,16 +48,6 @@ to_attr(struct attribute *attr)
#define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr
/*
* xfs_mount kobject. This currently has no attributes and thus no need for show
* and store helpers. The mp kobject serves as the per-mount parent object that
* is identified by the fsname under sysfs.
*/
struct kobj_type xfs_mp_ktype = {
.release = xfs_sysfs_release,
};
STATIC ssize_t
xfs_sysfs_object_show(
struct kobject *kobject,
@ -83,6 +76,71 @@ static const struct sysfs_ops xfs_sysfs_ops = {
.store = xfs_sysfs_object_store,
};
/*
* xfs_mount kobject. The mp kobject also serves as the per-mount parent object
* that is identified by the fsname under sysfs.
*/
static inline struct xfs_mount *
to_mp(struct kobject *kobject)
{
struct xfs_kobj *kobj = to_kobj(kobject);
return container_of(kobj, struct xfs_mount, m_kobj);
}
#ifdef DEBUG
STATIC ssize_t
fail_writes_store(
struct kobject *kobject,
const char *buf,
size_t count)
{
struct xfs_mount *mp = to_mp(kobject);
int ret;
int val;
ret = kstrtoint(buf, 0, &val);
if (ret)
return ret;
if (val == 1)
mp->m_fail_writes = true;
else if (val == 0)
mp->m_fail_writes = false;
else
return -EINVAL;
return count;
}
STATIC ssize_t
fail_writes_show(
struct kobject *kobject,
char *buf)
{
struct xfs_mount *mp = to_mp(kobject);
return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_writes ? 1 : 0);
}
XFS_SYSFS_ATTR_RW(fail_writes);
#endif /* DEBUG */
static struct attribute *xfs_mp_attrs[] = {
#ifdef DEBUG
ATTR_LIST(fail_writes),
#endif
NULL,
};
struct kobj_type xfs_mp_ktype = {
.release = xfs_sysfs_release,
.sysfs_ops = &xfs_sysfs_ops,
.default_attrs = xfs_mp_attrs,
};
#ifdef DEBUG
/* debug */

View File

@ -1028,6 +1028,8 @@ __xfs_trans_roll(
struct xfs_trans_res tres;
int error;
*committed = 0;
/*
* Ensure that the inode is always logged.
*/
@ -1082,6 +1084,6 @@ xfs_trans_roll(
struct xfs_trans **tpp,
struct xfs_inode *dp)
{
int committed = 0;
int committed;
return __xfs_trans_roll(tpp, dp, &committed);
}