diff --git a/fs/iomap.c b/fs/iomap.c index 51a02573405e..13dd413b2b9c 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -433,8 +433,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, struct page *page = data; int ret; - ret = __block_write_begin_int(page, pos & ~PAGE_MASK, length, - NULL, iomap); + ret = __block_write_begin_int(page, pos, length, NULL, iomap); if (ret) return ret; @@ -562,7 +561,7 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi, } while (len > 0) { - ret = iomap_apply(inode, start, len, 0, ops, &ctx, + ret = iomap_apply(inode, start, len, IOMAP_REPORT, ops, &ctx, iomap_fiemap_actor); /* inode with no (attribute) mapping will give ENOENT */ if (ret == -ENOENT) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index c27344cf38e1..6b7e6eb29414 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -49,6 +49,8 @@ #include "xfs_rmap.h" #include "xfs_ag_resv.h" #include "xfs_refcount.h" +#include "xfs_rmap_btree.h" +#include "xfs_icache.h" kmem_zone_t *xfs_bmap_free_item_zone; @@ -190,8 +192,12 @@ xfs_bmap_worst_indlen( int maxrecs; /* maximum record count at this level */ xfs_mount_t *mp; /* mount structure */ xfs_filblks_t rval; /* return value */ + xfs_filblks_t orig_len; mp = ip->i_mount; + + /* Calculate the worst-case size of the bmbt. */ + orig_len = len; maxrecs = mp->m_bmap_dmxr[0]; for (level = 0, rval = 0; level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); @@ -199,12 +205,20 @@ xfs_bmap_worst_indlen( len += maxrecs - 1; do_div(len, maxrecs); rval += len; - if (len == 1) - return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - + if (len == 1) { + rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - level - 1; + break; + } if (level == 0) maxrecs = mp->m_bmap_dmxr[1]; } + + /* Calculate the worst-case size of the rmapbt. */ + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) + + mp->m_rmap_maxlevels; + return rval; } @@ -515,7 +529,7 @@ xfs_bmap_trace_exlist( state |= BMAP_ATTRFORK; ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); + ASSERT(cnt == xfs_iext_count(ifp)); for (idx = 0; idx < cnt; idx++) trace_xfs_extlist(ip, idx, whichfork, caller_ip); } @@ -811,7 +825,7 @@ try_another_ag: XFS_BTREE_LONG_PTRS); arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); for (cnt = i = 0; i < nextents; i++) { ep = xfs_iext_get_ext(ifp, i); if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) { @@ -1296,7 +1310,7 @@ xfs_bmap_read_extents( /* * Here with bp and block set to the leftmost leaf node in the tree. */ - room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + room = xfs_iext_count(ifp); i = 0; /* * Loop over all leaf nodes. Copy information to the extent records. @@ -1361,7 +1375,7 @@ xfs_bmap_read_extents( return error; block = XFS_BUF_TO_BLOCK(bp); } - ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); + ASSERT(i == xfs_iext_count(ifp)); ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork)); XFS_BMAP_TRACE_EXLIST(ip, i, whichfork); return 0; @@ -1370,97 +1384,6 @@ error0: return -EFSCORRUPTED; } - -/* - * Search the extent records for the entry containing block bno. - * If bno lies in a hole, point to the next entry. If bno lies - * past eof, *eofp will be set, and *prevp will contain the last - * entry (null if none). Else, *lastxp will be set to the index - * of the found entry; *gotp will contain the entry. - */ -STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */ -xfs_bmap_search_multi_extents( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_fileoff_t bno, /* block number searched for */ - int *eofp, /* out: end of file found */ - xfs_extnum_t *lastxp, /* out: last extent index */ - xfs_bmbt_irec_t *gotp, /* out: extent entry found */ - xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ -{ - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ - xfs_extnum_t lastx; /* last extent index */ - - /* - * Initialize the extent entry structure to catch access to - * uninitialized br_startblock field. - */ - gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL; - gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL; - gotp->br_state = XFS_EXT_INVALID; - gotp->br_startblock = 0xffffa5a5a5a5a5a5LL; - prevp->br_startoff = NULLFILEOFF; - - ep = xfs_iext_bno_to_ext(ifp, bno, &lastx); - if (lastx > 0) { - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp); - } - if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) { - xfs_bmbt_get_all(ep, gotp); - *eofp = 0; - } else { - if (lastx > 0) { - *gotp = *prevp; - } - *eofp = 1; - ep = NULL; - } - *lastxp = lastx; - return ep; -} - -/* - * Search the extents list for the inode, for the extent containing bno. - * If bno lies in a hole, point to the next entry. If bno lies past eof, - * *eofp will be set, and *prevp will contain the last entry (null if none). - * Else, *lastxp will be set to the index of the found - * entry; *gotp will contain the entry. - */ -xfs_bmbt_rec_host_t * /* pointer to found extent entry */ -xfs_bmap_search_extents( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fileoff_t bno, /* block number searched for */ - int fork, /* data or attr fork */ - int *eofp, /* out: end of file found */ - xfs_extnum_t *lastxp, /* out: last extent index */ - xfs_bmbt_irec_t *gotp, /* out: extent entry found */ - xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ -{ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ - - XFS_STATS_INC(ip->i_mount, xs_look_exlist); - ifp = XFS_IFORK_PTR(ip, fork); - - ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp); - - if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) && - !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) { - xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, - "Access to block zero in inode %llu " - "start_block: %llx start_off: %llx " - "blkcnt: %llx extent-state: %x lastx: %x", - (unsigned long long)ip->i_ino, - (unsigned long long)gotp->br_startblock, - (unsigned long long)gotp->br_startoff, - (unsigned long long)gotp->br_blockcount, - gotp->br_state, *lastxp); - *lastxp = NULLEXTNUM; - *eofp = 1; - return NULL; - } - return ep; -} - /* * Returns the file-relative block number of the first unused block(s) * in the file with at least "len" logically contiguous blocks free. @@ -1497,7 +1420,7 @@ xfs_bmap_first_unused( (error = xfs_iread_extents(tp, ip, whichfork))) return error; lowest = *first_unused; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) { xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx); off = xfs_bmbt_get_startoff(ep); @@ -1523,44 +1446,44 @@ xfs_bmap_first_unused( */ int /* error */ xfs_bmap_last_before( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode */ - xfs_fileoff_t *last_block, /* last block */ - int whichfork) /* data or attr fork */ + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_inode *ip, /* incore inode */ + xfs_fileoff_t *last_block, /* last block */ + int whichfork) /* data or attr fork */ { - xfs_fileoff_t bno; /* input file offset */ - int eof; /* hit end of file */ - xfs_bmbt_rec_host_t *ep; /* pointer to last extent */ - int error; /* error return value */ - xfs_bmbt_irec_t got; /* current extent value */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_extnum_t lastx; /* last extent used */ - xfs_bmbt_irec_t prev; /* previous extent value */ + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_bmbt_irec got; + xfs_extnum_t idx; + int error; - if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) - return -EIO; - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { + switch (XFS_IFORK_FORMAT(ip, whichfork)) { + case XFS_DINODE_FMT_LOCAL: *last_block = 0; return 0; + case XFS_DINODE_FMT_BTREE: + case XFS_DINODE_FMT_EXTENTS: + break; + default: + return -EIO; } - ifp = XFS_IFORK_PTR(ip, whichfork); - if (!(ifp->if_flags & XFS_IFEXTENTS) && - (error = xfs_iread_extents(tp, ip, whichfork))) - return error; - bno = *last_block - 1; - ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, - &prev); - if (eof || xfs_bmbt_get_startoff(ep) > bno) { - if (prev.br_startoff == NULLFILEOFF) - *last_block = 0; - else - *last_block = prev.br_startoff + prev.br_blockcount; + + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(tp, ip, whichfork); + if (error) + return error; } - /* - * Otherwise *last_block is already the right answer. - */ + + if (xfs_iext_lookup_extent(ip, ifp, *last_block - 1, &idx, &got)) { + if (got.br_startoff <= *last_block - 1) + return 0; + } + + if (xfs_iext_get_extent(ifp, idx - 1, &got)) { + *last_block = got.br_startoff + got.br_blockcount; + return 0; + } + + *last_block = 0; return 0; } @@ -1582,7 +1505,7 @@ xfs_bmap_last_extent( return error; } - nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); if (nextents == 0) { *is_empty = 1; return 0; @@ -1735,7 +1658,7 @@ xfs_bmap_add_extent_delay_real( &bma->ip->i_d.di_nextents); ASSERT(bma->idx >= 0); - ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(bma->idx <= xfs_iext_count(ifp)); ASSERT(!isnullstartblock(new->br_startblock)); ASSERT(!bma->cur || (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); @@ -1794,7 +1717,7 @@ xfs_bmap_add_extent_delay_real( * Don't set contiguous if the combined extent would be too large. * Also check for all-three-contiguous being too large. */ - if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { + if (bma->idx < xfs_iext_count(ifp) - 1) { state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT); @@ -2300,7 +2223,7 @@ xfs_bmap_add_extent_unwritten_real( ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); ASSERT(*idx >= 0); - ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(*idx <= xfs_iext_count(ifp)); ASSERT(!isnullstartblock(new->br_startblock)); XFS_STATS_INC(mp, xs_add_exlist); @@ -2356,7 +2279,7 @@ xfs_bmap_add_extent_unwritten_real( * Don't set contiguous if the combined extent would be too large. * Also check for all-three-contiguous being too large. */ - if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { + if (*idx < xfs_iext_count(&ip->i_df) - 1) { state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); if (isnullstartblock(RIGHT.br_startblock)) @@ -2836,7 +2759,7 @@ xfs_bmap_add_extent_hole_delay( * Check and set flags if the current (right) segment exists. * If it doesn't exist, we're converting the hole at end-of-file. */ - if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { + if (*idx < xfs_iext_count(ifp)) { state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); @@ -2966,7 +2889,7 @@ xfs_bmap_add_extent_hole_real( ifp = XFS_IFORK_PTR(bma->ip, whichfork); ASSERT(bma->idx >= 0); - ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(bma->idx <= xfs_iext_count(ifp)); ASSERT(!isnullstartblock(new->br_startblock)); ASSERT(!bma->cur || !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); @@ -2992,7 +2915,7 @@ xfs_bmap_add_extent_hole_real( * Check and set flags if this segment has a current value. * Not true if we're inserting into the "hole" at eof. */ - if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { + if (bma->idx < xfs_iext_count(ifp)) { state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right); if (isnullstartblock(right.br_startblock)) @@ -3974,9 +3897,6 @@ xfs_bmap_remap_alloc( * allocating, so skip that check by pretending to be freeing. */ error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); - if (error) - goto error0; -error0: xfs_perag_put(args.pag); if (error) trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_); @@ -3999,6 +3919,39 @@ xfs_bmap_alloc( return xfs_bmap_btalloc(ap); } +/* Trim extent to fit a logical block range. */ +void +xfs_trim_extent( + struct xfs_bmbt_irec *irec, + xfs_fileoff_t bno, + xfs_filblks_t len) +{ + xfs_fileoff_t distance; + xfs_fileoff_t end = bno + len; + + if (irec->br_startoff + irec->br_blockcount <= bno || + irec->br_startoff >= end) { + irec->br_blockcount = 0; + return; + } + + if (irec->br_startoff < bno) { + distance = bno - irec->br_startoff; + if (isnullstartblock(irec->br_startblock)) + irec->br_startblock = DELAYSTARTBLOCK; + if (irec->br_startblock != DELAYSTARTBLOCK && + irec->br_startblock != HOLESTARTBLOCK) + irec->br_startblock += distance; + irec->br_startoff += distance; + irec->br_blockcount -= distance; + } + + if (end < irec->br_startoff + irec->br_blockcount) { + distance = irec->br_startoff + irec->br_blockcount - end; + irec->br_blockcount -= distance; + } +} + /* * Trim the returned map to the required bounds */ @@ -4115,12 +4068,11 @@ xfs_bmapi_read( struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp; struct xfs_bmbt_irec got; - struct xfs_bmbt_irec prev; xfs_fileoff_t obno; xfs_fileoff_t end; - xfs_extnum_t lastx; + xfs_extnum_t idx; int error; - int eof; + bool eof = false; int n = 0; int whichfork = xfs_bmapi_whichfork(flags); @@ -4160,7 +4112,8 @@ xfs_bmapi_read( return error; } - xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev); + if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) + eof = true; end = bno + len; obno = bno; @@ -4191,10 +4144,8 @@ xfs_bmapi_read( break; /* Else go on to the next record. */ - if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got); - else - eof = 1; + if (!xfs_iext_get_extent(ifp, ++idx, &got)) + eof = true; } *nmap = n; return 0; @@ -4204,10 +4155,10 @@ int xfs_bmapi_reserve_delalloc( struct xfs_inode *ip, int whichfork, - xfs_fileoff_t aoff, + xfs_fileoff_t off, xfs_filblks_t len, + xfs_filblks_t prealloc, struct xfs_bmbt_irec *got, - struct xfs_bmbt_irec *prev, xfs_extnum_t *lastx, int eof) { @@ -4218,10 +4169,17 @@ xfs_bmapi_reserve_delalloc( char rt = XFS_IS_REALTIME_INODE(ip); xfs_extlen_t extsz; int error; + xfs_fileoff_t aoff = off; - alen = XFS_FILBLKS_MIN(len, MAXEXTLEN); + /* + * Cap the alloc length. Keep track of prealloc so we know whether to + * tag the inode before we return. + */ + alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN); if (!eof) alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); + if (prealloc && alen >= len) + prealloc = alen - len; /* Figure out the extent size, adjust alen */ if (whichfork == XFS_COW_FORK) @@ -4229,7 +4187,12 @@ xfs_bmapi_reserve_delalloc( else extsz = xfs_get_extsz_hint(ip); if (extsz) { - error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof, + struct xfs_bmbt_irec prev; + + if (!xfs_iext_get_extent(ifp, *lastx - 1, &prev)) + prev.br_startoff = NULLFILEOFF; + + error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof, 1, 0, &aoff, &alen); ASSERT(!error); } @@ -4282,6 +4245,16 @@ xfs_bmapi_reserve_delalloc( */ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got); + /* + * Tag the inode if blocks were preallocated. Note that COW fork + * preallocation can occur at the start or end of the extent, even when + * prealloc == 0, so we must also check the aligned offset and length. + */ + if (whichfork == XFS_DATA_FORK && prealloc) + xfs_inode_set_eofblocks_tag(ip); + if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len)) + xfs_inode_set_cowblocks_tag(ip); + ASSERT(got->br_startoff <= aoff); ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen); ASSERT(isnullstartblock(got->br_startblock)); @@ -4319,7 +4292,7 @@ xfs_bmapi_allocate( if (bma->wasdel) { bma->length = (xfs_extlen_t)bma->got.br_blockcount; bma->offset = bma->got.br_startoff; - if (bma->idx != NULLEXTNUM && bma->idx) { + if (bma->idx) { xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &bma->prev); } @@ -4533,7 +4506,7 @@ xfs_bmapi_write( struct xfs_ifork *ifp; struct xfs_bmalloca bma = { NULL }; /* args for xfs_bmap_alloc */ xfs_fileoff_t end; /* end of mapped file region */ - int eof; /* after the end of extents */ + bool eof = false; /* after the end of extents */ int error; /* error return */ int n; /* current extent index */ xfs_fileoff_t obno; /* old block number (offset) */ @@ -4611,12 +4584,14 @@ xfs_bmapi_write( goto error0; } - xfs_bmap_search_extents(ip, bno, whichfork, &eof, &bma.idx, &bma.got, - &bma.prev); n = 0; end = bno + len; obno = bno; + if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.idx, &bma.got)) + eof = true; + if (!xfs_iext_get_extent(ifp, bma.idx - 1, &bma.prev)) + bma.prev.br_startoff = NULLFILEOFF; bma.tp = tp; bma.ip = ip; bma.total = total; @@ -4703,11 +4678,8 @@ xfs_bmapi_write( /* Else go on to the next record. */ bma.prev = bma.got; - if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) { - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx), - &bma.got); - } else - eof = 1; + if (!xfs_iext_get_extent(ifp, ++bma.idx, &bma.got)) + eof = true; } *nmap = n; @@ -4829,6 +4801,219 @@ xfs_bmap_split_indlen( return stolen; } +int +xfs_bmap_del_extent_delay( + struct xfs_inode *ip, + int whichfork, + xfs_extnum_t *idx, + struct xfs_bmbt_irec *got, + struct xfs_bmbt_irec *del) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_bmbt_irec new; + int64_t da_old, da_new, da_diff = 0; + xfs_fileoff_t del_endoff, got_endoff; + xfs_filblks_t got_indlen, new_indlen, stolen; + int error = 0, state = 0; + bool isrt; + + XFS_STATS_INC(mp, xs_del_exlist); + + isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); + del_endoff = del->br_startoff + del->br_blockcount; + got_endoff = got->br_startoff + got->br_blockcount; + da_old = startblockval(got->br_startblock); + da_new = 0; + + ASSERT(*idx >= 0); + ASSERT(*idx <= xfs_iext_count(ifp)); + ASSERT(del->br_blockcount > 0); + ASSERT(got->br_startoff <= del->br_startoff); + ASSERT(got_endoff >= del_endoff); + + if (isrt) { + int64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount); + + do_div(rtexts, mp->m_sb.sb_rextsize); + xfs_mod_frextents(mp, rtexts); + } + + /* + * Update the inode delalloc counter now and wait to update the + * sb counters as we might have to borrow some blocks for the + * indirect block accounting. + */ + xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del->br_blockcount), 0, + isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + ip->i_delayed_blks -= del->br_blockcount; + + if (whichfork == XFS_COW_FORK) + state |= BMAP_COWFORK; + + if (got->br_startoff == del->br_startoff) + state |= BMAP_LEFT_CONTIG; + if (got_endoff == del_endoff) + state |= BMAP_RIGHT_CONTIG; + + switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { + case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + /* + * Matches the whole extent. Delete the entry. + */ + xfs_iext_remove(ip, *idx, 1, state); + --*idx; + break; + case BMAP_LEFT_CONTIG: + /* + * Deleting the first part of the extent. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + got->br_startoff = del_endoff; + got->br_blockcount -= del->br_blockcount; + da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, + got->br_blockcount), da_old); + got->br_startblock = nullstartblock((int)da_new); + xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + break; + case BMAP_RIGHT_CONTIG: + /* + * Deleting the last part of the extent. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + got->br_blockcount = got->br_blockcount - del->br_blockcount; + da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, + got->br_blockcount), da_old); + got->br_startblock = nullstartblock((int)da_new); + xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + break; + case 0: + /* + * Deleting the middle of the extent. + * + * Distribute the original indlen reservation across the two new + * extents. Steal blocks from the deleted extent if necessary. + * Stealing blocks simply fudges the fdblocks accounting below. + * Warn if either of the new indlen reservations is zero as this + * can lead to delalloc problems. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + + got->br_blockcount = del->br_startoff - got->br_startoff; + got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount); + + new.br_blockcount = got_endoff - del_endoff; + new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount); + + WARN_ON_ONCE(!got_indlen || !new_indlen); + stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen, + del->br_blockcount); + + got->br_startblock = nullstartblock((int)got_indlen); + xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); + trace_xfs_bmap_post_update(ip, *idx, 0, _THIS_IP_); + + new.br_startoff = del_endoff; + new.br_state = got->br_state; + new.br_startblock = nullstartblock((int)new_indlen); + + ++*idx; + xfs_iext_insert(ip, *idx, 1, &new, state); + + da_new = got_indlen + new_indlen - stolen; + del->br_blockcount -= stolen; + break; + } + + ASSERT(da_old >= da_new); + da_diff = da_old - da_new; + if (!isrt) + da_diff += del->br_blockcount; + if (da_diff) + xfs_mod_fdblocks(mp, da_diff, false); + return error; +} + +void +xfs_bmap_del_extent_cow( + struct xfs_inode *ip, + xfs_extnum_t *idx, + struct xfs_bmbt_irec *got, + struct xfs_bmbt_irec *del) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + struct xfs_bmbt_irec new; + xfs_fileoff_t del_endoff, got_endoff; + int state = BMAP_COWFORK; + + XFS_STATS_INC(mp, xs_del_exlist); + + del_endoff = del->br_startoff + del->br_blockcount; + got_endoff = got->br_startoff + got->br_blockcount; + + ASSERT(*idx >= 0); + ASSERT(*idx <= xfs_iext_count(ifp)); + ASSERT(del->br_blockcount > 0); + ASSERT(got->br_startoff <= del->br_startoff); + ASSERT(got_endoff >= del_endoff); + ASSERT(!isnullstartblock(got->br_startblock)); + + if (got->br_startoff == del->br_startoff) + state |= BMAP_LEFT_CONTIG; + if (got_endoff == del_endoff) + state |= BMAP_RIGHT_CONTIG; + + switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { + case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + /* + * Matches the whole extent. Delete the entry. + */ + xfs_iext_remove(ip, *idx, 1, state); + --*idx; + break; + case BMAP_LEFT_CONTIG: + /* + * Deleting the first part of the extent. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + got->br_startoff = del_endoff; + got->br_blockcount -= del->br_blockcount; + got->br_startblock = del->br_startblock + del->br_blockcount; + xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + break; + case BMAP_RIGHT_CONTIG: + /* + * Deleting the last part of the extent. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + got->br_blockcount -= del->br_blockcount; + xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + break; + case 0: + /* + * Deleting the middle of the extent. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + got->br_blockcount = del->br_startoff - got->br_startoff; + xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + new.br_startoff = del_endoff; + new.br_blockcount = got_endoff - del_endoff; + new.br_state = got->br_state; + new.br_startblock = del->br_startblock + del->br_blockcount; + + ++*idx; + xfs_iext_insert(ip, *idx, 1, &new, state); + break; + } +} + /* * Called by xfs_bmapi to update file extent records and the btree * after removing space (or undoing a delayed allocation). @@ -4876,8 +5061,7 @@ xfs_bmap_del_extent( state |= BMAP_COWFORK; ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT((*idx >= 0) && (*idx < ifp->if_bytes / - (uint)sizeof(xfs_bmbt_rec_t))); + ASSERT((*idx >= 0) && (*idx < xfs_iext_count(ifp))); ASSERT(del->br_blockcount > 0); ep = xfs_iext_get_ext(ifp, *idx); xfs_bmbt_get_all(ep, &got); @@ -5171,175 +5355,6 @@ done: return error; } -/* Remove an extent from the CoW fork. Similar to xfs_bmap_del_extent. */ -int -xfs_bunmapi_cow( - struct xfs_inode *ip, - struct xfs_bmbt_irec *del) -{ - xfs_filblks_t da_new; - xfs_filblks_t da_old; - xfs_fsblock_t del_endblock = 0; - xfs_fileoff_t del_endoff; - int delay; - struct xfs_bmbt_rec_host *ep; - int error; - struct xfs_bmbt_irec got; - xfs_fileoff_t got_endoff; - struct xfs_ifork *ifp; - struct xfs_mount *mp; - xfs_filblks_t nblks; - struct xfs_bmbt_irec new; - /* REFERENCED */ - uint qfield; - xfs_filblks_t temp; - xfs_filblks_t temp2; - int state = BMAP_COWFORK; - int eof; - xfs_extnum_t eidx; - - mp = ip->i_mount; - XFS_STATS_INC(mp, xs_del_exlist); - - ep = xfs_bmap_search_extents(ip, del->br_startoff, XFS_COW_FORK, &eof, - &eidx, &got, &new); - - ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); ifp = ifp; - ASSERT((eidx >= 0) && (eidx < ifp->if_bytes / - (uint)sizeof(xfs_bmbt_rec_t))); - ASSERT(del->br_blockcount > 0); - ASSERT(got.br_startoff <= del->br_startoff); - del_endoff = del->br_startoff + del->br_blockcount; - got_endoff = got.br_startoff + got.br_blockcount; - ASSERT(got_endoff >= del_endoff); - delay = isnullstartblock(got.br_startblock); - ASSERT(isnullstartblock(del->br_startblock) == delay); - qfield = 0; - error = 0; - /* - * If deleting a real allocation, must free up the disk space. - */ - if (!delay) { - nblks = del->br_blockcount; - qfield = XFS_TRANS_DQ_BCOUNT; - /* - * Set up del_endblock and cur for later. - */ - del_endblock = del->br_startblock + del->br_blockcount; - da_old = da_new = 0; - } else { - da_old = startblockval(got.br_startblock); - da_new = 0; - nblks = 0; - } - qfield = qfield; - nblks = nblks; - - /* - * Set flag value to use in switch statement. - * Left-contig is 2, right-contig is 1. - */ - switch (((got.br_startoff == del->br_startoff) << 1) | - (got_endoff == del_endoff)) { - case 3: - /* - * Matches the whole extent. Delete the entry. - */ - xfs_iext_remove(ip, eidx, 1, BMAP_COWFORK); - --eidx; - break; - - case 2: - /* - * Deleting the first part of the extent. - */ - trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_); - xfs_bmbt_set_startoff(ep, del_endoff); - temp = got.br_blockcount - del->br_blockcount; - xfs_bmbt_set_blockcount(ep, temp); - if (delay) { - temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), - da_old); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); - da_new = temp; - break; - } - xfs_bmbt_set_startblock(ep, del_endblock); - trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); - break; - - case 1: - /* - * Deleting the last part of the extent. - */ - temp = got.br_blockcount - del->br_blockcount; - trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); - if (delay) { - temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), - da_old); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); - da_new = temp; - break; - } - trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); - break; - - case 0: - /* - * Deleting the middle of the extent. - */ - temp = del->br_startoff - got.br_startoff; - trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); - new.br_startoff = del_endoff; - temp2 = got_endoff - del_endoff; - new.br_blockcount = temp2; - new.br_state = got.br_state; - if (!delay) { - new.br_startblock = del_endblock; - } else { - temp = xfs_bmap_worst_indlen(ip, temp); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - temp2 = xfs_bmap_worst_indlen(ip, temp2); - new.br_startblock = nullstartblock((int)temp2); - da_new = temp + temp2; - while (da_new > da_old) { - if (temp) { - temp--; - da_new--; - xfs_bmbt_set_startblock(ep, - nullstartblock((int)temp)); - } - if (da_new == da_old) - break; - if (temp2) { - temp2--; - da_new--; - new.br_startblock = - nullstartblock((int)temp2); - } - } - } - trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); - xfs_iext_insert(ip, eidx + 1, 1, &new, state); - ++eidx; - break; - } - - /* - * Account for change in delayed indirect blocks. - * Nothing to do for disk quota accounting here. - */ - ASSERT(da_old >= da_new); - if (da_old > da_new) - xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false); - - return error; -} - /* * Unmap (remove) blocks from a file. * If nexts is nonzero then the number of extents to remove is limited to @@ -5360,8 +5375,6 @@ __xfs_bunmapi( { xfs_btree_cur_t *cur; /* bmap btree cursor */ xfs_bmbt_irec_t del; /* extent being deleted */ - int eof; /* is deleting at eof */ - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ int error; /* error return value */ xfs_extnum_t extno; /* extent number in list */ xfs_bmbt_irec_t got; /* current extent record */ @@ -5371,8 +5384,6 @@ __xfs_bunmapi( int logflags; /* transaction logging flags */ xfs_extlen_t mod; /* rt extent offset */ xfs_mount_t *mp; /* mount structure */ - xfs_extnum_t nextents; /* number of file extents */ - xfs_bmbt_irec_t prev; /* previous extent record */ xfs_fileoff_t start; /* first file offset deleted */ int tmp_logflags; /* partial logging flags */ int wasdel; /* was a delayed alloc extent */ @@ -5403,8 +5414,7 @@ __xfs_bunmapi( if (!(ifp->if_flags & XFS_IFEXTENTS) && (error = xfs_iread_extents(tp, ip, whichfork))) return error; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - if (nextents == 0) { + if (xfs_iext_count(ifp) == 0) { *rlen = 0; return 0; } @@ -5412,18 +5422,17 @@ __xfs_bunmapi( isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); start = bno; bno = start + len - 1; - ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, - &prev); /* * Check to see if the given block number is past the end of the * file, back up to the last block if so... */ - if (eof) { - ep = xfs_iext_get_ext(ifp, --lastx); - xfs_bmbt_get_all(ep, &got); + if (!xfs_iext_lookup_extent(ip, ifp, bno, &lastx, &got)) { + ASSERT(lastx > 0); + xfs_iext_get_extent(ifp, --lastx, &got); bno = got.br_startoff + got.br_blockcount - 1; } + logflags = 0; if (ifp->if_flags & XFS_IFBROOT) { ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); @@ -5454,8 +5463,7 @@ __xfs_bunmapi( if (got.br_startoff > bno) { if (--lastx < 0) break; - ep = xfs_iext_get_ext(ifp, lastx); - xfs_bmbt_get_all(ep, &got); + xfs_iext_get_extent(ifp, lastx, &got); } /* * Is the last block of this extent before the range @@ -5469,7 +5477,6 @@ __xfs_bunmapi( * Then deal with the (possibly delayed) allocated space * we found. */ - ASSERT(ep != NULL); del = got; wasdel = isnullstartblock(del.br_startblock); if (got.br_startoff < start) { @@ -5550,15 +5557,12 @@ __xfs_bunmapi( */ ASSERT(bno >= del.br_blockcount); bno -= del.br_blockcount; - if (got.br_startoff > bno) { - if (--lastx >= 0) { - ep = xfs_iext_get_ext(ifp, - lastx); - xfs_bmbt_get_all(ep, &got); - } - } + if (got.br_startoff > bno && --lastx >= 0) + xfs_iext_get_extent(ifp, lastx, &got); continue; } else if (del.br_state == XFS_EXT_UNWRITTEN) { + struct xfs_bmbt_irec prev; + /* * This one is already unwritten. * It must have a written left neighbor. @@ -5566,8 +5570,7 @@ __xfs_bunmapi( * try again. */ ASSERT(lastx > 0); - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, - lastx - 1), &prev); + xfs_iext_get_extent(ifp, lastx - 1, &prev); ASSERT(prev.br_state == XFS_EXT_NORM); ASSERT(!isnullstartblock(prev.br_startblock)); ASSERT(del.br_startblock == @@ -5665,13 +5668,9 @@ nodelete: */ if (bno != (xfs_fileoff_t)-1 && bno >= start) { if (lastx >= 0) { - ep = xfs_iext_get_ext(ifp, lastx); - if (xfs_bmbt_get_startoff(ep) > bno) { - if (--lastx >= 0) - ep = xfs_iext_get_ext(ifp, - lastx); - } - xfs_bmbt_get_all(ep, &got); + xfs_iext_get_extent(ifp, lastx, &got); + if (got.br_startoff > bno && --lastx >= 0) + xfs_iext_get_extent(ifp, lastx, &got); } extno++; } @@ -5889,7 +5888,7 @@ xfs_bmse_shift_one( mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); - total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + total_extents = xfs_iext_count(ifp); xfs_bmbt_get_all(gotp, &got); @@ -6066,7 +6065,7 @@ xfs_bmap_shift_extents( * are collapsing out, so we cannot use the count of real extents here. * Instead we have to calculate it from the incore fork. */ - total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + total_extents = xfs_iext_count(ifp); if (total_extents == 0) { *done = 1; goto del_cursor; @@ -6126,7 +6125,7 @@ xfs_bmap_shift_extents( * count can change. Update the total and grade the next record. */ if (direction == SHIFT_LEFT) { - total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + total_extents = xfs_iext_count(ifp); stop_extent = total_extents; } diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index f97db7132564..cecd094404cc 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -190,6 +190,8 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, #define XFS_BMAP_TRACE_EXLIST(ip,c,w) #endif +void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, + xfs_filblks_t len); int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops, @@ -221,7 +223,11 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, int flags, xfs_extnum_t nexts, xfs_fsblock_t *firstblock, struct xfs_defer_ops *dfops, int *done); -int xfs_bunmapi_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *del); +int xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork, + xfs_extnum_t *idx, struct xfs_bmbt_irec *got, + struct xfs_bmbt_irec *del); +void xfs_bmap_del_extent_cow(struct xfs_inode *ip, xfs_extnum_t *idx, + struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del); int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, xfs_extnum_t num); uint xfs_default_attroffset(struct xfs_inode *ip); @@ -231,14 +237,9 @@ int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, struct xfs_defer_ops *dfops, enum shift_direction direction, int num_exts); int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset); -struct xfs_bmbt_rec_host * - xfs_bmap_search_extents(struct xfs_inode *ip, xfs_fileoff_t bno, - int fork, int *eofp, xfs_extnum_t *lastxp, - struct xfs_bmbt_irec *gotp, struct xfs_bmbt_irec *prevp); int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork, - xfs_fileoff_t aoff, xfs_filblks_t len, - struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *prev, - xfs_extnum_t *lastx, int eof); + xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc, + struct xfs_bmbt_irec *got, xfs_extnum_t *lastx, int eof); enum xfs_bmap_intent_type { XFS_BMAP_MAP = 1, diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 5c8e6f2ce44f..0e80993c8a59 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -4826,7 +4826,7 @@ xfs_btree_calc_size( return rval; } -int +static int xfs_btree_count_blocks_helper( struct xfs_btree_cur *cur, int level, diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 613c5cf19436..5c2929f94bd3 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -199,9 +199,9 @@ xfs_defer_intake_work( struct xfs_defer_pending *dfp; list_for_each_entry(dfp, &dop->dop_intake, dfp_list) { - trace_xfs_defer_intake_work(tp->t_mountp, dfp); dfp->dfp_intent = dfp->dfp_type->create_intent(tp, dfp->dfp_count); + trace_xfs_defer_intake_work(tp->t_mountp, dfp); list_sort(tp->t_mountp, &dfp->dfp_work, dfp->dfp_type->diff_items); list_for_each(li, &dfp->dfp_work) @@ -221,21 +221,14 @@ xfs_defer_trans_abort( struct xfs_defer_pending *dfp; trace_xfs_defer_trans_abort(tp->t_mountp, dop); - /* - * If the transaction was committed, drop the intent reference - * since we're bailing out of here. The other reference is - * dropped when the intent hits the AIL. If the transaction - * was not committed, the intent is freed by the intent item - * unlock handler on abort. - */ - if (!dop->dop_committed) - return; - /* Abort intent items. */ + /* Abort intent items that don't have a done item. */ list_for_each_entry(dfp, &dop->dop_pending, dfp_list) { trace_xfs_defer_pending_abort(tp->t_mountp, dfp); - if (!dfp->dfp_done) + if (dfp->dfp_intent && !dfp->dfp_done) { dfp->dfp_type->abort_intent(dfp->dfp_intent); + dfp->dfp_intent = NULL; + } } /* Shut down FS. */ diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 20a96dd5af7e..c58d72c220f5 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -93,7 +93,7 @@ xfs_ascii_ci_compname( return result; } -static struct xfs_nameops xfs_ascii_ci_nameops = { +static const struct xfs_nameops xfs_ascii_ci_nameops = { .hashname = xfs_ascii_ci_hashname, .compname = xfs_ascii_ci_compname, }; diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index 3cc3cf767474..ac9a003dd29a 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -191,8 +191,7 @@ xfs_dquot_buf_verify_crc( if (mp->m_quotainfo) ndquots = mp->m_quotainfo->qi_dqperchunk; else - ndquots = xfs_calc_dquots_per_chunk( - XFS_BB_TO_FSB(mp, bp->b_length)); + ndquots = xfs_calc_dquots_per_chunk(bp->b_length); for (i = 0; i < ndquots; i++, d++) { if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk), diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index f6547fc5e016..6b7579e7b60a 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -865,7 +865,6 @@ typedef struct xfs_timestamp { * padding field for v3 inodes. */ #define XFS_DINODE_MAGIC 0x494e /* 'IN' */ -#define XFS_DINODE_GOOD_VERSION(v) ((v) >= 1 && (v) <= 3) typedef struct xfs_dinode { __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ __be16 di_mode; /* mode and type of file */ diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 8de9a3a29589..134424fac434 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -57,6 +57,17 @@ xfs_inobp_check( } #endif +bool +xfs_dinode_good_version( + struct xfs_mount *mp, + __u8 version) +{ + if (xfs_sb_version_hascrc(&mp->m_sb)) + return version == 3; + + return version == 1 || version == 2; +} + /* * If we are doing readahead on an inode buffer, we might be in log recovery * reading an inode allocation buffer that hasn't yet been replayed, and hence @@ -91,7 +102,7 @@ xfs_inode_buf_verify( dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && - XFS_DINODE_GOOD_VERSION(dip->di_version); + xfs_dinode_good_version(mp, dip->di_version); if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, XFS_RANDOM_ITOBP_INOTOBP))) { diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index 62d9d4681c8c..3cfe12a4f58a 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -74,6 +74,8 @@ void xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from); void xfs_log_dinode_to_disk(struct xfs_log_dinode *from, struct xfs_dinode *to); +bool xfs_dinode_good_version(struct xfs_mount *mp, __u8 version); + #if defined(DEBUG) void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); #else diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 5dd56d3dbb3a..222e103356c6 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -775,6 +775,13 @@ xfs_idestroy_fork( } } +/* Count number of incore extents based on if_bytes */ +xfs_extnum_t +xfs_iext_count(struct xfs_ifork *ifp) +{ + return ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); +} + /* * Convert in-core extents to on-disk form * @@ -803,7 +810,7 @@ xfs_iextents_copy( ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(ifp->if_bytes > 0); - nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nrecs = xfs_iext_count(ifp); XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork); ASSERT(nrecs > 0); @@ -941,7 +948,7 @@ xfs_iext_get_ext( xfs_extnum_t idx) /* index of target extent */ { ASSERT(idx >= 0); - ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); + ASSERT(idx < xfs_iext_count(ifp)); if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { return ifp->if_u1.if_ext_irec->er_extbuf; @@ -1017,7 +1024,7 @@ xfs_iext_add( int new_size; /* size of extents after adding */ xfs_extnum_t nextents; /* number of extents in file */ - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); ASSERT((idx >= 0) && (idx <= nextents)); byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); new_size = ifp->if_bytes + byte_diff; @@ -1241,7 +1248,7 @@ xfs_iext_remove( trace_xfs_iext_remove(ip, idx, state, _RET_IP_); ASSERT(ext_diff > 0); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); if (new_size == 0) { @@ -1270,7 +1277,7 @@ xfs_iext_remove_inline( ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); ASSERT(idx < XFS_INLINE_EXTS); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); ASSERT(((nextents - ext_diff) > 0) && (nextents - ext_diff) < XFS_INLINE_EXTS); @@ -1309,7 +1316,7 @@ xfs_iext_remove_direct( ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); new_size = ifp->if_bytes - (ext_diff * sizeof(xfs_bmbt_rec_t)); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); if (new_size == 0) { xfs_iext_destroy(ifp); @@ -1546,7 +1553,7 @@ xfs_iext_indirect_to_direct( int size; /* size of file extents */ ASSERT(ifp->if_flags & XFS_IFEXTIREC); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); ASSERT(nextents <= XFS_LINEAR_EXTS); size = nextents * sizeof(xfs_bmbt_rec_t); @@ -1620,7 +1627,7 @@ xfs_iext_bno_to_ext( xfs_extnum_t nextents; /* number of file extents */ xfs_fileoff_t startoff = 0; /* start offset of extent */ - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); if (nextents == 0) { *idxp = 0; return NULL; @@ -1733,8 +1740,8 @@ xfs_iext_idx_to_irec( ASSERT(ifp->if_flags & XFS_IFEXTIREC); ASSERT(page_idx >= 0); - ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); - ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc); + ASSERT(page_idx <= xfs_iext_count(ifp)); + ASSERT(page_idx < xfs_iext_count(ifp) || realloc); nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; erp_idx = 0; @@ -1782,7 +1789,7 @@ xfs_iext_irec_init( xfs_extnum_t nextents; /* number of extents in file */ ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); ASSERT(nextents <= XFS_LINEAR_EXTS); erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); @@ -1906,7 +1913,7 @@ xfs_iext_irec_compact( ASSERT(ifp->if_flags & XFS_IFEXTIREC); nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); if (nextents == 0) { xfs_iext_destroy(ifp); @@ -1996,3 +2003,49 @@ xfs_ifork_init_cow( ip->i_cformat = XFS_DINODE_FMT_EXTENTS; ip->i_cnextents = 0; } + +/* + * Lookup the extent covering bno. + * + * If there is an extent covering bno return the extent index, and store the + * expanded extent structure in *gotp, and the extent index in *idx. + * If there is no extent covering bno, but there is an extent after it (e.g. + * it lies in a hole) return that extent in *gotp and its index in *idx + * instead. + * If bno is beyond the last extent return false, and return the index after + * the last valid index in *idxp. + */ +bool +xfs_iext_lookup_extent( + struct xfs_inode *ip, + struct xfs_ifork *ifp, + xfs_fileoff_t bno, + xfs_extnum_t *idxp, + struct xfs_bmbt_irec *gotp) +{ + struct xfs_bmbt_rec_host *ep; + + XFS_STATS_INC(ip->i_mount, xs_look_exlist); + + ep = xfs_iext_bno_to_ext(ifp, bno, idxp); + if (!ep) + return false; + xfs_bmbt_get_all(ep, gotp); + return true; +} + +/* + * Return true if there is an extent at index idx, and return the expanded + * extent structure at idx in that case. Else return false. + */ +bool +xfs_iext_get_extent( + struct xfs_ifork *ifp, + xfs_extnum_t idx, + struct xfs_bmbt_irec *gotp) +{ + if (idx < 0 || idx >= xfs_iext_count(ifp)) + return false; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), gotp); + return true; +} diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index c9476f50e32d..7fb8365326d1 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -152,6 +152,7 @@ void xfs_init_local_fork(struct xfs_inode *, int, const void *, int); struct xfs_bmbt_rec_host * xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t); +xfs_extnum_t xfs_iext_count(struct xfs_ifork *); void xfs_iext_insert(struct xfs_inode *, xfs_extnum_t, xfs_extnum_t, struct xfs_bmbt_irec *, int); void xfs_iext_add(struct xfs_ifork *, xfs_extnum_t, int); @@ -181,6 +182,12 @@ void xfs_iext_irec_compact_pages(struct xfs_ifork *); void xfs_iext_irec_compact_full(struct xfs_ifork *); void xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int); +bool xfs_iext_lookup_extent(struct xfs_inode *ip, + struct xfs_ifork *ifp, xfs_fileoff_t bno, + xfs_extnum_t *idxp, struct xfs_bmbt_irec *gotp); +bool xfs_iext_get_extent(struct xfs_ifork *ifp, xfs_extnum_t idx, + struct xfs_bmbt_irec *gotp); + extern struct kmem_zone *xfs_ifork_zone; extern void xfs_ifork_init_cow(struct xfs_inode *ip); diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 8d74870468c2..f9a1076de911 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -57,7 +57,6 @@ typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */ #define NULLAGBLOCK ((xfs_agblock_t)-1) #define NULLAGNUMBER ((xfs_agnumber_t)-1) -#define NULLEXTNUM ((xfs_extnum_t)-1) #define NULLCOMMITLSN ((xfs_lsn_t)-1) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 561cf1456c6c..ab266d66124d 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -777,7 +777,7 @@ xfs_map_cow( { struct xfs_inode *ip = XFS_I(inode); struct xfs_bmbt_irec imap; - bool is_cow = false, need_alloc = false; + bool is_cow = false; int error; /* @@ -795,7 +795,7 @@ xfs_map_cow( * Else we need to check if there is a COW mapping at this offset. */ xfs_ilock(ip, XFS_ILOCK_SHARED); - is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap, &need_alloc); + is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (!is_cow) @@ -805,7 +805,7 @@ xfs_map_cow( * And if the COW mapping has a delayed extent here we need to * allocate real space for it now. */ - if (need_alloc) { + if (isnullstartblock(imap.br_startblock)) { error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset, &imap); if (error) @@ -1311,7 +1311,6 @@ __xfs_get_blocks( ssize_t size; int new = 0; bool is_cow = false; - bool need_alloc = false; BUG_ON(create && !direct); @@ -1337,9 +1336,11 @@ __xfs_get_blocks( end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); offset_fsb = XFS_B_TO_FSBT(mp, offset); - if (create && direct && xfs_is_reflink_inode(ip)) - is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap, - &need_alloc); + if (create && direct && xfs_is_reflink_inode(ip)) { + is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap); + ASSERT(!is_cow || !isnullstartblock(imap.br_startblock)); + } + if (!is_cow) { error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, XFS_BMAPI_ENTIRE); @@ -1356,10 +1357,29 @@ __xfs_get_blocks( xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb, &imap); } - ASSERT(!need_alloc); if (error) goto out_unlock; + /* + * The only time we can ever safely find delalloc blocks on direct I/O + * is a dio write to post-eof speculative preallocation. All other + * scenarios are indicative of a problem or misuse (such as mixing + * direct and mapped I/O). + * + * The file may be unmapped by the time we get here so we cannot + * reliably fail the I/O based on mapping. Instead, fail the I/O if this + * is a read or a write within eof. Otherwise, carry on but warn as a + * precuation if the file happens to be mapped. + */ + if (direct && imap.br_startblock == DELAYSTARTBLOCK) { + if (!create || offset < i_size_read(VFS_I(ip))) { + WARN_ON_ONCE(1); + error = -EIO; + goto out_unlock; + } + WARN_ON_ONCE(mapping_mapped(VFS_I(ip)->i_mapping)); + } + /* for DAX, we convert unwritten extents directly */ if (create && (!nimaps || @@ -1444,8 +1464,6 @@ __xfs_get_blocks( (new || ISUNWRITTEN(&imap)))) set_buffer_new(bh_result); - BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK); - return 0; out_unlock: diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 552465e011ec..0670a8bd5818 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -359,9 +359,7 @@ xfs_bmap_count_blocks( mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) { - xfs_bmap_count_leaves(ifp, 0, - ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t), - count); + xfs_bmap_count_leaves(ifp, 0, xfs_iext_count(ifp), count); return 0; } @@ -426,7 +424,7 @@ xfs_getbmapx_fix_eof_hole( ifp = XFS_IFORK_PTR(ip, whichfork); if (!moretocome && xfs_iext_bno_to_ext(ifp, fileblock, &lastx) && - (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1)) + (lastx == xfs_iext_count(ifp) - 1)) out->bmv_oflags |= BMV_OF_LAST; } @@ -1792,6 +1790,7 @@ xfs_swap_extent_forks( struct xfs_ifork tempifp, *ifp, *tifp; int aforkblks = 0; int taforkblks = 0; + xfs_extnum_t nextents; __uint64_t tmp; int error; @@ -1877,14 +1876,13 @@ xfs_swap_extent_forks( switch (ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: - /* If the extents fit in the inode, fix the - * pointer. Otherwise it's already NULL or - * pointing to the extent. + /* + * If the extents fit in the inode, fix the pointer. Otherwise + * it's already NULL or pointing to the extent. */ - if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { - ifp->if_u1.if_extents = - ifp->if_u2.if_inline_ext; - } + nextents = xfs_iext_count(&ip->i_df); + if (nextents <= XFS_INLINE_EXTS) + ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; (*src_log_flags) |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: @@ -1896,14 +1894,13 @@ xfs_swap_extent_forks( switch (tip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: - /* If the extents fit in the inode, fix the - * pointer. Otherwise it's already NULL or - * pointing to the extent. + /* + * If the extents fit in the inode, fix the pointer. Otherwise + * it's already NULL or pointing to the extent. */ - if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { - tifp->if_u1.if_extents = - tifp->if_u2.if_inline_ext; - } + nextents = xfs_iext_count(&tip->i_df); + if (nextents <= XFS_INLINE_EXTS) + tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext; (*target_log_flags) |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 1c2e52b2d926..38c9a95bda7d 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -71,6 +71,7 @@ typedef unsigned int xfs_buf_flags_t; { XBF_READ, "READ" }, \ { XBF_WRITE, "WRITE" }, \ { XBF_READ_AHEAD, "READ_AHEAD" }, \ + { XBF_NO_IOACCT, "NO_IOACCT" }, \ { XBF_ASYNC, "ASYNC" }, \ { XBF_DONE, "DONE" }, \ { XBF_STALE, "STALE" }, \ diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index ca2ab738fae2..d818c160451f 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -249,6 +249,7 @@ xfs_file_dio_aio_read( struct xfs_inode *ip = XFS_I(inode); loff_t isize = i_size_read(inode); size_t count = iov_iter_count(to); + loff_t end = iocb->ki_pos + count - 1; struct iov_iter data; struct xfs_buftarg *target; ssize_t ret = 0; @@ -272,49 +273,21 @@ xfs_file_dio_aio_read( file_accessed(iocb->ki_filp); - /* - * Locking is a bit tricky here. If we take an exclusive lock for direct - * IO, we effectively serialise all new concurrent read IO to this file - * and block it behind IO that is currently in progress because IO in - * progress holds the IO lock shared. We only need to hold the lock - * exclusive to blow away the page cache, so only take lock exclusively - * if the page cache needs invalidation. This allows the normal direct - * IO case of no page cache pages to proceeed concurrently without - * serialisation. - */ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); if (mapping->nrpages) { - xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); - xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); + ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end); + if (ret) + goto out_unlock; /* - * The generic dio code only flushes the range of the particular - * I/O. Because we take an exclusive lock here, this whole - * sequence is considerably more expensive for us. This has a - * noticeable performance impact for any file with cached pages, - * even when outside of the range of the particular I/O. - * - * Hence, amortize the cost of the lock against a full file - * flush and reduce the chances of repeated iolock cycles going - * forward. + * Invalidate whole pages. This can return an error if we fail + * to invalidate a page, but this should never happen on XFS. + * Warn if it does fail. */ - if (mapping->nrpages) { - ret = filemap_write_and_wait(mapping); - if (ret) { - xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); - return ret; - } - - /* - * Invalidate whole pages. This can return an error if - * we fail to invalidate a page, but this should never - * happen on XFS. Warn if it does fail. - */ - ret = invalidate_inode_pages2(mapping); - WARN_ON_ONCE(ret); - ret = 0; - } - xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); + ret = invalidate_inode_pages2_range(mapping, + iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); + WARN_ON_ONCE(ret); + ret = 0; } data = *to; @@ -324,8 +297,9 @@ xfs_file_dio_aio_read( iocb->ki_pos += ret; iov_iter_advance(to, ret); } - xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); +out_unlock: + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); return ret; } @@ -570,61 +544,49 @@ xfs_file_dio_aio_write( if ((iocb->ki_pos | count) & target->bt_logical_sectormask) return -EINVAL; - /* "unaligned" here means not aligned to a filesystem block */ + /* + * Don't take the exclusive iolock here unless the I/O is unaligned to + * the file system block size. We don't need to consider the EOF + * extension case here because xfs_file_aio_write_checks() will relock + * the inode as necessary for EOF zeroing cases and fill out the new + * inode size as appropriate. + */ if ((iocb->ki_pos & mp->m_blockmask) || - ((iocb->ki_pos + count) & mp->m_blockmask)) + ((iocb->ki_pos + count) & mp->m_blockmask)) { unaligned_io = 1; - - /* - * We don't need to take an exclusive lock unless there page cache needs - * to be invalidated or unaligned IO is being executed. We don't need to - * consider the EOF extension case here because - * xfs_file_aio_write_checks() will relock the inode as necessary for - * EOF zeroing cases and fill out the new inode size as appropriate. - */ - if (unaligned_io || mapping->nrpages) iolock = XFS_IOLOCK_EXCL; - else + } else { iolock = XFS_IOLOCK_SHARED; - xfs_rw_ilock(ip, iolock); - - /* - * Recheck if there are cached pages that need invalidate after we got - * the iolock to protect against other threads adding new pages while - * we were waiting for the iolock. - */ - if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) { - xfs_rw_iunlock(ip, iolock); - iolock = XFS_IOLOCK_EXCL; - xfs_rw_ilock(ip, iolock); } + xfs_rw_ilock(ip, iolock); + ret = xfs_file_aio_write_checks(iocb, from, &iolock); if (ret) goto out; count = iov_iter_count(from); end = iocb->ki_pos + count - 1; - /* - * See xfs_file_dio_aio_read() for why we do a full-file flush here. - */ if (mapping->nrpages) { - ret = filemap_write_and_wait(VFS_I(ip)->i_mapping); + ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end); if (ret) goto out; + /* * Invalidate whole pages. This can return an error if we fail * to invalidate a page, but this should never happen on XFS. * Warn if it does fail. */ - ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping); + ret = invalidate_inode_pages2_range(mapping, + iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); WARN_ON_ONCE(ret); ret = 0; } /* * If we are doing unaligned IO, wait for all other IO to drain, - * otherwise demote the lock if we had to flush cached pages + * otherwise demote the lock if we had to take the exclusive lock + * for other reasons in xfs_file_aio_write_checks. */ if (unaligned_io) inode_dio_wait(inode); @@ -947,134 +909,6 @@ out_unlock: return error; } -/* - * Flush all file writes out to disk. - */ -static int -xfs_file_wait_for_io( - struct inode *inode, - loff_t offset, - size_t len) -{ - loff_t rounding; - loff_t ioffset; - loff_t iendoffset; - loff_t bs; - int ret; - - bs = inode->i_sb->s_blocksize; - inode_dio_wait(inode); - - rounding = max_t(xfs_off_t, bs, PAGE_SIZE); - ioffset = round_down(offset, rounding); - iendoffset = round_up(offset + len, rounding) - 1; - ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, - iendoffset); - return ret; -} - -/* Hook up to the VFS reflink function */ -STATIC int -xfs_file_share_range( - struct file *file_in, - loff_t pos_in, - struct file *file_out, - loff_t pos_out, - u64 len, - bool is_dedupe) -{ - struct inode *inode_in; - struct inode *inode_out; - ssize_t ret; - loff_t bs; - loff_t isize; - int same_inode; - loff_t blen; - unsigned int flags = 0; - - inode_in = file_inode(file_in); - inode_out = file_inode(file_out); - bs = inode_out->i_sb->s_blocksize; - - /* Don't touch certain kinds of inodes */ - if (IS_IMMUTABLE(inode_out)) - return -EPERM; - if (IS_SWAPFILE(inode_in) || - IS_SWAPFILE(inode_out)) - return -ETXTBSY; - - /* Reflink only works within this filesystem. */ - if (inode_in->i_sb != inode_out->i_sb) - return -EXDEV; - same_inode = (inode_in->i_ino == inode_out->i_ino); - - /* Don't reflink dirs, pipes, sockets... */ - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) - return -EISDIR; - if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode)) - return -EINVAL; - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) - return -EINVAL; - - /* Don't share DAX file data for now. */ - if (IS_DAX(inode_in) || IS_DAX(inode_out)) - return -EINVAL; - - /* Are we going all the way to the end? */ - isize = i_size_read(inode_in); - if (isize == 0) - return 0; - if (len == 0) - len = isize - pos_in; - - /* Ensure offsets don't wrap and the input is inside i_size */ - if (pos_in + len < pos_in || pos_out + len < pos_out || - pos_in + len > isize) - return -EINVAL; - - /* Don't allow dedupe past EOF in the dest file */ - if (is_dedupe) { - loff_t disize; - - disize = i_size_read(inode_out); - if (pos_out >= disize || pos_out + len > disize) - return -EINVAL; - } - - /* If we're linking to EOF, continue to the block boundary. */ - if (pos_in + len == isize) - blen = ALIGN(isize, bs) - pos_in; - else - blen = len; - - /* Only reflink if we're aligned to block boundaries */ - if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) || - !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs)) - return -EINVAL; - - /* Don't allow overlapped reflink within the same file */ - if (same_inode && pos_out + blen > pos_in && pos_out < pos_in + blen) - return -EINVAL; - - /* Wait for the completion of any pending IOs on srcfile */ - ret = xfs_file_wait_for_io(inode_in, pos_in, len); - if (ret) - goto out; - ret = xfs_file_wait_for_io(inode_out, pos_out, len); - if (ret) - goto out; - - if (is_dedupe) - flags |= XFS_REFLINK_DEDUPE; - ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out), - pos_out, len, flags); - if (ret < 0) - goto out; - -out: - return ret; -} - STATIC ssize_t xfs_file_copy_range( struct file *file_in, @@ -1086,7 +920,7 @@ xfs_file_copy_range( { int error; - error = xfs_file_share_range(file_in, pos_in, file_out, pos_out, + error = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, len, false); if (error) return error; @@ -1101,7 +935,7 @@ xfs_file_clone_range( loff_t pos_out, u64 len) { - return xfs_file_share_range(file_in, pos_in, file_out, pos_out, + return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, len, false); } @@ -1124,7 +958,7 @@ xfs_file_dedupe_range( if (len > XFS_MAX_DEDUPE_LEN) len = XFS_MAX_DEDUPE_LEN; - error = xfs_file_share_range(src_file, loff, dst_file, dst_loff, + error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff, len, true); if (error) return error; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 14796b744e0a..9c3e5c6ddf20 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -123,7 +123,6 @@ __xfs_inode_free( { /* asserts to verify all state is correct here */ ASSERT(atomic_read(&ip->i_pincount) == 0); - ASSERT(!xfs_isiflocked(ip)); XFS_STATS_DEC(ip->i_mount, vn_active); call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); @@ -133,6 +132,8 @@ void xfs_inode_free( struct xfs_inode *ip) { + ASSERT(!xfs_isiflocked(ip)); + /* * Because we use RCU freeing we need to ensure the inode always * appears to be reclaimed with an invalid inode number when in the @@ -981,6 +982,7 @@ restart: if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { xfs_iunpin_wait(ip); + /* xfs_iflush_abort() drops the flush lock */ xfs_iflush_abort(ip, false); goto reclaim; } @@ -989,10 +991,10 @@ restart: goto out_ifunlock; xfs_iunpin_wait(ip); } - if (xfs_iflags_test(ip, XFS_ISTALE)) - goto reclaim; - if (xfs_inode_clean(ip)) + if (xfs_iflags_test(ip, XFS_ISTALE) || xfs_inode_clean(ip)) { + xfs_ifunlock(ip); goto reclaim; + } /* * Never flush out dirty data during non-blocking reclaim, as it would @@ -1030,25 +1032,24 @@ restart: xfs_buf_relse(bp); } - xfs_iflock(ip); reclaim: + ASSERT(!xfs_isiflocked(ip)); + /* * Because we use RCU freeing we need to ensure the inode always appears * to be reclaimed with an invalid inode number when in the free state. - * We do this as early as possible under the ILOCK and flush lock so - * that xfs_iflush_cluster() can be guaranteed to detect races with us - * here. By doing this, we guarantee that once xfs_iflush_cluster has - * locked both the XFS_ILOCK and the flush lock that it will see either - * a valid, flushable inode that will serialise correctly against the - * locks below, or it will see a clean (and invalid) inode that it can - * skip. + * We do this as early as possible under the ILOCK so that + * xfs_iflush_cluster() can be guaranteed to detect races with us here. + * By doing this, we guarantee that once xfs_iflush_cluster has locked + * XFS_ILOCK that it will see either a valid, flushable inode that will + * serialise correctly, or it will see a clean (and invalid) inode that + * it can skip. */ spin_lock(&ip->i_flags_lock); ip->i_flags = XFS_IRECLAIM; ip->i_ino = 0; spin_unlock(&ip->i_flags_lock); - xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); XFS_STATS_INC(ip->i_mount, xs_ig_reclaims); @@ -1580,10 +1581,15 @@ xfs_inode_free_cowblocks( struct xfs_eofblocks *eofb = args; bool need_iolock = true; int match; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); - if (!xfs_reflink_has_real_cow_blocks(ip)) { + /* + * Just clear the tag if we have an empty cow fork or none at all. It's + * possible the inode was fully unshared since it was originally tagged. + */ + if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) { trace_xfs_inode_free_cowblocks_invalid(ip); xfs_inode_clear_cowblocks_tag(ip); return 0; @@ -1656,9 +1662,9 @@ void xfs_inode_set_cowblocks_tag( xfs_inode_t *ip) { - trace_xfs_inode_set_eofblocks_tag(ip); + trace_xfs_inode_set_cowblocks_tag(ip); return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks, - trace_xfs_perag_set_eofblocks, + trace_xfs_perag_set_cowblocks, XFS_ICI_COWBLOCKS_TAG); } @@ -1666,7 +1672,7 @@ void xfs_inode_clear_cowblocks_tag( xfs_inode_t *ip) { - trace_xfs_inode_clear_eofblocks_tag(ip); + trace_xfs_inode_clear_cowblocks_tag(ip); return __xfs_inode_clear_eofblocks_tag(ip, - trace_xfs_perag_clear_eofblocks, XFS_ICI_COWBLOCKS_TAG); + trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG); } diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index d45ca72af6fb..865ad1373e5e 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c @@ -133,7 +133,7 @@ xfs_icreate_item_committing( /* * This is the ops vector shared by all buf log items. */ -static struct xfs_item_ops xfs_icreate_item_ops = { +static const struct xfs_item_ops xfs_icreate_item_ops = { .iop_size = xfs_icreate_item_size, .iop_format = xfs_icreate_item_format, .iop_pin = xfs_icreate_item_pin, diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index f14c1de2549d..71e8a81c91a3 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -246,6 +246,11 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip) * Synchronize processes attempting to flush the in-core inode back to disk. */ +static inline int xfs_isiflocked(struct xfs_inode *ip) +{ + return xfs_iflags_test(ip, XFS_IFLOCK); +} + extern void __xfs_iflock(struct xfs_inode *ip); static inline int xfs_iflock_nowait(struct xfs_inode *ip) @@ -261,16 +266,12 @@ static inline void xfs_iflock(struct xfs_inode *ip) static inline void xfs_ifunlock(struct xfs_inode *ip) { + ASSERT(xfs_isiflocked(ip)); xfs_iflags_clear(ip, XFS_IFLOCK); smp_mb(); wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT); } -static inline int xfs_isiflocked(struct xfs_inode *ip) -{ - return xfs_iflags_test(ip, XFS_IFLOCK); -} - /* * Flags for inode locking. * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield) diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 9610e9c00952..d90e7811ccdd 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -164,7 +164,7 @@ xfs_inode_item_format_data_fork( struct xfs_bmbt_rec *p; ASSERT(ip->i_df.if_u1.if_extents != NULL); - ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0); + ASSERT(xfs_iext_count(&ip->i_df) > 0); p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT); data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK); @@ -261,7 +261,7 @@ xfs_inode_item_format_attr_fork( ip->i_afp->if_bytes > 0) { struct xfs_bmbt_rec *p; - ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) == + ASSERT(xfs_iext_count(ip->i_afp) == ip->i_d.di_anextents); ASSERT(ip->i_afp->if_u1.if_extents != NULL); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index c245bed3249b..a39197501a7c 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -910,16 +910,14 @@ xfs_ioc_fsgetxattr( if (attr) { if (ip->i_afp) { if (ip->i_afp->if_flags & XFS_IFEXTENTS) - fa.fsx_nextents = ip->i_afp->if_bytes / - sizeof(xfs_bmbt_rec_t); + fa.fsx_nextents = xfs_iext_count(ip->i_afp); else fa.fsx_nextents = ip->i_d.di_anextents; } else fa.fsx_nextents = 0; } else { if (ip->i_df.if_flags & XFS_IFEXTENTS) - fa.fsx_nextents = ip->i_df.if_bytes / - sizeof(xfs_bmbt_rec_t); + fa.fsx_nextents = xfs_iext_count(&ip->i_df); else fa.fsx_nextents = ip->i_d.di_nextents; } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index d907eb9f8ef3..15a83813b708 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -395,11 +395,12 @@ xfs_iomap_prealloc_size( struct xfs_inode *ip, loff_t offset, loff_t count, - xfs_extnum_t idx, - struct xfs_bmbt_irec *prev) + xfs_extnum_t idx) { struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); + struct xfs_bmbt_irec prev; int shift = 0; int64_t freesp; xfs_fsblock_t qblocks; @@ -419,8 +420,8 @@ xfs_iomap_prealloc_size( */ if ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) || XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) || - idx == 0 || - prev->br_startoff + prev->br_blockcount < offset_fsb) + !xfs_iext_get_extent(ifp, idx - 1, &prev) || + prev.br_startoff + prev.br_blockcount < offset_fsb) return mp->m_writeio_blocks; /* @@ -439,8 +440,8 @@ xfs_iomap_prealloc_size( * always extends to MAXEXTLEN rather than falling short due to things * like stripe unit/width alignment of real extents. */ - if (prev->br_blockcount <= (MAXEXTLEN >> 1)) - alloc_blocks = prev->br_blockcount << 1; + if (prev.br_blockcount <= (MAXEXTLEN >> 1)) + alloc_blocks = prev.br_blockcount << 1; else alloc_blocks = XFS_B_TO_FSB(mp, offset); if (!alloc_blocks) @@ -535,11 +536,11 @@ xfs_file_iomap_begin_delay( xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t maxbytes_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); - xfs_fileoff_t end_fsb, orig_end_fsb; + xfs_fileoff_t end_fsb; int error = 0, eof = 0; struct xfs_bmbt_irec got; - struct xfs_bmbt_irec prev; xfs_extnum_t idx; + xfs_fsblock_t prealloc_blocks = 0; ASSERT(!XFS_IS_REALTIME_INODE(ip)); ASSERT(!xfs_get_extsz_hint(ip)); @@ -563,9 +564,19 @@ xfs_file_iomap_begin_delay( goto out_unlock; } - xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx, - &got, &prev); + eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got); if (!eof && got.br_startoff <= offset_fsb) { + if (xfs_is_reflink_inode(ip)) { + bool shared; + + end_fsb = min(XFS_B_TO_FSB(mp, offset + count), + maxbytes_fsb); + xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb); + error = xfs_reflink_reserve_cow(ip, &got, &shared); + if (error) + goto out_unlock; + } + trace_xfs_iomap_found(ip, offset, count, 0, &got); goto done; } @@ -584,35 +595,32 @@ xfs_file_iomap_begin_delay( * the lower level functions are updated. */ count = min_t(loff_t, count, 1024 * PAGE_SIZE); - end_fsb = orig_end_fsb = - min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); + end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); if (eof) { - xfs_fsblock_t prealloc_blocks; - - prealloc_blocks = - xfs_iomap_prealloc_size(ip, offset, count, idx, &prev); + prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, idx); if (prealloc_blocks) { xfs_extlen_t align; xfs_off_t end_offset; + xfs_fileoff_t p_end_fsb; end_offset = XFS_WRITEIO_ALIGN(mp, offset + count - 1); - end_fsb = XFS_B_TO_FSBT(mp, end_offset) + - prealloc_blocks; + p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) + + prealloc_blocks; align = xfs_eof_alignment(ip, 0); if (align) - end_fsb = roundup_64(end_fsb, align); + p_end_fsb = roundup_64(p_end_fsb, align); - end_fsb = min(end_fsb, maxbytes_fsb); - ASSERT(end_fsb > offset_fsb); + p_end_fsb = min(p_end_fsb, maxbytes_fsb); + ASSERT(p_end_fsb > offset_fsb); + prealloc_blocks = p_end_fsb - end_fsb; } } retry: error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb, - end_fsb - offset_fsb, &got, - &prev, &idx, eof); + end_fsb - offset_fsb, prealloc_blocks, &got, &idx, eof); switch (error) { case 0: break; @@ -620,8 +628,8 @@ retry: case -EDQUOT: /* retry without any preallocation */ trace_xfs_delalloc_enospc(ip, offset, count); - if (end_fsb != orig_end_fsb) { - end_fsb = orig_end_fsb; + if (prealloc_blocks) { + prealloc_blocks = 0; goto retry; } /*FALLTHRU*/ @@ -629,13 +637,6 @@ retry: goto out_unlock; } - /* - * Tag the inode as speculatively preallocated so we can reclaim this - * space on demand, if necessary. - */ - if (end_fsb != orig_end_fsb) - xfs_inode_set_eofblocks_tag(ip); - trace_xfs_iomap_alloc(ip, offset, count, 0, &got); done: if (isnullstartblock(got.br_startblock)) @@ -961,19 +962,13 @@ xfs_file_iomap_begin( struct xfs_mount *mp = ip->i_mount; struct xfs_bmbt_irec imap; xfs_fileoff_t offset_fsb, end_fsb; - bool shared, trimmed; int nimaps = 1, error = 0; + bool shared = false, trimmed = false; unsigned lockmode; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { - error = xfs_reflink_reserve_cow_range(ip, offset, length); - if (error < 0) - return error; - } - if ((flags & IOMAP_WRITE) && !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) { /* Reserve delalloc blocks for regular writeback. */ @@ -981,7 +976,16 @@ xfs_file_iomap_begin( iomap); } - lockmode = xfs_ilock_data_map_shared(ip); + /* + * COW writes will allocate delalloc space, so we need to make sure + * to take the lock exclusively here. + */ + if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { + lockmode = XFS_ILOCK_EXCL; + xfs_ilock(ip, XFS_ILOCK_EXCL); + } else { + lockmode = xfs_ilock_data_map_shared(ip); + } ASSERT(offset <= mp->m_super->s_maxbytes); if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) @@ -991,16 +995,24 @@ xfs_file_iomap_begin( error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, 0); - if (error) { - xfs_iunlock(ip, lockmode); - return error; + if (error) + goto out_unlock; + + if (flags & IOMAP_REPORT) { + /* Trim the mapping to the nearest shared extent boundary. */ + error = xfs_reflink_trim_around_shared(ip, &imap, &shared, + &trimmed); + if (error) + goto out_unlock; } - /* Trim the mapping to the nearest shared extent boundary. */ - error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed); - if (error) { - xfs_iunlock(ip, lockmode); - return error; + if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { + error = xfs_reflink_reserve_cow(ip, &imap, &shared); + if (error) + goto out_unlock; + + end_fsb = imap.br_startoff + imap.br_blockcount; + length = XFS_FSB_TO_B(mp, end_fsb) - offset; } if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) { @@ -1039,6 +1051,9 @@ xfs_file_iomap_begin( if (shared) iomap->flags |= IOMAP_F_SHARED; return 0; +out_unlock: + xfs_iunlock(ip, lockmode); + return error; } static int diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index fc7873942bea..b341f10cf481 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1009,6 +1009,7 @@ xfs_mountfs( out_quota: xfs_qm_unmount_quotas(mp); out_rtunmount: + mp->m_super->s_flags &= ~MS_ACTIVE; xfs_rtunmount_inodes(mp); out_rele_rip: IRELE(rip); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index a60d9e2739d1..45e50ea90769 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1135,7 +1135,7 @@ xfs_qm_get_rtblks( return error; } rtblks = 0; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); for (idx = 0; idx < nextents; idx++) rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx)); *O_rtblks = (xfs_qcnt_t)rtblks; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 5965e9455d91..becf2465dd23 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -182,7 +182,8 @@ xfs_reflink_trim_around_shared( if (!xfs_is_reflink_inode(ip) || ISUNWRITTEN(irec) || irec->br_startblock == HOLESTARTBLOCK || - irec->br_startblock == DELAYSTARTBLOCK) { + irec->br_startblock == DELAYSTARTBLOCK || + isnullstartblock(irec->br_startblock)) { *shared = false; return 0; } @@ -227,50 +228,54 @@ xfs_reflink_trim_around_shared( } } -/* Create a CoW reservation for a range of blocks within a file. */ -static int -__xfs_reflink_reserve_cow( +/* + * Trim the passed in imap to the next shared/unshared extent boundary, and + * if imap->br_startoff points to a shared extent reserve space for it in the + * COW fork. In this case *shared is set to true, else to false. + * + * Note that imap will always contain the block numbers for the existing blocks + * in the data fork, as the upper layers need them for read-modify-write + * operations. + */ +int +xfs_reflink_reserve_cow( struct xfs_inode *ip, - xfs_fileoff_t *offset_fsb, - xfs_fileoff_t end_fsb, - bool *skipped) + struct xfs_bmbt_irec *imap, + bool *shared) { - struct xfs_bmbt_irec got, prev, imap; - xfs_fileoff_t orig_end_fsb; - int nimaps, eof = 0, error = 0; - bool shared = false, trimmed = false; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + struct xfs_bmbt_irec got; + int error = 0; + bool eof = false, trimmed; xfs_extnum_t idx; - xfs_extlen_t align; - /* Already reserved? Skip the refcount btree access. */ - xfs_bmap_search_extents(ip, *offset_fsb, XFS_COW_FORK, &eof, &idx, - &got, &prev); - if (!eof && got.br_startoff <= *offset_fsb) { - end_fsb = orig_end_fsb = got.br_startoff + got.br_blockcount; - trace_xfs_reflink_cow_found(ip, &got); - goto done; + /* + * Search the COW fork extent list first. This serves two purposes: + * first this implement the speculative preallocation using cowextisze, + * so that we also unshared block adjacent to shared blocks instead + * of just the shared blocks themselves. Second the lookup in the + * extent list is generally faster than going out to the shared extent + * tree. + */ + + if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &idx, &got)) + eof = true; + if (!eof && got.br_startoff <= imap->br_startoff) { + trace_xfs_reflink_cow_found(ip, imap); + xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); + + *shared = true; + return 0; } - /* Read extent from the source file. */ - nimaps = 1; - error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb, - &imap, &nimaps, 0); - if (error) - goto out_unlock; - ASSERT(nimaps == 1); - /* Trim the mapping to the nearest shared extent boundary. */ - error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed); + error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed); if (error) - goto out_unlock; - - end_fsb = orig_end_fsb = imap.br_startoff + imap.br_blockcount; + return error; /* Not shared? Just report the (potentially capped) extent. */ - if (!shared) { - *skipped = true; - goto done; - } + if (!*shared) + return 0; /* * Fork all the shared blocks from our write offset until the end of @@ -278,72 +283,17 @@ __xfs_reflink_reserve_cow( */ error = xfs_qm_dqattach_locked(ip, 0); if (error) - goto out_unlock; + return error; - align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip)); - if (align) - end_fsb = roundup_64(end_fsb, align); - -retry: - error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, *offset_fsb, - end_fsb - *offset_fsb, &got, - &prev, &idx, eof); - switch (error) { - case 0: - break; - case -ENOSPC: - case -EDQUOT: - /* retry without any preallocation */ - trace_xfs_reflink_cow_enospc(ip, &imap); - if (end_fsb != orig_end_fsb) { - end_fsb = orig_end_fsb; - goto retry; - } - /*FALLTHRU*/ - default: - goto out_unlock; - } - - if (end_fsb != orig_end_fsb) - xfs_inode_set_cowblocks_tag(ip); + error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff, + imap->br_blockcount, 0, &got, &idx, eof); + if (error == -ENOSPC || error == -EDQUOT) + trace_xfs_reflink_cow_enospc(ip, imap); + if (error) + return error; trace_xfs_reflink_cow_alloc(ip, &got); -done: - *offset_fsb = end_fsb; -out_unlock: - return error; -} - -/* Create a CoW reservation for part of a file. */ -int -xfs_reflink_reserve_cow_range( - struct xfs_inode *ip, - xfs_off_t offset, - xfs_off_t count) -{ - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t offset_fsb, end_fsb; - bool skipped = false; - int error; - - trace_xfs_reflink_reserve_cow_range(ip, offset, count); - - offset_fsb = XFS_B_TO_FSBT(mp, offset); - end_fsb = XFS_B_TO_FSB(mp, offset + count); - - xfs_ilock(ip, XFS_ILOCK_EXCL); - while (offset_fsb < end_fsb) { - error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb, - &skipped); - if (error) { - trace_xfs_reflink_reserve_cow_range_error(ip, error, - _RET_IP_); - break; - } - } - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - return error; + return 0; } /* Allocate all CoW reservations covering a range of blocks in a file. */ @@ -358,9 +308,8 @@ __xfs_reflink_allocate_cow( struct xfs_defer_ops dfops; struct xfs_trans *tp; xfs_fsblock_t first_block; - xfs_fileoff_t next_fsb; int nimaps = 1, error; - bool skipped = false; + bool shared; xfs_defer_init(&dfops, &first_block); @@ -371,33 +320,38 @@ __xfs_reflink_allocate_cow( xfs_ilock(ip, XFS_ILOCK_EXCL); - next_fsb = *offset_fsb; - error = __xfs_reflink_reserve_cow(ip, &next_fsb, end_fsb, &skipped); + /* Read extent from the source file. */ + nimaps = 1; + error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb, + &imap, &nimaps, 0); + if (error) + goto out_unlock; + ASSERT(nimaps == 1); + + error = xfs_reflink_reserve_cow(ip, &imap, &shared); if (error) goto out_trans_cancel; - if (skipped) { - *offset_fsb = next_fsb; + if (!shared) { + *offset_fsb = imap.br_startoff + imap.br_blockcount; goto out_trans_cancel; } xfs_trans_ijoin(tp, ip, 0); - error = xfs_bmapi_write(tp, ip, *offset_fsb, next_fsb - *offset_fsb, + error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount, XFS_BMAPI_COWFORK, &first_block, XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), &imap, &nimaps, &dfops); if (error) goto out_trans_cancel; - /* We might not have been able to map the whole delalloc extent */ - *offset_fsb = min(*offset_fsb + imap.br_blockcount, next_fsb); - error = xfs_defer_finish(&tp, &dfops, NULL); if (error) goto out_trans_cancel; error = xfs_trans_commit(tp); + *offset_fsb = imap.br_startoff + imap.br_blockcount; out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; @@ -443,87 +397,65 @@ xfs_reflink_allocate_cow_range( } /* - * Find the CoW reservation (and whether or not it needs block allocation) - * for a given byte offset of a file. + * Find the CoW reservation for a given byte offset of a file. */ bool xfs_reflink_find_cow_mapping( struct xfs_inode *ip, xfs_off_t offset, - struct xfs_bmbt_irec *imap, - bool *need_alloc) + struct xfs_bmbt_irec *imap) { - struct xfs_bmbt_irec irec; - struct xfs_ifork *ifp; - struct xfs_bmbt_rec_host *gotp; - xfs_fileoff_t bno; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + xfs_fileoff_t offset_fsb; + struct xfs_bmbt_irec got; xfs_extnum_t idx; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); ASSERT(xfs_is_reflink_inode(ip)); - /* Find the extent in the CoW fork. */ - ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); - bno = XFS_B_TO_FSBT(ip->i_mount, offset); - gotp = xfs_iext_bno_to_ext(ifp, bno, &idx); - if (!gotp) + offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); + if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got)) return false; - - xfs_bmbt_get_all(gotp, &irec); - if (bno >= irec.br_startoff + irec.br_blockcount || - bno < irec.br_startoff) + if (got.br_startoff > offset_fsb) return false; trace_xfs_reflink_find_cow_mapping(ip, offset, 1, XFS_IO_OVERWRITE, - &irec); - - /* If it's still delalloc, we must allocate later. */ - *imap = irec; - *need_alloc = !!(isnullstartblock(irec.br_startblock)); - + &got); + *imap = got; return true; } /* * Trim an extent to end at the next CoW reservation past offset_fsb. */ -int +void xfs_reflink_trim_irec_to_next_cow( struct xfs_inode *ip, xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap) { - struct xfs_bmbt_irec irec; - struct xfs_ifork *ifp; - struct xfs_bmbt_rec_host *gotp; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + struct xfs_bmbt_irec got; xfs_extnum_t idx; if (!xfs_is_reflink_inode(ip)) - return 0; + return; /* Find the extent in the CoW fork. */ - ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); - gotp = xfs_iext_bno_to_ext(ifp, offset_fsb, &idx); - if (!gotp) - return 0; - xfs_bmbt_get_all(gotp, &irec); + if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got)) + return; /* This is the extent before; try sliding up one. */ - if (irec.br_startoff < offset_fsb) { - idx++; - if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) - return 0; - gotp = xfs_iext_get_ext(ifp, idx); - xfs_bmbt_get_all(gotp, &irec); + if (got.br_startoff < offset_fsb) { + if (!xfs_iext_get_extent(ifp, idx + 1, &got)) + return; } - if (irec.br_startoff >= imap->br_startoff + imap->br_blockcount) - return 0; + if (got.br_startoff >= imap->br_startoff + imap->br_blockcount) + return; - imap->br_blockcount = irec.br_startoff - imap->br_startoff; + imap->br_blockcount = got.br_startoff - imap->br_startoff; trace_xfs_reflink_trim_irec(ip, imap); - - return 0; } /* @@ -536,58 +468,46 @@ xfs_reflink_cancel_cow_blocks( xfs_fileoff_t offset_fsb, xfs_fileoff_t end_fsb) { - struct xfs_bmbt_irec irec; - xfs_filblks_t count_fsb; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + struct xfs_bmbt_irec got, del; + xfs_extnum_t idx; xfs_fsblock_t firstfsb; struct xfs_defer_ops dfops; int error = 0; - int nimaps; if (!xfs_is_reflink_inode(ip)) return 0; + if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got)) + return 0; - /* Go find the old extent in the CoW fork. */ - while (offset_fsb < end_fsb) { - nimaps = 1; - count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); - error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec, - &nimaps, XFS_BMAPI_COWFORK); - if (error) - break; - ASSERT(nimaps == 1); + while (got.br_startoff < end_fsb) { + del = got; + xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb); + trace_xfs_reflink_cancel_cow(ip, &del); - trace_xfs_reflink_cancel_cow(ip, &irec); - - if (irec.br_startblock == DELAYSTARTBLOCK) { - /* Free a delayed allocation. */ - xfs_mod_fdblocks(ip->i_mount, irec.br_blockcount, - false); - ip->i_delayed_blks -= irec.br_blockcount; - - /* Remove the mapping from the CoW fork. */ - error = xfs_bunmapi_cow(ip, &irec); + if (isnullstartblock(del.br_startblock)) { + error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK, + &idx, &got, &del); if (error) break; - } else if (irec.br_startblock == HOLESTARTBLOCK) { - /* empty */ } else { xfs_trans_ijoin(*tpp, ip, 0); xfs_defer_init(&dfops, &firstfsb); /* Free the CoW orphan record. */ error = xfs_refcount_free_cow_extent(ip->i_mount, - &dfops, irec.br_startblock, - irec.br_blockcount); + &dfops, del.br_startblock, + del.br_blockcount); if (error) break; xfs_bmap_add_free(ip->i_mount, &dfops, - irec.br_startblock, irec.br_blockcount, + del.br_startblock, del.br_blockcount, NULL); /* Update quota accounting */ xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT, - -(long)irec.br_blockcount); + -(long)del.br_blockcount); /* Roll the transaction */ error = xfs_defer_finish(tpp, &dfops, ip); @@ -597,15 +517,17 @@ xfs_reflink_cancel_cow_blocks( } /* Remove the mapping from the CoW fork. */ - error = xfs_bunmapi_cow(ip, &irec); - if (error) - break; + xfs_bmap_del_extent_cow(ip, &idx, &got, &del); } - /* Roll on... */ - offset_fsb = irec.br_startoff + irec.br_blockcount; + if (!xfs_iext_get_extent(ifp, ++idx, &got)) + break; } + /* clear tag if cow fork is emptied */ + if (!ifp->if_bytes) + xfs_inode_clear_cowblocks_tag(ip); + return error; } @@ -668,25 +590,26 @@ xfs_reflink_end_cow( xfs_off_t offset, xfs_off_t count) { - struct xfs_bmbt_irec irec; - struct xfs_bmbt_irec uirec; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + struct xfs_bmbt_irec got, del; struct xfs_trans *tp; xfs_fileoff_t offset_fsb; xfs_fileoff_t end_fsb; - xfs_filblks_t count_fsb; xfs_fsblock_t firstfsb; struct xfs_defer_ops dfops; int error; unsigned int resblks; - xfs_filblks_t ilen; xfs_filblks_t rlen; - int nimaps; + xfs_extnum_t idx; trace_xfs_reflink_end_cow(ip, offset, count); + /* No COW extents? That's easy! */ + if (ifp->if_bytes == 0) + return 0; + offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count); - count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); /* Start a rolling transaction to switch the mappings */ resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK); @@ -698,72 +621,61 @@ xfs_reflink_end_cow( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); - /* Go find the old extent in the CoW fork. */ - while (offset_fsb < end_fsb) { - /* Read extent from the source file */ - nimaps = 1; - count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); - error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec, - &nimaps, XFS_BMAPI_COWFORK); - if (error) - goto out_cancel; - ASSERT(nimaps == 1); + /* If there is a hole at end_fsb - 1 go to the previous extent */ + if (!xfs_iext_lookup_extent(ip, ifp, end_fsb - 1, &idx, &got) || + got.br_startoff > end_fsb) { + ASSERT(idx > 0); + xfs_iext_get_extent(ifp, --idx, &got); + } - ASSERT(irec.br_startblock != DELAYSTARTBLOCK); - trace_xfs_reflink_cow_remap(ip, &irec); + /* Walk backwards until we're out of the I/O range... */ + while (got.br_startoff + got.br_blockcount > offset_fsb) { + del = got; + xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb); - /* - * We can have a hole in the CoW fork if part of a directio - * write is CoW but part of it isn't. - */ - rlen = ilen = irec.br_blockcount; - if (irec.br_startblock == HOLESTARTBLOCK) + /* Extent delete may have bumped idx forward */ + if (!del.br_blockcount) { + idx--; goto next_extent; - - /* Unmap the old blocks in the data fork. */ - while (rlen) { - xfs_defer_init(&dfops, &firstfsb); - error = __xfs_bunmapi(tp, ip, irec.br_startoff, - &rlen, 0, 1, &firstfsb, &dfops); - if (error) - goto out_defer; - - /* - * Trim the extent to whatever got unmapped. - * Remember, bunmapi works backwards. - */ - uirec.br_startblock = irec.br_startblock + rlen; - uirec.br_startoff = irec.br_startoff + rlen; - uirec.br_blockcount = irec.br_blockcount - rlen; - irec.br_blockcount = rlen; - trace_xfs_reflink_cow_remap_piece(ip, &uirec); - - /* Free the CoW orphan record. */ - error = xfs_refcount_free_cow_extent(tp->t_mountp, - &dfops, uirec.br_startblock, - uirec.br_blockcount); - if (error) - goto out_defer; - - /* Map the new blocks into the data fork. */ - error = xfs_bmap_map_extent(tp->t_mountp, &dfops, - ip, &uirec); - if (error) - goto out_defer; - - /* Remove the mapping from the CoW fork. */ - error = xfs_bunmapi_cow(ip, &uirec); - if (error) - goto out_defer; - - error = xfs_defer_finish(&tp, &dfops, ip); - if (error) - goto out_defer; } + ASSERT(!isnullstartblock(got.br_startblock)); + + /* Unmap the old blocks in the data fork. */ + xfs_defer_init(&dfops, &firstfsb); + rlen = del.br_blockcount; + error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1, + &firstfsb, &dfops); + if (error) + goto out_defer; + + /* Trim the extent to whatever got unmapped. */ + if (rlen) { + xfs_trim_extent(&del, del.br_startoff + rlen, + del.br_blockcount - rlen); + } + trace_xfs_reflink_cow_remap(ip, &del); + + /* Free the CoW orphan record. */ + error = xfs_refcount_free_cow_extent(tp->t_mountp, &dfops, + del.br_startblock, del.br_blockcount); + if (error) + goto out_defer; + + /* Map the new blocks into the data fork. */ + error = xfs_bmap_map_extent(tp->t_mountp, &dfops, ip, &del); + if (error) + goto out_defer; + + /* Remove the mapping from the CoW fork. */ + xfs_bmap_del_extent_cow(ip, &idx, &got, &del); + + error = xfs_defer_finish(&tp, &dfops, ip); + if (error) + goto out_defer; next_extent: - /* Roll on... */ - offset_fsb = irec.br_startoff + ilen; + if (!xfs_iext_get_extent(ifp, idx, &got)) + break; } error = xfs_trans_commit(tp); @@ -774,7 +686,6 @@ next_extent: out_defer: xfs_defer_cancel(&dfops); -out_cancel: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); out: @@ -1312,19 +1223,26 @@ out_error: */ int xfs_reflink_remap_range( - struct xfs_inode *src, - xfs_off_t srcoff, - struct xfs_inode *dest, - xfs_off_t destoff, - xfs_off_t len, - unsigned int flags) + struct file *file_in, + loff_t pos_in, + struct file *file_out, + loff_t pos_out, + u64 len, + bool is_dedupe) { + struct inode *inode_in = file_inode(file_in); + struct xfs_inode *src = XFS_I(inode_in); + struct inode *inode_out = file_inode(file_out); + struct xfs_inode *dest = XFS_I(inode_out); struct xfs_mount *mp = src->i_mount; + loff_t bs = inode_out->i_sb->s_blocksize; + bool same_inode = (inode_in == inode_out); xfs_fileoff_t sfsbno, dfsbno; xfs_filblks_t fsblen; - int error; xfs_extlen_t cowextsize; - bool is_same; + loff_t isize; + ssize_t ret; + loff_t blen; if (!xfs_sb_version_hasreflink(&mp->m_sb)) return -EOPNOTSUPP; @@ -1332,17 +1250,8 @@ xfs_reflink_remap_range( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - /* Don't reflink realtime inodes */ - if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) - return -EINVAL; - - if (flags & ~XFS_REFLINK_ALL) - return -EINVAL; - - trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff); - /* Lock both files against IO */ - if (src->i_ino == dest->i_ino) { + if (same_inode) { xfs_ilock(src, XFS_IOLOCK_EXCL); xfs_ilock(src, XFS_MMAPLOCK_EXCL); } else { @@ -1350,39 +1259,132 @@ xfs_reflink_remap_range( xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); } + /* Don't touch certain kinds of inodes */ + ret = -EPERM; + if (IS_IMMUTABLE(inode_out)) + goto out_unlock; + + ret = -ETXTBSY; + if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) + goto out_unlock; + + + /* Don't reflink dirs, pipes, sockets... */ + ret = -EISDIR; + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) + goto out_unlock; + ret = -EINVAL; + if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode)) + goto out_unlock; + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) + goto out_unlock; + + /* Don't reflink realtime inodes */ + if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) + goto out_unlock; + + /* Don't share DAX file data for now. */ + if (IS_DAX(inode_in) || IS_DAX(inode_out)) + goto out_unlock; + + /* Are we going all the way to the end? */ + isize = i_size_read(inode_in); + if (isize == 0) { + ret = 0; + goto out_unlock; + } + + /* Zero length dedupe exits immediately; reflink goes to EOF. */ + if (len == 0) { + if (is_dedupe) { + ret = 0; + goto out_unlock; + } + len = isize - pos_in; + } + + /* Ensure offsets don't wrap and the input is inside i_size */ + if (pos_in + len < pos_in || pos_out + len < pos_out || + pos_in + len > isize) + goto out_unlock; + + /* Don't allow dedupe past EOF in the dest file */ + if (is_dedupe) { + loff_t disize; + + disize = i_size_read(inode_out); + if (pos_out >= disize || pos_out + len > disize) + goto out_unlock; + } + + /* If we're linking to EOF, continue to the block boundary. */ + if (pos_in + len == isize) + blen = ALIGN(isize, bs) - pos_in; + else + blen = len; + + /* Only reflink if we're aligned to block boundaries */ + if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) || + !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs)) + goto out_unlock; + + /* Don't allow overlapped reflink within the same file */ + if (same_inode) { + if (pos_out + blen > pos_in && pos_out < pos_in + blen) + goto out_unlock; + } + + /* Wait for the completion of any pending IOs on both files */ + inode_dio_wait(inode_in); + if (!same_inode) + inode_dio_wait(inode_out); + + ret = filemap_write_and_wait_range(inode_in->i_mapping, + pos_in, pos_in + len - 1); + if (ret) + goto out_unlock; + + ret = filemap_write_and_wait_range(inode_out->i_mapping, + pos_out, pos_out + len - 1); + if (ret) + goto out_unlock; + + trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); + /* * Check that the extents are the same. */ - if (flags & XFS_REFLINK_DEDUPE) { - is_same = false; - error = xfs_compare_extents(VFS_I(src), srcoff, VFS_I(dest), - destoff, len, &is_same); - if (error) - goto out_error; + if (is_dedupe) { + bool is_same = false; + + ret = xfs_compare_extents(inode_in, pos_in, inode_out, pos_out, + len, &is_same); + if (ret) + goto out_unlock; if (!is_same) { - error = -EBADE; - goto out_error; + ret = -EBADE; + goto out_unlock; } } - error = xfs_reflink_set_inode_flag(src, dest); - if (error) - goto out_error; + ret = xfs_reflink_set_inode_flag(src, dest); + if (ret) + goto out_unlock; /* * Invalidate the page cache so that we can clear any CoW mappings * in the destination file. */ - truncate_inode_pages_range(&VFS_I(dest)->i_data, destoff, - PAGE_ALIGN(destoff + len) - 1); + truncate_inode_pages_range(&inode_out->i_data, pos_out, + PAGE_ALIGN(pos_out + len) - 1); - dfsbno = XFS_B_TO_FSBT(mp, destoff); - sfsbno = XFS_B_TO_FSBT(mp, srcoff); + dfsbno = XFS_B_TO_FSBT(mp, pos_out); + sfsbno = XFS_B_TO_FSBT(mp, pos_in); fsblen = XFS_B_TO_FSB(mp, len); - error = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen, - destoff + len); - if (error) - goto out_error; + ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen, + pos_out + len); + if (ret) + goto out_unlock; /* * Carry the cowextsize hint from src to dest if we're sharing the @@ -1390,26 +1392,24 @@ xfs_reflink_remap_range( * has a cowextsize hint, and the destination file does not. */ cowextsize = 0; - if (srcoff == 0 && len == i_size_read(VFS_I(src)) && + if (pos_in == 0 && len == i_size_read(inode_in) && (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && - destoff == 0 && len >= i_size_read(VFS_I(dest)) && + pos_out == 0 && len >= i_size_read(inode_out) && !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) cowextsize = src->i_d.di_cowextsize; - error = xfs_reflink_update_dest(dest, destoff + len, cowextsize); - if (error) - goto out_error; + ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize); -out_error: +out_unlock: xfs_iunlock(src, XFS_MMAPLOCK_EXCL); xfs_iunlock(src, XFS_IOLOCK_EXCL); if (src->i_ino != dest->i_ino) { xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); xfs_iunlock(dest, XFS_IOLOCK_EXCL); } - if (error) - trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_); - return error; + if (ret) + trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); + return ret; } /* @@ -1652,37 +1652,3 @@ out: trace_xfs_reflink_unshare_error(ip, error, _RET_IP_); return error; } - -/* - * Does this inode have any real CoW reservations? - */ -bool -xfs_reflink_has_real_cow_blocks( - struct xfs_inode *ip) -{ - struct xfs_bmbt_irec irec; - struct xfs_ifork *ifp; - struct xfs_bmbt_rec_host *gotp; - xfs_extnum_t idx; - - if (!xfs_is_reflink_inode(ip)) - return false; - - /* Go find the old extent in the CoW fork. */ - ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); - gotp = xfs_iext_bno_to_ext(ifp, 0, &idx); - while (gotp) { - xfs_bmbt_get_all(gotp, &irec); - - if (!isnullstartblock(irec.br_startblock)) - return true; - - /* Roll on... */ - idx++; - if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) - break; - gotp = xfs_iext_get_ext(ifp, idx); - } - - return false; -} diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 5dc3c8ac12aa..aa6a4d64bd35 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -26,13 +26,13 @@ extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno, extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed); -extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip, - xfs_off_t offset, xfs_off_t count); +extern int xfs_reflink_reserve_cow(struct xfs_inode *ip, + struct xfs_bmbt_irec *imap, bool *shared); extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count); extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset, - struct xfs_bmbt_irec *imap, bool *need_alloc); -extern int xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, + struct xfs_bmbt_irec *imap); +extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap); extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip, @@ -43,16 +43,11 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count); extern int xfs_reflink_recover_cow(struct xfs_mount *mp); -#define XFS_REFLINK_DEDUPE 1 /* only reflink if contents match */ -#define XFS_REFLINK_ALL (XFS_REFLINK_DEDUPE) -extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff, - struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len, - unsigned int flags); +extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe); extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip, struct xfs_trans **tpp); extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len); -extern bool xfs_reflink_has_real_cow_blocks(struct xfs_inode *ip); - #endif /* __XFS_REFLINK_H */ diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 5f8d55d29a11..276d3023d60f 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -512,13 +512,13 @@ static struct attribute *xfs_error_attrs[] = { }; -struct kobj_type xfs_error_cfg_ktype = { +static struct kobj_type xfs_error_cfg_ktype = { .release = xfs_sysfs_release, .sysfs_ops = &xfs_sysfs_ops, .default_attrs = xfs_error_attrs, }; -struct kobj_type xfs_error_ktype = { +static struct kobj_type xfs_error_ktype = { .release = xfs_sysfs_release, .sysfs_ops = &xfs_sysfs_ops, }; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index ad188d3a83f3..0907752be62d 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3346,7 +3346,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); -DEFINE_RW_EVENT(xfs_reflink_reserve_cow_range); +DEFINE_RW_EVENT(xfs_reflink_reserve_cow); DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range); DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write); @@ -3356,9 +3356,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec); DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range); DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap); -DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_piece); -DEFINE_INODE_ERROR_EVENT(xfs_reflink_reserve_cow_range_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error); diff --git a/include/linux/iomap.h b/include/linux/iomap.h index b9e7b8ec8c1d..f185156de74d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -19,11 +19,15 @@ struct vm_fault; #define IOMAP_UNWRITTEN 0x04 /* blocks allocated @blkno in unwritten state */ /* - * Flags for iomap mappings: + * Flags for all iomap mappings: */ -#define IOMAP_F_MERGED 0x01 /* contains multiple blocks/extents */ -#define IOMAP_F_SHARED 0x02 /* block shared with another file */ -#define IOMAP_F_NEW 0x04 /* blocks have been newly allocated */ +#define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ + +/* + * Flags that only need to be reported for IOMAP_REPORT requests: + */ +#define IOMAP_F_MERGED 0x10 /* contains multiple blocks/extents */ +#define IOMAP_F_SHARED 0x20 /* block shared with another file */ /* * Magic value for blkno: @@ -42,8 +46,9 @@ struct iomap { /* * Flags for iomap_begin / iomap_end. No flag implies a read. */ -#define IOMAP_WRITE (1 << 0) -#define IOMAP_ZERO (1 << 1) +#define IOMAP_WRITE (1 << 0) /* writing, must allocate blocks */ +#define IOMAP_ZERO (1 << 1) /* zeroing operation, may skip holes */ +#define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */ #define IOMAP_FAULT (1 << 3) /* mapping for page fault */ struct iomap_ops {