b3c496343b
Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
604 lines
15 KiB
C
604 lines
15 KiB
C
/*
|
|
* Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
|
|
* Copyright (c) 2010 David Chinner.
|
|
* Copyright (c) 2011 Christoph Hellwig.
|
|
* All Rights Reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it would be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write the Free Software Foundation,
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
#include "xfs.h"
|
|
#include "xfs_fs.h"
|
|
#include "xfs_types.h"
|
|
#include "xfs_log.h"
|
|
#include "xfs_trans.h"
|
|
#include "xfs_sb.h"
|
|
#include "xfs_ag.h"
|
|
#include "xfs_mount.h"
|
|
#include "xfs_bmap_btree.h"
|
|
#include "xfs_alloc.h"
|
|
#include "xfs_inode.h"
|
|
#include "xfs_extent_busy.h"
|
|
#include "xfs_trace.h"
|
|
|
|
void
|
|
xfs_extent_busy_insert(
|
|
struct xfs_trans *tp,
|
|
xfs_agnumber_t agno,
|
|
xfs_agblock_t bno,
|
|
xfs_extlen_t len,
|
|
unsigned int flags)
|
|
{
|
|
struct xfs_extent_busy *new;
|
|
struct xfs_extent_busy *busyp;
|
|
struct xfs_perag *pag;
|
|
struct rb_node **rbp;
|
|
struct rb_node *parent = NULL;
|
|
|
|
new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL);
|
|
if (!new) {
|
|
/*
|
|
* No Memory! Since it is now not possible to track the free
|
|
* block, make this a synchronous transaction to insure that
|
|
* the block is not reused before this transaction commits.
|
|
*/
|
|
trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len);
|
|
xfs_trans_set_sync(tp);
|
|
return;
|
|
}
|
|
|
|
new->agno = agno;
|
|
new->bno = bno;
|
|
new->length = len;
|
|
INIT_LIST_HEAD(&new->list);
|
|
new->flags = flags;
|
|
|
|
/* trace before insert to be able to see failed inserts */
|
|
trace_xfs_extent_busy(tp->t_mountp, agno, bno, len);
|
|
|
|
pag = xfs_perag_get(tp->t_mountp, new->agno);
|
|
spin_lock(&pag->pagb_lock);
|
|
rbp = &pag->pagb_tree.rb_node;
|
|
while (*rbp) {
|
|
parent = *rbp;
|
|
busyp = rb_entry(parent, struct xfs_extent_busy, rb_node);
|
|
|
|
if (new->bno < busyp->bno) {
|
|
rbp = &(*rbp)->rb_left;
|
|
ASSERT(new->bno + new->length <= busyp->bno);
|
|
} else if (new->bno > busyp->bno) {
|
|
rbp = &(*rbp)->rb_right;
|
|
ASSERT(bno >= busyp->bno + busyp->length);
|
|
} else {
|
|
ASSERT(0);
|
|
}
|
|
}
|
|
|
|
rb_link_node(&new->rb_node, parent, rbp);
|
|
rb_insert_color(&new->rb_node, &pag->pagb_tree);
|
|
|
|
list_add(&new->list, &tp->t_busy);
|
|
spin_unlock(&pag->pagb_lock);
|
|
xfs_perag_put(pag);
|
|
}
|
|
|
|
/*
|
|
* Search for a busy extent within the range of the extent we are about to
|
|
* allocate. You need to be holding the busy extent tree lock when calling
|
|
* xfs_extent_busy_search(). This function returns 0 for no overlapping busy
|
|
* extent, -1 for an overlapping but not exact busy extent, and 1 for an exact
|
|
* match. This is done so that a non-zero return indicates an overlap that
|
|
* will require a synchronous transaction, but it can still be
|
|
* used to distinguish between a partial or exact match.
|
|
*/
|
|
int
|
|
xfs_extent_busy_search(
|
|
struct xfs_mount *mp,
|
|
xfs_agnumber_t agno,
|
|
xfs_agblock_t bno,
|
|
xfs_extlen_t len)
|
|
{
|
|
struct xfs_perag *pag;
|
|
struct rb_node *rbp;
|
|
struct xfs_extent_busy *busyp;
|
|
int match = 0;
|
|
|
|
pag = xfs_perag_get(mp, agno);
|
|
spin_lock(&pag->pagb_lock);
|
|
|
|
rbp = pag->pagb_tree.rb_node;
|
|
|
|
/* find closest start bno overlap */
|
|
while (rbp) {
|
|
busyp = rb_entry(rbp, struct xfs_extent_busy, rb_node);
|
|
if (bno < busyp->bno) {
|
|
/* may overlap, but exact start block is lower */
|
|
if (bno + len > busyp->bno)
|
|
match = -1;
|
|
rbp = rbp->rb_left;
|
|
} else if (bno > busyp->bno) {
|
|
/* may overlap, but exact start block is higher */
|
|
if (bno < busyp->bno + busyp->length)
|
|
match = -1;
|
|
rbp = rbp->rb_right;
|
|
} else {
|
|
/* bno matches busyp, length determines exact match */
|
|
match = (busyp->length == len) ? 1 : -1;
|
|
break;
|
|
}
|
|
}
|
|
spin_unlock(&pag->pagb_lock);
|
|
xfs_perag_put(pag);
|
|
return match;
|
|
}
|
|
|
|
/*
|
|
* The found free extent [fbno, fend] overlaps part or all of the given busy
|
|
* extent. If the overlap covers the beginning, the end, or all of the busy
|
|
* extent, the overlapping portion can be made unbusy and used for the
|
|
* allocation. We can't split a busy extent because we can't modify a
|
|
* transaction/CIL context busy list, but we can update an entry's block
|
|
* number or length.
|
|
*
|
|
* Returns true if the extent can safely be reused, or false if the search
|
|
* needs to be restarted.
|
|
*/
|
|
STATIC bool
|
|
xfs_extent_busy_update_extent(
|
|
struct xfs_mount *mp,
|
|
struct xfs_perag *pag,
|
|
struct xfs_extent_busy *busyp,
|
|
xfs_agblock_t fbno,
|
|
xfs_extlen_t flen,
|
|
bool userdata)
|
|
{
|
|
xfs_agblock_t fend = fbno + flen;
|
|
xfs_agblock_t bbno = busyp->bno;
|
|
xfs_agblock_t bend = bbno + busyp->length;
|
|
|
|
/*
|
|
* This extent is currently being discarded. Give the thread
|
|
* performing the discard a chance to mark the extent unbusy
|
|
* and retry.
|
|
*/
|
|
if (busyp->flags & XFS_EXTENT_BUSY_DISCARDED) {
|
|
spin_unlock(&pag->pagb_lock);
|
|
delay(1);
|
|
spin_lock(&pag->pagb_lock);
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* If there is a busy extent overlapping a user allocation, we have
|
|
* no choice but to force the log and retry the search.
|
|
*
|
|
* Fortunately this does not happen during normal operation, but
|
|
* only if the filesystem is very low on space and has to dip into
|
|
* the AGFL for normal allocations.
|
|
*/
|
|
if (userdata)
|
|
goto out_force_log;
|
|
|
|
if (bbno < fbno && bend > fend) {
|
|
/*
|
|
* Case 1:
|
|
* bbno bend
|
|
* +BBBBBBBBBBBBBBBBB+
|
|
* +---------+
|
|
* fbno fend
|
|
*/
|
|
|
|
/*
|
|
* We would have to split the busy extent to be able to track
|
|
* it correct, which we cannot do because we would have to
|
|
* modify the list of busy extents attached to the transaction
|
|
* or CIL context, which is immutable.
|
|
*
|
|
* Force out the log to clear the busy extent and retry the
|
|
* search.
|
|
*/
|
|
goto out_force_log;
|
|
} else if (bbno >= fbno && bend <= fend) {
|
|
/*
|
|
* Case 2:
|
|
* bbno bend
|
|
* +BBBBBBBBBBBBBBBBB+
|
|
* +-----------------+
|
|
* fbno fend
|
|
*
|
|
* Case 3:
|
|
* bbno bend
|
|
* +BBBBBBBBBBBBBBBBB+
|
|
* +--------------------------+
|
|
* fbno fend
|
|
*
|
|
* Case 4:
|
|
* bbno bend
|
|
* +BBBBBBBBBBBBBBBBB+
|
|
* +--------------------------+
|
|
* fbno fend
|
|
*
|
|
* Case 5:
|
|
* bbno bend
|
|
* +BBBBBBBBBBBBBBBBB+
|
|
* +-----------------------------------+
|
|
* fbno fend
|
|
*
|
|
*/
|
|
|
|
/*
|
|
* The busy extent is fully covered by the extent we are
|
|
* allocating, and can simply be removed from the rbtree.
|
|
* However we cannot remove it from the immutable list
|
|
* tracking busy extents in the transaction or CIL context,
|
|
* so set the length to zero to mark it invalid.
|
|
*
|
|
* We also need to restart the busy extent search from the
|
|
* tree root, because erasing the node can rearrange the
|
|
* tree topology.
|
|
*/
|
|
rb_erase(&busyp->rb_node, &pag->pagb_tree);
|
|
busyp->length = 0;
|
|
return false;
|
|
} else if (fend < bend) {
|
|
/*
|
|
* Case 6:
|
|
* bbno bend
|
|
* +BBBBBBBBBBBBBBBBB+
|
|
* +---------+
|
|
* fbno fend
|
|
*
|
|
* Case 7:
|
|
* bbno bend
|
|
* +BBBBBBBBBBBBBBBBB+
|
|
* +------------------+
|
|
* fbno fend
|
|
*
|
|
*/
|
|
busyp->bno = fend;
|
|
} else if (bbno < fbno) {
|
|
/*
|
|
* Case 8:
|
|
* bbno bend
|
|
* +BBBBBBBBBBBBBBBBB+
|
|
* +-------------+
|
|
* fbno fend
|
|
*
|
|
* Case 9:
|
|
* bbno bend
|
|
* +BBBBBBBBBBBBBBBBB+
|
|
* +----------------------+
|
|
* fbno fend
|
|
*/
|
|
busyp->length = fbno - busyp->bno;
|
|
} else {
|
|
ASSERT(0);
|
|
}
|
|
|
|
trace_xfs_extent_busy_reuse(mp, pag->pag_agno, fbno, flen);
|
|
return true;
|
|
|
|
out_force_log:
|
|
spin_unlock(&pag->pagb_lock);
|
|
xfs_log_force(mp, XFS_LOG_SYNC);
|
|
trace_xfs_extent_busy_force(mp, pag->pag_agno, fbno, flen);
|
|
spin_lock(&pag->pagb_lock);
|
|
return false;
|
|
}
|
|
|
|
|
|
/*
|
|
* For a given extent [fbno, flen], make sure we can reuse it safely.
|
|
*/
|
|
void
|
|
xfs_extent_busy_reuse(
|
|
struct xfs_mount *mp,
|
|
xfs_agnumber_t agno,
|
|
xfs_agblock_t fbno,
|
|
xfs_extlen_t flen,
|
|
bool userdata)
|
|
{
|
|
struct xfs_perag *pag;
|
|
struct rb_node *rbp;
|
|
|
|
ASSERT(flen > 0);
|
|
|
|
pag = xfs_perag_get(mp, agno);
|
|
spin_lock(&pag->pagb_lock);
|
|
restart:
|
|
rbp = pag->pagb_tree.rb_node;
|
|
while (rbp) {
|
|
struct xfs_extent_busy *busyp =
|
|
rb_entry(rbp, struct xfs_extent_busy, rb_node);
|
|
xfs_agblock_t bbno = busyp->bno;
|
|
xfs_agblock_t bend = bbno + busyp->length;
|
|
|
|
if (fbno + flen <= bbno) {
|
|
rbp = rbp->rb_left;
|
|
continue;
|
|
} else if (fbno >= bend) {
|
|
rbp = rbp->rb_right;
|
|
continue;
|
|
}
|
|
|
|
if (!xfs_extent_busy_update_extent(mp, pag, busyp, fbno, flen,
|
|
userdata))
|
|
goto restart;
|
|
}
|
|
spin_unlock(&pag->pagb_lock);
|
|
xfs_perag_put(pag);
|
|
}
|
|
|
|
/*
|
|
* For a given extent [fbno, flen], search the busy extent list to find a
|
|
* subset of the extent that is not busy. If *rlen is smaller than
|
|
* args->minlen no suitable extent could be found, and the higher level
|
|
* code needs to force out the log and retry the allocation.
|
|
*/
|
|
void
|
|
xfs_extent_busy_trim(
|
|
struct xfs_alloc_arg *args,
|
|
xfs_agblock_t bno,
|
|
xfs_extlen_t len,
|
|
xfs_agblock_t *rbno,
|
|
xfs_extlen_t *rlen)
|
|
{
|
|
xfs_agblock_t fbno;
|
|
xfs_extlen_t flen;
|
|
struct rb_node *rbp;
|
|
|
|
ASSERT(len > 0);
|
|
|
|
spin_lock(&args->pag->pagb_lock);
|
|
restart:
|
|
fbno = bno;
|
|
flen = len;
|
|
rbp = args->pag->pagb_tree.rb_node;
|
|
while (rbp && flen >= args->minlen) {
|
|
struct xfs_extent_busy *busyp =
|
|
rb_entry(rbp, struct xfs_extent_busy, rb_node);
|
|
xfs_agblock_t fend = fbno + flen;
|
|
xfs_agblock_t bbno = busyp->bno;
|
|
xfs_agblock_t bend = bbno + busyp->length;
|
|
|
|
if (fend <= bbno) {
|
|
rbp = rbp->rb_left;
|
|
continue;
|
|
} else if (fbno >= bend) {
|
|
rbp = rbp->rb_right;
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* If this is a metadata allocation, try to reuse the busy
|
|
* extent instead of trimming the allocation.
|
|
*/
|
|
if (!args->userdata &&
|
|
!(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) {
|
|
if (!xfs_extent_busy_update_extent(args->mp, args->pag,
|
|
busyp, fbno, flen,
|
|
false))
|
|
goto restart;
|
|
continue;
|
|
}
|
|
|
|
if (bbno <= fbno) {
|
|
/* start overlap */
|
|
|
|
/*
|
|
* Case 1:
|
|
* bbno bend
|
|
* +BBBBBBBBBBBBBBBBB+
|
|
* +---------+
|
|
* fbno fend
|
|
*
|
|
* Case 2:
|
|
* bbno bend
|
|
* +BBBBBBBBBBBBBBBBB+
|
|
* +-------------+
|
|
* fbno fend
|
|
*
|
|
* Case 3:
|
|
* bbno bend
|
|
* +BBBBBBBBBBBBBBBBB+
|
|
* +-------------+
|
|
* fbno fend
|
|
*
|
|
* Case 4:
|
|
* bbno bend
|
|
* +BBBBBBBBBBBBBBBBB+
|
|
* +-----------------+
|
|
* fbno fend
|
|
*
|
|
* No unbusy region in extent, return failure.
|
|
*/
|
|
if (fend <= bend)
|
|
goto fail;
|
|
|
|
/*
|
|
* Case 5:
|
|
* bbno bend
|
|
* +BBBBBBBBBBBBBBBBB+
|
|
* +----------------------+
|
|
* fbno fend
|
|
*
|
|
* Case 6:
|
|
* bbno bend
|
|
* +BBBBBBBBBBBBBBBBB+
|
|
* +--------------------------+
|
|
* fbno fend
|
|
*
|
|
* Needs to be trimmed to:
|
|
* +-------+
|
|
* fbno fend
|
|
*/
|
|
fbno = bend;
|
|
} else if (bend >= fend) {
|
|
/* end overlap */
|
|
|
|
/*
|
|
* Case 7:
|
|
* bbno bend
|
|
* +BBBBBBBBBBBBBBBBB+
|
|
* +------------------+
|
|
* fbno fend
|
|
*
|
|
* Case 8:
|
|
* bbno bend
|
|
* +BBBBBBBBBBBBBBBBB+
|
|
* +--------------------------+
|
|
* fbno fend
|
|
*
|
|
* Needs to be trimmed to:
|
|
* +-------+
|
|
* fbno fend
|
|
*/
|
|
fend = bbno;
|
|
} else {
|
|
/* middle overlap */
|
|
|
|
/*
|
|
* Case 9:
|
|
* bbno bend
|
|
* +BBBBBBBBBBBBBBBBB+
|
|
* +-----------------------------------+
|
|
* fbno fend
|
|
*
|
|
* Can be trimmed to:
|
|
* +-------+ OR +-------+
|
|
* fbno fend fbno fend
|
|
*
|
|
* Backward allocation leads to significant
|
|
* fragmentation of directories, which degrades
|
|
* directory performance, therefore we always want to
|
|
* choose the option that produces forward allocation
|
|
* patterns.
|
|
* Preferring the lower bno extent will make the next
|
|
* request use "fend" as the start of the next
|
|
* allocation; if the segment is no longer busy at
|
|
* that point, we'll get a contiguous allocation, but
|
|
* even if it is still busy, we will get a forward
|
|
* allocation.
|
|
* We try to avoid choosing the segment at "bend",
|
|
* because that can lead to the next allocation
|
|
* taking the segment at "fbno", which would be a
|
|
* backward allocation. We only use the segment at
|
|
* "fbno" if it is much larger than the current
|
|
* requested size, because in that case there's a
|
|
* good chance subsequent allocations will be
|
|
* contiguous.
|
|
*/
|
|
if (bbno - fbno >= args->maxlen) {
|
|
/* left candidate fits perfect */
|
|
fend = bbno;
|
|
} else if (fend - bend >= args->maxlen * 4) {
|
|
/* right candidate has enough free space */
|
|
fbno = bend;
|
|
} else if (bbno - fbno >= args->minlen) {
|
|
/* left candidate fits minimum requirement */
|
|
fend = bbno;
|
|
} else {
|
|
goto fail;
|
|
}
|
|
}
|
|
|
|
flen = fend - fbno;
|
|
}
|
|
spin_unlock(&args->pag->pagb_lock);
|
|
|
|
if (fbno != bno || flen != len) {
|
|
trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len,
|
|
fbno, flen);
|
|
}
|
|
*rbno = fbno;
|
|
*rlen = flen;
|
|
return;
|
|
fail:
|
|
/*
|
|
* Return a zero extent length as failure indications. All callers
|
|
* re-check if the trimmed extent satisfies the minlen requirement.
|
|
*/
|
|
spin_unlock(&args->pag->pagb_lock);
|
|
trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
|
|
*rbno = fbno;
|
|
*rlen = 0;
|
|
}
|
|
|
|
STATIC void
|
|
xfs_extent_busy_clear_one(
|
|
struct xfs_mount *mp,
|
|
struct xfs_perag *pag,
|
|
struct xfs_extent_busy *busyp)
|
|
{
|
|
if (busyp->length) {
|
|
trace_xfs_extent_busy_clear(mp, busyp->agno, busyp->bno,
|
|
busyp->length);
|
|
rb_erase(&busyp->rb_node, &pag->pagb_tree);
|
|
}
|
|
|
|
list_del_init(&busyp->list);
|
|
kmem_free(busyp);
|
|
}
|
|
|
|
/*
|
|
* Remove all extents on the passed in list from the busy extents tree.
|
|
* If do_discard is set skip extents that need to be discarded, and mark
|
|
* these as undergoing a discard operation instead.
|
|
*/
|
|
void
|
|
xfs_extent_busy_clear(
|
|
struct xfs_mount *mp,
|
|
struct list_head *list,
|
|
bool do_discard)
|
|
{
|
|
struct xfs_extent_busy *busyp, *n;
|
|
struct xfs_perag *pag = NULL;
|
|
xfs_agnumber_t agno = NULLAGNUMBER;
|
|
|
|
list_for_each_entry_safe(busyp, n, list, list) {
|
|
if (busyp->agno != agno) {
|
|
if (pag) {
|
|
spin_unlock(&pag->pagb_lock);
|
|
xfs_perag_put(pag);
|
|
}
|
|
pag = xfs_perag_get(mp, busyp->agno);
|
|
spin_lock(&pag->pagb_lock);
|
|
agno = busyp->agno;
|
|
}
|
|
|
|
if (do_discard && busyp->length &&
|
|
!(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD))
|
|
busyp->flags = XFS_EXTENT_BUSY_DISCARDED;
|
|
else
|
|
xfs_extent_busy_clear_one(mp, pag, busyp);
|
|
}
|
|
|
|
if (pag) {
|
|
spin_unlock(&pag->pagb_lock);
|
|
xfs_perag_put(pag);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Callback for list_sort to sort busy extents by the AG they reside in.
|
|
*/
|
|
int
|
|
xfs_extent_busy_ag_cmp(
|
|
void *priv,
|
|
struct list_head *a,
|
|
struct list_head *b)
|
|
{
|
|
return container_of(a, struct xfs_extent_busy, list)->agno -
|
|
container_of(b, struct xfs_extent_busy, list)->agno;
|
|
}
|