Merge branch 'xfs-buf-iosubmit' into for-next

This commit is contained in:
Dave Chinner 2014-10-02 09:11:14 +10:00
commit 75e58ce4c8
12 changed files with 284 additions and 360 deletions

View File

@ -1122,14 +1122,6 @@ xfs_zero_remaining_bytes(
if (endoff > XFS_ISIZE(ip)) if (endoff > XFS_ISIZE(ip))
endoff = XFS_ISIZE(ip); endoff = XFS_ISIZE(ip);
bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp,
BTOBB(mp->m_sb.sb_blocksize), 0);
if (!bp)
return -ENOMEM;
xfs_buf_unlock(bp);
for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
uint lock_mode; uint lock_mode;
@ -1152,42 +1144,24 @@ xfs_zero_remaining_bytes(
ASSERT(imap.br_startblock != DELAYSTARTBLOCK); ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
if (imap.br_state == XFS_EXT_UNWRITTEN) if (imap.br_state == XFS_EXT_UNWRITTEN)
continue; continue;
XFS_BUF_UNDONE(bp);
XFS_BUF_UNWRITE(bp);
XFS_BUF_READ(bp);
XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
if (XFS_FORCED_SHUTDOWN(mp)) { error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ?
error = -EIO; mp->m_rtdev_targp : mp->m_ddev_targp,
break; xfs_fsb_to_db(ip, imap.br_startblock),
} BTOBB(mp->m_sb.sb_blocksize),
xfs_buf_iorequest(bp); 0, &bp, NULL);
error = xfs_buf_iowait(bp); if (error)
if (error) { return error;
xfs_buf_ioerror_alert(bp,
"xfs_zero_remaining_bytes(read)");
break;
}
memset(bp->b_addr + memset(bp->b_addr +
(offset - XFS_FSB_TO_B(mp, imap.br_startoff)), (offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
0, lastoffset - offset + 1); 0, lastoffset - offset + 1);
XFS_BUF_UNDONE(bp);
XFS_BUF_UNREAD(bp);
XFS_BUF_WRITE(bp);
if (XFS_FORCED_SHUTDOWN(mp)) { error = xfs_bwrite(bp);
error = -EIO; xfs_buf_relse(bp);
break; if (error)
} return error;
xfs_buf_iorequest(bp);
error = xfs_buf_iowait(bp);
if (error) {
xfs_buf_ioerror_alert(bp,
"xfs_zero_remaining_bytes(write)");
break;
}
} }
xfs_buf_free(bp);
return error; return error;
} }

View File

@ -623,10 +623,11 @@ _xfs_buf_read(
bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
xfs_buf_iorequest(bp); if (flags & XBF_ASYNC) {
if (flags & XBF_ASYNC) xfs_buf_submit(bp);
return 0; return 0;
return xfs_buf_iowait(bp); }
return xfs_buf_submit_wait(bp);
} }
xfs_buf_t * xfs_buf_t *
@ -687,34 +688,39 @@ xfs_buf_readahead_map(
* Read an uncached buffer from disk. Allocates and returns a locked * Read an uncached buffer from disk. Allocates and returns a locked
* buffer containing the disk contents or nothing. * buffer containing the disk contents or nothing.
*/ */
struct xfs_buf * int
xfs_buf_read_uncached( xfs_buf_read_uncached(
struct xfs_buftarg *target, struct xfs_buftarg *target,
xfs_daddr_t daddr, xfs_daddr_t daddr,
size_t numblks, size_t numblks,
int flags, int flags,
struct xfs_buf **bpp,
const struct xfs_buf_ops *ops) const struct xfs_buf_ops *ops)
{ {
struct xfs_buf *bp; struct xfs_buf *bp;
*bpp = NULL;
bp = xfs_buf_get_uncached(target, numblks, flags); bp = xfs_buf_get_uncached(target, numblks, flags);
if (!bp) if (!bp)
return NULL; return -ENOMEM;
/* set up the buffer for a read IO */ /* set up the buffer for a read IO */
ASSERT(bp->b_map_count == 1); ASSERT(bp->b_map_count == 1);
bp->b_bn = daddr; bp->b_bn = XFS_BUF_DADDR_NULL; /* always null for uncached buffers */
bp->b_maps[0].bm_bn = daddr; bp->b_maps[0].bm_bn = daddr;
bp->b_flags |= XBF_READ; bp->b_flags |= XBF_READ;
bp->b_ops = ops; bp->b_ops = ops;
if (XFS_FORCED_SHUTDOWN(target->bt_mount)) { xfs_buf_submit_wait(bp);
if (bp->b_error) {
int error = bp->b_error;
xfs_buf_relse(bp); xfs_buf_relse(bp);
return NULL; return error;
} }
xfs_buf_iorequest(bp);
xfs_buf_iowait(bp); *bpp = bp;
return bp; return 0;
} }
/* /*
@ -998,53 +1004,56 @@ xfs_buf_wait_unpin(
* Buffer Utility Routines * Buffer Utility Routines
*/ */
STATIC void void
xfs_buf_iodone_work( xfs_buf_ioend(
struct work_struct *work) struct xfs_buf *bp)
{ {
struct xfs_buf *bp = bool read = bp->b_flags & XBF_READ;
container_of(work, xfs_buf_t, b_iodone_work);
bool read = !!(bp->b_flags & XBF_READ); trace_xfs_buf_iodone(bp, _RET_IP_);
bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
/* only validate buffers that were read without errors */ /*
if (read && bp->b_ops && !bp->b_error && (bp->b_flags & XBF_DONE)) * Pull in IO completion errors now. We are guaranteed to be running
* single threaded, so we don't need the lock to read b_io_error.
*/
if (!bp->b_error && bp->b_io_error)
xfs_buf_ioerror(bp, bp->b_io_error);
/* Only validate buffers that were read without errors */
if (read && !bp->b_error && bp->b_ops) {
ASSERT(!bp->b_iodone);
bp->b_ops->verify_read(bp); bp->b_ops->verify_read(bp);
}
if (!bp->b_error)
bp->b_flags |= XBF_DONE;
if (bp->b_iodone) if (bp->b_iodone)
(*(bp->b_iodone))(bp); (*(bp->b_iodone))(bp);
else if (bp->b_flags & XBF_ASYNC) else if (bp->b_flags & XBF_ASYNC)
xfs_buf_relse(bp); xfs_buf_relse(bp);
else { else
ASSERT(read && bp->b_ops);
complete(&bp->b_iowait); complete(&bp->b_iowait);
} }
static void
xfs_buf_ioend_work(
struct work_struct *work)
{
struct xfs_buf *bp =
container_of(work, xfs_buf_t, b_iodone_work);
xfs_buf_ioend(bp);
} }
void void
xfs_buf_ioend( xfs_buf_ioend_async(
struct xfs_buf *bp, struct xfs_buf *bp)
int schedule)
{ {
bool read = !!(bp->b_flags & XBF_READ); INIT_WORK(&bp->b_iodone_work, xfs_buf_ioend_work);
queue_work(xfslogd_workqueue, &bp->b_iodone_work);
trace_xfs_buf_iodone(bp, _RET_IP_);
if (bp->b_error == 0)
bp->b_flags |= XBF_DONE;
if (bp->b_iodone || (read && bp->b_ops) || (bp->b_flags & XBF_ASYNC)) {
if (schedule) {
INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
queue_work(xfslogd_workqueue, &bp->b_iodone_work);
} else {
xfs_buf_iodone_work(&bp->b_iodone_work);
}
} else {
bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
complete(&bp->b_iowait);
}
} }
void void
@ -1067,96 +1076,6 @@ xfs_buf_ioerror_alert(
(__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length); (__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
} }
/*
* Called when we want to stop a buffer from getting written or read.
* We attach the EIO error, muck with its flags, and call xfs_buf_ioend
* so that the proper iodone callbacks get called.
*/
STATIC int
xfs_bioerror(
xfs_buf_t *bp)
{
#ifdef XFSERRORDEBUG
ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
#endif
/*
* No need to wait until the buffer is unpinned, we aren't flushing it.
*/
xfs_buf_ioerror(bp, -EIO);
/*
* We're calling xfs_buf_ioend, so delete XBF_DONE flag.
*/
XFS_BUF_UNREAD(bp);
XFS_BUF_UNDONE(bp);
xfs_buf_stale(bp);
xfs_buf_ioend(bp, 0);
return -EIO;
}
/*
* Same as xfs_bioerror, except that we are releasing the buffer
* here ourselves, and avoiding the xfs_buf_ioend call.
* This is meant for userdata errors; metadata bufs come with
* iodone functions attached, so that we can track down errors.
*/
int
xfs_bioerror_relse(
struct xfs_buf *bp)
{
int64_t fl = bp->b_flags;
/*
* No need to wait until the buffer is unpinned.
* We aren't flushing it.
*
* chunkhold expects B_DONE to be set, whether
* we actually finish the I/O or not. We don't want to
* change that interface.
*/
XFS_BUF_UNREAD(bp);
XFS_BUF_DONE(bp);
xfs_buf_stale(bp);
bp->b_iodone = NULL;
if (!(fl & XBF_ASYNC)) {
/*
* Mark b_error and B_ERROR _both_.
* Lot's of chunkcache code assumes that.
* There's no reason to mark error for
* ASYNC buffers.
*/
xfs_buf_ioerror(bp, -EIO);
complete(&bp->b_iowait);
} else {
xfs_buf_relse(bp);
}
return -EIO;
}
STATIC int
xfs_bdstrat_cb(
struct xfs_buf *bp)
{
if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
trace_xfs_bdstrat_shut(bp, _RET_IP_);
/*
* Metadata write that didn't get logged but
* written delayed anyway. These aren't associated
* with a transaction, and can be ignored.
*/
if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
return xfs_bioerror_relse(bp);
else
return xfs_bioerror(bp);
}
xfs_buf_iorequest(bp);
return 0;
}
int int
xfs_bwrite( xfs_bwrite(
struct xfs_buf *bp) struct xfs_buf *bp)
@ -1166,11 +1085,10 @@ xfs_bwrite(
ASSERT(xfs_buf_islocked(bp)); ASSERT(xfs_buf_islocked(bp));
bp->b_flags |= XBF_WRITE; bp->b_flags |= XBF_WRITE;
bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL); bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q |
XBF_WRITE_FAIL | XBF_DONE);
xfs_bdstrat_cb(bp); error = xfs_buf_submit_wait(bp);
error = xfs_buf_iowait(bp);
if (error) { if (error) {
xfs_force_shutdown(bp->b_target->bt_mount, xfs_force_shutdown(bp->b_target->bt_mount,
SHUTDOWN_META_IO_ERROR); SHUTDOWN_META_IO_ERROR);
@ -1178,15 +1096,6 @@ xfs_bwrite(
return error; return error;
} }
STATIC void
_xfs_buf_ioend(
xfs_buf_t *bp,
int schedule)
{
if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
xfs_buf_ioend(bp, schedule);
}
STATIC void STATIC void
xfs_buf_bio_end_io( xfs_buf_bio_end_io(
struct bio *bio, struct bio *bio,
@ -1198,13 +1107,18 @@ xfs_buf_bio_end_io(
* don't overwrite existing errors - otherwise we can lose errors on * don't overwrite existing errors - otherwise we can lose errors on
* buffers that require multiple bios to complete. * buffers that require multiple bios to complete.
*/ */
if (!bp->b_error) if (error) {
xfs_buf_ioerror(bp, error); spin_lock(&bp->b_lock);
if (!bp->b_io_error)
bp->b_io_error = error;
spin_unlock(&bp->b_lock);
}
if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
_xfs_buf_ioend(bp, 1); if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
xfs_buf_ioend_async(bp);
bio_put(bio); bio_put(bio);
} }
@ -1283,7 +1197,7 @@ next_chunk:
} else { } else {
/* /*
* This is guaranteed not to be the last io reference count * This is guaranteed not to be the last io reference count
* because the caller (xfs_buf_iorequest) holds a count itself. * because the caller (xfs_buf_submit) holds a count itself.
*/ */
atomic_dec(&bp->b_io_remaining); atomic_dec(&bp->b_io_remaining);
xfs_buf_ioerror(bp, -EIO); xfs_buf_ioerror(bp, -EIO);
@ -1373,53 +1287,131 @@ _xfs_buf_ioapply(
blk_finish_plug(&plug); blk_finish_plug(&plug);
} }
/*
* Asynchronous IO submission path. This transfers the buffer lock ownership and
* the current reference to the IO. It is not safe to reference the buffer after
* a call to this function unless the caller holds an additional reference
* itself.
*/
void void
xfs_buf_iorequest( xfs_buf_submit(
xfs_buf_t *bp) struct xfs_buf *bp)
{ {
trace_xfs_buf_iorequest(bp, _RET_IP_); trace_xfs_buf_submit(bp, _RET_IP_);
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
ASSERT(bp->b_flags & XBF_ASYNC);
/* on shutdown we stale and complete the buffer immediately */
if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
xfs_buf_ioerror(bp, -EIO);
bp->b_flags &= ~XBF_DONE;
xfs_buf_stale(bp);
xfs_buf_ioend(bp);
return;
}
if (bp->b_flags & XBF_WRITE) if (bp->b_flags & XBF_WRITE)
xfs_buf_wait_unpin(bp); xfs_buf_wait_unpin(bp);
/* clear the internal error state to avoid spurious errors */
bp->b_io_error = 0;
/*
* The caller's reference is released during I/O completion.
* This occurs some time after the last b_io_remaining reference is
* released, so after we drop our Io reference we have to have some
* other reference to ensure the buffer doesn't go away from underneath
* us. Take a direct reference to ensure we have safe access to the
* buffer until we are finished with it.
*/
xfs_buf_hold(bp); xfs_buf_hold(bp);
/* /*
* Set the count to 1 initially, this will stop an I/O * Set the count to 1 initially, this will stop an I/O completion
* completion callout which happens before we have started * callout which happens before we have started all the I/O from calling
* all the I/O from calling xfs_buf_ioend too early. * xfs_buf_ioend too early.
*/ */
atomic_set(&bp->b_io_remaining, 1); atomic_set(&bp->b_io_remaining, 1);
_xfs_buf_ioapply(bp); _xfs_buf_ioapply(bp);
/* /*
* If _xfs_buf_ioapply failed, we'll get back here with * If _xfs_buf_ioapply failed, we can get back here with only the IO
* only the reference we took above. _xfs_buf_ioend will * reference we took above. If we drop it to zero, run completion so
* drop it to zero, so we'd better not queue it for later, * that we don't return to the caller with completion still pending.
* or we'll free it before it's done.
*/ */
_xfs_buf_ioend(bp, bp->b_error ? 0 : 1); if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
if (bp->b_error)
xfs_buf_ioend(bp);
else
xfs_buf_ioend_async(bp);
}
xfs_buf_rele(bp); xfs_buf_rele(bp);
/* Note: it is not safe to reference bp now we've dropped our ref */
} }
/* /*
* Waits for I/O to complete on the buffer supplied. It returns immediately if * Synchronous buffer IO submission path, read or write.
* no I/O is pending or there is already a pending error on the buffer, in which
* case nothing will ever complete. It returns the I/O error code, if any, or
* 0 if there was no error.
*/ */
int int
xfs_buf_iowait( xfs_buf_submit_wait(
xfs_buf_t *bp) struct xfs_buf *bp)
{ {
int error;
trace_xfs_buf_submit_wait(bp, _RET_IP_);
ASSERT(!(bp->b_flags & (_XBF_DELWRI_Q | XBF_ASYNC)));
if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
xfs_buf_ioerror(bp, -EIO);
xfs_buf_stale(bp);
bp->b_flags &= ~XBF_DONE;
return -EIO;
}
if (bp->b_flags & XBF_WRITE)
xfs_buf_wait_unpin(bp);
/* clear the internal error state to avoid spurious errors */
bp->b_io_error = 0;
/*
* For synchronous IO, the IO does not inherit the submitters reference
* count, nor the buffer lock. Hence we cannot release the reference we
* are about to take until we've waited for all IO completion to occur,
* including any xfs_buf_ioend_async() work that may be pending.
*/
xfs_buf_hold(bp);
/*
* Set the count to 1 initially, this will stop an I/O completion
* callout which happens before we have started all the I/O from calling
* xfs_buf_ioend too early.
*/
atomic_set(&bp->b_io_remaining, 1);
_xfs_buf_ioapply(bp);
/*
* make sure we run completion synchronously if it raced with us and is
* already complete.
*/
if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
xfs_buf_ioend(bp);
/* wait for completion before gathering the error from the buffer */
trace_xfs_buf_iowait(bp, _RET_IP_); trace_xfs_buf_iowait(bp, _RET_IP_);
wait_for_completion(&bp->b_iowait);
if (!bp->b_error)
wait_for_completion(&bp->b_iowait);
trace_xfs_buf_iowait_done(bp, _RET_IP_); trace_xfs_buf_iowait_done(bp, _RET_IP_);
return bp->b_error; error = bp->b_error;
/*
* all done now, we can release the hold that keeps the buffer
* referenced for the entire IO.
*/
xfs_buf_rele(bp);
return error;
} }
xfs_caddr_t xfs_caddr_t
@ -1813,13 +1805,19 @@ __xfs_buf_delwri_submit(
blk_start_plug(&plug); blk_start_plug(&plug);
list_for_each_entry_safe(bp, n, io_list, b_list) { list_for_each_entry_safe(bp, n, io_list, b_list) {
bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL); bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);
bp->b_flags |= XBF_WRITE; bp->b_flags |= XBF_WRITE | XBF_ASYNC;
if (!wait) { /*
bp->b_flags |= XBF_ASYNC; * we do all Io submission async. This means if we need to wait
* for IO completion we need to take an extra reference so the
* buffer is still valid on the other side.
*/
if (wait)
xfs_buf_hold(bp);
else
list_del_init(&bp->b_list); list_del_init(&bp->b_list);
}
xfs_bdstrat_cb(bp); xfs_buf_submit(bp);
} }
blk_finish_plug(&plug); blk_finish_plug(&plug);
@ -1866,7 +1864,10 @@ xfs_buf_delwri_submit(
bp = list_first_entry(&io_list, struct xfs_buf, b_list); bp = list_first_entry(&io_list, struct xfs_buf, b_list);
list_del_init(&bp->b_list); list_del_init(&bp->b_list);
error2 = xfs_buf_iowait(bp);
/* locking the buffer will wait for async IO completion. */
xfs_buf_lock(bp);
error2 = bp->b_error;
xfs_buf_relse(bp); xfs_buf_relse(bp);
if (!error) if (!error)
error = error2; error = error2;

View File

@ -158,6 +158,7 @@ typedef struct xfs_buf {
struct list_head b_lru; /* lru list */ struct list_head b_lru; /* lru list */
spinlock_t b_lock; /* internal state lock */ spinlock_t b_lock; /* internal state lock */
unsigned int b_state; /* internal state flags */ unsigned int b_state; /* internal state flags */
int b_io_error; /* internal IO error state */
wait_queue_head_t b_waiters; /* unpin waiters */ wait_queue_head_t b_waiters; /* unpin waiters */
struct list_head b_list; struct list_head b_list;
struct xfs_perag *b_pag; /* contains rbtree root */ struct xfs_perag *b_pag; /* contains rbtree root */
@ -268,9 +269,9 @@ int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length);
struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks,
int flags); int flags);
struct xfs_buf *xfs_buf_read_uncached(struct xfs_buftarg *target, int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr,
xfs_daddr_t daddr, size_t numblks, int flags, size_t numblks, int flags, struct xfs_buf **bpp,
const struct xfs_buf_ops *ops); const struct xfs_buf_ops *ops);
void xfs_buf_hold(struct xfs_buf *bp); void xfs_buf_hold(struct xfs_buf *bp);
/* Releasing Buffers */ /* Releasing Buffers */
@ -286,18 +287,16 @@ extern void xfs_buf_unlock(xfs_buf_t *);
/* Buffer Read and Write Routines */ /* Buffer Read and Write Routines */
extern int xfs_bwrite(struct xfs_buf *bp); extern int xfs_bwrite(struct xfs_buf *bp);
extern void xfs_buf_ioend(xfs_buf_t *, int); extern void xfs_buf_ioend(struct xfs_buf *bp);
extern void xfs_buf_ioerror(xfs_buf_t *, int); extern void xfs_buf_ioerror(xfs_buf_t *, int);
extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
extern void xfs_buf_iorequest(xfs_buf_t *); extern void xfs_buf_submit(struct xfs_buf *bp);
extern int xfs_buf_iowait(xfs_buf_t *); extern int xfs_buf_submit_wait(struct xfs_buf *bp);
extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
xfs_buf_rw_t); xfs_buf_rw_t);
#define xfs_buf_zero(bp, off, len) \ #define xfs_buf_zero(bp, off, len) \
xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
extern int xfs_bioerror_relse(struct xfs_buf *);
/* Buffer Utility Routines */ /* Buffer Utility Routines */
extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);

View File

@ -491,7 +491,7 @@ xfs_buf_item_unpin(
xfs_buf_ioerror(bp, -EIO); xfs_buf_ioerror(bp, -EIO);
XFS_BUF_UNDONE(bp); XFS_BUF_UNDONE(bp);
xfs_buf_stale(bp); xfs_buf_stale(bp);
xfs_buf_ioend(bp, 0); xfs_buf_ioend(bp);
} }
} }
@ -1081,7 +1081,7 @@ xfs_buf_iodone_callbacks(
* a way to shut the filesystem down if the writes keep failing. * a way to shut the filesystem down if the writes keep failing.
* *
* In practice we'll shut the filesystem down soon as non-transient * In practice we'll shut the filesystem down soon as non-transient
* erorrs tend to affect the whole device and a failing log write * errors tend to affect the whole device and a failing log write
* will make us give up. But we really ought to do better here. * will make us give up. But we really ought to do better here.
*/ */
if (XFS_BUF_ISASYNC(bp)) { if (XFS_BUF_ISASYNC(bp)) {
@ -1094,7 +1094,7 @@ xfs_buf_iodone_callbacks(
if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) { if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) {
bp->b_flags |= XBF_WRITE | XBF_ASYNC | bp->b_flags |= XBF_WRITE | XBF_ASYNC |
XBF_DONE | XBF_WRITE_FAIL; XBF_DONE | XBF_WRITE_FAIL;
xfs_buf_iorequest(bp); xfs_buf_submit(bp);
} else { } else {
xfs_buf_relse(bp); xfs_buf_relse(bp);
} }
@ -1115,7 +1115,7 @@ do_callbacks:
xfs_buf_do_callbacks(bp); xfs_buf_do_callbacks(bp);
bp->b_fspriv = NULL; bp->b_fspriv = NULL;
bp->b_iodone = NULL; bp->b_iodone = NULL;
xfs_buf_ioend(bp, 0); xfs_buf_ioend(bp);
} }
/* /*

View File

@ -172,16 +172,11 @@ xfs_growfs_data_private(
if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
return error; return error;
dpct = pct - mp->m_sb.sb_imax_pct; dpct = pct - mp->m_sb.sb_imax_pct;
bp = xfs_buf_read_uncached(mp->m_ddev_targp, error = xfs_buf_read_uncached(mp->m_ddev_targp,
XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
XFS_FSS_TO_BB(mp, 1), 0, NULL); XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
if (!bp) if (error)
return -EIO;
if (bp->b_error) {
error = bp->b_error;
xfs_buf_relse(bp);
return error; return error;
}
xfs_buf_relse(bp); xfs_buf_relse(bp);
new = nb; /* use new as a temporary here */ new = nb; /* use new as a temporary here */

View File

@ -3062,7 +3062,7 @@ cluster_corrupt_out:
XFS_BUF_UNDONE(bp); XFS_BUF_UNDONE(bp);
xfs_buf_stale(bp); xfs_buf_stale(bp);
xfs_buf_ioerror(bp, -EIO); xfs_buf_ioerror(bp, -EIO);
xfs_buf_ioend(bp, 0); xfs_buf_ioend(bp);
} else { } else {
xfs_buf_stale(bp); xfs_buf_stale(bp);
xfs_buf_relse(bp); xfs_buf_relse(bp);

View File

@ -1678,7 +1678,7 @@ xlog_bdstrat(
if (iclog->ic_state & XLOG_STATE_IOERROR) { if (iclog->ic_state & XLOG_STATE_IOERROR) {
xfs_buf_ioerror(bp, -EIO); xfs_buf_ioerror(bp, -EIO);
xfs_buf_stale(bp); xfs_buf_stale(bp);
xfs_buf_ioend(bp, 0); xfs_buf_ioend(bp);
/* /*
* It would seem logical to return EIO here, but we rely on * It would seem logical to return EIO here, but we rely on
* the log state machine to propagate I/O errors instead of * the log state machine to propagate I/O errors instead of
@ -1688,7 +1688,7 @@ xlog_bdstrat(
return 0; return 0;
} }
xfs_buf_iorequest(bp); xfs_buf_submit(bp);
return 0; return 0;
} }
@ -3867,18 +3867,17 @@ xlog_state_ioerror(
* This is called from xfs_force_shutdown, when we're forcibly * This is called from xfs_force_shutdown, when we're forcibly
* shutting down the filesystem, typically because of an IO error. * shutting down the filesystem, typically because of an IO error.
* Our main objectives here are to make sure that: * Our main objectives here are to make sure that:
* a. the filesystem gets marked 'SHUTDOWN' for all interested * a. if !logerror, flush the logs to disk. Anything modified
* after this is ignored.
* b. the filesystem gets marked 'SHUTDOWN' for all interested
* parties to find out, 'atomically'. * parties to find out, 'atomically'.
* b. those who're sleeping on log reservations, pinned objects and * c. those who're sleeping on log reservations, pinned objects and
* other resources get woken up, and be told the bad news. * other resources get woken up, and be told the bad news.
* c. nothing new gets queued up after (a) and (b) are done. * d. nothing new gets queued up after (b) and (c) are done.
* d. if !logerror, flush the iclogs to disk, then seal them off
* for business.
* *
* Note: for delayed logging the !logerror case needs to flush the regions * Note: for the !logerror case we need to flush the regions held in memory out
* held in memory out to the iclogs before flushing them to disk. This needs * to disk first. This needs to be done before the log is marked as shutdown,
* to be done before the log is marked as shutdown, otherwise the flush to the * otherwise the iclog writes will fail.
* iclogs will fail.
*/ */
int int
xfs_log_force_umount( xfs_log_force_umount(
@ -3910,16 +3909,16 @@ xfs_log_force_umount(
ASSERT(XLOG_FORCED_SHUTDOWN(log)); ASSERT(XLOG_FORCED_SHUTDOWN(log));
return 1; return 1;
} }
retval = 0;
/* /*
* Flush the in memory commit item list before marking the log as * Flush all the completed transactions to disk before marking the log
* being shut down. We need to do it in this order to ensure all the * being shut down. We need to do it in this order to ensure that
* completed transactions are flushed to disk with the xfs_log_force() * completed operations are safely on disk before we shut down, and that
* call below. * we don't have to issue any buffer IO after the shutdown flags are set
* to guarantee this.
*/ */
if (!logerror) if (!logerror)
xlog_cil_force(log); _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
/* /*
* mark the filesystem and the as in a shutdown state and wake * mark the filesystem and the as in a shutdown state and wake
@ -3931,18 +3930,11 @@ xfs_log_force_umount(
XFS_BUF_DONE(mp->m_sb_bp); XFS_BUF_DONE(mp->m_sb_bp);
/* /*
* This flag is sort of redundant because of the mount flag, but * Mark the log and the iclogs with IO error flags to prevent any
* it's good to maintain the separation between the log and the rest * further log IO from being issued or completed.
* of XFS.
*/ */
log->l_flags |= XLOG_IO_ERROR; log->l_flags |= XLOG_IO_ERROR;
retval = xlog_state_ioerror(log);
/*
* If we hit a log error, we want to mark all the iclogs IOERROR
* while we're still holding the loglock.
*/
if (logerror)
retval = xlog_state_ioerror(log);
spin_unlock(&log->l_icloglock); spin_unlock(&log->l_icloglock);
/* /*
@ -3955,19 +3947,6 @@ xfs_log_force_umount(
xlog_grant_head_wake_all(&log->l_reserve_head); xlog_grant_head_wake_all(&log->l_reserve_head);
xlog_grant_head_wake_all(&log->l_write_head); xlog_grant_head_wake_all(&log->l_write_head);
if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
ASSERT(!logerror);
/*
* Force the incore logs to disk before shutting the
* log down completely.
*/
_xfs_log_force(mp, XFS_LOG_SYNC, NULL);
spin_lock(&log->l_icloglock);
retval = xlog_state_ioerror(log);
spin_unlock(&log->l_icloglock);
}
/* /*
* Wake up everybody waiting on xfs_log_force. Wake the CIL push first * Wake up everybody waiting on xfs_log_force. Wake the CIL push first
* as if the log writes were completed. The abort handling in the log * as if the log writes were completed. The abort handling in the log

View File

@ -193,12 +193,8 @@ xlog_bread_noalign(
bp->b_io_length = nbblks; bp->b_io_length = nbblks;
bp->b_error = 0; bp->b_error = 0;
if (XFS_FORCED_SHUTDOWN(log->l_mp)) error = xfs_buf_submit_wait(bp);
return -EIO; if (error && !XFS_FORCED_SHUTDOWN(log->l_mp))
xfs_buf_iorequest(bp);
error = xfs_buf_iowait(bp);
if (error)
xfs_buf_ioerror_alert(bp, __func__); xfs_buf_ioerror_alert(bp, __func__);
return error; return error;
} }
@ -378,12 +374,14 @@ xlog_recover_iodone(
* We're not going to bother about retrying * We're not going to bother about retrying
* this during recovery. One strike! * this during recovery. One strike!
*/ */
xfs_buf_ioerror_alert(bp, __func__); if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
xfs_force_shutdown(bp->b_target->bt_mount, xfs_buf_ioerror_alert(bp, __func__);
SHUTDOWN_META_IO_ERROR); xfs_force_shutdown(bp->b_target->bt_mount,
SHUTDOWN_META_IO_ERROR);
}
} }
bp->b_iodone = NULL; bp->b_iodone = NULL;
xfs_buf_ioend(bp, 0); xfs_buf_ioend(bp);
} }
/* /*
@ -4452,16 +4450,12 @@ xlog_do_recover(
XFS_BUF_UNASYNC(bp); XFS_BUF_UNASYNC(bp);
bp->b_ops = &xfs_sb_buf_ops; bp->b_ops = &xfs_sb_buf_ops;
if (XFS_FORCED_SHUTDOWN(log->l_mp)) { error = xfs_buf_submit_wait(bp);
xfs_buf_relse(bp);
return -EIO;
}
xfs_buf_iorequest(bp);
error = xfs_buf_iowait(bp);
if (error) { if (error) {
xfs_buf_ioerror_alert(bp, __func__); if (!XFS_FORCED_SHUTDOWN(log->l_mp)) {
ASSERT(0); xfs_buf_ioerror_alert(bp, __func__);
ASSERT(0);
}
xfs_buf_relse(bp); xfs_buf_relse(bp);
return error; return error;
} }

View File

@ -300,21 +300,15 @@ xfs_readsb(
* access to the superblock. * access to the superblock.
*/ */
reread: reread:
bp = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
BTOBB(sector_size), 0, buf_ops); BTOBB(sector_size), 0, &bp, buf_ops);
if (!bp) { if (error) {
if (loud)
xfs_warn(mp, "SB buffer read failed");
return -EIO;
}
if (bp->b_error) {
error = bp->b_error;
if (loud) if (loud)
xfs_warn(mp, "SB validate failed with error %d.", error); xfs_warn(mp, "SB validate failed with error %d.", error);
/* bad CRC means corrupted metadata */ /* bad CRC means corrupted metadata */
if (error == -EFSBADCRC) if (error == -EFSBADCRC)
error = -EFSCORRUPTED; error = -EFSCORRUPTED;
goto release_buf; return error;
} }
/* /*
@ -544,40 +538,43 @@ xfs_set_inoalignment(xfs_mount_t *mp)
* Check that the data (and log if separate) is an ok size. * Check that the data (and log if separate) is an ok size.
*/ */
STATIC int STATIC int
xfs_check_sizes(xfs_mount_t *mp) xfs_check_sizes(
struct xfs_mount *mp)
{ {
xfs_buf_t *bp; struct xfs_buf *bp;
xfs_daddr_t d; xfs_daddr_t d;
int error;
d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
xfs_warn(mp, "filesystem size mismatch detected"); xfs_warn(mp, "filesystem size mismatch detected");
return -EFBIG; return -EFBIG;
} }
bp = xfs_buf_read_uncached(mp->m_ddev_targp, error = xfs_buf_read_uncached(mp->m_ddev_targp,
d - XFS_FSS_TO_BB(mp, 1), d - XFS_FSS_TO_BB(mp, 1),
XFS_FSS_TO_BB(mp, 1), 0, NULL); XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
if (!bp) { if (error) {
xfs_warn(mp, "last sector read failed"); xfs_warn(mp, "last sector read failed");
return -EIO; return error;
} }
xfs_buf_relse(bp); xfs_buf_relse(bp);
if (mp->m_logdev_targp != mp->m_ddev_targp) { if (mp->m_logdev_targp == mp->m_ddev_targp)
d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); return 0;
if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
xfs_warn(mp, "log size mismatch detected"); d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
return -EFBIG; if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
} xfs_warn(mp, "log size mismatch detected");
bp = xfs_buf_read_uncached(mp->m_logdev_targp, return -EFBIG;
d - XFS_FSB_TO_BB(mp, 1),
XFS_FSB_TO_BB(mp, 1), 0, NULL);
if (!bp) {
xfs_warn(mp, "log device read failed");
return -EIO;
}
xfs_buf_relse(bp);
} }
error = xfs_buf_read_uncached(mp->m_logdev_targp,
d - XFS_FSB_TO_BB(mp, 1),
XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
if (error) {
xfs_warn(mp, "log device read failed");
return error;
}
xfs_buf_relse(bp);
return 0; return 0;
} }

View File

@ -921,16 +921,11 @@ xfs_growfs_rt(
/* /*
* Read in the last block of the device, make sure it exists. * Read in the last block of the device, make sure it exists.
*/ */
bp = xfs_buf_read_uncached(mp->m_rtdev_targp, error = xfs_buf_read_uncached(mp->m_rtdev_targp,
XFS_FSB_TO_BB(mp, nrblocks - 1), XFS_FSB_TO_BB(mp, nrblocks - 1),
XFS_FSB_TO_BB(mp, 1), 0, NULL); XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
if (!bp) if (error)
return -EIO;
if (bp->b_error) {
error = bp->b_error;
xfs_buf_relse(bp);
return error; return error;
}
xfs_buf_relse(bp); xfs_buf_relse(bp);
/* /*
@ -1184,11 +1179,12 @@ xfs_rtallocate_extent(
*/ */
int /* error */ int /* error */
xfs_rtmount_init( xfs_rtmount_init(
xfs_mount_t *mp) /* file system mount structure */ struct xfs_mount *mp) /* file system mount structure */
{ {
xfs_buf_t *bp; /* buffer for last block of subvolume */ struct xfs_buf *bp; /* buffer for last block of subvolume */
xfs_daddr_t d; /* address of last block of subvolume */ struct xfs_sb *sbp; /* filesystem superblock copy in mount */
xfs_sb_t *sbp; /* filesystem superblock copy in mount */ xfs_daddr_t d; /* address of last block of subvolume */
int error;
sbp = &mp->m_sb; sbp = &mp->m_sb;
if (sbp->sb_rblocks == 0) if (sbp->sb_rblocks == 0)
@ -1214,14 +1210,12 @@ xfs_rtmount_init(
(unsigned long long) mp->m_sb.sb_rblocks); (unsigned long long) mp->m_sb.sb_rblocks);
return -EFBIG; return -EFBIG;
} }
bp = xfs_buf_read_uncached(mp->m_rtdev_targp, error = xfs_buf_read_uncached(mp->m_rtdev_targp,
d - XFS_FSB_TO_BB(mp, 1), d - XFS_FSB_TO_BB(mp, 1),
XFS_FSB_TO_BB(mp, 1), 0, NULL); XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
if (!bp || bp->b_error) { if (error) {
xfs_warn(mp, "realtime device size check failed"); xfs_warn(mp, "realtime device size check failed");
if (bp) return error;
xfs_buf_relse(bp);
return -EIO;
} }
xfs_buf_relse(bp); xfs_buf_relse(bp);
return 0; return 0;

View File

@ -349,7 +349,8 @@ DEFINE_BUF_EVENT(xfs_buf_free);
DEFINE_BUF_EVENT(xfs_buf_hold); DEFINE_BUF_EVENT(xfs_buf_hold);
DEFINE_BUF_EVENT(xfs_buf_rele); DEFINE_BUF_EVENT(xfs_buf_rele);
DEFINE_BUF_EVENT(xfs_buf_iodone); DEFINE_BUF_EVENT(xfs_buf_iodone);
DEFINE_BUF_EVENT(xfs_buf_iorequest); DEFINE_BUF_EVENT(xfs_buf_submit);
DEFINE_BUF_EVENT(xfs_buf_submit_wait);
DEFINE_BUF_EVENT(xfs_buf_bawrite); DEFINE_BUF_EVENT(xfs_buf_bawrite);
DEFINE_BUF_EVENT(xfs_buf_lock); DEFINE_BUF_EVENT(xfs_buf_lock);
DEFINE_BUF_EVENT(xfs_buf_lock_done); DEFINE_BUF_EVENT(xfs_buf_lock_done);

View File

@ -318,20 +318,10 @@ xfs_trans_read_buf_map(
XFS_BUF_READ(bp); XFS_BUF_READ(bp);
bp->b_ops = ops; bp->b_ops = ops;
/* error = xfs_buf_submit_wait(bp);
* XXX(hch): clean up the error handling here to be less
* of a mess..
*/
if (XFS_FORCED_SHUTDOWN(mp)) {
trace_xfs_bdstrat_shut(bp, _RET_IP_);
xfs_bioerror_relse(bp);
} else {
xfs_buf_iorequest(bp);
}
error = xfs_buf_iowait(bp);
if (error) { if (error) {
xfs_buf_ioerror_alert(bp, __func__); if (!XFS_FORCED_SHUTDOWN(mp))
xfs_buf_ioerror_alert(bp, __func__);
xfs_buf_relse(bp); xfs_buf_relse(bp);
/* /*
* We can gracefully recover from most read * We can gracefully recover from most read