diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index d8b77b5bf4d9..c2aaa58e59ee 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1122,14 +1122,6 @@ xfs_zero_remaining_bytes( if (endoff > XFS_ISIZE(ip)) endoff = XFS_ISIZE(ip); - bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? - mp->m_rtdev_targp : mp->m_ddev_targp, - BTOBB(mp->m_sb.sb_blocksize), 0); - if (!bp) - return -ENOMEM; - - xfs_buf_unlock(bp); - for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { uint lock_mode; @@ -1152,42 +1144,24 @@ xfs_zero_remaining_bytes( ASSERT(imap.br_startblock != DELAYSTARTBLOCK); if (imap.br_state == XFS_EXT_UNWRITTEN) continue; - XFS_BUF_UNDONE(bp); - XFS_BUF_UNWRITE(bp); - XFS_BUF_READ(bp); - XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); - if (XFS_FORCED_SHUTDOWN(mp)) { - error = -EIO; - break; - } - xfs_buf_iorequest(bp); - error = xfs_buf_iowait(bp); - if (error) { - xfs_buf_ioerror_alert(bp, - "xfs_zero_remaining_bytes(read)"); - break; - } + error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ? + mp->m_rtdev_targp : mp->m_ddev_targp, + xfs_fsb_to_db(ip, imap.br_startblock), + BTOBB(mp->m_sb.sb_blocksize), + 0, &bp, NULL); + if (error) + return error; + memset(bp->b_addr + - (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), - 0, lastoffset - offset + 1); - XFS_BUF_UNDONE(bp); - XFS_BUF_UNREAD(bp); - XFS_BUF_WRITE(bp); + (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), + 0, lastoffset - offset + 1); - if (XFS_FORCED_SHUTDOWN(mp)) { - error = -EIO; - break; - } - xfs_buf_iorequest(bp); - error = xfs_buf_iowait(bp); - if (error) { - xfs_buf_ioerror_alert(bp, - "xfs_zero_remaining_bytes(write)"); - break; - } + error = xfs_bwrite(bp); + xfs_buf_relse(bp); + if (error) + return error; } - xfs_buf_free(bp); return error; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index ec6505056b2c..017b6afe340b 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -623,10 +623,11 @@ _xfs_buf_read( bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); - xfs_buf_iorequest(bp); - if (flags & XBF_ASYNC) + if (flags & XBF_ASYNC) { + xfs_buf_submit(bp); return 0; - return xfs_buf_iowait(bp); + } + return xfs_buf_submit_wait(bp); } xfs_buf_t * @@ -687,34 +688,39 @@ xfs_buf_readahead_map( * Read an uncached buffer from disk. Allocates and returns a locked * buffer containing the disk contents or nothing. */ -struct xfs_buf * +int xfs_buf_read_uncached( struct xfs_buftarg *target, xfs_daddr_t daddr, size_t numblks, int flags, + struct xfs_buf **bpp, const struct xfs_buf_ops *ops) { struct xfs_buf *bp; + *bpp = NULL; + bp = xfs_buf_get_uncached(target, numblks, flags); if (!bp) - return NULL; + return -ENOMEM; /* set up the buffer for a read IO */ ASSERT(bp->b_map_count == 1); - bp->b_bn = daddr; + bp->b_bn = XFS_BUF_DADDR_NULL; /* always null for uncached buffers */ bp->b_maps[0].bm_bn = daddr; bp->b_flags |= XBF_READ; bp->b_ops = ops; - if (XFS_FORCED_SHUTDOWN(target->bt_mount)) { + xfs_buf_submit_wait(bp); + if (bp->b_error) { + int error = bp->b_error; xfs_buf_relse(bp); - return NULL; + return error; } - xfs_buf_iorequest(bp); - xfs_buf_iowait(bp); - return bp; + + *bpp = bp; + return 0; } /* @@ -998,53 +1004,56 @@ xfs_buf_wait_unpin( * Buffer Utility Routines */ -STATIC void -xfs_buf_iodone_work( - struct work_struct *work) +void +xfs_buf_ioend( + struct xfs_buf *bp) { - struct xfs_buf *bp = - container_of(work, xfs_buf_t, b_iodone_work); - bool read = !!(bp->b_flags & XBF_READ); + bool read = bp->b_flags & XBF_READ; + + trace_xfs_buf_iodone(bp, _RET_IP_); bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); - /* only validate buffers that were read without errors */ - if (read && bp->b_ops && !bp->b_error && (bp->b_flags & XBF_DONE)) + /* + * Pull in IO completion errors now. We are guaranteed to be running + * single threaded, so we don't need the lock to read b_io_error. + */ + if (!bp->b_error && bp->b_io_error) + xfs_buf_ioerror(bp, bp->b_io_error); + + /* Only validate buffers that were read without errors */ + if (read && !bp->b_error && bp->b_ops) { + ASSERT(!bp->b_iodone); bp->b_ops->verify_read(bp); + } + + if (!bp->b_error) + bp->b_flags |= XBF_DONE; if (bp->b_iodone) (*(bp->b_iodone))(bp); else if (bp->b_flags & XBF_ASYNC) xfs_buf_relse(bp); - else { - ASSERT(read && bp->b_ops); + else complete(&bp->b_iowait); - } +} + +static void +xfs_buf_ioend_work( + struct work_struct *work) +{ + struct xfs_buf *bp = + container_of(work, xfs_buf_t, b_iodone_work); + + xfs_buf_ioend(bp); } void -xfs_buf_ioend( - struct xfs_buf *bp, - int schedule) +xfs_buf_ioend_async( + struct xfs_buf *bp) { - bool read = !!(bp->b_flags & XBF_READ); - - trace_xfs_buf_iodone(bp, _RET_IP_); - - if (bp->b_error == 0) - bp->b_flags |= XBF_DONE; - - if (bp->b_iodone || (read && bp->b_ops) || (bp->b_flags & XBF_ASYNC)) { - if (schedule) { - INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work); - queue_work(xfslogd_workqueue, &bp->b_iodone_work); - } else { - xfs_buf_iodone_work(&bp->b_iodone_work); - } - } else { - bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); - complete(&bp->b_iowait); - } + INIT_WORK(&bp->b_iodone_work, xfs_buf_ioend_work); + queue_work(xfslogd_workqueue, &bp->b_iodone_work); } void @@ -1067,96 +1076,6 @@ xfs_buf_ioerror_alert( (__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length); } -/* - * Called when we want to stop a buffer from getting written or read. - * We attach the EIO error, muck with its flags, and call xfs_buf_ioend - * so that the proper iodone callbacks get called. - */ -STATIC int -xfs_bioerror( - xfs_buf_t *bp) -{ -#ifdef XFSERRORDEBUG - ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone); -#endif - - /* - * No need to wait until the buffer is unpinned, we aren't flushing it. - */ - xfs_buf_ioerror(bp, -EIO); - - /* - * We're calling xfs_buf_ioend, so delete XBF_DONE flag. - */ - XFS_BUF_UNREAD(bp); - XFS_BUF_UNDONE(bp); - xfs_buf_stale(bp); - - xfs_buf_ioend(bp, 0); - - return -EIO; -} - -/* - * Same as xfs_bioerror, except that we are releasing the buffer - * here ourselves, and avoiding the xfs_buf_ioend call. - * This is meant for userdata errors; metadata bufs come with - * iodone functions attached, so that we can track down errors. - */ -int -xfs_bioerror_relse( - struct xfs_buf *bp) -{ - int64_t fl = bp->b_flags; - /* - * No need to wait until the buffer is unpinned. - * We aren't flushing it. - * - * chunkhold expects B_DONE to be set, whether - * we actually finish the I/O or not. We don't want to - * change that interface. - */ - XFS_BUF_UNREAD(bp); - XFS_BUF_DONE(bp); - xfs_buf_stale(bp); - bp->b_iodone = NULL; - if (!(fl & XBF_ASYNC)) { - /* - * Mark b_error and B_ERROR _both_. - * Lot's of chunkcache code assumes that. - * There's no reason to mark error for - * ASYNC buffers. - */ - xfs_buf_ioerror(bp, -EIO); - complete(&bp->b_iowait); - } else { - xfs_buf_relse(bp); - } - - return -EIO; -} - -STATIC int -xfs_bdstrat_cb( - struct xfs_buf *bp) -{ - if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { - trace_xfs_bdstrat_shut(bp, _RET_IP_); - /* - * Metadata write that didn't get logged but - * written delayed anyway. These aren't associated - * with a transaction, and can be ignored. - */ - if (!bp->b_iodone && !XFS_BUF_ISREAD(bp)) - return xfs_bioerror_relse(bp); - else - return xfs_bioerror(bp); - } - - xfs_buf_iorequest(bp); - return 0; -} - int xfs_bwrite( struct xfs_buf *bp) @@ -1166,11 +1085,10 @@ xfs_bwrite( ASSERT(xfs_buf_islocked(bp)); bp->b_flags |= XBF_WRITE; - bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL); + bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | + XBF_WRITE_FAIL | XBF_DONE); - xfs_bdstrat_cb(bp); - - error = xfs_buf_iowait(bp); + error = xfs_buf_submit_wait(bp); if (error) { xfs_force_shutdown(bp->b_target->bt_mount, SHUTDOWN_META_IO_ERROR); @@ -1178,15 +1096,6 @@ xfs_bwrite( return error; } -STATIC void -_xfs_buf_ioend( - xfs_buf_t *bp, - int schedule) -{ - if (atomic_dec_and_test(&bp->b_io_remaining) == 1) - xfs_buf_ioend(bp, schedule); -} - STATIC void xfs_buf_bio_end_io( struct bio *bio, @@ -1198,13 +1107,18 @@ xfs_buf_bio_end_io( * don't overwrite existing errors - otherwise we can lose errors on * buffers that require multiple bios to complete. */ - if (!bp->b_error) - xfs_buf_ioerror(bp, error); + if (error) { + spin_lock(&bp->b_lock); + if (!bp->b_io_error) + bp->b_io_error = error; + spin_unlock(&bp->b_lock); + } if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); - _xfs_buf_ioend(bp, 1); + if (atomic_dec_and_test(&bp->b_io_remaining) == 1) + xfs_buf_ioend_async(bp); bio_put(bio); } @@ -1283,7 +1197,7 @@ next_chunk: } else { /* * This is guaranteed not to be the last io reference count - * because the caller (xfs_buf_iorequest) holds a count itself. + * because the caller (xfs_buf_submit) holds a count itself. */ atomic_dec(&bp->b_io_remaining); xfs_buf_ioerror(bp, -EIO); @@ -1373,53 +1287,131 @@ _xfs_buf_ioapply( blk_finish_plug(&plug); } +/* + * Asynchronous IO submission path. This transfers the buffer lock ownership and + * the current reference to the IO. It is not safe to reference the buffer after + * a call to this function unless the caller holds an additional reference + * itself. + */ void -xfs_buf_iorequest( - xfs_buf_t *bp) +xfs_buf_submit( + struct xfs_buf *bp) { - trace_xfs_buf_iorequest(bp, _RET_IP_); + trace_xfs_buf_submit(bp, _RET_IP_); ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); + ASSERT(bp->b_flags & XBF_ASYNC); + + /* on shutdown we stale and complete the buffer immediately */ + if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { + xfs_buf_ioerror(bp, -EIO); + bp->b_flags &= ~XBF_DONE; + xfs_buf_stale(bp); + xfs_buf_ioend(bp); + return; + } if (bp->b_flags & XBF_WRITE) xfs_buf_wait_unpin(bp); + + /* clear the internal error state to avoid spurious errors */ + bp->b_io_error = 0; + + /* + * The caller's reference is released during I/O completion. + * This occurs some time after the last b_io_remaining reference is + * released, so after we drop our Io reference we have to have some + * other reference to ensure the buffer doesn't go away from underneath + * us. Take a direct reference to ensure we have safe access to the + * buffer until we are finished with it. + */ xfs_buf_hold(bp); /* - * Set the count to 1 initially, this will stop an I/O - * completion callout which happens before we have started - * all the I/O from calling xfs_buf_ioend too early. + * Set the count to 1 initially, this will stop an I/O completion + * callout which happens before we have started all the I/O from calling + * xfs_buf_ioend too early. */ atomic_set(&bp->b_io_remaining, 1); _xfs_buf_ioapply(bp); + /* - * If _xfs_buf_ioapply failed, we'll get back here with - * only the reference we took above. _xfs_buf_ioend will - * drop it to zero, so we'd better not queue it for later, - * or we'll free it before it's done. + * If _xfs_buf_ioapply failed, we can get back here with only the IO + * reference we took above. If we drop it to zero, run completion so + * that we don't return to the caller with completion still pending. */ - _xfs_buf_ioend(bp, bp->b_error ? 0 : 1); + if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { + if (bp->b_error) + xfs_buf_ioend(bp); + else + xfs_buf_ioend_async(bp); + } xfs_buf_rele(bp); + /* Note: it is not safe to reference bp now we've dropped our ref */ } /* - * Waits for I/O to complete on the buffer supplied. It returns immediately if - * no I/O is pending or there is already a pending error on the buffer, in which - * case nothing will ever complete. It returns the I/O error code, if any, or - * 0 if there was no error. + * Synchronous buffer IO submission path, read or write. */ int -xfs_buf_iowait( - xfs_buf_t *bp) +xfs_buf_submit_wait( + struct xfs_buf *bp) { + int error; + + trace_xfs_buf_submit_wait(bp, _RET_IP_); + + ASSERT(!(bp->b_flags & (_XBF_DELWRI_Q | XBF_ASYNC))); + + if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { + xfs_buf_ioerror(bp, -EIO); + xfs_buf_stale(bp); + bp->b_flags &= ~XBF_DONE; + return -EIO; + } + + if (bp->b_flags & XBF_WRITE) + xfs_buf_wait_unpin(bp); + + /* clear the internal error state to avoid spurious errors */ + bp->b_io_error = 0; + + /* + * For synchronous IO, the IO does not inherit the submitters reference + * count, nor the buffer lock. Hence we cannot release the reference we + * are about to take until we've waited for all IO completion to occur, + * including any xfs_buf_ioend_async() work that may be pending. + */ + xfs_buf_hold(bp); + + /* + * Set the count to 1 initially, this will stop an I/O completion + * callout which happens before we have started all the I/O from calling + * xfs_buf_ioend too early. + */ + atomic_set(&bp->b_io_remaining, 1); + _xfs_buf_ioapply(bp); + + /* + * make sure we run completion synchronously if it raced with us and is + * already complete. + */ + if (atomic_dec_and_test(&bp->b_io_remaining) == 1) + xfs_buf_ioend(bp); + + /* wait for completion before gathering the error from the buffer */ trace_xfs_buf_iowait(bp, _RET_IP_); - - if (!bp->b_error) - wait_for_completion(&bp->b_iowait); - + wait_for_completion(&bp->b_iowait); trace_xfs_buf_iowait_done(bp, _RET_IP_); - return bp->b_error; + error = bp->b_error; + + /* + * all done now, we can release the hold that keeps the buffer + * referenced for the entire IO. + */ + xfs_buf_rele(bp); + return error; } xfs_caddr_t @@ -1813,13 +1805,19 @@ __xfs_buf_delwri_submit( blk_start_plug(&plug); list_for_each_entry_safe(bp, n, io_list, b_list) { bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL); - bp->b_flags |= XBF_WRITE; + bp->b_flags |= XBF_WRITE | XBF_ASYNC; - if (!wait) { - bp->b_flags |= XBF_ASYNC; + /* + * we do all Io submission async. This means if we need to wait + * for IO completion we need to take an extra reference so the + * buffer is still valid on the other side. + */ + if (wait) + xfs_buf_hold(bp); + else list_del_init(&bp->b_list); - } - xfs_bdstrat_cb(bp); + + xfs_buf_submit(bp); } blk_finish_plug(&plug); @@ -1866,7 +1864,10 @@ xfs_buf_delwri_submit( bp = list_first_entry(&io_list, struct xfs_buf, b_list); list_del_init(&bp->b_list); - error2 = xfs_buf_iowait(bp); + + /* locking the buffer will wait for async IO completion. */ + xfs_buf_lock(bp); + error2 = bp->b_error; xfs_buf_relse(bp); if (!error) error = error2; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index c753183900b3..82002c00af90 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -158,6 +158,7 @@ typedef struct xfs_buf { struct list_head b_lru; /* lru list */ spinlock_t b_lock; /* internal state lock */ unsigned int b_state; /* internal state flags */ + int b_io_error; /* internal IO error state */ wait_queue_head_t b_waiters; /* unpin waiters */ struct list_head b_list; struct xfs_perag *b_pag; /* contains rbtree root */ @@ -268,9 +269,9 @@ int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length); struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, int flags); -struct xfs_buf *xfs_buf_read_uncached(struct xfs_buftarg *target, - xfs_daddr_t daddr, size_t numblks, int flags, - const struct xfs_buf_ops *ops); +int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr, + size_t numblks, int flags, struct xfs_buf **bpp, + const struct xfs_buf_ops *ops); void xfs_buf_hold(struct xfs_buf *bp); /* Releasing Buffers */ @@ -286,18 +287,16 @@ extern void xfs_buf_unlock(xfs_buf_t *); /* Buffer Read and Write Routines */ extern int xfs_bwrite(struct xfs_buf *bp); -extern void xfs_buf_ioend(xfs_buf_t *, int); +extern void xfs_buf_ioend(struct xfs_buf *bp); extern void xfs_buf_ioerror(xfs_buf_t *, int); extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); -extern void xfs_buf_iorequest(xfs_buf_t *); -extern int xfs_buf_iowait(xfs_buf_t *); +extern void xfs_buf_submit(struct xfs_buf *bp); +extern int xfs_buf_submit_wait(struct xfs_buf *bp); extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, xfs_buf_rw_t); #define xfs_buf_zero(bp, off, len) \ xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) -extern int xfs_bioerror_relse(struct xfs_buf *); - /* Buffer Utility Routines */ extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 30fa5db9aea8..f15969543326 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -491,7 +491,7 @@ xfs_buf_item_unpin( xfs_buf_ioerror(bp, -EIO); XFS_BUF_UNDONE(bp); xfs_buf_stale(bp); - xfs_buf_ioend(bp, 0); + xfs_buf_ioend(bp); } } @@ -1081,7 +1081,7 @@ xfs_buf_iodone_callbacks( * a way to shut the filesystem down if the writes keep failing. * * In practice we'll shut the filesystem down soon as non-transient - * erorrs tend to affect the whole device and a failing log write + * errors tend to affect the whole device and a failing log write * will make us give up. But we really ought to do better here. */ if (XFS_BUF_ISASYNC(bp)) { @@ -1094,7 +1094,7 @@ xfs_buf_iodone_callbacks( if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) { bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE | XBF_WRITE_FAIL; - xfs_buf_iorequest(bp); + xfs_buf_submit(bp); } else { xfs_buf_relse(bp); } @@ -1115,7 +1115,7 @@ do_callbacks: xfs_buf_do_callbacks(bp); bp->b_fspriv = NULL; bp->b_iodone = NULL; - xfs_buf_ioend(bp, 0); + xfs_buf_ioend(bp); } /* diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index f91de1ef05e1..c05ac8b70fa9 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -172,16 +172,11 @@ xfs_growfs_data_private( if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) return error; dpct = pct - mp->m_sb.sb_imax_pct; - bp = xfs_buf_read_uncached(mp->m_ddev_targp, + error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), - XFS_FSS_TO_BB(mp, 1), 0, NULL); - if (!bp) - return -EIO; - if (bp->b_error) { - error = bp->b_error; - xfs_buf_relse(bp); + XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL); + if (error) return error; - } xfs_buf_relse(bp); new = nb; /* use new as a temporary here */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c92cb48617d1..e5bbc1f30f16 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3062,7 +3062,7 @@ cluster_corrupt_out: XFS_BUF_UNDONE(bp); xfs_buf_stale(bp); xfs_buf_ioerror(bp, -EIO); - xfs_buf_ioend(bp, 0); + xfs_buf_ioend(bp); } else { xfs_buf_stale(bp); xfs_buf_relse(bp); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index ca4fd5bd8522..fe88ef67f93a 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1678,7 +1678,7 @@ xlog_bdstrat( if (iclog->ic_state & XLOG_STATE_IOERROR) { xfs_buf_ioerror(bp, -EIO); xfs_buf_stale(bp); - xfs_buf_ioend(bp, 0); + xfs_buf_ioend(bp); /* * It would seem logical to return EIO here, but we rely on * the log state machine to propagate I/O errors instead of @@ -1688,7 +1688,7 @@ xlog_bdstrat( return 0; } - xfs_buf_iorequest(bp); + xfs_buf_submit(bp); return 0; } @@ -3867,18 +3867,17 @@ xlog_state_ioerror( * This is called from xfs_force_shutdown, when we're forcibly * shutting down the filesystem, typically because of an IO error. * Our main objectives here are to make sure that: - * a. the filesystem gets marked 'SHUTDOWN' for all interested + * a. if !logerror, flush the logs to disk. Anything modified + * after this is ignored. + * b. the filesystem gets marked 'SHUTDOWN' for all interested * parties to find out, 'atomically'. - * b. those who're sleeping on log reservations, pinned objects and + * c. those who're sleeping on log reservations, pinned objects and * other resources get woken up, and be told the bad news. - * c. nothing new gets queued up after (a) and (b) are done. - * d. if !logerror, flush the iclogs to disk, then seal them off - * for business. + * d. nothing new gets queued up after (b) and (c) are done. * - * Note: for delayed logging the !logerror case needs to flush the regions - * held in memory out to the iclogs before flushing them to disk. This needs - * to be done before the log is marked as shutdown, otherwise the flush to the - * iclogs will fail. + * Note: for the !logerror case we need to flush the regions held in memory out + * to disk first. This needs to be done before the log is marked as shutdown, + * otherwise the iclog writes will fail. */ int xfs_log_force_umount( @@ -3910,16 +3909,16 @@ xfs_log_force_umount( ASSERT(XLOG_FORCED_SHUTDOWN(log)); return 1; } - retval = 0; /* - * Flush the in memory commit item list before marking the log as - * being shut down. We need to do it in this order to ensure all the - * completed transactions are flushed to disk with the xfs_log_force() - * call below. + * Flush all the completed transactions to disk before marking the log + * being shut down. We need to do it in this order to ensure that + * completed operations are safely on disk before we shut down, and that + * we don't have to issue any buffer IO after the shutdown flags are set + * to guarantee this. */ if (!logerror) - xlog_cil_force(log); + _xfs_log_force(mp, XFS_LOG_SYNC, NULL); /* * mark the filesystem and the as in a shutdown state and wake @@ -3931,18 +3930,11 @@ xfs_log_force_umount( XFS_BUF_DONE(mp->m_sb_bp); /* - * This flag is sort of redundant because of the mount flag, but - * it's good to maintain the separation between the log and the rest - * of XFS. + * Mark the log and the iclogs with IO error flags to prevent any + * further log IO from being issued or completed. */ log->l_flags |= XLOG_IO_ERROR; - - /* - * If we hit a log error, we want to mark all the iclogs IOERROR - * while we're still holding the loglock. - */ - if (logerror) - retval = xlog_state_ioerror(log); + retval = xlog_state_ioerror(log); spin_unlock(&log->l_icloglock); /* @@ -3955,19 +3947,6 @@ xfs_log_force_umount( xlog_grant_head_wake_all(&log->l_reserve_head); xlog_grant_head_wake_all(&log->l_write_head); - if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { - ASSERT(!logerror); - /* - * Force the incore logs to disk before shutting the - * log down completely. - */ - _xfs_log_force(mp, XFS_LOG_SYNC, NULL); - - spin_lock(&log->l_icloglock); - retval = xlog_state_ioerror(log); - spin_unlock(&log->l_icloglock); - } - /* * Wake up everybody waiting on xfs_log_force. Wake the CIL push first * as if the log writes were completed. The abort handling in the log diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 79cfe7e6ec7a..00cd7f3a8f59 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -193,12 +193,8 @@ xlog_bread_noalign( bp->b_io_length = nbblks; bp->b_error = 0; - if (XFS_FORCED_SHUTDOWN(log->l_mp)) - return -EIO; - - xfs_buf_iorequest(bp); - error = xfs_buf_iowait(bp); - if (error) + error = xfs_buf_submit_wait(bp); + if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) xfs_buf_ioerror_alert(bp, __func__); return error; } @@ -378,12 +374,14 @@ xlog_recover_iodone( * We're not going to bother about retrying * this during recovery. One strike! */ - xfs_buf_ioerror_alert(bp, __func__); - xfs_force_shutdown(bp->b_target->bt_mount, - SHUTDOWN_META_IO_ERROR); + if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { + xfs_buf_ioerror_alert(bp, __func__); + xfs_force_shutdown(bp->b_target->bt_mount, + SHUTDOWN_META_IO_ERROR); + } } bp->b_iodone = NULL; - xfs_buf_ioend(bp, 0); + xfs_buf_ioend(bp); } /* @@ -4452,16 +4450,12 @@ xlog_do_recover( XFS_BUF_UNASYNC(bp); bp->b_ops = &xfs_sb_buf_ops; - if (XFS_FORCED_SHUTDOWN(log->l_mp)) { - xfs_buf_relse(bp); - return -EIO; - } - - xfs_buf_iorequest(bp); - error = xfs_buf_iowait(bp); + error = xfs_buf_submit_wait(bp); if (error) { - xfs_buf_ioerror_alert(bp, __func__); - ASSERT(0); + if (!XFS_FORCED_SHUTDOWN(log->l_mp)) { + xfs_buf_ioerror_alert(bp, __func__); + ASSERT(0); + } xfs_buf_relse(bp); return error; } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index d36bdbc9eeb2..51435dbce9c4 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -300,21 +300,15 @@ xfs_readsb( * access to the superblock. */ reread: - bp = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, - BTOBB(sector_size), 0, buf_ops); - if (!bp) { - if (loud) - xfs_warn(mp, "SB buffer read failed"); - return -EIO; - } - if (bp->b_error) { - error = bp->b_error; + error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, + BTOBB(sector_size), 0, &bp, buf_ops); + if (error) { if (loud) xfs_warn(mp, "SB validate failed with error %d.", error); /* bad CRC means corrupted metadata */ if (error == -EFSBADCRC) error = -EFSCORRUPTED; - goto release_buf; + return error; } /* @@ -544,40 +538,43 @@ xfs_set_inoalignment(xfs_mount_t *mp) * Check that the data (and log if separate) is an ok size. */ STATIC int -xfs_check_sizes(xfs_mount_t *mp) +xfs_check_sizes( + struct xfs_mount *mp) { - xfs_buf_t *bp; + struct xfs_buf *bp; xfs_daddr_t d; + int error; d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { xfs_warn(mp, "filesystem size mismatch detected"); return -EFBIG; } - bp = xfs_buf_read_uncached(mp->m_ddev_targp, + error = xfs_buf_read_uncached(mp->m_ddev_targp, d - XFS_FSS_TO_BB(mp, 1), - XFS_FSS_TO_BB(mp, 1), 0, NULL); - if (!bp) { + XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL); + if (error) { xfs_warn(mp, "last sector read failed"); - return -EIO; + return error; } xfs_buf_relse(bp); - if (mp->m_logdev_targp != mp->m_ddev_targp) { - d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); - if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { - xfs_warn(mp, "log size mismatch detected"); - return -EFBIG; - } - bp = xfs_buf_read_uncached(mp->m_logdev_targp, - d - XFS_FSB_TO_BB(mp, 1), - XFS_FSB_TO_BB(mp, 1), 0, NULL); - if (!bp) { - xfs_warn(mp, "log device read failed"); - return -EIO; - } - xfs_buf_relse(bp); + if (mp->m_logdev_targp == mp->m_ddev_targp) + return 0; + + d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); + if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { + xfs_warn(mp, "log size mismatch detected"); + return -EFBIG; } + error = xfs_buf_read_uncached(mp->m_logdev_targp, + d - XFS_FSB_TO_BB(mp, 1), + XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL); + if (error) { + xfs_warn(mp, "log device read failed"); + return error; + } + xfs_buf_relse(bp); return 0; } diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index d45aebe04dde..e1175ea9b551 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -921,16 +921,11 @@ xfs_growfs_rt( /* * Read in the last block of the device, make sure it exists. */ - bp = xfs_buf_read_uncached(mp->m_rtdev_targp, + error = xfs_buf_read_uncached(mp->m_rtdev_targp, XFS_FSB_TO_BB(mp, nrblocks - 1), - XFS_FSB_TO_BB(mp, 1), 0, NULL); - if (!bp) - return -EIO; - if (bp->b_error) { - error = bp->b_error; - xfs_buf_relse(bp); + XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL); + if (error) return error; - } xfs_buf_relse(bp); /* @@ -1184,11 +1179,12 @@ xfs_rtallocate_extent( */ int /* error */ xfs_rtmount_init( - xfs_mount_t *mp) /* file system mount structure */ + struct xfs_mount *mp) /* file system mount structure */ { - xfs_buf_t *bp; /* buffer for last block of subvolume */ - xfs_daddr_t d; /* address of last block of subvolume */ - xfs_sb_t *sbp; /* filesystem superblock copy in mount */ + struct xfs_buf *bp; /* buffer for last block of subvolume */ + struct xfs_sb *sbp; /* filesystem superblock copy in mount */ + xfs_daddr_t d; /* address of last block of subvolume */ + int error; sbp = &mp->m_sb; if (sbp->sb_rblocks == 0) @@ -1214,14 +1210,12 @@ xfs_rtmount_init( (unsigned long long) mp->m_sb.sb_rblocks); return -EFBIG; } - bp = xfs_buf_read_uncached(mp->m_rtdev_targp, + error = xfs_buf_read_uncached(mp->m_rtdev_targp, d - XFS_FSB_TO_BB(mp, 1), - XFS_FSB_TO_BB(mp, 1), 0, NULL); - if (!bp || bp->b_error) { + XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL); + if (error) { xfs_warn(mp, "realtime device size check failed"); - if (bp) - xfs_buf_relse(bp); - return -EIO; + return error; } xfs_buf_relse(bp); return 0; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 152f82782630..51372e34d988 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -349,7 +349,8 @@ DEFINE_BUF_EVENT(xfs_buf_free); DEFINE_BUF_EVENT(xfs_buf_hold); DEFINE_BUF_EVENT(xfs_buf_rele); DEFINE_BUF_EVENT(xfs_buf_iodone); -DEFINE_BUF_EVENT(xfs_buf_iorequest); +DEFINE_BUF_EVENT(xfs_buf_submit); +DEFINE_BUF_EVENT(xfs_buf_submit_wait); DEFINE_BUF_EVENT(xfs_buf_bawrite); DEFINE_BUF_EVENT(xfs_buf_lock); DEFINE_BUF_EVENT(xfs_buf_lock_done); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 96c898e7ac9a..e2b2216b1635 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -318,20 +318,10 @@ xfs_trans_read_buf_map( XFS_BUF_READ(bp); bp->b_ops = ops; - /* - * XXX(hch): clean up the error handling here to be less - * of a mess.. - */ - if (XFS_FORCED_SHUTDOWN(mp)) { - trace_xfs_bdstrat_shut(bp, _RET_IP_); - xfs_bioerror_relse(bp); - } else { - xfs_buf_iorequest(bp); - } - - error = xfs_buf_iowait(bp); + error = xfs_buf_submit_wait(bp); if (error) { - xfs_buf_ioerror_alert(bp, __func__); + if (!XFS_FORCED_SHUTDOWN(mp)) + xfs_buf_ioerror_alert(bp, __func__); xfs_buf_relse(bp); /* * We can gracefully recover from most read