xfs: use bios directly to read and write the log recovery buffers

The xfs_buf structure is basically used as a glorified container for
a memory allocation in the log recovery code.  Replace it with a
call to kmem_alloc_large and a simple abstraction to read into or
write from it synchronously using chained bios.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
This commit is contained in:
Christoph Hellwig 2019-06-28 19:27:26 -07:00 committed by Darrick J. Wong
parent 18ffb8c3f0
commit 6ad5b3255b
4 changed files with 151 additions and 159 deletions

View File

@ -62,6 +62,7 @@ xfs-y += xfs_aops.o \
xfs_attr_inactive.o \
xfs_attr_list.o \
xfs_bmap_util.o \
xfs_bio_io.o \
xfs_buf.o \
xfs_dir2_readdir.o \
xfs_discard.o \

61
fs/xfs/xfs_bio_io.c Normal file
View File

@ -0,0 +1,61 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2019 Christoph Hellwig.
*/
#include "xfs.h"
static inline unsigned int bio_max_vecs(unsigned int count)
{
return min_t(unsigned, howmany(count, PAGE_SIZE), BIO_MAX_PAGES);
}
int
xfs_rw_bdev(
struct block_device *bdev,
sector_t sector,
unsigned int count,
char *data,
unsigned int op)
{
unsigned int is_vmalloc = is_vmalloc_addr(data);
unsigned int left = count;
int error;
struct bio *bio;
if (is_vmalloc && op == REQ_OP_WRITE)
flush_kernel_vmap_range(data, count);
bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left));
bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = sector;
bio->bi_opf = op | REQ_META | REQ_SYNC;
do {
struct page *page = kmem_to_page(data);
unsigned int off = offset_in_page(data);
unsigned int len = min_t(unsigned, left, PAGE_SIZE - off);
while (bio_add_page(bio, page, len, off) != len) {
struct bio *prev = bio;
bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left));
bio_copy_dev(bio, prev);
bio->bi_iter.bi_sector = bio_end_sector(prev);
bio->bi_opf = prev->bi_opf;
bio_chain(bio, prev);
submit_bio(prev);
}
data += len;
left -= len;
} while (left > 0);
error = submit_bio_wait(bio);
bio_put(bio);
if (is_vmalloc && op == REQ_OP_READ)
invalidate_kernel_vmap_range(data, count);
return error;
}

View File

@ -219,6 +219,9 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y)
return x;
}
int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count,
char *data, unsigned int op);
#define ASSERT_ALWAYS(expr) \
(likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))

View File

@ -92,17 +92,14 @@ xlog_verify_bp(
}
/*
* Allocate a buffer to hold log data. The buffer needs to be able
* to map to a range of nbblks basic blocks at any valid (basic
* block) offset within the log.
* Allocate a buffer to hold log data. The buffer needs to be able to map to
* a range of nbblks basic blocks at any valid offset within the log.
*/
STATIC xfs_buf_t *
static char *
xlog_get_bp(
struct xlog *log,
int nbblks)
{
struct xfs_buf *bp;
/*
* Pass log block 0 since we don't have an addr yet, buffer will be
* verified on read.
@ -115,36 +112,23 @@ xlog_get_bp(
}
/*
* We do log I/O in units of log sectors (a power-of-2
* multiple of the basic block size), so we round up the
* requested size to accommodate the basic blocks required
* for complete log sectors.
* We do log I/O in units of log sectors (a power-of-2 multiple of the
* basic block size), so we round up the requested size to accommodate
* the basic blocks required for complete log sectors.
*
* In addition, the buffer may be used for a non-sector-
* aligned block offset, in which case an I/O of the
* requested size could extend beyond the end of the
* buffer. If the requested size is only 1 basic block it
* will never straddle a sector boundary, so this won't be
* an issue. Nor will this be a problem if the log I/O is
* done in basic blocks (sector size 1). But otherwise we
* extend the buffer by one extra log sector to ensure
* there's space to accommodate this possibility.
* In addition, the buffer may be used for a non-sector-aligned block
* offset, in which case an I/O of the requested size could extend
* beyond the end of the buffer. If the requested size is only 1 basic
* block it will never straddle a sector boundary, so this won't be an
* issue. Nor will this be a problem if the log I/O is done in basic
* blocks (sector size 1). But otherwise we extend the buffer by one
* extra log sector to ensure there's space to accommodate this
* possibility.
*/
if (nbblks > 1 && log->l_sectBBsize > 1)
nbblks += log->l_sectBBsize;
nbblks = round_up(nbblks, log->l_sectBBsize);
bp = xfs_buf_get_uncached(log->l_targ, nbblks, 0);
if (bp)
xfs_buf_unlock(bp);
return bp;
}
STATIC void
xlog_put_bp(
xfs_buf_t *bp)
{
xfs_buf_free(bp);
return kmem_alloc_large(BBTOB(nbblks), KM_MAYFAIL);
}
/*
@ -159,15 +143,13 @@ xlog_align(
return BBTOB(blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1));
}
/*
* nbblks should be uint, but oh well. Just want to catch that 32-bit length.
*/
STATIC int
xlog_bread_noalign(
static int
xlog_do_io(
struct xlog *log,
xfs_daddr_t blk_no,
int nbblks,
struct xfs_buf *bp)
unsigned int nbblks,
char *data,
unsigned int op)
{
int error;
@ -181,107 +163,53 @@ xlog_bread_noalign(
blk_no = round_down(blk_no, log->l_sectBBsize);
nbblks = round_up(nbblks, log->l_sectBBsize);
ASSERT(nbblks > 0);
ASSERT(nbblks <= bp->b_length);
XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
bp->b_flags |= XBF_READ;
bp->b_io_length = nbblks;
bp->b_error = 0;
error = xfs_buf_submit(bp);
if (error && !XFS_FORCED_SHUTDOWN(log->l_mp))
xfs_buf_ioerror_alert(bp, __func__);
error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no,
BBTOB(nbblks), data, op);
if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) {
xfs_alert(log->l_mp,
"log recovery %s I/O error at daddr 0x%llx len %d error %d",
op == REQ_OP_WRITE ? "write" : "read",
blk_no, nbblks, error);
}
return error;
}
STATIC int
xlog_bread_noalign(
struct xlog *log,
xfs_daddr_t blk_no,
int nbblks,
char *data)
{
return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ);
}
STATIC int
xlog_bread(
struct xlog *log,
xfs_daddr_t blk_no,
int nbblks,
struct xfs_buf *bp,
char *data,
char **offset)
{
int error;
error = xlog_bread_noalign(log, blk_no, nbblks, bp);
if (error)
error = xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ);
if (!error)
*offset = data + xlog_align(log, blk_no);
return error;
*offset = bp->b_addr + xlog_align(log, blk_no);
return 0;
}
/*
* Read at an offset into the buffer. Returns with the buffer in it's original
* state regardless of the result of the read.
*/
STATIC int
xlog_bread_offset(
struct xlog *log,
xfs_daddr_t blk_no, /* block to read from */
int nbblks, /* blocks to read */
struct xfs_buf *bp,
char *offset)
{
char *orig_offset = bp->b_addr;
int orig_len = BBTOB(bp->b_length);
int error, error2;
error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks));
if (error)
return error;
error = xlog_bread_noalign(log, blk_no, nbblks, bp);
/* must reset buffer pointer even on error */
error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len);
if (error)
return error;
return error2;
}
/*
* Write out the buffer at the given block for the given number of blocks.
* The buffer is kept locked across the write and is returned locked.
* This can only be used for synchronous log writes.
*/
STATIC int
xlog_bwrite(
struct xlog *log,
xfs_daddr_t blk_no,
int nbblks,
struct xfs_buf *bp)
char *data)
{
int error;
if (!xlog_verify_bp(log, blk_no, nbblks)) {
xfs_warn(log->l_mp,
"Invalid log block/length (0x%llx, 0x%x) for buffer",
blk_no, nbblks);
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
return -EFSCORRUPTED;
}
blk_no = round_down(blk_no, log->l_sectBBsize);
nbblks = round_up(nbblks, log->l_sectBBsize);
ASSERT(nbblks > 0);
ASSERT(nbblks <= bp->b_length);
XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
xfs_buf_hold(bp);
xfs_buf_lock(bp);
bp->b_io_length = nbblks;
bp->b_error = 0;
error = xfs_bwrite(bp);
if (error)
xfs_buf_ioerror_alert(bp, __func__);
xfs_buf_relse(bp);
return error;
return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_WRITE);
}
#ifdef DEBUG
@ -399,7 +327,7 @@ xlog_recover_iodone(
STATIC int
xlog_find_cycle_start(
struct xlog *log,
struct xfs_buf *bp,
char *bp,
xfs_daddr_t first_blk,
xfs_daddr_t *last_blk,
uint cycle)
@ -449,7 +377,7 @@ xlog_find_verify_cycle(
{
xfs_daddr_t i, j;
uint cycle;
xfs_buf_t *bp;
char *bp;
xfs_daddr_t bufblks;
char *buf = NULL;
int error = 0;
@ -492,7 +420,7 @@ xlog_find_verify_cycle(
*new_blk = -1;
out:
xlog_put_bp(bp);
kmem_free(bp);
return error;
}
@ -516,7 +444,7 @@ xlog_find_verify_log_record(
int extra_bblks)
{
xfs_daddr_t i;
xfs_buf_t *bp;
char *bp;
char *offset = NULL;
xlog_rec_header_t *head = NULL;
int error = 0;
@ -601,7 +529,7 @@ xlog_find_verify_log_record(
*last_blk = i;
out:
xlog_put_bp(bp);
kmem_free(bp);
return error;
}
@ -623,7 +551,7 @@ xlog_find_head(
struct xlog *log,
xfs_daddr_t *return_head_blk)
{
xfs_buf_t *bp;
char *bp;
char *offset;
xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk;
int num_scan_bblks;
@ -854,7 +782,7 @@ validate_head:
goto bp_err;
}
xlog_put_bp(bp);
kmem_free(bp);
if (head_blk == log_bbnum)
*return_head_blk = 0;
else
@ -868,7 +796,7 @@ validate_head:
return 0;
bp_err:
xlog_put_bp(bp);
kmem_free(bp);
if (error)
xfs_warn(log->l_mp, "failed to find log head");
@ -889,7 +817,7 @@ xlog_rseek_logrec_hdr(
xfs_daddr_t head_blk,
xfs_daddr_t tail_blk,
int count,
struct xfs_buf *bp,
char *bp,
xfs_daddr_t *rblk,
struct xlog_rec_header **rhead,
bool *wrapped)
@ -963,7 +891,7 @@ xlog_seek_logrec_hdr(
xfs_daddr_t head_blk,
xfs_daddr_t tail_blk,
int count,
struct xfs_buf *bp,
char *bp,
xfs_daddr_t *rblk,
struct xlog_rec_header **rhead,
bool *wrapped)
@ -1063,7 +991,7 @@ xlog_verify_tail(
int hsize)
{
struct xlog_rec_header *thead;
struct xfs_buf *bp;
char *bp;
xfs_daddr_t first_bad;
int error = 0;
bool wrapped;
@ -1123,7 +1051,7 @@ xlog_verify_tail(
"Tail block (0x%llx) overwrite detected. Updated to 0x%llx",
orig_tail, *tail_blk);
out:
xlog_put_bp(bp);
kmem_free(bp);
return error;
}
@ -1145,13 +1073,13 @@ xlog_verify_head(
struct xlog *log,
xfs_daddr_t *head_blk, /* in/out: unverified head */
xfs_daddr_t *tail_blk, /* out: tail block */
struct xfs_buf *bp,
char *bp,
xfs_daddr_t *rhead_blk, /* start blk of last record */
struct xlog_rec_header **rhead, /* ptr to last record */
bool *wrapped) /* last rec. wraps phys. log */
{
struct xlog_rec_header *tmp_rhead;
struct xfs_buf *tmp_bp;
char *tmp_bp;
xfs_daddr_t first_bad;
xfs_daddr_t tmp_rhead_blk;
int found;
@ -1170,7 +1098,7 @@ xlog_verify_head(
error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk,
XLOG_MAX_ICLOGS, tmp_bp, &tmp_rhead_blk,
&tmp_rhead, &tmp_wrapped);
xlog_put_bp(tmp_bp);
kmem_free(tmp_bp);
if (error < 0)
return error;
@ -1260,7 +1188,7 @@ xlog_check_unmount_rec(
xfs_daddr_t *tail_blk,
struct xlog_rec_header *rhead,
xfs_daddr_t rhead_blk,
struct xfs_buf *bp,
char *bp,
bool *clean)
{
struct xlog_op_header *op_head;
@ -1382,7 +1310,7 @@ xlog_find_tail(
{
xlog_rec_header_t *rhead;
char *offset = NULL;
xfs_buf_t *bp;
char *bp;
int error;
xfs_daddr_t rhead_blk;
xfs_lsn_t tail_lsn;
@ -1503,7 +1431,7 @@ xlog_find_tail(
error = xlog_clear_stale_blocks(log, tail_lsn);
done:
xlog_put_bp(bp);
kmem_free(bp);
if (error)
xfs_warn(log->l_mp, "failed to locate log tail");
@ -1531,7 +1459,7 @@ xlog_find_zeroed(
struct xlog *log,
xfs_daddr_t *blk_no)
{
xfs_buf_t *bp;
char *bp;
char *offset;
uint first_cycle, last_cycle;
xfs_daddr_t new_blk, last_blk, start_blk;
@ -1551,7 +1479,7 @@ xlog_find_zeroed(
first_cycle = xlog_get_cycle(offset);
if (first_cycle == 0) { /* completely zeroed log */
*blk_no = 0;
xlog_put_bp(bp);
kmem_free(bp);
return 1;
}
@ -1562,7 +1490,7 @@ xlog_find_zeroed(
last_cycle = xlog_get_cycle(offset);
if (last_cycle != 0) { /* log completely written to */
xlog_put_bp(bp);
kmem_free(bp);
return 0;
}
@ -1608,7 +1536,7 @@ xlog_find_zeroed(
*blk_no = last_blk;
bp_err:
xlog_put_bp(bp);
kmem_free(bp);
if (error)
return error;
return 1;
@ -1651,7 +1579,7 @@ xlog_write_log_records(
int tail_block)
{
char *offset;
xfs_buf_t *bp;
char *bp;
int balign, ealign;
int sectbb = log->l_sectBBsize;
int end_block = start_block + blocks;
@ -1699,15 +1627,14 @@ xlog_write_log_records(
*/
ealign = round_down(end_block, sectbb);
if (j == 0 && (start_block + endcount > ealign)) {
offset = bp->b_addr + BBTOB(ealign - start_block);
error = xlog_bread_offset(log, ealign, sectbb,
bp, offset);
error = xlog_bread_noalign(log, ealign, sectbb,
bp + BBTOB(ealign - start_block));
if (error)
break;
}
offset = bp->b_addr + xlog_align(log, start_block);
offset = bp + xlog_align(log, start_block);
for (; j < endcount; j++) {
xlog_add_record(log, offset, cycle, i+j,
tail_cycle, tail_block);
@ -1721,7 +1648,7 @@ xlog_write_log_records(
}
out_put_bp:
xlog_put_bp(bp);
kmem_free(bp);
return error;
}
@ -5301,7 +5228,7 @@ xlog_do_recovery_pass(
xfs_daddr_t blk_no, rblk_no;
xfs_daddr_t rhead_blk;
char *offset;
xfs_buf_t *hbp, *dbp;
char *hbp, *dbp;
int error = 0, h_size, h_len;
int error2 = 0;
int bblks, split_bblks;
@ -5368,7 +5295,7 @@ xlog_do_recovery_pass(
hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
if (h_size % XLOG_HEADER_CYCLE_SIZE)
hblks++;
xlog_put_bp(hbp);
kmem_free(hbp);
hbp = xlog_get_bp(log, hblks);
} else {
hblks = 1;
@ -5384,7 +5311,7 @@ xlog_do_recovery_pass(
return -ENOMEM;
dbp = xlog_get_bp(log, BTOBB(h_size));
if (!dbp) {
xlog_put_bp(hbp);
kmem_free(hbp);
return -ENOMEM;
}
@ -5399,7 +5326,7 @@ xlog_do_recovery_pass(
/*
* Check for header wrapping around physical end-of-log
*/
offset = hbp->b_addr;
offset = hbp;
split_hblks = 0;
wrapped_hblks = 0;
if (blk_no + hblks <= log->l_logBBsize) {
@ -5435,8 +5362,8 @@ xlog_do_recovery_pass(
* - order is important.
*/
wrapped_hblks = hblks - split_hblks;
error = xlog_bread_offset(log, 0,
wrapped_hblks, hbp,
error = xlog_bread_noalign(log, 0,
wrapped_hblks,
offset + BBTOB(split_hblks));
if (error)
goto bread_err2;
@ -5467,7 +5394,7 @@ xlog_do_recovery_pass(
} else {
/* This log record is split across the
* physical end of log */
offset = dbp->b_addr;
offset = dbp;
split_bblks = 0;
if (blk_no != log->l_logBBsize) {
/* some data is before the physical
@ -5496,8 +5423,8 @@ xlog_do_recovery_pass(
* _first_, then the log start (LR header end)
* - order is important.
*/
error = xlog_bread_offset(log, 0,
bblks - split_bblks, dbp,
error = xlog_bread_noalign(log, 0,
bblks - split_bblks,
offset + BBTOB(split_bblks));
if (error)
goto bread_err2;
@ -5545,9 +5472,9 @@ xlog_do_recovery_pass(
}
bread_err2:
xlog_put_bp(dbp);
kmem_free(dbp);
bread_err1:
xlog_put_bp(hbp);
kmem_free(hbp);
/*
* Submit buffers that have been added from the last record processed,