xen-blkfront: Introduce BLKIF_OP_FLUSH_DISKCACHE support.

If the backend supports the 'feature-flush-cache' mode, use that
instead of the 'feature-barrier' support.

Currently there are three backends that support the 'feature-flush-cache'
mode: NetBSD, Solaris and Linux kernel. The 'flush' option is much
light-weight version than the 'barrier' support so lets try to use as
there are no filesystems in the kernel that use full barriers anymore.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
This commit is contained in:
Konrad Rzeszutek Wilk 2011-05-03 12:01:11 -04:00
parent 6dcfb751c9
commit edf6ef59ec
1 changed files with 36 additions and 13 deletions

View File

@ -97,6 +97,7 @@ struct blkfront_info
struct blk_shadow shadow[BLK_RING_SIZE]; struct blk_shadow shadow[BLK_RING_SIZE];
unsigned long shadow_free; unsigned long shadow_free;
unsigned int feature_flush; unsigned int feature_flush;
unsigned int flush_op;
int is_ready; int is_ready;
}; };
@ -250,8 +251,7 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
/* /*
* Generate a Xen blkfront IO request from a blk layer request. Reads * Generate a Xen blkfront IO request from a blk layer request. Reads
* and writes are handled as expected. Since we lack a loose flush * and writes are handled as expected.
* request, we map flushes into a full ordered barrier.
* *
* @req: a request struct * @req: a request struct
*/ */
@ -293,14 +293,13 @@ static int blkif_queue_request(struct request *req)
if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
/* /*
* Ideally we could just do an unordered * Ideally we can do an unordered flush-to-disk. In case the
* flush-to-disk, but all we have is a full write * backend onlysupports barriers, use that. A barrier request
* barrier at the moment. However, a barrier write is
* a superset of FUA, so we can implement it the same * a superset of FUA, so we can implement it the same
* way. (It's also a FLUSH+FUA, since it is * way. (It's also a FLUSH+FUA, since it is
* guaranteed ordered WRT previous writes.) * guaranteed ordered WRT previous writes.)
*/ */
ring_req->operation = BLKIF_OP_WRITE_BARRIER; ring_req->operation = info->flush_op;
} }
ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
@ -433,8 +432,11 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
static void xlvbd_flush(struct blkfront_info *info) static void xlvbd_flush(struct blkfront_info *info)
{ {
blk_queue_flush(info->rq, info->feature_flush); blk_queue_flush(info->rq, info->feature_flush);
printk(KERN_INFO "blkfront: %s: barriers %s\n", printk(KERN_INFO "blkfront: %s: %s: %s\n",
info->gd->disk_name, info->gd->disk_name,
info->flush_op == BLKIF_OP_WRITE_BARRIER ?
"barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
"flush diskcache" : "barrier or flush"),
info->feature_flush ? "enabled" : "disabled"); info->feature_flush ? "enabled" : "disabled");
} }
@ -720,15 +722,20 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
switch (bret->operation) { switch (bret->operation) {
case BLKIF_OP_FLUSH_DISKCACHE:
case BLKIF_OP_WRITE_BARRIER: case BLKIF_OP_WRITE_BARRIER:
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", printk(KERN_WARNING "blkfront: %s: write %s op failed\n",
info->flush_op == BLKIF_OP_WRITE_BARRIER ?
"barrier" : "flush disk cache",
info->gd->disk_name); info->gd->disk_name);
error = -EOPNOTSUPP; error = -EOPNOTSUPP;
} }
if (unlikely(bret->status == BLKIF_RSP_ERROR && if (unlikely(bret->status == BLKIF_RSP_ERROR &&
info->shadow[id].req.nr_segments == 0)) { info->shadow[id].req.nr_segments == 0)) {
printk(KERN_WARNING "blkfront: %s: empty write barrier op failed\n", printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n",
info->flush_op == BLKIF_OP_WRITE_BARRIER ?
"barrier" : "flush disk cache",
info->gd->disk_name); info->gd->disk_name);
error = -EOPNOTSUPP; error = -EOPNOTSUPP;
} }
@ -736,6 +743,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
if (error == -EOPNOTSUPP) if (error == -EOPNOTSUPP)
error = 0; error = 0;
info->feature_flush = 0; info->feature_flush = 0;
info->flush_op = 0;
xlvbd_flush(info); xlvbd_flush(info);
} }
/* fall through */ /* fall through */
@ -1100,7 +1108,7 @@ static void blkfront_connect(struct blkfront_info *info)
unsigned long sector_size; unsigned long sector_size;
unsigned int binfo; unsigned int binfo;
int err; int err;
int barrier; int barrier, flush;
switch (info->connected) { switch (info->connected) {
case BLKIF_STATE_CONNECTED: case BLKIF_STATE_CONNECTED:
@ -1140,6 +1148,9 @@ static void blkfront_connect(struct blkfront_info *info)
return; return;
} }
info->feature_flush = 0;
info->flush_op = 0;
err = xenbus_gather(XBT_NIL, info->xbdev->otherend, err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
"feature-barrier", "%d", &barrier, "feature-barrier", "%d", &barrier,
NULL); NULL);
@ -1151,11 +1162,23 @@ static void blkfront_connect(struct blkfront_info *info)
* *
* If there are barriers, then we use flush. * If there are barriers, then we use flush.
*/ */
info->feature_flush = 0; if (!err && barrier) {
if (!err && barrier)
info->feature_flush = REQ_FLUSH | REQ_FUA; info->feature_flush = REQ_FLUSH | REQ_FUA;
info->flush_op = BLKIF_OP_WRITE_BARRIER;
}
/*
* And if there is "feature-flush-cache" use that above
* barriers.
*/
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
"feature-flush-cache", "%d", &flush,
NULL);
if (!err && flush) {
info->feature_flush = REQ_FLUSH;
info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
}
err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
if (err) { if (err) {
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",