From 97e36834f5a106459ab1b290e663a4eb6264639e Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 12 Oct 2011 12:12:36 -0400 Subject: [PATCH 1/5] xen/blk[front|back]: Squash blkif_request_rw and blkif_request_discard together In a union type structure to deal with the overlapping attributes in a easier manner. Suggested-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 13 ++--- drivers/block/xen-blkback/common.h | 74 ++++++++++++++++------------- drivers/block/xen-blkfront.c | 44 +++++++++-------- include/xen/interface/io/blkif.h | 24 +++++++--- 4 files changed, 88 insertions(+), 67 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 15ec4db194d1..d7104abc8b72 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -362,7 +362,7 @@ static int xen_blkbk_map(struct blkif_request *req, { struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int i; - int nseg = req->nr_segments; + int nseg = req->u.rw.nr_segments; int ret = 0; /* @@ -449,7 +449,7 @@ static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req) } else if (err) status = BLKIF_RSP_ERROR; - make_response(blkif, req->id, req->operation, status); + make_response(blkif, req->u.discard.id, req->operation, status); } static void xen_blk_drain_io(struct xen_blkif *blkif) @@ -644,7 +644,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, } /* Check that the number of segments is sane. */ - nseg = req->nr_segments; + nseg = req->u.rw.nr_segments; + if (unlikely(nseg == 0 && operation != WRITE_FLUSH && operation != REQ_DISCARD) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { @@ -654,12 +655,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, goto fail_response; } - preq.dev = req->handle; + preq.dev = req->u.rw.handle; preq.sector_number = req->u.rw.sector_number; preq.nr_sects = 0; pending_req->blkif = blkif; - pending_req->id = req->id; + pending_req->id = req->u.rw.id; pending_req->operation = req->operation; pending_req->status = BLKIF_RSP_OKAY; pending_req->nr_pages = nseg; @@ -784,7 +785,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, xen_blkbk_unmap(pending_req); fail_response: /* Haven't submitted any bio's yet. */ - make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); + make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR); free_req(pending_req); msleep(1); /* back off a bit */ return -EIO; diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index dfb1b3a43a5d..dbfe7b3b0737 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -60,58 +60,66 @@ struct blkif_common_response { char dummy; }; -/* i386 protocol version */ -#pragma pack(push, 4) - struct blkif_x86_32_request_rw { - blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ - struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -}; - -struct blkif_x86_32_request_discard { - blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ - uint64_t nr_sectors; -}; - -struct blkif_x86_32_request { - uint8_t operation; /* BLKIF_OP_??? */ uint8_t nr_segments; /* number of segments */ blkif_vdev_t handle; /* only for read/write requests */ uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +} __attribute__((__packed__)); + +struct blkif_x86_32_request_discard { + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t _pad1; /* was "handle" for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + uint64_t nr_sectors; +} __attribute__((__packed__)); + +struct blkif_x86_32_request { + uint8_t operation; /* BLKIF_OP_??? */ union { struct blkif_x86_32_request_rw rw; struct blkif_x86_32_request_discard discard; } u; -}; +} __attribute__((__packed__)); + +/* i386 protocol version */ +#pragma pack(push, 4) struct blkif_x86_32_response { uint64_t id; /* copied from request */ uint8_t operation; /* copied from request */ int16_t status; /* BLKIF_RSP_??? */ }; #pragma pack(pop) - /* x86_64 protocol version */ struct blkif_x86_64_request_rw { + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint32_t _pad1; /* offsetof(blkif_reqest..,u.rw.id)==8 */ + uint64_t id; blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -}; +} __attribute__((__packed__)); struct blkif_x86_64_request_discard { + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t _pad1; /* was "handle" for read/write requests */ + uint32_t _pad2; /* offsetof(blkif_..,u.discard.id)==8 */ + uint64_t id; blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ - uint64_t nr_sectors; -}; + uint64_t nr_sectors; +} __attribute__((__packed__)); struct blkif_x86_64_request { uint8_t operation; /* BLKIF_OP_??? */ - uint8_t nr_segments; /* number of segments */ - blkif_vdev_t handle; /* only for read/write requests */ - uint64_t __attribute__((__aligned__(8))) id; union { struct blkif_x86_64_request_rw rw; struct blkif_x86_64_request_discard discard; } u; -}; +} __attribute__((__packed__)); + struct blkif_x86_64_response { uint64_t __attribute__((__aligned__(8))) id; uint8_t operation; /* copied from request */ @@ -237,18 +245,18 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst, { int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; dst->operation = src->operation; - dst->nr_segments = src->nr_segments; - dst->handle = src->handle; - dst->id = src->id; switch (src->operation) { case BLKIF_OP_READ: case BLKIF_OP_WRITE: case BLKIF_OP_WRITE_BARRIER: case BLKIF_OP_FLUSH_DISKCACHE: + dst->u.rw.nr_segments = src->u.rw.nr_segments; + dst->u.rw.handle = src->u.rw.handle; + dst->u.rw.id = src->u.rw.id; dst->u.rw.sector_number = src->u.rw.sector_number; barrier(); - if (n > dst->nr_segments) - n = dst->nr_segments; + if (n > dst->u.rw.nr_segments) + n = dst->u.rw.nr_segments; for (i = 0; i < n; i++) dst->u.rw.seg[i] = src->u.rw.seg[i]; break; @@ -266,18 +274,18 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst, { int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; dst->operation = src->operation; - dst->nr_segments = src->nr_segments; - dst->handle = src->handle; - dst->id = src->id; switch (src->operation) { case BLKIF_OP_READ: case BLKIF_OP_WRITE: case BLKIF_OP_WRITE_BARRIER: case BLKIF_OP_FLUSH_DISKCACHE: + dst->u.rw.nr_segments = src->u.rw.nr_segments; + dst->u.rw.handle = src->u.rw.handle; + dst->u.rw.id = src->u.rw.id; dst->u.rw.sector_number = src->u.rw.sector_number; barrier(); - if (n > dst->nr_segments) - n = dst->nr_segments; + if (n > dst->u.rw.nr_segments) + n = dst->u.rw.nr_segments; for (i = 0; i < n; i++) dst->u.rw.seg[i] = src->u.rw.seg[i]; break; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 7b2ec5908413..2c2c4be7d9d6 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -135,15 +135,15 @@ static int get_id_from_freelist(struct blkfront_info *info) { unsigned long free = info->shadow_free; BUG_ON(free >= BLK_RING_SIZE); - info->shadow_free = info->shadow[free].req.id; - info->shadow[free].req.id = 0x0fffffee; /* debug */ + info->shadow_free = info->shadow[free].req.u.rw.id; + info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ return free; } static void add_id_to_freelist(struct blkfront_info *info, unsigned long id) { - info->shadow[id].req.id = info->shadow_free; + info->shadow[id].req.u.rw.id = info->shadow_free; info->shadow[id].request = NULL; info->shadow_free = id; } @@ -287,9 +287,9 @@ static int blkif_queue_request(struct request *req) id = get_id_from_freelist(info); info->shadow[id].request = req; - ring_req->id = id; + ring_req->u.rw.id = id; ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); - ring_req->handle = info->handle; + ring_req->u.rw.handle = info->handle; ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ; @@ -308,13 +308,15 @@ static int blkif_queue_request(struct request *req) if (unlikely(req->cmd_flags & REQ_DISCARD)) { /* id, sector_number and handle are set above. */ ring_req->operation = BLKIF_OP_DISCARD; - ring_req->nr_segments = 0; + ring_req->u.discard.nr_segments = 0; ring_req->u.discard.nr_sectors = blk_rq_sectors(req); } else { - ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); - BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); + ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req, + info->sg); + BUG_ON(ring_req->u.rw.nr_segments > + BLKIF_MAX_SEGMENTS_PER_REQUEST); - for_each_sg(info->sg, sg, ring_req->nr_segments, i) { + for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) { buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); fsect = sg->offset >> 9; lsect = fsect + (sg->length >> 9) - 1; @@ -705,7 +707,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) static void blkif_completion(struct blk_shadow *s) { int i; - for (i = 0; i < s->req.nr_segments; i++) + for (i = 0; i < s->req.u.rw.nr_segments; i++) gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL); } @@ -763,7 +765,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) error = -EOPNOTSUPP; } if (unlikely(bret->status == BLKIF_RSP_ERROR && - info->shadow[id].req.nr_segments == 0)) { + info->shadow[id].req.u.rw.nr_segments == 0)) { printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n", info->flush_op == BLKIF_OP_WRITE_BARRIER ? "barrier" : "flush disk cache", @@ -984,8 +986,8 @@ static int blkfront_probe(struct xenbus_device *dev, INIT_WORK(&info->work, blkif_restart_queue); for (i = 0; i < BLK_RING_SIZE; i++) - info->shadow[i].req.id = i+1; - info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; + info->shadow[i].req.u.rw.id = i+1; + info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; /* Front end dir is a number, which is used as the id. */ info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); @@ -1019,9 +1021,9 @@ static int blkif_recover(struct blkfront_info *info) /* Stage 2: Set up free list. */ memset(&info->shadow, 0, sizeof(info->shadow)); for (i = 0; i < BLK_RING_SIZE; i++) - info->shadow[i].req.id = i+1; + info->shadow[i].req.u.rw.id = i+1; info->shadow_free = info->ring.req_prod_pvt; - info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; + info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; /* Stage 3: Find pending requests and requeue them. */ for (i = 0; i < BLK_RING_SIZE; i++) { @@ -1034,17 +1036,17 @@ static int blkif_recover(struct blkfront_info *info) *req = copy[i].req; /* We get a new request id, and must reset the shadow state. */ - req->id = get_id_from_freelist(info); - memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); + req->u.rw.id = get_id_from_freelist(info); + memcpy(&info->shadow[req->u.rw.id], ©[i], sizeof(copy[i])); /* Rewrite any grant references invalidated by susp/resume. */ - for (j = 0; j < req->nr_segments; j++) + for (j = 0; j < req->u.rw.nr_segments; j++) gnttab_grant_foreign_access_ref( req->u.rw.seg[j].gref, info->xbdev->otherend_id, - pfn_to_mfn(info->shadow[req->id].frame[j]), - rq_data_dir(info->shadow[req->id].request)); - info->shadow[req->id].req = *req; + pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]), + rq_data_dir(info->shadow[req->u.rw.id].request)); + info->shadow[req->u.rw.id].req = *req; info->ring.req_prod_pvt++; } diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index 9324488f23f0..f88e28b6a27c 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h @@ -95,6 +95,12 @@ typedef uint64_t blkif_sector_t; #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 struct blkif_request_rw { + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ +#ifdef CONFIG_X86_64 + uint32_t _pad1; /* offsetof(blkif_request,u.rw.id) == 8 */ +#endif + uint64_t id; /* private guest value, echoed in resp */ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ struct blkif_request_segment { grant_ref_t gref; /* reference to I/O buffer frame */ @@ -102,23 +108,27 @@ struct blkif_request_rw { /* @last_sect: last sector in frame to transfer (inclusive). */ uint8_t first_sect, last_sect; } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -}; +} __attribute__((__packed__)); struct blkif_request_discard { + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t _pad1; /* only for read/write requests */ +#ifdef CONFIG_X86_64 + uint32_t _pad2; /* offsetof(blkif_req..,u.discard.id)==8*/ +#endif + uint64_t id; /* private guest value, echoed in resp */ blkif_sector_t sector_number; - uint64_t nr_sectors; -}; + uint64_t nr_sectors; + uint8_t _pad3; +} __attribute__((__packed__)); struct blkif_request { uint8_t operation; /* BLKIF_OP_??? */ - uint8_t nr_segments; /* number of segments */ - blkif_vdev_t handle; /* only for read/write requests */ - uint64_t id; /* private guest value, echoed in resp */ union { struct blkif_request_rw rw; struct blkif_request_discard discard; } u; -}; +} __attribute__((__packed__)); struct blkif_response { uint64_t id; /* copied from request */ From 5ea42986694a96542644f9cae8b122d3a00c508f Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 12 Oct 2011 16:23:30 -0400 Subject: [PATCH 2/5] xen/blk[front|back]: Enhance discard support with secure erasing support. Part of the blkdev_issue_discard(xx) operation is that it can also issue a secure discard operation that will permanantly remove the sectors in question. We advertise that we can support that via the 'discard-secure' attribute and on the request, if the 'secure' bit is set, we will attempt to pass in REQ_DISCARD | REQ_SECURE. CC: Li Dongyang [v1: Used 'flag' instead of 'secure:1' bit] [v2: Use 'reserved' uint8_t instead of adding a new value] [v3: Check for nseg when mapping instead of operation] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 18 ++++++++----- drivers/block/xen-blkback/common.h | 7 +++-- drivers/block/xen-blkback/xenbus.c | 12 +++++++++ drivers/block/xen-blkfront.c | 41 ++++++++++++++++++++++------- include/xen/interface/io/blkif.h | 18 ++++++++++++- 5 files changed, 77 insertions(+), 19 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index d7104abc8b72..9d2261b02f24 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -422,13 +422,16 @@ static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req) int status = BLKIF_RSP_OKAY; struct block_device *bdev = blkif->vbd.bdev; - if (blkif->blk_backend_type == BLKIF_BACKEND_PHY) + if (blkif->blk_backend_type == BLKIF_BACKEND_PHY) { + unsigned long secure = (blkif->vbd.discard_secure && + (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ? + BLKDEV_DISCARD_SECURE : 0; /* just forward the discard request */ err = blkdev_issue_discard(bdev, req->u.discard.sector_number, req->u.discard.nr_sectors, - GFP_KERNEL, 0); - else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) { + GFP_KERNEL, secure); + } else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) { /* punch a hole in the backing file */ struct loop_device *lo = bdev->bd_disk->private_data; struct file *file = lo->lo_backing_file; @@ -643,8 +646,11 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, break; } - /* Check that the number of segments is sane. */ - nseg = req->u.rw.nr_segments; + if (unlikely(operation == REQ_DISCARD)) + nseg = 0; + else + /* Check that the number of segments is sane. */ + nseg = req->u.rw.nr_segments; if (unlikely(nseg == 0 && operation != WRITE_FLUSH && operation != REQ_DISCARD) || @@ -708,7 +714,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, * the hypercall to unmap the grants - that is all done in * xen_blkbk_unmap. */ - if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg)) + if (nseg && xen_blkbk_map(req, pending_req, seg)) goto fail_flush; /* diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index dbfe7b3b0737..d0ee7edc9be8 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -69,7 +69,7 @@ struct blkif_x86_32_request_rw { } __attribute__((__packed__)); struct blkif_x86_32_request_discard { - uint8_t nr_segments; /* number of segments */ + uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */ blkif_vdev_t _pad1; /* was "handle" for read/write requests */ uint64_t id; /* private guest value, echoed in resp */ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ @@ -104,7 +104,7 @@ struct blkif_x86_64_request_rw { } __attribute__((__packed__)); struct blkif_x86_64_request_discard { - uint8_t nr_segments; /* number of segments */ + uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */ blkif_vdev_t _pad1; /* was "handle" for read/write requests */ uint32_t _pad2; /* offsetof(blkif_..,u.discard.id)==8 */ uint64_t id; @@ -164,6 +164,7 @@ struct xen_vbd { /* Cached size parameter. */ sector_t size; bool flush_support; + bool discard_secure; }; struct backend_info; @@ -261,6 +262,7 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst, dst->u.rw.seg[i] = src->u.rw.seg[i]; break; case BLKIF_OP_DISCARD: + dst->u.discard.flag = src->u.discard.flag; dst->u.discard.sector_number = src->u.discard.sector_number; dst->u.discard.nr_sectors = src->u.discard.nr_sectors; break; @@ -290,6 +292,7 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst, dst->u.rw.seg[i] = src->u.rw.seg[i]; break; case BLKIF_OP_DISCARD: + dst->u.discard.flag = src->u.discard.flag; dst->u.discard.sector_number = src->u.discard.sector_number; dst->u.discard.nr_sectors = src->u.discard.nr_sectors; break; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index f759ad4584c3..187fd2c1a15d 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -338,6 +338,9 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, if (q && q->flush_flags) vbd->flush_support = true; + if (q && blk_queue_secdiscard(q)) + vbd->discard_secure = true; + DPRINTK("Successful creation of handle=%04x (dom=%u)\n", handle, blkif->domid); return 0; @@ -420,6 +423,15 @@ int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be) state = 1; blkif->blk_backend_type = BLKIF_BACKEND_PHY; } + /* Optional. */ + err = xenbus_printf(xbt, dev->nodename, + "discard-secure", "%d", + blkif->vbd.discard_secure); + if (err) { + xenbus_dev_fatal(dev, err, + "writting discard-secure"); + goto kfree; + } } } else { err = PTR_ERR(type); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2c2c4be7d9d6..351ddeffd430 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -98,7 +98,8 @@ struct blkfront_info unsigned long shadow_free; unsigned int feature_flush; unsigned int flush_op; - unsigned int feature_discard; + unsigned int feature_discard:1; + unsigned int feature_secdiscard:1; unsigned int discard_granularity; unsigned int discard_alignment; int is_ready; @@ -305,11 +306,14 @@ static int blkif_queue_request(struct request *req) ring_req->operation = info->flush_op; } - if (unlikely(req->cmd_flags & REQ_DISCARD)) { + if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) { /* id, sector_number and handle are set above. */ ring_req->operation = BLKIF_OP_DISCARD; - ring_req->u.discard.nr_segments = 0; ring_req->u.discard.nr_sectors = blk_rq_sectors(req); + if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard) + ring_req->u.discard.flag = BLKIF_DISCARD_SECURE; + else + ring_req->u.discard.flag = 0; } else { ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req, info->sg); @@ -426,6 +430,8 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) blk_queue_max_discard_sectors(rq, get_capacity(gd)); rq->limits.discard_granularity = info->discard_granularity; rq->limits.discard_alignment = info->discard_alignment; + if (info->feature_secdiscard) + queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq); } /* Hard sector size and max sectors impersonate the equiv. hardware. */ @@ -707,6 +713,8 @@ static void blkif_free(struct blkfront_info *info, int suspend) static void blkif_completion(struct blk_shadow *s) { int i; + /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place + * flag. */ for (i = 0; i < s->req.u.rw.nr_segments; i++) gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL); } @@ -738,7 +746,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) id = bret->id; req = info->shadow[id].request; - blkif_completion(&info->shadow[id]); + if (bret->operation != BLKIF_OP_DISCARD) + blkif_completion(&info->shadow[id]); add_id_to_freelist(info, id); @@ -751,7 +760,9 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) info->gd->disk_name); error = -EOPNOTSUPP; info->feature_discard = 0; + info->feature_secdiscard = 0; queue_flag_clear(QUEUE_FLAG_DISCARD, rq); + queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq); } __blk_end_request_all(req, error); break; @@ -1039,13 +1050,15 @@ static int blkif_recover(struct blkfront_info *info) req->u.rw.id = get_id_from_freelist(info); memcpy(&info->shadow[req->u.rw.id], ©[i], sizeof(copy[i])); + if (req->operation != BLKIF_OP_DISCARD) { /* Rewrite any grant references invalidated by susp/resume. */ - for (j = 0; j < req->u.rw.nr_segments; j++) - gnttab_grant_foreign_access_ref( - req->u.rw.seg[j].gref, - info->xbdev->otherend_id, - pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]), - rq_data_dir(info->shadow[req->u.rw.id].request)); + for (j = 0; j < req->u.rw.nr_segments; j++) + gnttab_grant_foreign_access_ref( + req->u.rw.seg[j].gref, + info->xbdev->otherend_id, + pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]), + rq_data_dir(info->shadow[req->u.rw.id].request)); + } info->shadow[req->u.rw.id].req = *req; info->ring.req_prod_pvt++; @@ -1137,11 +1150,13 @@ static void blkfront_setup_discard(struct blkfront_info *info) char *type; unsigned int discard_granularity; unsigned int discard_alignment; + unsigned int discard_secure; type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL); if (IS_ERR(type)) return; + info->feature_secdiscard = 0; if (strncmp(type, "phy", 3) == 0) { err = xenbus_gather(XBT_NIL, info->xbdev->otherend, "discard-granularity", "%u", &discard_granularity, @@ -1152,6 +1167,12 @@ static void blkfront_setup_discard(struct blkfront_info *info) info->discard_granularity = discard_granularity; info->discard_alignment = discard_alignment; } + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, + "discard-secure", "%d", &discard_secure, + NULL); + if (!err) + info->feature_secdiscard = discard_secure; + } else if (strncmp(type, "file", 4) == 0) info->feature_discard = 1; diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index f88e28b6a27c..ee338bfde18b 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h @@ -84,6 +84,21 @@ typedef uint64_t blkif_sector_t; * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc * http://www.seagate.com/staticfiles/support/disc/manuals/ * Interface%20manuals/100293068c.pdf + * The backend can optionally provide three extra XenBus attributes to + * further optimize the discard functionality: + * 'discard-aligment' - Devices that support discard functionality may + * internally allocate space in units that are bigger than the exported + * logical block size. The discard-alignment parameter indicates how many bytes + * the beginning of the partition is offset from the internal allocation unit's + * natural alignment. + * 'discard-granularity' - Devices that support discard functionality may + * internally allocate space using units that are bigger than the logical block + * size. The discard-granularity parameter indicates the size of the internal + * allocation unit in bytes if reported by the device. Otherwise the + * discard-granularity will be set to match the device's physical block size. + * 'discard-secure' - All copies of the discarded sectors (potentially created + * by garbage collection) must also be erased. To use this feature, the flag + * BLKIF_DISCARD_SECURE must be set in the blkif_request_trim. */ #define BLKIF_OP_DISCARD 5 @@ -111,7 +126,8 @@ struct blkif_request_rw { } __attribute__((__packed__)); struct blkif_request_discard { - uint8_t nr_segments; /* number of segments */ + uint8_t flag; /* BLKIF_DISCARD_SECURE or zero. */ +#define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */ blkif_vdev_t _pad1; /* only for read/write requests */ #ifdef CONFIG_X86_64 uint32_t _pad2; /* offsetof(blkif_req..,u.discard.id)==8*/ From 421463526fd1d8b5cb575baca12667c1005a110b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 12 Oct 2011 17:26:47 -0400 Subject: [PATCH 3/5] xen/blkback: Move processing of BLKIF_OP_DISCARD from dispatch_rw_block_io .. and move it to its own function that will deal with the discard operation. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 54 +++++++++++++---------------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 9d2261b02f24..b058de7825f5 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -416,12 +416,16 @@ static int xen_blkbk_map(struct blkif_request *req, return ret; } -static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req) +static int dispatch_discard_io(struct xen_blkif *blkif, + struct blkif_request *req) { int err = 0; int status = BLKIF_RSP_OKAY; struct block_device *bdev = blkif->vbd.bdev; + blkif->st_ds_req++; + + xen_blkif_get(blkif); if (blkif->blk_backend_type == BLKIF_BACKEND_PHY) { unsigned long secure = (blkif->vbd.discard_secure && (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ? @@ -453,6 +457,8 @@ static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req) status = BLKIF_RSP_ERROR; make_response(blkif, req->u.discard.id, req->operation, status); + xen_blkif_put(blkif); + return err; } static void xen_blk_drain_io(struct xen_blkif *blkif) @@ -576,8 +582,11 @@ __do_block_io_op(struct xen_blkif *blkif) /* Apply all sanity checks to /private copy/ of request. */ barrier(); - - if (dispatch_rw_block_io(blkif, &req, pending_req)) + if (unlikely(req.operation == BLKIF_OP_DISCARD)) { + free_req(pending_req); + if (dispatch_discard_io(blkif, &req)) + break; + } else if (dispatch_rw_block_io(blkif, &req, pending_req)) break; /* Yield point for this unbounded loop. */ @@ -636,24 +645,16 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, blkif->st_f_req++; operation = WRITE_FLUSH; break; - case BLKIF_OP_DISCARD: - blkif->st_ds_req++; - operation = REQ_DISCARD; - break; default: operation = 0; /* make gcc happy */ goto fail_response; break; } - if (unlikely(operation == REQ_DISCARD)) - nseg = 0; - else - /* Check that the number of segments is sane. */ - nseg = req->u.rw.nr_segments; + /* Check that the number of segments is sane. */ + nseg = req->u.rw.nr_segments; - if (unlikely(nseg == 0 && operation != WRITE_FLUSH && - operation != REQ_DISCARD) || + if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", nseg); @@ -714,7 +715,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, * the hypercall to unmap the grants - that is all done in * xen_blkbk_unmap. */ - if (nseg && xen_blkbk_map(req, pending_req, seg)) + if (xen_blkbk_map(req, pending_req, seg)) goto fail_flush; /* @@ -746,23 +747,16 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, /* This will be hit if the operation was a flush or discard. */ if (!bio) { - BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD); + BUG_ON(operation != WRITE_FLUSH); - if (operation == WRITE_FLUSH) { - bio = bio_alloc(GFP_KERNEL, 0); - if (unlikely(bio == NULL)) - goto fail_put_bio; + bio = bio_alloc(GFP_KERNEL, 0); + if (unlikely(bio == NULL)) + goto fail_put_bio; - biolist[nbio++] = bio; - bio->bi_bdev = preq.bdev; - bio->bi_private = pending_req; - bio->bi_end_io = end_block_io_op; - } else if (operation == REQ_DISCARD) { - xen_blk_discard(blkif, req); - xen_blkif_put(blkif); - free_req(pending_req); - return 0; - } + biolist[nbio++] = bio; + bio->bi_bdev = preq.bdev; + bio->bi_private = pending_req; + bio->bi_end_io = end_block_io_op; } /* From ae18be11b5ccc3be9e268592616488c5f9d987f5 Mon Sep 17 00:00:00 2001 From: Li Dongyang Date: Thu, 10 Nov 2011 15:52:06 +0800 Subject: [PATCH 4/5] xen-blkback: convert hole punching to discard request on loop devices As of dfaa2ef68e80c378e610e3c8c536f1c239e8d3ef, loop devices support discard request now. We could just issue a discard request, and the loop driver will punch the hole for us, so we don't need to touch the internals of loop device and punch the hole ourselves, Thanks. V0->V1: rebased on devel/for-jens-3.3 Signed-off-by: Li Dongyang Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index b058de7825f5..0088bf60f368 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -39,9 +39,6 @@ #include #include #include -#include -#include -#include #include #include @@ -426,27 +423,15 @@ static int dispatch_discard_io(struct xen_blkif *blkif, blkif->st_ds_req++; xen_blkif_get(blkif); - if (blkif->blk_backend_type == BLKIF_BACKEND_PHY) { + if (blkif->blk_backend_type == BLKIF_BACKEND_PHY || + blkif->blk_backend_type == BLKIF_BACKEND_FILE) { unsigned long secure = (blkif->vbd.discard_secure && (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ? BLKDEV_DISCARD_SECURE : 0; - /* just forward the discard request */ err = blkdev_issue_discard(bdev, req->u.discard.sector_number, req->u.discard.nr_sectors, GFP_KERNEL, secure); - } else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) { - /* punch a hole in the backing file */ - struct loop_device *lo = bdev->bd_disk->private_data; - struct file *file = lo->lo_backing_file; - - if (file->f_op->fallocate) - err = file->f_op->fallocate(file, - FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, - req->u.discard.sector_number << 9, - req->u.discard.nr_sectors << 9); - else - err = -EOPNOTSUPP; } else err = -EOPNOTSUPP; From f094148a1751d6ece9374851eb2926bc3cfd16ef Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Tue, 29 Nov 2011 22:08:00 +0100 Subject: [PATCH 5/5] xen-blkfront: Use kcalloc instead of kzalloc to allocate array The advantage of kcalloc is, that will prevent integer overflows which could result from the multiplication of number of elements and size and it is also a bit nicer to read. The semantic patch that makes this change is available in https://lkml.org/lkml/2011/11/25/107 Signed-off-by: Thomas Meyer [v1: Seperated the drivers/block/cciss_scsi.c out of this patch] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 351ddeffd430..8cb0c27f2654 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -157,7 +157,7 @@ static int xlbd_reserve_minors(unsigned int minor, unsigned int nr) if (end > nr_minors) { unsigned long *bitmap, *old; - bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap), + bitmap = kcalloc(BITS_TO_LONGS(end), sizeof(*bitmap), GFP_KERNEL); if (bitmap == NULL) return -ENOMEM;