From 4d05a28db56225bbab5e1321d818f318e92a4657 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 18:25:47 -0400 Subject: [PATCH 01/72] xen: add blkback support Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk Conflicts: drivers/xen/Makefile --- drivers/xen/Kconfig | 8 + drivers/xen/Makefile | 1 + drivers/xen/blkback/Makefile | 3 + drivers/xen/blkback/blkback.c | 656 ++++++++++++++++++++++++++++++++ drivers/xen/blkback/common.h | 139 +++++++ drivers/xen/blkback/interface.c | 181 +++++++++ drivers/xen/blkback/vbd.c | 118 ++++++ drivers/xen/blkback/xenbus.c | 541 ++++++++++++++++++++++++++ include/xen/blkif.h | 123 ++++++ 9 files changed, 1770 insertions(+) create mode 100644 drivers/xen/blkback/Makefile create mode 100644 drivers/xen/blkback/blkback.c create mode 100644 drivers/xen/blkback/common.h create mode 100644 drivers/xen/blkback/interface.c create mode 100644 drivers/xen/blkback/vbd.c create mode 100644 drivers/xen/blkback/xenbus.c create mode 100644 include/xen/blkif.h diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index a59638b37c1a..fb1af628cbfc 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -37,6 +37,14 @@ config XEN_BACKEND Support for backend device drivers that provide I/O services to other virtual machines. +config XEN_BLKDEV_BACKEND + tristate "Block-device backend driver" + depends on XEN_BACKEND && BLOCK + help + The block-device backend driver allows the kernel to export its + block devices to other guests via a high-performance shared-memory + interface. + config XENFS tristate "Xen filesystem" default y diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index f420f1ff7f13..29c0a416f082 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o +obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o diff --git a/drivers/xen/blkback/Makefile b/drivers/xen/blkback/Makefile new file mode 100644 index 000000000000..8bab63da3b3e --- /dev/null +++ b/drivers/xen/blkback/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_XEN_BLKDEV_BACKEND) := blkbk.o + +blkbk-y := blkback.o xenbus.o interface.o vbd.o diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c new file mode 100644 index 000000000000..5b8d50e344b4 --- /dev/null +++ b/drivers/xen/blkback/blkback.c @@ -0,0 +1,656 @@ +/****************************************************************************** + * arch/xen/drivers/blkif/backend/main.c + * + * Back-end of the driver for virtual block devices. This portion of the + * driver exports a 'unified' block-device interface that can be accessed + * by any operating system that implements a compatible front end. A + * reference front-end implementation can be found in: + * arch/xen/drivers/blkif/frontend + * + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand + * Copyright (c) 2005, Christopher Clark + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include "common.h" + +/* + * These are rather arbitrary. They are fairly large because adjacent requests + * pulled from a communication ring are quite likely to end up being part of + * the same scatter/gather request at the disc. + * + * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** + * + * This will increase the chances of being able to write whole tracks. + * 64 should be enough to keep us competitive with Linux. + */ +static int blkif_reqs = 64; +module_param_named(reqs, blkif_reqs, int, 0); +MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); + +/* Run-time switchable: /sys/module/blkback/parameters/ */ +static unsigned int log_stats = 0; +static unsigned int debug_lvl = 0; +module_param(log_stats, int, 0644); +module_param(debug_lvl, int, 0644); + +/* + * Each outstanding request that we've passed to the lower device layers has a + * 'pending_req' allocated to it. Each buffer_head that completes decrements + * the pendcnt towards zero. When it hits zero, the specified domain has a + * response queued for it, with the saved 'id' passed back. + */ +typedef struct { + blkif_t *blkif; + u64 id; + int nr_pages; + atomic_t pendcnt; + unsigned short operation; + int status; + struct list_head free_list; +} pending_req_t; + +static pending_req_t *pending_reqs; +static struct list_head pending_free; +static DEFINE_SPINLOCK(pending_free_lock); +static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq); + +#define BLKBACK_INVALID_HANDLE (~0) + +static struct page **pending_pages; +static grant_handle_t *pending_grant_handles; + +static inline int vaddr_pagenr(pending_req_t *req, int seg) +{ + return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; +} + +static inline unsigned long vaddr(pending_req_t *req, int seg) +{ + unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]); + return (unsigned long)pfn_to_kaddr(pfn); +} + +#define pending_handle(_req, _seg) \ + (pending_grant_handles[vaddr_pagenr(_req, _seg)]) + + +static int do_block_io_op(blkif_t *blkif); +static void dispatch_rw_block_io(blkif_t *blkif, + blkif_request_t *req, + pending_req_t *pending_req); +static void make_response(blkif_t *blkif, u64 id, + unsigned short op, int st); + +/****************************************************************** + * misc small helpers + */ +static pending_req_t* alloc_req(void) +{ + pending_req_t *req = NULL; + unsigned long flags; + + spin_lock_irqsave(&pending_free_lock, flags); + if (!list_empty(&pending_free)) { + req = list_entry(pending_free.next, pending_req_t, free_list); + list_del(&req->free_list); + } + spin_unlock_irqrestore(&pending_free_lock, flags); + return req; +} + +static void free_req(pending_req_t *req) +{ + unsigned long flags; + int was_empty; + + spin_lock_irqsave(&pending_free_lock, flags); + was_empty = list_empty(&pending_free); + list_add(&req->free_list, &pending_free); + spin_unlock_irqrestore(&pending_free_lock, flags); + if (was_empty) + wake_up(&pending_free_wq); +} + +static void unplug_queue(blkif_t *blkif) +{ + if (blkif->plug == NULL) + return; + if (blkif->plug->unplug_fn) + blkif->plug->unplug_fn(blkif->plug); + blk_put_queue(blkif->plug); + blkif->plug = NULL; +} + +static void plug_queue(blkif_t *blkif, struct block_device *bdev) +{ + request_queue_t *q = bdev_get_queue(bdev); + + if (q == blkif->plug) + return; + unplug_queue(blkif); + blk_get_queue(q); + blkif->plug = q; +} + +static void fast_flush_area(pending_req_t *req) +{ + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned int i, invcount = 0; + grant_handle_t handle; + int ret; + + for (i = 0; i < req->nr_pages; i++) { + handle = pending_handle(req, i); + if (handle == BLKBACK_INVALID_HANDLE) + continue; + gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), + GNTMAP_host_map, handle); + pending_handle(req, i) = BLKBACK_INVALID_HANDLE; + invcount++; + } + + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, unmap, invcount); + BUG_ON(ret); +} + +/****************************************************************** + * SCHEDULER FUNCTIONS + */ + +static void print_stats(blkif_t *blkif) +{ + printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n", + current->comm, blkif->st_oo_req, + blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req); + blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); + blkif->st_rd_req = 0; + blkif->st_wr_req = 0; + blkif->st_oo_req = 0; +} + +int blkif_schedule(void *arg) +{ + blkif_t *blkif = arg; + + blkif_get(blkif); + + if (debug_lvl) + printk(KERN_DEBUG "%s: started\n", current->comm); + + while (!kthread_should_stop()) { + if (try_to_freeze()) + continue; + + wait_event_interruptible( + blkif->wq, + blkif->waiting_reqs || kthread_should_stop()); + wait_event_interruptible( + pending_free_wq, + !list_empty(&pending_free) || kthread_should_stop()); + + blkif->waiting_reqs = 0; + smp_mb(); /* clear flag *before* checking for work */ + + if (do_block_io_op(blkif)) + blkif->waiting_reqs = 1; + unplug_queue(blkif); + + if (log_stats && time_after(jiffies, blkif->st_print)) + print_stats(blkif); + } + + if (log_stats) + print_stats(blkif); + if (debug_lvl) + printk(KERN_DEBUG "%s: exiting\n", current->comm); + + blkif->xenblkd = NULL; + blkif_put(blkif); + + return 0; +} + +/****************************************************************** + * COMPLETION CALLBACK -- Called as bh->b_end_io() + */ + +static void __end_block_io_op(pending_req_t *pending_req, int error) +{ + /* An error fails the entire request. */ + if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && + (error == -EOPNOTSUPP)) { + DPRINTK("blkback: write barrier op failed, not supported\n"); + blkback_barrier(XBT_NIL, pending_req->blkif->be, 0); + pending_req->status = BLKIF_RSP_EOPNOTSUPP; + } else if (error) { + DPRINTK("Buffer not up-to-date at end of operation, " + "error=%d\n", error); + pending_req->status = BLKIF_RSP_ERROR; + } + + if (atomic_dec_and_test(&pending_req->pendcnt)) { + fast_flush_area(pending_req); + make_response(pending_req->blkif, pending_req->id, + pending_req->operation, pending_req->status); + blkif_put(pending_req->blkif); + free_req(pending_req); + } +} + +static int end_block_io_op(struct bio *bio, unsigned int done, int error) +{ + if (bio->bi_size != 0) + return 1; + __end_block_io_op(bio->bi_private, error); + bio_put(bio); + return error; +} + + +/****************************************************************************** + * NOTIFICATION FROM GUEST OS. + */ + +static void blkif_notify_work(blkif_t *blkif) +{ + blkif->waiting_reqs = 1; + wake_up(&blkif->wq); +} + +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) +{ + blkif_notify_work(dev_id); + return IRQ_HANDLED; +} + + + +/****************************************************************** + * DOWNWARD CALLS -- These interface with the block-device layer proper. + */ + +static int do_block_io_op(blkif_t *blkif) +{ + blkif_back_rings_t *blk_rings = &blkif->blk_rings; + blkif_request_t req; + pending_req_t *pending_req; + RING_IDX rc, rp; + int more_to_do = 0; + + rc = blk_rings->common.req_cons; + rp = blk_rings->common.sring->req_prod; + rmb(); /* Ensure we see queued requests up to 'rp'. */ + + while (rc != rp) { + + if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) + break; + + pending_req = alloc_req(); + if (NULL == pending_req) { + blkif->st_oo_req++; + more_to_do = 1; + break; + } + + if (kthread_should_stop()) { + more_to_do = 1; + break; + } + + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req)); + break; + case BLKIF_PROTOCOL_X86_32: + blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc)); + break; + case BLKIF_PROTOCOL_X86_64: + blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc)); + break; + default: + BUG(); + } + blk_rings->common.req_cons = ++rc; /* before make_response() */ + + /* Apply all sanity checks to /private copy/ of request. */ + barrier(); + + switch (req.operation) { + case BLKIF_OP_READ: + blkif->st_rd_req++; + dispatch_rw_block_io(blkif, &req, pending_req); + break; + case BLKIF_OP_WRITE_BARRIER: + blkif->st_br_req++; + /* fall through */ + case BLKIF_OP_WRITE: + blkif->st_wr_req++; + dispatch_rw_block_io(blkif, &req, pending_req); + break; + default: + /* A good sign something is wrong: sleep for a while to + * avoid excessive CPU consumption by a bad guest. */ + msleep(1); + DPRINTK("error: unknown block io operation [%d]\n", + req.operation); + make_response(blkif, req.id, req.operation, + BLKIF_RSP_ERROR); + free_req(pending_req); + break; + } + + /* Yield point for this unbounded loop. */ + cond_resched(); + } + + return more_to_do; +} + +static void dispatch_rw_block_io(blkif_t *blkif, + blkif_request_t *req, + pending_req_t *pending_req) +{ + extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct phys_req preq; + struct { + unsigned long buf; unsigned int nsec; + } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned int nseg; + struct bio *bio = NULL; + int ret, i; + int operation; + + switch (req->operation) { + case BLKIF_OP_READ: + operation = READ; + break; + case BLKIF_OP_WRITE: + operation = WRITE; + break; + case BLKIF_OP_WRITE_BARRIER: + operation = WRITE_BARRIER; + break; + default: + operation = 0; /* make gcc happy */ + BUG(); + } + + /* Check that number of segments is sane. */ + nseg = req->nr_segments; + if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || + unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { + DPRINTK("Bad number of segments in request (%d)\n", nseg); + goto fail_response; + } + + preq.dev = req->handle; + preq.sector_number = req->sector_number; + preq.nr_sects = 0; + + pending_req->blkif = blkif; + pending_req->id = req->id; + pending_req->operation = req->operation; + pending_req->status = BLKIF_RSP_OKAY; + pending_req->nr_pages = nseg; + + for (i = 0; i < nseg; i++) { + uint32_t flags; + + seg[i].nsec = req->seg[i].last_sect - + req->seg[i].first_sect + 1; + + if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) || + (req->seg[i].last_sect < req->seg[i].first_sect)) + goto fail_response; + preq.nr_sects += seg[i].nsec; + + flags = GNTMAP_host_map; + if (operation != READ) + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, + req->seg[i].gref, blkif->domid); + } + + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); + BUG_ON(ret); + + for (i = 0; i < nseg; i++) { + if (unlikely(map[i].status != 0)) { + DPRINTK("invalid buffer -- could not remap it\n"); + map[i].handle = BLKBACK_INVALID_HANDLE; + ret |= 1; + } + + pending_handle(pending_req, i) = map[i].handle; + + if (ret) + continue; + + set_phys_to_machine(__pa(vaddr( + pending_req, i)) >> PAGE_SHIFT, + FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); + seg[i].buf = map[i].dev_bus_addr | + (req->seg[i].first_sect << 9); + } + + if (ret) + goto fail_flush; + + if (vbd_translate(&preq, blkif, operation) != 0) { + DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", + operation == READ ? "read" : "write", + preq.sector_number, + preq.sector_number + preq.nr_sects, preq.dev); + goto fail_flush; + } + + plug_queue(blkif, preq.bdev); + atomic_set(&pending_req->pendcnt, 1); + blkif_get(blkif); + + for (i = 0; i < nseg; i++) { + if (((int)preq.sector_number|(int)seg[i].nsec) & + ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) { + DPRINTK("Misaligned I/O request from domain %d", + blkif->domid); + goto fail_put_bio; + } + + while ((bio == NULL) || + (bio_add_page(bio, + virt_to_page(vaddr(pending_req, i)), + seg[i].nsec << 9, + seg[i].buf & ~PAGE_MASK) == 0)) { + if (bio) { + atomic_inc(&pending_req->pendcnt); + submit_bio(operation, bio); + } + + bio = bio_alloc(GFP_KERNEL, nseg-i); + if (unlikely(bio == NULL)) + goto fail_put_bio; + + bio->bi_bdev = preq.bdev; + bio->bi_private = pending_req; + bio->bi_end_io = end_block_io_op; + bio->bi_sector = preq.sector_number; + } + + preq.sector_number += seg[i].nsec; + } + + if (!bio) { + BUG_ON(operation != WRITE_BARRIER); + bio = bio_alloc(GFP_KERNEL, 0); + if (unlikely(bio == NULL)) + goto fail_put_bio; + + bio->bi_bdev = preq.bdev; + bio->bi_private = pending_req; + bio->bi_end_io = end_block_io_op; + bio->bi_sector = -1; + } + + submit_bio(operation, bio); + + if (operation == READ) + blkif->st_rd_sect += preq.nr_sects; + else if (operation == WRITE || operation == WRITE_BARRIER) + blkif->st_wr_sect += preq.nr_sects; + + return; + + fail_flush: + fast_flush_area(pending_req); + fail_response: + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); + free_req(pending_req); + msleep(1); /* back off a bit */ + return; + + fail_put_bio: + __end_block_io_op(pending_req, -EINVAL); + if (bio) + bio_put(bio); + unplug_queue(blkif); + msleep(1); /* back off a bit */ + return; +} + + + +/****************************************************************** + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING + */ + + +static void make_response(blkif_t *blkif, u64 id, + unsigned short op, int st) +{ + blkif_response_t resp; + unsigned long flags; + blkif_back_rings_t *blk_rings = &blkif->blk_rings; + int more_to_do = 0; + int notify; + + resp.id = id; + resp.operation = op; + resp.status = st; + + spin_lock_irqsave(&blkif->blk_ring_lock, flags); + /* Place on the response ring for the relevant domain. */ + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), + &resp, sizeof(resp)); + break; + case BLKIF_PROTOCOL_X86_32: + memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt), + &resp, sizeof(resp)); + break; + case BLKIF_PROTOCOL_X86_64: + memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt), + &resp, sizeof(resp)); + break; + default: + BUG(); + } + blk_rings->common.rsp_prod_pvt++; + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); + if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) { + /* + * Tail check for pending requests. Allows frontend to avoid + * notifications if requests are already in flight (lower + * overheads and promotes batching). + */ + RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do); + + } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) { + more_to_do = 1; + } + + spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); + + if (more_to_do) + blkif_notify_work(blkif); + if (notify) + notify_remote_via_irq(blkif->irq); +} + +static int __init blkif_init(void) +{ + int i, mmap_pages; + + if (!is_running_on_xen()) + return -ENODEV; + + mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; + + pending_reqs = kmalloc(sizeof(pending_reqs[0]) * + blkif_reqs, GFP_KERNEL); + pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) * + mmap_pages, GFP_KERNEL); + pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); + + if (!pending_reqs || !pending_grant_handles || !pending_pages) + goto out_of_memory; + + for (i = 0; i < mmap_pages; i++) + pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; + + blkif_interface_init(); + + memset(pending_reqs, 0, sizeof(pending_reqs)); + INIT_LIST_HEAD(&pending_free); + + for (i = 0; i < blkif_reqs; i++) + list_add_tail(&pending_reqs[i].free_list, &pending_free); + + blkif_xenbus_init(); + + return 0; + + out_of_memory: + kfree(pending_reqs); + kfree(pending_grant_handles); + free_empty_pages_and_pagevec(pending_pages, mmap_pages); + printk("%s: out of memory\n", __FUNCTION__); + return -ENOMEM; +} + +module_init(blkif_init); + +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h new file mode 100644 index 000000000000..422e935528be --- /dev/null +++ b/drivers/xen/blkback/common.h @@ -0,0 +1,139 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __BLKIF__BACKEND__COMMON_H__ +#define __BLKIF__BACKEND__COMMON_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DPRINTK(_f, _a...) \ + pr_debug("(file=%s, line=%d) " _f, \ + __FILE__ , __LINE__ , ## _a ) + +struct vbd { + blkif_vdev_t handle; /* what the domain refers to this vbd as */ + unsigned char readonly; /* Non-zero -> read-only */ + unsigned char type; /* VDISK_xxx */ + u32 pdevice; /* phys device that this vbd maps to */ + struct block_device *bdev; +}; + +struct backend_info; + +typedef struct blkif_st { + /* Unique identifier for this interface. */ + domid_t domid; + unsigned int handle; + /* Physical parameters of the comms window. */ + unsigned int irq; + /* Comms information. */ + enum blkif_protocol blk_protocol; + blkif_back_rings_t blk_rings; + struct vm_struct *blk_ring_area; + /* The VBD attached to this interface. */ + struct vbd vbd; + /* Back pointer to the backend_info. */ + struct backend_info *be; + /* Private fields. */ + spinlock_t blk_ring_lock; + atomic_t refcnt; + + wait_queue_head_t wq; + struct task_struct *xenblkd; + unsigned int waiting_reqs; + request_queue_t *plug; + + /* statistics */ + unsigned long st_print; + int st_rd_req; + int st_wr_req; + int st_oo_req; + int st_br_req; + int st_rd_sect; + int st_wr_sect; + + wait_queue_head_t waiting_to_free; + + grant_handle_t shmem_handle; + grant_ref_t shmem_ref; +} blkif_t; + +blkif_t *blkif_alloc(domid_t domid); +void blkif_disconnect(blkif_t *blkif); +void blkif_free(blkif_t *blkif); +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); + +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) +#define blkif_put(_b) \ + do { \ + if (atomic_dec_and_test(&(_b)->refcnt)) \ + wake_up(&(_b)->waiting_to_free);\ + } while (0) + +/* Create a vbd. */ +int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major, + unsigned minor, int readonly, int cdrom); +void vbd_free(struct vbd *vbd); + +unsigned long long vbd_size(struct vbd *vbd); +unsigned int vbd_info(struct vbd *vbd); +unsigned long vbd_secsize(struct vbd *vbd); + +struct phys_req { + unsigned short dev; + unsigned short nr_sects; + struct block_device *bdev; + blkif_sector_t sector_number; +}; + +int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); + +void blkif_interface_init(void); + +void blkif_xenbus_init(void); + +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); +int blkif_schedule(void *arg); + +int blkback_barrier(struct xenbus_transaction xbt, + struct backend_info *be, int state); + +#endif /* __BLKIF__BACKEND__COMMON_H__ */ diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c new file mode 100644 index 000000000000..81821bdc7ef1 --- /dev/null +++ b/drivers/xen/blkback/interface.c @@ -0,0 +1,181 @@ +/****************************************************************************** + * arch/xen/drivers/blkif/backend/interface.c + * + * Block-device interface management. + * + * Copyright (c) 2004, Keir Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "common.h" +#include +#include + +static kmem_cache_t *blkif_cachep; + +blkif_t *blkif_alloc(domid_t domid) +{ + blkif_t *blkif; + + blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); + if (!blkif) + return ERR_PTR(-ENOMEM); + + memset(blkif, 0, sizeof(*blkif)); + blkif->domid = domid; + spin_lock_init(&blkif->blk_ring_lock); + atomic_set(&blkif->refcnt, 1); + init_waitqueue_head(&blkif->wq); + blkif->st_print = jiffies; + init_waitqueue_head(&blkif->waiting_to_free); + + return blkif; +} + +static int map_frontend_page(blkif_t *blkif, unsigned long shared_page) +{ + struct gnttab_map_grant_ref op; + + gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, shared_page, blkif->domid); + + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) + BUG(); + + if (op.status) { + DPRINTK(" Grant table operation failure !\n"); + return op.status; + } + + blkif->shmem_ref = shared_page; + blkif->shmem_handle = op.handle; + + return 0; +} + +static void unmap_frontend_page(blkif_t *blkif) +{ + struct gnttab_unmap_grant_ref op; + + gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, blkif->shmem_handle); + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + BUG(); +} + +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) +{ + int err; + + /* Already connected through? */ + if (blkif->irq) + return 0; + + if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL ) + return -ENOMEM; + + err = map_frontend_page(blkif, shared_page); + if (err) { + free_vm_area(blkif->blk_ring_area); + return err; + } + + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + { + blkif_sring_t *sring; + sring = (blkif_sring_t *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_32: + { + blkif_x86_32_sring_t *sring_x86_32; + sring_x86_32 = (blkif_x86_32_sring_t *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_64: + { + blkif_x86_64_sring_t *sring_x86_64; + sring_x86_64 = (blkif_x86_64_sring_t *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); + break; + } + default: + BUG(); + } + + err = bind_interdomain_evtchn_to_irqhandler( + blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif); + if (err < 0) + { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + blkif->blk_rings.common.sring = NULL; + return err; + } + blkif->irq = err; + + return 0; +} + +void blkif_disconnect(blkif_t *blkif) +{ + if (blkif->xenblkd) { + kthread_stop(blkif->xenblkd); + blkif->xenblkd = NULL; + } + + atomic_dec(&blkif->refcnt); + wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); + atomic_inc(&blkif->refcnt); + + if (blkif->irq) { + unbind_from_irqhandler(blkif->irq, blkif); + blkif->irq = 0; + } + + if (blkif->blk_rings.common.sring) { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + blkif->blk_rings.common.sring = NULL; + } +} + +void blkif_free(blkif_t *blkif) +{ + if (!atomic_dec_and_test(&blkif->refcnt)) + BUG(); + kmem_cache_free(blkif_cachep, blkif); +} + +void __init blkif_interface_init(void) +{ + blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), + 0, 0, NULL, NULL); +} diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c new file mode 100644 index 000000000000..1fb31d0236b4 --- /dev/null +++ b/drivers/xen/blkback/vbd.c @@ -0,0 +1,118 @@ +/****************************************************************************** + * blkback/vbd.c + * + * Routines for managing virtual block devices (VBDs). + * + * Copyright (c) 2003-2005, Keir Fraser & Steve Hand + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "common.h" + +#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ + (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity) + +unsigned long long vbd_size(struct vbd *vbd) +{ + return vbd_sz(vbd); +} + +unsigned int vbd_info(struct vbd *vbd) +{ + return vbd->type | (vbd->readonly?VDISK_READONLY:0); +} + +unsigned long vbd_secsize(struct vbd *vbd) +{ + return bdev_hardsect_size(vbd->bdev); +} + +int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major, + unsigned minor, int readonly, int cdrom) +{ + struct vbd *vbd; + struct block_device *bdev; + + vbd = &blkif->vbd; + vbd->handle = handle; + vbd->readonly = readonly; + vbd->type = 0; + + vbd->pdevice = MKDEV(major, minor); + + bdev = open_by_devnum(vbd->pdevice, + vbd->readonly ? FMODE_READ : FMODE_WRITE); + + if (IS_ERR(bdev)) { + DPRINTK("vbd_creat: device %08x could not be opened.\n", + vbd->pdevice); + return -ENOENT; + } + + vbd->bdev = bdev; + + if (vbd->bdev->bd_disk == NULL) { + DPRINTK("vbd_creat: device %08x doesn't exist.\n", + vbd->pdevice); + vbd_free(vbd); + return -ENOENT; + } + + if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) + vbd->type |= VDISK_CDROM; + if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) + vbd->type |= VDISK_REMOVABLE; + + DPRINTK("Successful creation of handle=%04x (dom=%u)\n", + handle, blkif->domid); + return 0; +} + +void vbd_free(struct vbd *vbd) +{ + if (vbd->bdev) + blkdev_put(vbd->bdev); + vbd->bdev = NULL; +} + +int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation) +{ + struct vbd *vbd = &blkif->vbd; + int rc = -EACCES; + + if ((operation != READ) && vbd->readonly) + goto out; + + if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd))) + goto out; + + req->dev = vbd->pdevice; + req->bdev = vbd->bdev; + rc = 0; + + out: + return rc; +} diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c new file mode 100644 index 000000000000..80d9aa6e6ba3 --- /dev/null +++ b/drivers/xen/blkback/xenbus.c @@ -0,0 +1,541 @@ +/* Xenbus code for blkif backend + Copyright (C) 2005 Rusty Russell + Copyright (C) 2005 XenSource Ltd + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include +#include "common.h" + +#undef DPRINTK +#define DPRINTK(fmt, args...) \ + pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \ + __FUNCTION__, __LINE__, ##args) + +struct backend_info +{ + struct xenbus_device *dev; + blkif_t *blkif; + struct xenbus_watch backend_watch; + unsigned major; + unsigned minor; + char *mode; +}; + +static void connect(struct backend_info *); +static int connect_ring(struct backend_info *); +static void backend_changed(struct xenbus_watch *, const char **, + unsigned int); + +static int blkback_name(blkif_t *blkif, char *buf) +{ + char *devpath, *devname; + struct xenbus_device *dev = blkif->be->dev; + + devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); + if (IS_ERR(devpath)) + return PTR_ERR(devpath); + + if ((devname = strstr(devpath, "/dev/")) != NULL) + devname += strlen("/dev/"); + else + devname = devpath; + + snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname); + kfree(devpath); + + return 0; +} + +static void update_blkif_status(blkif_t *blkif) +{ + int err; + char name[TASK_COMM_LEN]; + + /* Not ready to connect? */ + if (!blkif->irq || !blkif->vbd.bdev) + return; + + /* Already connected? */ + if (blkif->be->dev->state == XenbusStateConnected) + return; + + /* Attempt to connect: exit if we fail to. */ + connect(blkif->be); + if (blkif->be->dev->state != XenbusStateConnected) + return; + + err = blkback_name(blkif, name); + if (err) { + xenbus_dev_error(blkif->be->dev, err, "get blkback dev name"); + return; + } + + blkif->xenblkd = kthread_run(blkif_schedule, blkif, name); + if (IS_ERR(blkif->xenblkd)) { + err = PTR_ERR(blkif->xenblkd); + blkif->xenblkd = NULL; + xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); + } +} + + +/**************************************************************** + * sysfs interface for VBD I/O requests + */ + +#define VBD_SHOW(name, format, args...) \ + static ssize_t show_##name(struct device *_dev, \ + struct device_attribute *attr, \ + char *buf) \ + { \ + struct xenbus_device *dev = to_xenbus_device(_dev); \ + struct backend_info *be = dev->dev.driver_data; \ + \ + return sprintf(buf, format, ##args); \ + } \ + static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) + +VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); +VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); +VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); +VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req); +VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); +VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); + +static struct attribute *vbdstat_attrs[] = { + &dev_attr_oo_req.attr, + &dev_attr_rd_req.attr, + &dev_attr_wr_req.attr, + &dev_attr_br_req.attr, + &dev_attr_rd_sect.attr, + &dev_attr_wr_sect.attr, + NULL +}; + +static struct attribute_group vbdstat_group = { + .name = "statistics", + .attrs = vbdstat_attrs, +}; + +VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); +VBD_SHOW(mode, "%s\n", be->mode); + +int xenvbd_sysfs_addif(struct xenbus_device *dev) +{ + int error; + + error = device_create_file(&dev->dev, &dev_attr_physical_device); + if (error) + goto fail1; + + error = device_create_file(&dev->dev, &dev_attr_mode); + if (error) + goto fail2; + + error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group); + if (error) + goto fail3; + + return 0; + +fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); +fail2: device_remove_file(&dev->dev, &dev_attr_mode); +fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); + return error; +} + +void xenvbd_sysfs_delif(struct xenbus_device *dev) +{ + sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); + device_remove_file(&dev->dev, &dev_attr_mode); + device_remove_file(&dev->dev, &dev_attr_physical_device); +} + +static int blkback_remove(struct xenbus_device *dev) +{ + struct backend_info *be = dev->dev.driver_data; + + DPRINTK(""); + + if (be->major || be->minor) + xenvbd_sysfs_delif(dev); + + if (be->backend_watch.node) { + unregister_xenbus_watch(&be->backend_watch); + kfree(be->backend_watch.node); + be->backend_watch.node = NULL; + } + + if (be->blkif) { + blkif_disconnect(be->blkif); + vbd_free(&be->blkif->vbd); + blkif_free(be->blkif); + be->blkif = NULL; + } + + kfree(be); + dev->dev.driver_data = NULL; + return 0; +} + +int blkback_barrier(struct xenbus_transaction xbt, + struct backend_info *be, int state) +{ + struct xenbus_device *dev = be->dev; + int err; + + err = xenbus_printf(xbt, dev->nodename, "feature-barrier", + "%d", state); + if (err) + xenbus_dev_fatal(dev, err, "writing feature-barrier"); + + return err; +} + +/** + * Entry point to this code when a new device is created. Allocate the basic + * structures, and watch the store waiting for the hotplug scripts to tell us + * the device's physical major and minor numbers. Switch to InitWait. + */ +static int blkback_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int err; + struct backend_info *be = kzalloc(sizeof(struct backend_info), + GFP_KERNEL); + if (!be) { + xenbus_dev_fatal(dev, -ENOMEM, + "allocating backend structure"); + return -ENOMEM; + } + be->dev = dev; + dev->dev.driver_data = be; + + be->blkif = blkif_alloc(dev->otherend_id); + if (IS_ERR(be->blkif)) { + err = PTR_ERR(be->blkif); + be->blkif = NULL; + xenbus_dev_fatal(dev, err, "creating block interface"); + goto fail; + } + + /* setup back pointer */ + be->blkif->be = be; + + err = xenbus_watch_path2(dev, dev->nodename, "physical-device", + &be->backend_watch, backend_changed); + if (err) + goto fail; + + err = xenbus_switch_state(dev, XenbusStateInitWait); + if (err) + goto fail; + + return 0; + +fail: + DPRINTK("failed"); + blkback_remove(dev); + return err; +} + + +/** + * Callback received when the hotplug scripts have placed the physical-device + * node. Read it and the mode node, and create a vbd. If the frontend is + * ready, connect. + */ +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + int err; + unsigned major; + unsigned minor; + struct backend_info *be + = container_of(watch, struct backend_info, backend_watch); + struct xenbus_device *dev = be->dev; + int cdrom = 0; + char *device_type; + + DPRINTK(""); + + err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x", + &major, &minor); + if (XENBUS_EXIST_ERR(err)) { + /* Since this watch will fire once immediately after it is + registered, we expect this. Ignore it, and wait for the + hotplug scripts. */ + return; + } + if (err != 2) { + xenbus_dev_fatal(dev, err, "reading physical-device"); + return; + } + + if ((be->major || be->minor) && + ((be->major != major) || (be->minor != minor))) { + printk(KERN_WARNING + "blkback: changing physical device (from %x:%x to " + "%x:%x) not supported.\n", be->major, be->minor, + major, minor); + return; + } + + be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL); + if (IS_ERR(be->mode)) { + err = PTR_ERR(be->mode); + be->mode = NULL; + xenbus_dev_fatal(dev, err, "reading mode"); + return; + } + + device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL); + if (!IS_ERR(device_type)) { + cdrom = strcmp(device_type, "cdrom") == 0; + kfree(device_type); + } + + if (be->major == 0 && be->minor == 0) { + /* Front end dir is a number, which is used as the handle. */ + + char *p = strrchr(dev->otherend, '/') + 1; + long handle = simple_strtoul(p, NULL, 0); + + be->major = major; + be->minor = minor; + + err = vbd_create(be->blkif, handle, major, minor, + (NULL == strchr(be->mode, 'w')), cdrom); + if (err) { + be->major = be->minor = 0; + xenbus_dev_fatal(dev, err, "creating vbd structure"); + return; + } + + err = xenvbd_sysfs_addif(dev); + if (err) { + vbd_free(&be->blkif->vbd); + be->major = be->minor = 0; + xenbus_dev_fatal(dev, err, "creating sysfs entries"); + return; + } + + /* We're potentially connected now */ + update_blkif_status(be->blkif); + } +} + + +/** + * Callback received when the frontend's state changes. + */ +static void frontend_changed(struct xenbus_device *dev, + enum xenbus_state frontend_state) +{ + struct backend_info *be = dev->dev.driver_data; + int err; + + DPRINTK("%s", xenbus_strstate(frontend_state)); + + switch (frontend_state) { + case XenbusStateInitialising: + if (dev->state == XenbusStateClosed) { + printk(KERN_INFO "%s: %s: prepare for reconnect\n", + __FUNCTION__, dev->nodename); + xenbus_switch_state(dev, XenbusStateInitWait); + } + break; + + case XenbusStateInitialised: + case XenbusStateConnected: + /* Ensure we connect even when two watches fire in + close successsion and we miss the intermediate value + of frontend_state. */ + if (dev->state == XenbusStateConnected) + break; + + err = connect_ring(be); + if (err) + break; + update_blkif_status(be->blkif); + break; + + case XenbusStateClosing: + blkif_disconnect(be->blkif); + xenbus_switch_state(dev, XenbusStateClosing); + break; + + case XenbusStateClosed: + xenbus_switch_state(dev, XenbusStateClosed); + if (xenbus_dev_is_online(dev)) + break; + /* fall through if not online */ + case XenbusStateUnknown: + device_unregister(&dev->dev); + break; + + default: + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", + frontend_state); + break; + } +} + + +/* ** Connection ** */ + + +/** + * Write the physical details regarding the block device to the store, and + * switch to Connected state. + */ +static void connect(struct backend_info *be) +{ + struct xenbus_transaction xbt; + int err; + struct xenbus_device *dev = be->dev; + + DPRINTK("%s", dev->otherend); + + /* Supply the information about the device the frontend needs */ +again: + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + return; + } + + err = blkback_barrier(xbt, be, 1); + if (err) + goto abort; + + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", + vbd_size(&be->blkif->vbd)); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/sectors", + dev->nodename); + goto abort; + } + + /* FIXME: use a typename instead */ + err = xenbus_printf(xbt, dev->nodename, "info", "%u", + vbd_info(&be->blkif->vbd)); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/info", + dev->nodename); + goto abort; + } + err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu", + vbd_secsize(&be->blkif->vbd)); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/sector-size", + dev->nodename); + goto abort; + } + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + if (err) + xenbus_dev_fatal(dev, err, "ending transaction"); + + err = xenbus_switch_state(dev, XenbusStateConnected); + if (err) + xenbus_dev_fatal(dev, err, "switching to Connected state", + dev->nodename); + + return; + abort: + xenbus_transaction_end(xbt, 1); +} + + +static int connect_ring(struct backend_info *be) +{ + struct xenbus_device *dev = be->dev; + unsigned long ring_ref; + unsigned int evtchn; + char protocol[64] = ""; + int err; + + DPRINTK("%s", dev->otherend); + + err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", &ring_ref, + "event-channel", "%u", &evtchn, NULL); + if (err) { + xenbus_dev_fatal(dev, err, + "reading %s/ring-ref and event-channel", + dev->otherend); + return err; + } + + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; + err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", + "%63s", protocol, NULL); + if (err) + strcpy(protocol, "unspecified, assuming native"); + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; + else { + xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); + return -1; + } + printk(KERN_INFO + "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n", + ring_ref, evtchn, be->blkif->blk_protocol, protocol); + + /* Map the shared frame, irq etc. */ + err = blkif_map(be->blkif, ring_ref, evtchn); + if (err) { + xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", + ring_ref, evtchn); + return err; + } + + return 0; +} + + +/* ** Driver Registration ** */ + + +static const struct xenbus_device_id blkback_ids[] = { + { "vbd" }, + { "" } +}; + + +static struct xenbus_driver blkback = { + .name = "vbd", + .owner = THIS_MODULE, + .ids = blkback_ids, + .probe = blkback_probe, + .remove = blkback_remove, + .otherend_changed = frontend_changed +}; + + +void blkif_xenbus_init(void) +{ + xenbus_register_backend(&blkback); +} diff --git a/include/xen/blkif.h b/include/xen/blkif.h new file mode 100644 index 000000000000..3d56b75de909 --- /dev/null +++ b/include/xen/blkif.h @@ -0,0 +1,123 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_BLKIF_H__ +#define __XEN_BLKIF_H__ + +#include +#include +#include + +/* Not a real protocol. Used to generate ring structs which contain + * the elements common to all protocols only. This way we get a + * compiler-checkable way to use common struct elements, so we can + * avoid using switch(protocol) in a number of places. */ +struct blkif_common_request { + char dummy; +}; +struct blkif_common_response { + char dummy; +}; + +/* i386 protocol version */ +#pragma pack(push, 4) +struct blkif_x86_32_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +struct blkif_x86_32_response { + uint64_t id; /* copied from request */ + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; +typedef struct blkif_x86_32_request blkif_x86_32_request_t; +typedef struct blkif_x86_32_response blkif_x86_32_response_t; +#pragma pack(pop) + +/* x86_64 protocol version */ +struct blkif_x86_64_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t __attribute__((__aligned__(8))) id; + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +struct blkif_x86_64_response { + uint64_t __attribute__((__aligned__(8))) id; + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; +typedef struct blkif_x86_64_request blkif_x86_64_request_t; +typedef struct blkif_x86_64_response blkif_x86_64_response_t; + +DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, struct blkif_common_response); +DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, struct blkif_x86_32_response); +DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, struct blkif_x86_64_response); + +union blkif_back_rings { + blkif_back_ring_t native; + blkif_common_back_ring_t common; + blkif_x86_32_back_ring_t x86_32; + blkif_x86_64_back_ring_t x86_64; +}; +typedef union blkif_back_rings blkif_back_rings_t; + +enum blkif_protocol { + BLKIF_PROTOCOL_NATIVE = 1, + BLKIF_PROTOCOL_X86_32 = 2, + BLKIF_PROTOCOL_X86_64 = 3, +}; + +static void inline blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_request_t *src) +{ + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; + dst->operation = src->operation; + dst->nr_segments = src->nr_segments; + dst->handle = src->handle; + dst->id = src->id; + dst->sector_number = src->sector_number; + barrier(); + if (n > dst->nr_segments) + n = dst->nr_segments; + for (i = 0; i < n; i++) + dst->seg[i] = src->seg[i]; +} + +static void inline blkif_get_x86_64_req(blkif_request_t *dst, blkif_x86_64_request_t *src) +{ + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; + dst->operation = src->operation; + dst->nr_segments = src->nr_segments; + dst->handle = src->handle; + dst->id = src->id; + dst->sector_number = src->sector_number; + barrier(); + if (n > dst->nr_segments) + n = dst->nr_segments; + for (i = 0; i < n; i++) + dst->seg[i] = src->seg[i]; +} + +#endif /* __XEN_BLKIF_H__ */ From 8812293323a79134e06c3bf82eba1e217d23382e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 12:05:51 -0800 Subject: [PATCH 02/72] xen-blkback-porting --- drivers/xen/blkback/blkback.c | 30 ++++++++++++++++-------------- drivers/xen/blkback/common.h | 9 ++++----- drivers/xen/blkback/interface.c | 19 ++++++++++--------- drivers/xen/blkback/vbd.c | 4 ++-- drivers/xen/blkback/xenbus.c | 7 ++++--- include/xen/blkif.h | 13 ++++++------- 6 files changed, 42 insertions(+), 40 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 5b8d50e344b4..43fd07091d4d 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -39,8 +39,12 @@ #include #include #include +#include #include -#include +#include +#include +#include +#include #include "common.h" /* @@ -106,7 +110,7 @@ static inline unsigned long vaddr(pending_req_t *req, int seg) static int do_block_io_op(blkif_t *blkif); static void dispatch_rw_block_io(blkif_t *blkif, - blkif_request_t *req, + struct blkif_request *req, pending_req_t *pending_req); static void make_response(blkif_t *blkif, u64 id, unsigned short op, int st); @@ -153,7 +157,7 @@ static void unplug_queue(blkif_t *blkif) static void plug_queue(blkif_t *blkif, struct block_device *bdev) { - request_queue_t *q = bdev_get_queue(bdev); + struct request_queue *q = bdev_get_queue(bdev); if (q == blkif->plug) return; @@ -268,13 +272,10 @@ static void __end_block_io_op(pending_req_t *pending_req, int error) } } -static int end_block_io_op(struct bio *bio, unsigned int done, int error) +static void end_block_io_op(struct bio *bio, int error) { - if (bio->bi_size != 0) - return 1; __end_block_io_op(bio->bi_private, error); bio_put(bio); - return error; } @@ -288,7 +289,7 @@ static void blkif_notify_work(blkif_t *blkif) wake_up(&blkif->wq); } -irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t blkif_be_int(int irq, void *dev_id) { blkif_notify_work(dev_id); return IRQ_HANDLED; @@ -302,8 +303,8 @@ irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) static int do_block_io_op(blkif_t *blkif) { - blkif_back_rings_t *blk_rings = &blkif->blk_rings; - blkif_request_t req; + union blkif_back_rings *blk_rings = &blkif->blk_rings; + struct blkif_request req; pending_req_t *pending_req; RING_IDX rc, rp; int more_to_do = 0; @@ -379,7 +380,7 @@ static int do_block_io_op(blkif_t *blkif) } static void dispatch_rw_block_io(blkif_t *blkif, - blkif_request_t *req, + struct blkif_request *req, pending_req_t *pending_req) { extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); @@ -560,9 +561,9 @@ static void dispatch_rw_block_io(blkif_t *blkif, static void make_response(blkif_t *blkif, u64 id, unsigned short op, int st) { - blkif_response_t resp; + struct blkif_response resp; unsigned long flags; - blkif_back_rings_t *blk_rings = &blkif->blk_rings; + union blkif_back_rings *blk_rings = &blkif->blk_rings; int more_to_do = 0; int notify; @@ -614,7 +615,8 @@ static int __init blkif_init(void) { int i, mmap_pages; - if (!is_running_on_xen()) + printk(KERN_CRIT "***blkif_init\n"); + if (!xen_pv_domain()) return -ENODEV; mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index 422e935528be..1c422b00974e 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -40,8 +40,7 @@ #include #include #include -#include -#include +#include #include #define DPRINTK(_f, _a...) \ @@ -66,7 +65,7 @@ typedef struct blkif_st { unsigned int irq; /* Comms information. */ enum blkif_protocol blk_protocol; - blkif_back_rings_t blk_rings; + union blkif_back_rings blk_rings; struct vm_struct *blk_ring_area; /* The VBD attached to this interface. */ struct vbd vbd; @@ -79,7 +78,7 @@ typedef struct blkif_st { wait_queue_head_t wq; struct task_struct *xenblkd; unsigned int waiting_reqs; - request_queue_t *plug; + struct request_queue *plug; /* statistics */ unsigned long st_print; @@ -130,7 +129,7 @@ void blkif_interface_init(void); void blkif_xenbus_init(void); -irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); +irqreturn_t blkif_be_int(int irq, void *dev_id); int blkif_schedule(void *arg); int blkback_barrier(struct xenbus_transaction xbt, diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c index 81821bdc7ef1..c6c3e14776b9 100644 --- a/drivers/xen/blkback/interface.c +++ b/drivers/xen/blkback/interface.c @@ -31,10 +31,11 @@ */ #include "common.h" -#include +#include +#include #include -static kmem_cache_t *blkif_cachep; +static struct kmem_cache *blkif_cachep; blkif_t *blkif_alloc(domid_t domid) { @@ -107,22 +108,22 @@ int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) switch (blkif->blk_protocol) { case BLKIF_PROTOCOL_NATIVE: { - blkif_sring_t *sring; - sring = (blkif_sring_t *)blkif->blk_ring_area->addr; + struct blkif_sring *sring; + sring = (struct blkif_sring *)blkif->blk_ring_area->addr; BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); break; } case BLKIF_PROTOCOL_X86_32: { - blkif_x86_32_sring_t *sring_x86_32; - sring_x86_32 = (blkif_x86_32_sring_t *)blkif->blk_ring_area->addr; + struct blkif_x86_32_sring *sring_x86_32; + sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr; BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); break; } case BLKIF_PROTOCOL_X86_64: { - blkif_x86_64_sring_t *sring_x86_64; - sring_x86_64 = (blkif_x86_64_sring_t *)blkif->blk_ring_area->addr; + struct blkif_x86_64_sring *sring_x86_64; + sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr; BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); break; } @@ -177,5 +178,5 @@ void blkif_free(blkif_t *blkif) void __init blkif_interface_init(void) { blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), - 0, 0, NULL, NULL); + 0, 0, NULL); } diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 1fb31d0236b4..7e9a1cd35ade 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -33,7 +33,7 @@ #include "common.h" #define vbd_sz(_v) ((_v)->bdev->bd_part ? \ - (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity) + (_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk)) unsigned long long vbd_size(struct vbd *vbd) { @@ -94,7 +94,7 @@ int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major, void vbd_free(struct vbd *vbd) { if (vbd->bdev) - blkdev_put(vbd->bdev); + blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); vbd->bdev = NULL; } diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index 80d9aa6e6ba3..650f4b3e9b3c 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -238,8 +238,8 @@ static int blkback_probe(struct xenbus_device *dev, /* setup back pointer */ be->blkif->be = be; - err = xenbus_watch_path2(dev, dev->nodename, "physical-device", - &be->backend_watch, backend_changed); + err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed, + "%s/%s", dev->nodename, "physical-device"); if (err) goto fail; @@ -537,5 +537,6 @@ static struct xenbus_driver blkback = { void blkif_xenbus_init(void) { - xenbus_register_backend(&blkback); + /* XXX must_check */ + (void)xenbus_register_backend(&blkback); } diff --git a/include/xen/blkif.h b/include/xen/blkif.h index 3d56b75de909..d27428046918 100644 --- a/include/xen/blkif.h +++ b/include/xen/blkif.h @@ -77,12 +77,11 @@ DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, struct blkif_x86_32 DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, struct blkif_x86_64_response); union blkif_back_rings { - blkif_back_ring_t native; - blkif_common_back_ring_t common; - blkif_x86_32_back_ring_t x86_32; - blkif_x86_64_back_ring_t x86_64; + struct blkif_back_ring native; + struct blkif_common_back_ring common; + struct blkif_x86_32_back_ring x86_32; + struct blkif_x86_64_back_ring x86_64; }; -typedef union blkif_back_rings blkif_back_rings_t; enum blkif_protocol { BLKIF_PROTOCOL_NATIVE = 1, @@ -90,7 +89,7 @@ enum blkif_protocol { BLKIF_PROTOCOL_X86_64 = 3, }; -static void inline blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_request_t *src) +static void inline blkif_get_x86_32_req(struct blkif_request *dst, struct blkif_x86_32_request *src) { int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; dst->operation = src->operation; @@ -105,7 +104,7 @@ static void inline blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_reque dst->seg[i] = src->seg[i]; } -static void inline blkif_get_x86_64_req(blkif_request_t *dst, blkif_x86_64_request_t *src) +static void inline blkif_get_x86_64_req(struct blkif_request *dst, struct blkif_x86_64_request *src) { int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; dst->operation = src->operation; From dd3672424caa7b302433635831afbb6787476b96 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 16:39:58 -0800 Subject: [PATCH 03/72] xen/blkback: don't include xen/evtchn.h It's a user-mode header for users of /dev/evtchn Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/common.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index 1c422b00974e..57b78250cfb7 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include From 8270b45bc8a45eef4a224bd256bd0997d4fd857e Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Fri, 6 Mar 2009 08:29:15 +0000 Subject: [PATCH 04/72] blkback: Fix potential resource leak. --- drivers/xen/blkback/blkback.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 43fd07091d4d..8d988f4513aa 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -318,14 +318,14 @@ static int do_block_io_op(blkif_t *blkif) if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) break; - pending_req = alloc_req(); - if (NULL == pending_req) { - blkif->st_oo_req++; + if (kthread_should_stop()) { more_to_do = 1; break; } - if (kthread_should_stop()) { + pending_req = alloc_req(); + if (NULL == pending_req) { + blkif->st_oo_req++; more_to_do = 1; break; } From 690f1b63b2db88330834d8482f3b125990c8e609 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 21 Mar 2009 23:34:19 -0700 Subject: [PATCH 05/72] block: export blk_get/put_queue for blkback Impact: build fix I'm not sure if blkback should be using these functions, but in the meantime export them to allow blkback to be a module. Signed-off-by: Jeremy Fitzhardinge --- block/blk-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index 90f22cc30799..9b60e69a5400 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -351,6 +351,7 @@ void blk_put_queue(struct request_queue *q) { kobject_put(&q->kobj); } +EXPORT_SYMBOL_GPL(blk_put_queue); /* * Note: If a driver supplied the queue lock, it should not zap that lock @@ -572,6 +573,7 @@ int blk_get_queue(struct request_queue *q) return 1; } +EXPORT_SYMBOL_GPL(blk_get_queue); static inline void blk_free_request(struct request_queue *q, struct request *rq) { From 05d43865ddc00bdb33d12c8e9d9f176ed5d3797b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 29 Jun 2009 14:58:45 -0700 Subject: [PATCH 06/72] xen/blkback: deal with hardsect_size to logical_block_size rename Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/blkback.c | 2 +- drivers/xen/blkback/vbd.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 8d988f4513aa..ac5af91c393f 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -484,7 +484,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, for (i = 0; i < nseg; i++) { if (((int)preq.sector_number|(int)seg[i].nsec) & - ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) { + ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { DPRINTK("Misaligned I/O request from domain %d", blkif->domid); goto fail_put_bio; diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 7e9a1cd35ade..410c2eac5ad7 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -47,7 +47,7 @@ unsigned int vbd_info(struct vbd *vbd) unsigned long vbd_secsize(struct vbd *vbd) { - return bdev_hardsect_size(vbd->bdev); + return bdev_logical_block_size(vbd->bdev); } int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major, From 0660c7dbf228a06345392a64ebb43734875a3b91 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 9 Sep 2009 15:15:16 -0700 Subject: [PATCH 07/72] xen/blkback: remove spurious debug output noise Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/blkback.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index ac5af91c393f..31458bd07252 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -615,7 +615,6 @@ static int __init blkif_init(void) { int i, mmap_pages; - printk(KERN_CRIT "***blkif_init\n"); if (!xen_pv_domain()) return -ENODEV; From afd91d07ff72919071e37086c0664384b3875688 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 15 Sep 2009 14:12:37 -0700 Subject: [PATCH 08/72] xen/blkback: little cleanups Remove unused local prototype; group headers. Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/blkback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 31458bd07252..e9e3de119a73 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -40,6 +40,7 @@ #include #include #include + #include #include #include @@ -383,7 +384,6 @@ static void dispatch_rw_block_io(blkif_t *blkif, struct blkif_request *req, pending_req_t *pending_req) { - extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct phys_req preq; struct { From 8770b2683f9f98d4c1d6caf2e28f625592bba4f3 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 8 Oct 2009 13:23:09 -0400 Subject: [PATCH 09/72] Fix compile warnings: ignoring return value of 'xenbus_register_backend' .. We neglect to check the return value of xenbus_register_backend and take actions when that fails. This patch fixes that and adds code to deal with those type of failures. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/blkback.c | 18 +++++++++++++----- drivers/xen/blkback/common.h | 4 ++-- drivers/xen/blkback/interface.c | 6 +++++- drivers/xen/blkback/xenbus.c | 5 ++--- 4 files changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index e9e3de119a73..a2ac7189cc0a 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -614,6 +614,7 @@ static void make_response(blkif_t *blkif, u64 id, static int __init blkif_init(void) { int i, mmap_pages; + int rc = 0; if (!xen_pv_domain()) return -ENODEV; @@ -626,13 +627,17 @@ static int __init blkif_init(void) mmap_pages, GFP_KERNEL); pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); - if (!pending_reqs || !pending_grant_handles || !pending_pages) + if (!pending_reqs || !pending_grant_handles || !pending_pages) { + rc = -ENOMEM; goto out_of_memory; + } for (i = 0; i < mmap_pages; i++) pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; - blkif_interface_init(); + rc = blkif_interface_init(); + if (rc) + goto failed_init; memset(pending_reqs, 0, sizeof(pending_reqs)); INIT_LIST_HEAD(&pending_free); @@ -640,16 +645,19 @@ static int __init blkif_init(void) for (i = 0; i < blkif_reqs; i++) list_add_tail(&pending_reqs[i].free_list, &pending_free); - blkif_xenbus_init(); + rc = blkif_xenbus_init(); + if (rc) + goto failed_init; return 0; out_of_memory: + printk(KERN_ERR "%s: out of memory\n", __func__); + failed_init: kfree(pending_reqs); kfree(pending_grant_handles); free_empty_pages_and_pagevec(pending_pages, mmap_pages); - printk("%s: out of memory\n", __FUNCTION__); - return -ENOMEM; + return rc; } module_init(blkif_init); diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index 57b78250cfb7..aaf36485bc01 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -124,9 +124,9 @@ struct phys_req { int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); -void blkif_interface_init(void); +int blkif_interface_init(void); -void blkif_xenbus_init(void); +int blkif_xenbus_init(void); irqreturn_t blkif_be_int(int irq, void *dev_id); int blkif_schedule(void *arg); diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c index c6c3e14776b9..e397a4134f1b 100644 --- a/drivers/xen/blkback/interface.c +++ b/drivers/xen/blkback/interface.c @@ -175,8 +175,12 @@ void blkif_free(blkif_t *blkif) kmem_cache_free(blkif_cachep, blkif); } -void __init blkif_interface_init(void) +int __init blkif_interface_init(void) { blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 0, 0, NULL); + if (!blkif_cachep) + return -ENOMEM; + + return 0; } diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index 650f4b3e9b3c..04c0a12aff36 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -535,8 +535,7 @@ static struct xenbus_driver blkback = { }; -void blkif_xenbus_init(void) +int blkif_xenbus_init(void) { - /* XXX must_check */ - (void)xenbus_register_backend(&blkback); + return xenbus_register_backend(&blkback); } From e7579a99b598f8e4a2b4df4854fbda2cc961bb02 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 3 Dec 2009 21:56:18 +0000 Subject: [PATCH 10/72] xen: rename blkbk module xen-blkback. blkbk is rather generic for a modular distro style kernel. Signed-off-by: Ian Campbell Cc: Jeremy Fitzhardinge Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/blkback/Makefile b/drivers/xen/blkback/Makefile index 8bab63da3b3e..f1ae1ff07a4d 100644 --- a/drivers/xen/blkback/Makefile +++ b/drivers/xen/blkback/Makefile @@ -1,3 +1,3 @@ -obj-$(CONFIG_XEN_BLKDEV_BACKEND) := blkbk.o +obj-$(CONFIG_XEN_BLKDEV_BACKEND) := xen-blkback.o -blkbk-y := blkback.o xenbus.o interface.o vbd.o +xen-blkback-y := blkback.o xenbus.o interface.o vbd.o From 5cf6e4f6f6d5549904db6ecb3ffd5b8f71f41250 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 11 Feb 2010 16:07:31 -0800 Subject: [PATCH 11/72] xen/blkback: use drv_get/set_drvdata rather than directly accessing driver_data. Direct driver_data access is obsolete and will disappear. Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/xenbus.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index 04c0a12aff36..34f8e4046578 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -105,7 +105,7 @@ static void update_blkif_status(blkif_t *blkif) char *buf) \ { \ struct xenbus_device *dev = to_xenbus_device(_dev); \ - struct backend_info *be = dev->dev.driver_data; \ + struct backend_info *be = dev_get_drvdata(&dev->dev); \ \ return sprintf(buf, format, ##args); \ } \ @@ -169,7 +169,7 @@ void xenvbd_sysfs_delif(struct xenbus_device *dev) static int blkback_remove(struct xenbus_device *dev) { - struct backend_info *be = dev->dev.driver_data; + struct backend_info *be = dev_get_drvdata(&dev->dev); DPRINTK(""); @@ -190,7 +190,7 @@ static int blkback_remove(struct xenbus_device *dev) } kfree(be); - dev->dev.driver_data = NULL; + dev_set_drvdata(&dev->dev, NULL); return 0; } @@ -225,7 +225,7 @@ static int blkback_probe(struct xenbus_device *dev, return -ENOMEM; } be->dev = dev; - dev->dev.driver_data = be; + dev_set_drvdata(&dev->dev, be); be->blkif = blkif_alloc(dev->otherend_id); if (IS_ERR(be->blkif)) { @@ -348,7 +348,7 @@ static void backend_changed(struct xenbus_watch *watch, static void frontend_changed(struct xenbus_device *dev, enum xenbus_state frontend_state) { - struct backend_info *be = dev->dev.driver_data; + struct backend_info *be = dev_get_drvdata(&dev->dev); int err; DPRINTK("%s", xenbus_strstate(frontend_state)); From 2ccbfe26c106a1a93a402567b7853c1484c4a0b0 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 11 Mar 2010 13:39:50 -0800 Subject: [PATCH 12/72] xen/blkback: Propagate changed size of VBDs Support dynamic resizing of virtual block devices. This patch supports both file backed block devices as well as physical devices that can be dynamically resized on the host side. Signed-off-by: K. Y. Srinivasan Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/blkback.c | 3 +++ drivers/xen/blkback/common.h | 2 ++ drivers/xen/blkback/vbd.c | 43 +++++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index a2ac7189cc0a..6d897664802d 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -207,6 +207,7 @@ static void print_stats(blkif_t *blkif) int blkif_schedule(void *arg) { blkif_t *blkif = arg; + struct vbd *vbd = &blkif->vbd; blkif_get(blkif); @@ -216,6 +217,8 @@ int blkif_schedule(void *arg) while (!kthread_should_stop()) { if (try_to_freeze()) continue; + if (unlikely(vbd->size != vbd_size(vbd))) + vbd_resize(blkif); wait_event_interruptible( blkif->wq, diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index aaf36485bc01..cebcc2b7e9f6 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -52,6 +52,7 @@ struct vbd { unsigned char type; /* VDISK_xxx */ u32 pdevice; /* phys device that this vbd maps to */ struct block_device *bdev; + sector_t size; /* Cached size parameter */ }; struct backend_info; @@ -98,6 +99,7 @@ blkif_t *blkif_alloc(domid_t domid); void blkif_disconnect(blkif_t *blkif); void blkif_free(blkif_t *blkif); int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); +void vbd_resize(blkif_t *blkif); #define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) #define blkif_put(_b) \ diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 410c2eac5ad7..0635c54079f8 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -73,6 +73,7 @@ int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major, } vbd->bdev = bdev; + vbd->size = vbd_size(vbd); if (vbd->bdev->bd_disk == NULL) { DPRINTK("vbd_creat: device %08x doesn't exist.\n", @@ -116,3 +117,45 @@ int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation) out: return rc; } + +void vbd_resize(blkif_t *blkif) +{ + struct vbd *vbd = &blkif->vbd; + struct xenbus_transaction xbt; + int err; + struct xenbus_device *dev = blkif->be->dev; + unsigned long long new_size = vbd_size(vbd); + + printk(KERN_INFO "VBD Resize: new size %Lu\n", new_size); + vbd->size = new_size; +again: + err = xenbus_transaction_start(&xbt); + if (err) { + printk(KERN_WARNING "Error starting transaction"); + return; + } + err = xenbus_printf(xbt, dev->nodename, "sectors", "%Lu", + vbd_size(vbd)); + if (err) { + printk(KERN_WARNING "Error writing new size"); + goto abort; + } + /* + * Write the current state; we will use this to synchronize + * the front-end. If the current state is "connected" the + * front-end will get the new size information online. + */ + err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); + if (err) { + printk(KERN_WARNING "Error writing the state"); + goto abort; + } + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + if (err) + printk(KERN_WARNING "Error ending transaction"); +abort: + xenbus_transaction_end(xbt, 1); +} From 98e036a356747cfaa225478b1e4875e190257b09 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 18 Mar 2010 15:35:05 -0700 Subject: [PATCH 13/72] xen/blkback: add accessor for xenbus backend device Since backend_info is hidden away now. Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/common.h | 2 ++ drivers/xen/blkback/vbd.c | 2 +- drivers/xen/blkback/xenbus.c | 5 +++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index cebcc2b7e9f6..0f91830f18c8 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -136,4 +136,6 @@ int blkif_schedule(void *arg); int blkback_barrier(struct xenbus_transaction xbt, struct backend_info *be, int state); +struct xenbus_device *blkback_xenbus(struct backend_info *be); + #endif /* __BLKIF__BACKEND__COMMON_H__ */ diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 0635c54079f8..943ec2313522 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -123,7 +123,7 @@ void vbd_resize(blkif_t *blkif) struct vbd *vbd = &blkif->vbd; struct xenbus_transaction xbt; int err; - struct xenbus_device *dev = blkif->be->dev; + struct xenbus_device *dev = blkback_xenbus(blkif->be); unsigned long long new_size = vbd_size(vbd); printk(KERN_INFO "VBD Resize: new size %Lu\n", new_size); diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index 34f8e4046578..c31e5c40b45c 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -42,6 +42,11 @@ static int connect_ring(struct backend_info *); static void backend_changed(struct xenbus_watch *, const char **, unsigned int); +struct xenbus_device *blkback_xenbus(struct backend_info *be) +{ + return be->dev; +} + static int blkback_name(blkif_t *blkif, char *buf) { char *devpath, *devname; From cbf462908c8080f47c2a3300072877589dd1275f Mon Sep 17 00:00:00 2001 From: Chris Lalancette Date: Wed, 21 Jul 2010 12:41:45 -0700 Subject: [PATCH 14/72] xen/blkback: Flush blkback data when connecting. First cut at flushing blkback data when first connecting blkback. This should avoid the pygrub issues we are experiencing in (RedHat bugzilla) 466681. [ 2.6.18-xen.hg commit 63b4d7f56688 ] Signed-off-by: Chris Lalancette Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/xenbus.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index c31e5c40b45c..a0534fc6a428 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -91,6 +91,13 @@ static void update_blkif_status(blkif_t *blkif) return; } + err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping); + if (err) { + xenbus_dev_error(blkif->be->dev, err, "block flush"); + return; + } + invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); + blkif->xenblkd = kthread_run(blkif_schedule, blkif, name); if (IS_ERR(blkif->xenblkd)) { err = PTR_ERR(blkif->xenblkd); From a81135d90bf176e6139c352c7b96c03d00131836 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Mon, 16 Aug 2010 13:43:06 -0700 Subject: [PATCH 15/72] xen/blkback: Print additional information when a vbd is resized. Signed-off-by: K. Y. Srinivasan Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/vbd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 943ec2313522..dc2572338567 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -126,6 +126,8 @@ void vbd_resize(blkif_t *blkif) struct xenbus_device *dev = blkback_xenbus(blkif->be); unsigned long long new_size = vbd_size(vbd); + printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n", + blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); printk(KERN_INFO "VBD Resize: new size %Lu\n", new_size); vbd->size = new_size; again: From 313d7b003ceceb797e8c0d18ab085ed0638b4aff Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Wed, 24 Nov 2010 22:08:20 -0800 Subject: [PATCH 16/72] blkback: Fix CVE-2010-3699 A guest can cause the backend driver to leak a kernel thread. Such leaked threads hold references to the device, whichmakes the device impossible to tear down. If shut down, the guest remains a zombie domain, the xenwatch process hangs, and most xm commands will stop working. This patch tries to do the following for blkback: - identify/extract idempotent teardown operations, - add/move the invocation of said teardown operation right before we're about to allocate new resources in the Connected states. [ linux-2.6.18-xen.hg 59f097ef181b ] Signed-off-by: Laszlo Ersek Signed-off-by: Keir Fraser Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/xenbus.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index a0534fc6a428..031bc3d7eec3 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -382,6 +382,11 @@ static void frontend_changed(struct xenbus_device *dev, if (dev->state == XenbusStateConnected) break; + /* Enforce precondition before potential leak point. + * blkif_disconnect() is idempotent. + */ + blkif_disconnect(be->blkif); + err = connect_ring(be); if (err) break; @@ -399,6 +404,7 @@ static void frontend_changed(struct xenbus_device *dev, break; /* fall through if not online */ case XenbusStateUnknown: + /* implies blkif_disconnect() via blkback_remove() */ device_unregister(&dev->dev); break; From 248e9f7539f8351cd857d12a74bd52133a3a900f Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 24 Feb 2011 17:22:41 -0500 Subject: [PATCH 17/72] xen/blkback: Replace WRITE_BARRIER with (REQ_FLUSH | REQ_FUA) TODO: Double check xen-blkfront.c --- drivers/xen/blkback/blkback.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 6d897664802d..cb844f734d91 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -405,7 +405,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, operation = WRITE; break; case BLKIF_OP_WRITE_BARRIER: - operation = WRITE_BARRIER; + operation = REQ_FLUSH | REQ_FUA; break; default: operation = 0; /* make gcc happy */ @@ -414,7 +414,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, /* Check that number of segments is sane. */ nseg = req->nr_segments; - if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || + if (unlikely(nseg == 0 && operation != (REQ_FLUSH | REQ_FUA)) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { DPRINTK("Bad number of segments in request (%d)\n", nseg); goto fail_response; @@ -517,7 +517,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, } if (!bio) { - BUG_ON(operation != WRITE_BARRIER); + BUG_ON(operation != (REQ_FLUSH | REQ_FUA)); bio = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) goto fail_put_bio; @@ -532,7 +532,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, if (operation == READ) blkif->st_rd_sect += preq.nr_sects; - else if (operation == WRITE || operation == WRITE_BARRIER) + else if (operation == WRITE || operation == (REQ_FLUSH | REQ_FUA)) blkif->st_wr_sect += preq.nr_sects; return; From bc0c081b0e7a4afc4d2c7bc0666f5cd169e96814 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 25 Feb 2011 10:02:39 -0500 Subject: [PATCH 18/72] xen/blkback: Update to use blkdev_get_by_dev instead of open_by_devnum. The API for opening a block device has changed since 2.6.32. The correct function to open a device is blkdev_get_by_dev. --- drivers/xen/blkback/vbd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index dc2572338567..8c91a2fb0019 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -63,8 +63,8 @@ int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major, vbd->pdevice = MKDEV(major, minor); - bdev = open_by_devnum(vbd->pdevice, - vbd->readonly ? FMODE_READ : FMODE_WRITE); + bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? + FMODE_READ : FMODE_WRITE, NULL); if (IS_ERR(bdev)) { DPRINTK("vbd_creat: device %08x could not be opened.\n", From efe08a3eecf15ab022afba48c691d02c7de2fbbb Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 5 Feb 2010 14:19:33 -0500 Subject: [PATCH 19/72] xen/blkback: simplify address translations Cherry-pick and modified from 69d64727c42eecd47fdf82c15a54474d21a4012a ("blkback/blktap2: simplify address translations"): "There are quite a number of places where e.g. page->va->page translations happen. Besides yielding smaller code (source and binary), a second goal is to make it easier to determine where virtual addresses of pages allocated through alloc_empty_pages_and_pagevec() are really used (in turn in order to determine whether using highmem pages would be possible there)." The second goal is not the purpose of this patch - it is just to make it easier to read the code. linux-2.6-pvops: * Stripped drivers/xen/gntdev/* * Stripped drivers/xen/netback/* [v2: Stripped blktap off] Signed-off-by: Jan Beulich Signed-off-by: Daniel Stodden Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index cb844f734d91..7c9421cc5991 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -99,9 +99,11 @@ static inline int vaddr_pagenr(pending_req_t *req, int seg) return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; } +#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] + static inline unsigned long vaddr(pending_req_t *req, int seg) { - unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]); + unsigned long pfn = page_to_pfn(pending_page(req, seg)); return (unsigned long)pfn_to_kaddr(pfn); } @@ -463,8 +465,8 @@ static void dispatch_rw_block_io(blkif_t *blkif, if (ret) continue; - set_phys_to_machine(__pa(vaddr( - pending_req, i)) >> PAGE_SHIFT, + set_phys_to_machine( + page_to_pfn(pending_page(pending_req, i)), FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); seg[i].buf = map[i].dev_bus_addr | (req->seg[i].first_sect << 9); @@ -495,7 +497,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, while ((bio == NULL) || (bio_add_page(bio, - virt_to_page(vaddr(pending_req, i)), + pending_page(pending_req, i), seg[i].nsec << 9, seg[i].buf & ~PAGE_MASK) == 0)) { if (bio) { From e8e28871edf0d0adb0bd7e597c044cbaf7a7f137 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 25 Feb 2011 10:51:29 -0500 Subject: [PATCH 20/72] xen/blkback: Move global/static variables into struct xen_blkbk. Bundle the lot of discrete variables into a single structure. This is based on what was done in the xen-netback driver: xen: netback: Move global/static variables into struct xen_netbk. (094944631cc5a9d6e623302c987f78117c0bf7ac) Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 82 ++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 34 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 7c9421cc5991..c08875b0ad64 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -84,31 +84,34 @@ typedef struct { struct list_head free_list; } pending_req_t; -static pending_req_t *pending_reqs; -static struct list_head pending_free; -static DEFINE_SPINLOCK(pending_free_lock); -static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq); - #define BLKBACK_INVALID_HANDLE (~0) -static struct page **pending_pages; -static grant_handle_t *pending_grant_handles; +struct xen_blkbk { + pending_req_t *pending_reqs; + struct list_head pending_free; + spinlock_t pending_free_lock; + wait_queue_head_t pending_free_wq; + struct page **pending_pages; + grant_handle_t *pending_grant_handles; +}; + +static struct xen_blkbk *blkbk; static inline int vaddr_pagenr(pending_req_t *req, int seg) { - return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; + return (req - blkbk->pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; } #define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] static inline unsigned long vaddr(pending_req_t *req, int seg) { - unsigned long pfn = page_to_pfn(pending_page(req, seg)); + unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg)); return (unsigned long)pfn_to_kaddr(pfn); } #define pending_handle(_req, _seg) \ - (pending_grant_handles[vaddr_pagenr(_req, _seg)]) + (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)]) static int do_block_io_op(blkif_t *blkif); @@ -126,12 +129,12 @@ static pending_req_t* alloc_req(void) pending_req_t *req = NULL; unsigned long flags; - spin_lock_irqsave(&pending_free_lock, flags); - if (!list_empty(&pending_free)) { - req = list_entry(pending_free.next, pending_req_t, free_list); + spin_lock_irqsave(&blkbk->pending_free_lock, flags); + if (!list_empty(&blkbk->pending_free)) { + req = list_entry(blkbk->pending_free.next, pending_req_t, free_list); list_del(&req->free_list); } - spin_unlock_irqrestore(&pending_free_lock, flags); + spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); return req; } @@ -140,12 +143,12 @@ static void free_req(pending_req_t *req) unsigned long flags; int was_empty; - spin_lock_irqsave(&pending_free_lock, flags); - was_empty = list_empty(&pending_free); - list_add(&req->free_list, &pending_free); - spin_unlock_irqrestore(&pending_free_lock, flags); + spin_lock_irqsave(&blkbk->pending_free_lock, flags); + was_empty = list_empty(&blkbk->pending_free); + list_add(&req->free_list, &blkbk->pending_free); + spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); if (was_empty) - wake_up(&pending_free_wq); + wake_up(&blkbk->pending_free_wq); } static void unplug_queue(blkif_t *blkif) @@ -226,8 +229,8 @@ int blkif_schedule(void *arg) blkif->wq, blkif->waiting_reqs || kthread_should_stop()); wait_event_interruptible( - pending_free_wq, - !list_empty(&pending_free) || kthread_should_stop()); + blkbk->pending_free_wq, + !list_empty(&blkbk->pending_free) || kthread_should_stop()); blkif->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ @@ -466,7 +469,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, continue; set_phys_to_machine( - page_to_pfn(pending_page(pending_req, i)), + page_to_pfn(blkbk->pending_page(pending_req, i)), FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); seg[i].buf = map[i].dev_bus_addr | (req->seg[i].first_sect << 9); @@ -497,7 +500,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, while ((bio == NULL) || (bio_add_page(bio, - pending_page(pending_req, i), + blkbk->pending_page(pending_req, i), seg[i].nsec << 9, seg[i].buf & ~PAGE_MASK) == 0)) { if (bio) { @@ -624,31 +627,40 @@ static int __init blkif_init(void) if (!xen_pv_domain()) return -ENODEV; + blkbk = (struct xen_blkbk *)vmalloc(sizeof(struct xen_blkbk)); + if (!blkbk) { + printk(KERN_ALERT "%s: out of memory!\n", __func__); + return -ENOMEM; + } + mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; - pending_reqs = kmalloc(sizeof(pending_reqs[0]) * + blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * blkif_reqs, GFP_KERNEL); - pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) * + blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) * mmap_pages, GFP_KERNEL); - pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); + blkbk->pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); - if (!pending_reqs || !pending_grant_handles || !pending_pages) { + if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || !blkbk->pending_pages) { rc = -ENOMEM; goto out_of_memory; } for (i = 0; i < mmap_pages; i++) - pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; + blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; rc = blkif_interface_init(); if (rc) goto failed_init; - memset(pending_reqs, 0, sizeof(pending_reqs)); - INIT_LIST_HEAD(&pending_free); + memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs)); + + INIT_LIST_HEAD(&blkbk->pending_free); + spin_lock_init(&blkbk->pending_free_lock); + init_waitqueue_head(&blkbk->pending_free_wq); for (i = 0; i < blkif_reqs; i++) - list_add_tail(&pending_reqs[i].free_list, &pending_free); + list_add_tail(&blkbk->pending_reqs[i].free_list, &blkbk->pending_free); rc = blkif_xenbus_init(); if (rc) @@ -659,9 +671,11 @@ static int __init blkif_init(void) out_of_memory: printk(KERN_ERR "%s: out of memory\n", __func__); failed_init: - kfree(pending_reqs); - kfree(pending_grant_handles); - free_empty_pages_and_pagevec(pending_pages, mmap_pages); + kfree(blkbk->pending_reqs); + kfree(blkbk->pending_grant_handles); + free_empty_pages_and_pagevec(blkbk->pending_pages, mmap_pages); + vfree(blkbk); + blkbk = NULL; return rc; } From c35950bfa9abaaf16548a287a8d5d782a361414f Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 1 Mar 2011 16:22:28 -0500 Subject: [PATCH 21/72] xen/blkback: Union the blkif_request request specific fields Following in the steps of patch: "xen: Union the blkif_request request specific fields" this patch changes the blkback. Per the original patch: "Prepare for extending the block device ring to allow request specific fields, by moving the request specific fields for reads, writes and barrier requests to a union member." Cc: Owen Smith Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 14 +++++++------- include/xen/blkif.h | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index c08875b0ad64..eda50646775d 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -426,7 +426,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, } preq.dev = req->handle; - preq.sector_number = req->sector_number; + preq.sector_number = req->u.rw.sector_number; preq.nr_sects = 0; pending_req->blkif = blkif; @@ -438,11 +438,11 @@ static void dispatch_rw_block_io(blkif_t *blkif, for (i = 0; i < nseg; i++) { uint32_t flags; - seg[i].nsec = req->seg[i].last_sect - - req->seg[i].first_sect + 1; + seg[i].nsec = req->u.rw.seg[i].last_sect - + req->u.rw.seg[i].first_sect + 1; - if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) || - (req->seg[i].last_sect < req->seg[i].first_sect)) + if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || + (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) goto fail_response; preq.nr_sects += seg[i].nsec; @@ -450,7 +450,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, if (operation != READ) flags |= GNTMAP_readonly; gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, - req->seg[i].gref, blkif->domid); + req->u.rw.seg[i].gref, blkif->domid); } ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); @@ -472,7 +472,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, page_to_pfn(blkbk->pending_page(pending_req, i)), FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); seg[i].buf = map[i].dev_bus_addr | - (req->seg[i].first_sect << 9); + (req->u.rw.seg[i].first_sect << 9); } if (ret) diff --git a/include/xen/blkif.h b/include/xen/blkif.h index d27428046918..ab794269fc53 100644 --- a/include/xen/blkif.h +++ b/include/xen/blkif.h @@ -96,12 +96,12 @@ static void inline blkif_get_x86_32_req(struct blkif_request *dst, struct blkif_ dst->nr_segments = src->nr_segments; dst->handle = src->handle; dst->id = src->id; - dst->sector_number = src->sector_number; + dst->u.rw.sector_number = src->sector_number; barrier(); if (n > dst->nr_segments) n = dst->nr_segments; for (i = 0; i < n; i++) - dst->seg[i] = src->seg[i]; + dst->u.rw.seg[i] = src->seg[i]; } static void inline blkif_get_x86_64_req(struct blkif_request *dst, struct blkif_x86_64_request *src) @@ -111,12 +111,12 @@ static void inline blkif_get_x86_64_req(struct blkif_request *dst, struct blkif_ dst->nr_segments = src->nr_segments; dst->handle = src->handle; dst->id = src->id; - dst->sector_number = src->sector_number; + dst->u.rw.sector_number = src->sector_number; barrier(); if (n > dst->nr_segments) n = dst->nr_segments; for (i = 0; i < n; i++) - dst->seg[i] = src->seg[i]; + dst->u.rw.seg[i] = src->seg[i]; } #endif /* __XEN_BLKIF_H__ */ From 464fb419e17083a18b636c9f4714fc49ef6857d2 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 1 Mar 2011 16:26:10 -0500 Subject: [PATCH 22/72] xen/blkback: Use 'vzalloc' for page arrays and pre-allocate pages. Previously we would allocate the array for page using 'kmalloc' which we can as easily do with 'vzalloc'. The pre-allocation of pages was done a bit differently in the past - it used to be that the balloon driver would export "alloc_empty_pages_and_pagevec" which would have in one function created an array, allocated the pages, balloned the pages out (so the memory behind those pages would be non-present), and provide us those pages. This was OK as those pages were shared between other guest and the only thing we needed was to "swizzel" the MFN of those pages to point to the other guest MFN. We can still "swizzel" the MFNs using the M2P (and P2M) override API calls, but for the sake of simplicity we are dropping the balloon API calls. We can return to those later on. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index eda50646775d..d32198d1be04 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -637,18 +637,23 @@ static int __init blkif_init(void) blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * blkif_reqs, GFP_KERNEL); - blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) * - mmap_pages, GFP_KERNEL); - blkbk->pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); + blkbk->pending_grant_handles = vzalloc(sizeof(blkbk->pending_grant_handles[0]) * + mmap_pages); + blkbk->pending_pages = vzalloc(sizeof(blkbk->pending_pages[0]) * mmap_pages); if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || !blkbk->pending_pages) { rc = -ENOMEM; goto out_of_memory; } - for (i = 0; i < mmap_pages; i++) + for (i = 0; i < mmap_pages; i++) { blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; - + blkbk->pending_pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (blkbk->pending_pages[i] == NULL) { + rc = -ENOMEM; + goto out_of_memory; + } + } rc = blkif_interface_init(); if (rc) goto failed_init; @@ -672,8 +677,12 @@ static int __init blkif_init(void) printk(KERN_ERR "%s: out of memory\n", __func__); failed_init: kfree(blkbk->pending_reqs); - kfree(blkbk->pending_grant_handles); - free_empty_pages_and_pagevec(blkbk->pending_pages, mmap_pages); + vfree(blkbk->pending_grant_handles); + for (i = 0; i < mmap_pages; i++) { + if (blkbk->pending_pages[i]) + __free_page(blkbk->pending_pages[i]); + } + vfree(blkbk->pending_pages); vfree(blkbk); blkbk = NULL; return rc; From 5dc03639cc903f887931831d69895facb5260f4b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 1 Mar 2011 16:46:45 -0500 Subject: [PATCH 23/72] xen/blkback: Utilize the M2P override mechanism for GNTMAP_host_map Instead of doing copy grants lets do mapping grants using the M2P(and P2M) override mechanism. Signed-off-by: Konrad Rzeszutek Wilk Conflicts: drivers/xen/blkback/blkback.c --- drivers/xen/blkback/blkback.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index d32198d1be04..15790ae96f33 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -41,7 +41,6 @@ #include #include -#include #include #include #include @@ -192,6 +191,17 @@ static void fast_flush_area(pending_req_t *req) ret = HYPERVISOR_grant_table_op( GNTTABOP_unmap_grant_ref, unmap, invcount); BUG_ON(ret); + /* Note, we use invcount, so nr->pages, so we can't index + * using vaddr(req, i). */ + for (i = 0; i < invcount; i++) { + ret = m2p_remove_override( + virt_to_page(unmap[i].host_addr), false); + if (ret) { + printk(KERN_ALERT "Failed to remove M2P override for " \ + "%lx\n", (unsigned long)unmap[i].host_addr); + continue; + } + } } /****************************************************************** @@ -467,10 +477,15 @@ static void dispatch_rw_block_io(blkif_t *blkif, if (ret) continue; + + ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), + blkbk->pending_page(pending_req, i), false); + if (ret) { + printk(KERN_ALERT "Failed to install M2P override for"\ + " %lx (ret: %d)\n", (unsigned long)map[i].dev_bus_addr, ret); + continue; + } - set_phys_to_machine( - page_to_pfn(blkbk->pending_page(pending_req, i)), - FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); seg[i].buf = map[i].dev_bus_addr | (req->u.rw.seg[i].first_sect << 9); } From a742b02c75e6e76bd0833f9b6e702f1be7d7e008 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 14 Mar 2011 12:41:26 -0400 Subject: [PATCH 24/72] xen/blkback: Use kzalloc's, and GFP_KERNEL for data structures. The patch titled:"xen/blkback: Use 'vzalloc' for page arrays and pre-allocate pages." allocates the structures and its member variables using the 'vzalloc'. Daniel Stodden pointed out that vzalloc is good when we use big number of pages - while these are at the max two pages. We can do this using kzalloc. Also the GFP_HIGHMEM does not work properly with Xen, so take that out. We will have to revisit this when a "get_empty_pages_and_pagevec" type API shows up to leverage that. BugLink: http://mid.gmane.org/1299898639.11681.227.camel@agari.van.xensource.com CC: Daniel Stodden Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 15790ae96f33..a6f8f1338118 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -642,7 +642,7 @@ static int __init blkif_init(void) if (!xen_pv_domain()) return -ENODEV; - blkbk = (struct xen_blkbk *)vmalloc(sizeof(struct xen_blkbk)); + blkbk = (struct xen_blkbk *)kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); if (!blkbk) { printk(KERN_ALERT "%s: out of memory!\n", __func__); return -ENOMEM; @@ -652,9 +652,10 @@ static int __init blkif_init(void) blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * blkif_reqs, GFP_KERNEL); - blkbk->pending_grant_handles = vzalloc(sizeof(blkbk->pending_grant_handles[0]) * - mmap_pages); - blkbk->pending_pages = vzalloc(sizeof(blkbk->pending_pages[0]) * mmap_pages); + blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) * + mmap_pages, GFP_KERNEL); + blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * + mmap_pages, GFP_KERNEL); if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || !blkbk->pending_pages) { rc = -ENOMEM; @@ -663,7 +664,7 @@ static int __init blkif_init(void) for (i = 0; i < mmap_pages; i++) { blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; - blkbk->pending_pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + blkbk->pending_pages[i] = alloc_page(GFP_KERNEL); if (blkbk->pending_pages[i] == NULL) { rc = -ENOMEM; goto out_of_memory; @@ -692,13 +693,13 @@ static int __init blkif_init(void) printk(KERN_ERR "%s: out of memory\n", __func__); failed_init: kfree(blkbk->pending_reqs); - vfree(blkbk->pending_grant_handles); + kfree(blkbk->pending_grant_handles); for (i = 0; i < mmap_pages; i++) { if (blkbk->pending_pages[i]) __free_page(blkbk->pending_pages[i]); } - vfree(blkbk->pending_pages); - vfree(blkbk); + kfree(blkbk->pending_pages); + kfree(blkbk); blkbk = NULL; return rc; } From 314146e515710f8a7d7eaf7a58b7ed590c9c14c3 Mon Sep 17 00:00:00 2001 From: Tom Goetz Date: Thu, 17 Mar 2011 12:14:29 -0400 Subject: [PATCH 25/72] xen/blkback: Fix the WRITE_BARRIER The WRITE_BARRIER was missing the REQ_WRITE option. This was causing the blktap to die. Signed-off-by: Tom Goetz Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index a6f8f1338118..4cd5b49de0c1 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -47,6 +47,8 @@ #include #include "common.h" +#define WRITE_BARRIER (REQ_WRITE | REQ_FLUSH | REQ_FUA) + /* * These are rather arbitrary. They are fairly large because adjacent requests * pulled from a communication ring are quite likely to end up being part of @@ -420,7 +422,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, operation = WRITE; break; case BLKIF_OP_WRITE_BARRIER: - operation = REQ_FLUSH | REQ_FUA; + operation = WRITE_BARRIER; break; default: operation = 0; /* make gcc happy */ @@ -429,7 +431,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, /* Check that number of segments is sane. */ nseg = req->nr_segments; - if (unlikely(nseg == 0 && operation != (REQ_FLUSH | REQ_FUA)) || + if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { DPRINTK("Bad number of segments in request (%d)\n", nseg); goto fail_response; @@ -537,7 +539,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, } if (!bio) { - BUG_ON(operation != (REQ_FLUSH | REQ_FUA)); + BUG_ON(operation != WRITE_BARRIER); bio = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) goto fail_put_bio; @@ -552,7 +554,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, if (operation == READ) blkif->st_rd_sect += preq.nr_sects; - else if (operation == WRITE || operation == (REQ_FLUSH | REQ_FUA)) + else if (operation == WRITE || operation == WRITE_BARRIER) blkif->st_wr_sect += preq.nr_sects; return; From a1397fa3090c25c6c51c04b4101f2786d16b615f Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 17:05:23 -0400 Subject: [PATCH 26/72] xen/blkback: Add some comments. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 88 ++++++++++++++++++++++++++------- drivers/xen/blkback/common.h | 1 + drivers/xen/blkback/interface.c | 2 - drivers/xen/blkback/vbd.c | 2 - drivers/xen/blkback/xenbus.c | 2 +- 5 files changed, 71 insertions(+), 24 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 4cd5b49de0c1..8a4b1e8eeb62 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -1,11 +1,10 @@ /****************************************************************************** - * arch/xen/drivers/blkif/backend/main.c * * Back-end of the driver for virtual block devices. This portion of the * driver exports a 'unified' block-device interface that can be accessed * by any operating system that implements a compatible front end. A * reference front-end implementation can be found in: - * arch/xen/drivers/blkif/frontend + * drivers/block/xen-blkfront.c * * Copyright (c) 2003-2004, Keir Fraser & Steve Hand * Copyright (c) 2005, Christopher Clark @@ -88,16 +87,25 @@ typedef struct { #define BLKBACK_INVALID_HANDLE (~0) struct xen_blkbk { - pending_req_t *pending_reqs; + pending_req_t *pending_reqs; + /* List of all 'pending_req' available */ struct list_head pending_free; + /* And its spinlock. */ spinlock_t pending_free_lock; wait_queue_head_t pending_free_wq; + /* The list of all pages that are available. */ struct page **pending_pages; + /* And the grant handles that are available. */ grant_handle_t *pending_grant_handles; }; static struct xen_blkbk *blkbk; +/* + * Little helpful macro to figure out the index and virtual address of the + * pending_pages[..]. For each 'pending_req' we have have up to + * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through + * 10 and would index in the pending_pages[..]. */ static inline int vaddr_pagenr(pending_req_t *req, int seg) { return (req - blkbk->pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; @@ -122,8 +130,8 @@ static void dispatch_rw_block_io(blkif_t *blkif, static void make_response(blkif_t *blkif, u64 id, unsigned short op, int st); -/****************************************************************** - * misc small helpers +/* + * Retrieve from the 'pending_reqs' a free pending_req structure to be used. */ static pending_req_t* alloc_req(void) { @@ -139,6 +147,10 @@ static pending_req_t* alloc_req(void) return req; } +/* + * Return the 'pending_req' structure back to the freepool. We also + * wake up the thread if it was waiting for a free page. + */ static void free_req(pending_req_t *req) { unsigned long flags; @@ -152,6 +164,11 @@ static void free_req(pending_req_t *req) wake_up(&blkbk->pending_free_wq); } +/* + * Give back a reference count on the underlaying storage. + * It is OK to make multiple calls in this function as it + * resets the plug to NULL when it is done on the first call. + */ static void unplug_queue(blkif_t *blkif) { if (blkif->plug == NULL) @@ -162,6 +179,12 @@ static void unplug_queue(blkif_t *blkif) blkif->plug = NULL; } +/* + * Take a reference count on the underlaying storage. + * It is OK to call this multiple times as we check to make sure + * not to double reference. We also give back a reference count + * if it corresponds to another queue. + */ static void plug_queue(blkif_t *blkif, struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); @@ -173,6 +196,10 @@ static void plug_queue(blkif_t *blkif, struct block_device *bdev) blkif->plug = q; } +/* + * Unmap the grant references, and also remove the M2P over-rides + * used in the 'pending_req'. +*/ static void fast_flush_area(pending_req_t *req) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; @@ -266,8 +293,8 @@ int blkif_schedule(void *arg) return 0; } -/****************************************************************** - * COMPLETION CALLBACK -- Called as bh->b_end_io() +/* + * Completion callback on the bio's. Called as bh->b_end_io() */ static void __end_block_io_op(pending_req_t *pending_req, int error) @@ -284,6 +311,9 @@ static void __end_block_io_op(pending_req_t *pending_req, int error) pending_req->status = BLKIF_RSP_ERROR; } + /* If all of the bio's have completed it is time to unmap + * the grant references associated with 'request' and provide + * the proper response on the ring. */ if (atomic_dec_and_test(&pending_req->pendcnt)) { fast_flush_area(pending_req); make_response(pending_req->blkif, pending_req->id, @@ -293,6 +323,9 @@ static void __end_block_io_op(pending_req_t *pending_req, int error) } } +/* + * bio callback. + */ static void end_block_io_op(struct bio *bio, int error) { __end_block_io_op(bio->bi_private, error); @@ -300,8 +333,8 @@ static void end_block_io_op(struct bio *bio, int error) } -/****************************************************************************** - * NOTIFICATION FROM GUEST OS. +/* + * Notification from the guest OS. */ static void blkif_notify_work(blkif_t *blkif) @@ -318,10 +351,11 @@ irqreturn_t blkif_be_int(int irq, void *dev_id) -/****************************************************************** - * DOWNWARD CALLS -- These interface with the block-device layer proper. +/* + * Function to copy the from the ring buffer the 'struct blkif_request' + * (which has the sectors we want, number of them, grant references, etc), + * and transmute it to the block API to hand it over to the proper block disk. */ - static int do_block_io_op(blkif_t *blkif) { union blkif_back_rings *blk_rings = &blkif->blk_rings; @@ -400,6 +434,10 @@ static int do_block_io_op(blkif_t *blkif) return more_to_do; } +/* + * Transumation of the 'struct blkif_request' to a proper 'struct bio' + * and call the 'submit_bio' to pass it to the underlaying storage. + */ static void dispatch_rw_block_io(blkif_t *blkif, struct blkif_request *req, pending_req_t *pending_req) @@ -429,7 +467,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, BUG(); } - /* Check that number of segments is sane. */ + /* Check that the number of segments is sane. */ nseg = req->nr_segments; if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { @@ -447,12 +485,14 @@ static void dispatch_rw_block_io(blkif_t *blkif, pending_req->status = BLKIF_RSP_OKAY; pending_req->nr_pages = nseg; + /* Fill out preq.nr_sects with proper amount of sectors, and setup + * assign map[..] with the PFN of the page in our domain with the + * corresponding grant reference for each page.*/ for (i = 0; i < nseg; i++) { uint32_t flags; seg[i].nsec = req->u.rw.seg[i].last_sect - req->u.rw.seg[i].first_sect + 1; - if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) goto fail_response; @@ -468,6 +508,9 @@ static void dispatch_rw_block_io(blkif_t *blkif, ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); BUG_ON(ret); + /* Now swizzel the MFN in our domain with the MFN from the other domain + * so that when we access vaddr(pending_req,i) it has the contents of the + * page from the other domain. */ for (i = 0; i < nseg; i++) { if (unlikely(map[i].status != 0)) { DPRINTK("invalid buffer -- could not remap it\n"); @@ -485,6 +528,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, if (ret) { printk(KERN_ALERT "Failed to install M2P override for"\ " %lx (ret: %d)\n", (unsigned long)map[i].dev_bus_addr, ret); + /* We could switch over to GNTTABOP_copy */ continue; } @@ -492,6 +536,9 @@ static void dispatch_rw_block_io(blkif_t *blkif, (req->u.rw.seg[i].first_sect << 9); } + /* If we have failed at this point, we need to undo the M2P override, set + * gnttab_set_unmap_op on all of the grant references and perform the + * hypercall to unmap the grants - that is all done in fast_flush_area. */ if (ret) goto fail_flush; @@ -503,7 +550,11 @@ static void dispatch_rw_block_io(blkif_t *blkif, goto fail_flush; } + /* Get a reference count for the disk queue and start sending I/O */ plug_queue(blkif, preq.bdev); + + /* We set it one so that the last submit_bio does not have to call + * atomic_inc. */ atomic_set(&pending_req->pendcnt, 1); blkif_get(blkif); @@ -524,7 +575,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, atomic_inc(&pending_req->pendcnt); submit_bio(operation, bio); } - + bio = bio_alloc(GFP_KERNEL, nseg-i); if (unlikely(bio == NULL)) goto fail_put_bio; @@ -538,6 +589,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, preq.sector_number += seg[i].nsec; } + /* This will be hit if the operation was a barrier. */ if (!bio) { BUG_ON(operation != WRITE_BARRIER); bio = bio_alloc(GFP_KERNEL, 0); @@ -578,11 +630,9 @@ static void dispatch_rw_block_io(blkif_t *blkif, -/****************************************************************** - * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING +/* + * Put a response on the ring on how the operation fared. */ - - static void make_response(blkif_t *blkif, u64 id, unsigned short op, int st) { diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index 0f91830f18c8..4c140c8e75bd 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -76,6 +76,7 @@ typedef struct blkif_st { atomic_t refcnt; wait_queue_head_t wq; + /* One thread per one blkif. */ struct task_struct *xenblkd; unsigned int waiting_reqs; struct request_queue *plug; diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c index e397a4134f1b..a4a15350737f 100644 --- a/drivers/xen/blkback/interface.c +++ b/drivers/xen/blkback/interface.c @@ -1,6 +1,4 @@ /****************************************************************************** - * arch/xen/drivers/blkif/backend/interface.c - * * Block-device interface management. * * Copyright (c) 2004, Keir Fraser diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 8c91a2fb0019..95156c95ab2f 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -1,6 +1,4 @@ /****************************************************************************** - * blkback/vbd.c - * * Routines for managing virtual block devices (VBDs). * * Copyright (c) 2003-2005, Keir Fraser & Steve Hand diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index 031bc3d7eec3..e9c4f80ef1c8 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -107,7 +107,7 @@ static void update_blkif_status(blkif_t *blkif) } -/**************************************************************** +/* * sysfs interface for VBD I/O requests */ From 5489377ce40d52fb722dcd811617114cebad7bba Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 17:21:50 -0400 Subject: [PATCH 27/72] xen/blkback: blkif->struct blkif_st checkpatch.pl suggested that we don't use the typdef in common.h and this triggered this avalanche of patches. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 24 ++++++++++++------------ drivers/xen/blkback/common.h | 23 ++++++++++++----------- drivers/xen/blkback/interface.c | 16 ++++++++-------- drivers/xen/blkback/vbd.c | 6 +++--- drivers/xen/blkback/xenbus.c | 6 +++--- 5 files changed, 38 insertions(+), 37 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 8a4b1e8eeb62..d07ad5318a85 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -75,7 +75,7 @@ module_param(debug_lvl, int, 0644); * response queued for it, with the saved 'id' passed back. */ typedef struct { - blkif_t *blkif; + struct blkif_st *blkif; u64 id; int nr_pages; atomic_t pendcnt; @@ -123,11 +123,11 @@ static inline unsigned long vaddr(pending_req_t *req, int seg) (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)]) -static int do_block_io_op(blkif_t *blkif); -static void dispatch_rw_block_io(blkif_t *blkif, +static int do_block_io_op(struct blkif_st *blkif); +static void dispatch_rw_block_io(struct blkif_st *blkif, struct blkif_request *req, pending_req_t *pending_req); -static void make_response(blkif_t *blkif, u64 id, +static void make_response(struct blkif_st *blkif, u64 id, unsigned short op, int st); /* @@ -169,7 +169,7 @@ static void free_req(pending_req_t *req) * It is OK to make multiple calls in this function as it * resets the plug to NULL when it is done on the first call. */ -static void unplug_queue(blkif_t *blkif) +static void unplug_queue(struct blkif_st *blkif) { if (blkif->plug == NULL) return; @@ -185,7 +185,7 @@ static void unplug_queue(blkif_t *blkif) * not to double reference. We also give back a reference count * if it corresponds to another queue. */ -static void plug_queue(blkif_t *blkif, struct block_device *bdev) +static void plug_queue(struct blkif_st *blkif, struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); @@ -237,7 +237,7 @@ static void fast_flush_area(pending_req_t *req) * SCHEDULER FUNCTIONS */ -static void print_stats(blkif_t *blkif) +static void print_stats(struct blkif_st *blkif) { printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n", current->comm, blkif->st_oo_req, @@ -250,7 +250,7 @@ static void print_stats(blkif_t *blkif) int blkif_schedule(void *arg) { - blkif_t *blkif = arg; + struct blkif_st *blkif = arg; struct vbd *vbd = &blkif->vbd; blkif_get(blkif); @@ -337,7 +337,7 @@ static void end_block_io_op(struct bio *bio, int error) * Notification from the guest OS. */ -static void blkif_notify_work(blkif_t *blkif) +static void blkif_notify_work(struct blkif_st *blkif) { blkif->waiting_reqs = 1; wake_up(&blkif->wq); @@ -356,7 +356,7 @@ irqreturn_t blkif_be_int(int irq, void *dev_id) * (which has the sectors we want, number of them, grant references, etc), * and transmute it to the block API to hand it over to the proper block disk. */ -static int do_block_io_op(blkif_t *blkif) +static int do_block_io_op(struct blkif_st *blkif) { union blkif_back_rings *blk_rings = &blkif->blk_rings; struct blkif_request req; @@ -438,7 +438,7 @@ static int do_block_io_op(blkif_t *blkif) * Transumation of the 'struct blkif_request' to a proper 'struct bio' * and call the 'submit_bio' to pass it to the underlaying storage. */ -static void dispatch_rw_block_io(blkif_t *blkif, +static void dispatch_rw_block_io(struct blkif_st *blkif, struct blkif_request *req, pending_req_t *pending_req) { @@ -633,7 +633,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, /* * Put a response on the ring on how the operation fared. */ -static void make_response(blkif_t *blkif, u64 id, +static void make_response(struct blkif_st *blkif, u64 id, unsigned short op, int st) { struct blkif_response resp; diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index 4c140c8e75bd..be3fc93d8a31 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include @@ -44,7 +44,7 @@ #define DPRINTK(_f, _a...) \ pr_debug("(file=%s, line=%d) " _f, \ - __FILE__ , __LINE__ , ## _a ) + __FILE__ , __LINE__ , ## _a) struct vbd { blkif_vdev_t handle; /* what the domain refers to this vbd as */ @@ -57,7 +57,7 @@ struct vbd { struct backend_info; -typedef struct blkif_st { +struct blkif_st { /* Unique identifier for this interface. */ domid_t domid; unsigned int handle; @@ -94,13 +94,14 @@ typedef struct blkif_st { grant_handle_t shmem_handle; grant_ref_t shmem_ref; -} blkif_t; +}; -blkif_t *blkif_alloc(domid_t domid); -void blkif_disconnect(blkif_t *blkif); -void blkif_free(blkif_t *blkif); -int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); -void vbd_resize(blkif_t *blkif); +struct blkif_st *blkif_alloc(domid_t domid); +void blkif_disconnect(struct blkif_st *blkif); +void blkif_free(struct blkif_st *blkif); +int blkif_map(struct blkif_st *blkif, unsigned long shared_page, + unsigned int evtchn); +void vbd_resize(struct blkif_st *blkif); #define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) #define blkif_put(_b) \ @@ -110,7 +111,7 @@ void vbd_resize(blkif_t *blkif); } while (0) /* Create a vbd. */ -int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major, +int vbd_create(struct blkif_st *blkif, blkif_vdev_t vdevice, unsigned major, unsigned minor, int readonly, int cdrom); void vbd_free(struct vbd *vbd); @@ -125,7 +126,7 @@ struct phys_req { blkif_sector_t sector_number; }; -int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); +int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation); int blkif_interface_init(void); diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c index a4a15350737f..7d59f13115cf 100644 --- a/drivers/xen/blkback/interface.c +++ b/drivers/xen/blkback/interface.c @@ -35,9 +35,9 @@ static struct kmem_cache *blkif_cachep; -blkif_t *blkif_alloc(domid_t domid) +struct blkif_st *blkif_alloc(domid_t domid) { - blkif_t *blkif; + struct blkif_st *blkif; blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); if (!blkif) @@ -54,7 +54,7 @@ blkif_t *blkif_alloc(domid_t domid) return blkif; } -static int map_frontend_page(blkif_t *blkif, unsigned long shared_page) +static int map_frontend_page(struct blkif_st *blkif, unsigned long shared_page) { struct gnttab_map_grant_ref op; @@ -75,7 +75,7 @@ static int map_frontend_page(blkif_t *blkif, unsigned long shared_page) return 0; } -static void unmap_frontend_page(blkif_t *blkif) +static void unmap_frontend_page(struct blkif_st *blkif) { struct gnttab_unmap_grant_ref op; @@ -86,7 +86,7 @@ static void unmap_frontend_page(blkif_t *blkif) BUG(); } -int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) +int blkif_map(struct blkif_st *blkif, unsigned long shared_page, unsigned int evtchn) { int err; @@ -143,7 +143,7 @@ int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) return 0; } -void blkif_disconnect(blkif_t *blkif) +void blkif_disconnect(struct blkif_st *blkif) { if (blkif->xenblkd) { kthread_stop(blkif->xenblkd); @@ -166,7 +166,7 @@ void blkif_disconnect(blkif_t *blkif) } } -void blkif_free(blkif_t *blkif) +void blkif_free(struct blkif_st *blkif) { if (!atomic_dec_and_test(&blkif->refcnt)) BUG(); @@ -175,7 +175,7 @@ void blkif_free(blkif_t *blkif) int __init blkif_interface_init(void) { - blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), + blkif_cachep = kmem_cache_create("blkif_cache", sizeof(struct blkif_st), 0, 0, NULL); if (!blkif_cachep) return -ENOMEM; diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 95156c95ab2f..26a37df8173a 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -48,7 +48,7 @@ unsigned long vbd_secsize(struct vbd *vbd) return bdev_logical_block_size(vbd->bdev); } -int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major, +int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, unsigned major, unsigned minor, int readonly, int cdrom) { struct vbd *vbd; @@ -97,7 +97,7 @@ void vbd_free(struct vbd *vbd) vbd->bdev = NULL; } -int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation) +int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation) { struct vbd *vbd = &blkif->vbd; int rc = -EACCES; @@ -116,7 +116,7 @@ int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation) return rc; } -void vbd_resize(blkif_t *blkif) +void vbd_resize(struct blkif_st *blkif) { struct vbd *vbd = &blkif->vbd; struct xenbus_transaction xbt; diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index e9c4f80ef1c8..67462c4e9ab4 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -30,7 +30,7 @@ struct backend_info { struct xenbus_device *dev; - blkif_t *blkif; + struct blkif_st *blkif; struct xenbus_watch backend_watch; unsigned major; unsigned minor; @@ -47,7 +47,7 @@ struct xenbus_device *blkback_xenbus(struct backend_info *be) return be->dev; } -static int blkback_name(blkif_t *blkif, char *buf) +static int blkback_name(struct blkif_st *blkif, char *buf) { char *devpath, *devname; struct xenbus_device *dev = blkif->be->dev; @@ -67,7 +67,7 @@ static int blkback_name(blkif_t *blkif, char *buf) return 0; } -static void update_blkif_status(blkif_t *blkif) +static void update_blkif_status(struct blkif_st *blkif) { int err; char name[TASK_COMM_LEN]; From 3c64b58cd614c976dcb19e16fa59ab620b3fe130 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 17:24:45 -0400 Subject: [PATCH 28/72] xen/blkback: Fix checkpatch warnings in vbd.c Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/vbd.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 26a37df8173a..d0ff4cf91a34 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -30,8 +30,9 @@ #include "common.h" -#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ - (_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk)) +#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ + (_v)->bdev->bd_part->nr_sects : \ + get_capacity((_v)->bdev->bd_disk)) unsigned long long vbd_size(struct vbd *vbd) { @@ -40,7 +41,7 @@ unsigned long long vbd_size(struct vbd *vbd) unsigned int vbd_info(struct vbd *vbd) { - return vbd->type | (vbd->readonly?VDISK_READONLY:0); + return vbd->type | (vbd->readonly ? VDISK_READONLY : 0); } unsigned long vbd_secsize(struct vbd *vbd) @@ -126,7 +127,7 @@ void vbd_resize(struct blkif_st *blkif) printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n", blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); - printk(KERN_INFO "VBD Resize: new size %Lu\n", new_size); + printk(KERN_INFO "VBD Resize: new size %llu\n", new_size); vbd->size = new_size; again: err = xenbus_transaction_start(&xbt); @@ -134,7 +135,7 @@ again: printk(KERN_WARNING "Error starting transaction"); return; } - err = xenbus_printf(xbt, dev->nodename, "sectors", "%Lu", + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", vbd_size(vbd)); if (err) { printk(KERN_WARNING "Error writing new size"); From e5f4b3c498623fc3d83f6d92e00a2b2dbf500cd0 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 17:27:29 -0400 Subject: [PATCH 29/72] xen/blkback: Fix interface.c checkpatch warnings .. except + sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr; WARNING: line over 80 characters + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); as breaking them up really does not help that much. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/interface.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c index 7d59f13115cf..163aed41e825 100644 --- a/drivers/xen/blkback/interface.c +++ b/drivers/xen/blkback/interface.c @@ -86,7 +86,8 @@ static void unmap_frontend_page(struct blkif_st *blkif) BUG(); } -int blkif_map(struct blkif_st *blkif, unsigned long shared_page, unsigned int evtchn) +int blkif_map(struct blkif_st *blkif, unsigned long shared_page, + unsigned int evtchn) { int err; @@ -94,7 +95,8 @@ int blkif_map(struct blkif_st *blkif, unsigned long shared_page, unsigned int ev if (blkif->irq) return 0; - if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL ) + blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE); + if (!blkif->blk_ring_area) return -ENOMEM; err = map_frontend_page(blkif, shared_page); @@ -131,8 +133,7 @@ int blkif_map(struct blkif_st *blkif, unsigned long shared_page, unsigned int ev err = bind_interdomain_evtchn_to_irqhandler( blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif); - if (err < 0) - { + if (err < 0) { unmap_frontend_page(blkif); free_vm_area(blkif->blk_ring_area); blkif->blk_rings.common.sring = NULL; From d6091b217dd4fdabc4a8cd6fa61775f1e3eb6efe Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 17:33:30 -0400 Subject: [PATCH 30/72] xen/blkback: Fix checkpatch warnings of xenbus.c Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/xenbus.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index 67462c4e9ab4..b41ed65db2d3 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -25,10 +25,9 @@ #undef DPRINTK #define DPRINTK(fmt, args...) \ pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \ - __FUNCTION__, __LINE__, ##args) + __func__, __LINE__, ##args) -struct backend_info -{ +struct backend_info { struct xenbus_device *dev; struct blkif_st *blkif; struct xenbus_watch backend_watch; @@ -56,7 +55,8 @@ static int blkback_name(struct blkif_st *blkif, char *buf) if (IS_ERR(devpath)) return PTR_ERR(devpath); - if ((devname = strstr(devpath, "/dev/")) != NULL) + devname = strstr(devpath, "/dev/"); + if (devname != NULL) devname += strlen("/dev/"); else devname = devpath; @@ -153,7 +153,7 @@ int xenvbd_sysfs_addif(struct xenbus_device *dev) int error; error = device_create_file(&dev->dev, &dev_attr_physical_device); - if (error) + if (error) goto fail1; error = device_create_file(&dev->dev, &dev_attr_mode); @@ -327,7 +327,10 @@ static void backend_changed(struct xenbus_watch *watch, /* Front end dir is a number, which is used as the handle. */ char *p = strrchr(dev->otherend, '/') + 1; - long handle = simple_strtoul(p, NULL, 0); + long handle; + err = strict_strtoul(p, 0, &handle); + if (err) + return; be->major = major; be->minor = minor; @@ -369,7 +372,7 @@ static void frontend_changed(struct xenbus_device *dev, case XenbusStateInitialising: if (dev->state == XenbusStateClosed) { printk(KERN_INFO "%s: %s: prepare for reconnect\n", - __FUNCTION__, dev->nodename); + __func__, dev->nodename); xenbus_switch_state(dev, XenbusStateInitWait); } break; @@ -494,8 +497,8 @@ static int connect_ring(struct backend_info *be) DPRINTK("%s", dev->otherend); - err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", &ring_ref, - "event-channel", "%u", &evtchn, NULL); + err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", + &ring_ref, "event-channel", "%u", &evtchn, NULL); if (err) { xenbus_dev_fatal(dev, err, "reading %s/ring-ref and event-channel", From 2e9977c21f7679d5f616132ae1f7857e932ccd19 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 17:42:07 -0400 Subject: [PATCH 31/72] xen/blkback: Fix checkpatch warnings in blkback.c Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 81 ++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 34 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index d07ad5318a85..2d413930f235 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -63,8 +63,8 @@ module_param_named(reqs, blkif_reqs, int, 0); MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); /* Run-time switchable: /sys/module/blkback/parameters/ */ -static unsigned int log_stats = 0; -static unsigned int debug_lvl = 0; +static unsigned int log_stats; +static unsigned int debug_lvl; module_param(log_stats, int, 0644); module_param(debug_lvl, int, 0644); @@ -74,7 +74,7 @@ module_param(debug_lvl, int, 0644); * the pendcnt towards zero. When it hits zero, the specified domain has a * response queued for it, with the saved 'id' passed back. */ -typedef struct { +struct pending_req { struct blkif_st *blkif; u64 id; int nr_pages; @@ -82,12 +82,12 @@ typedef struct { unsigned short operation; int status; struct list_head free_list; -} pending_req_t; +}; #define BLKBACK_INVALID_HANDLE (~0) struct xen_blkbk { - pending_req_t *pending_reqs; + struct pending_req *pending_reqs; /* List of all 'pending_req' available */ struct list_head pending_free; /* And its spinlock. */ @@ -106,14 +106,15 @@ static struct xen_blkbk *blkbk; * pending_pages[..]. For each 'pending_req' we have have up to * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through * 10 and would index in the pending_pages[..]. */ -static inline int vaddr_pagenr(pending_req_t *req, int seg) +static inline int vaddr_pagenr(struct pending_req *req, int seg) { - return (req - blkbk->pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; + return (req - blkbk->pending_reqs) * + BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; } #define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] -static inline unsigned long vaddr(pending_req_t *req, int seg) +static inline unsigned long vaddr(struct pending_req *req, int seg) { unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg)); return (unsigned long)pfn_to_kaddr(pfn); @@ -126,21 +127,22 @@ static inline unsigned long vaddr(pending_req_t *req, int seg) static int do_block_io_op(struct blkif_st *blkif); static void dispatch_rw_block_io(struct blkif_st *blkif, struct blkif_request *req, - pending_req_t *pending_req); + struct pending_req *pending_req); static void make_response(struct blkif_st *blkif, u64 id, unsigned short op, int st); /* * Retrieve from the 'pending_reqs' a free pending_req structure to be used. */ -static pending_req_t* alloc_req(void) +static struct pending_req *alloc_req(void) { - pending_req_t *req = NULL; + struct pending_req *req = NULL; unsigned long flags; spin_lock_irqsave(&blkbk->pending_free_lock, flags); if (!list_empty(&blkbk->pending_free)) { - req = list_entry(blkbk->pending_free.next, pending_req_t, free_list); + req = list_entry(blkbk->pending_free.next, struct pending_req, + free_list); list_del(&req->free_list); } spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); @@ -151,7 +153,7 @@ static pending_req_t* alloc_req(void) * Return the 'pending_req' structure back to the freepool. We also * wake up the thread if it was waiting for a free page. */ -static void free_req(pending_req_t *req) +static void free_req(struct pending_req *req) { unsigned long flags; int was_empty; @@ -200,7 +202,7 @@ static void plug_queue(struct blkif_st *blkif, struct block_device *bdev) * Unmap the grant references, and also remove the M2P over-rides * used in the 'pending_req'. */ -static void fast_flush_area(pending_req_t *req) +static void fast_flush_area(struct pending_req *req) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int i, invcount = 0; @@ -221,7 +223,8 @@ static void fast_flush_area(pending_req_t *req) GNTTABOP_unmap_grant_ref, unmap, invcount); BUG_ON(ret); /* Note, we use invcount, so nr->pages, so we can't index - * using vaddr(req, i). */ + * using vaddr(req, i). + */ for (i = 0; i < invcount; i++) { ret = m2p_remove_override( virt_to_page(unmap[i].host_addr), false); @@ -233,7 +236,7 @@ static void fast_flush_area(pending_req_t *req) } } -/****************************************************************** +/* * SCHEDULER FUNCTIONS */ @@ -269,7 +272,8 @@ int blkif_schedule(void *arg) blkif->waiting_reqs || kthread_should_stop()); wait_event_interruptible( blkbk->pending_free_wq, - !list_empty(&blkbk->pending_free) || kthread_should_stop()); + !list_empty(&blkbk->pending_free) || + kthread_should_stop()); blkif->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ @@ -297,7 +301,7 @@ int blkif_schedule(void *arg) * Completion callback on the bio's. Called as bh->b_end_io() */ -static void __end_block_io_op(pending_req_t *pending_req, int error) +static void __end_block_io_op(struct pending_req *pending_req, int error) { /* An error fails the entire request. */ if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && @@ -313,7 +317,8 @@ static void __end_block_io_op(pending_req_t *pending_req, int error) /* If all of the bio's have completed it is time to unmap * the grant references associated with 'request' and provide - * the proper response on the ring. */ + * the proper response on the ring. + */ if (atomic_dec_and_test(&pending_req->pendcnt)) { fast_flush_area(pending_req); make_response(pending_req->blkif, pending_req->id, @@ -360,7 +365,7 @@ static int do_block_io_op(struct blkif_st *blkif) { union blkif_back_rings *blk_rings = &blkif->blk_rings; struct blkif_request req; - pending_req_t *pending_req; + struct pending_req *pending_req; RING_IDX rc, rp; int more_to_do = 0; @@ -440,7 +445,7 @@ static int do_block_io_op(struct blkif_st *blkif) */ static void dispatch_rw_block_io(struct blkif_st *blkif, struct blkif_request *req, - pending_req_t *pending_req) + struct pending_req *pending_req) { struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct phys_req preq; @@ -487,7 +492,8 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, /* Fill out preq.nr_sects with proper amount of sectors, and setup * assign map[..] with the PFN of the page in our domain with the - * corresponding grant reference for each page.*/ + * corresponding grant reference for each page. + */ for (i = 0; i < nseg; i++) { uint32_t flags; @@ -509,8 +515,9 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, BUG_ON(ret); /* Now swizzel the MFN in our domain with the MFN from the other domain - * so that when we access vaddr(pending_req,i) it has the contents of the - * page from the other domain. */ + * so that when we access vaddr(pending_req,i) it has the contents of + * the page from the other domain. + */ for (i = 0; i < nseg; i++) { if (unlikely(map[i].status != 0)) { DPRINTK("invalid buffer -- could not remap it\n"); @@ -522,12 +529,13 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, if (ret) continue; - + ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), blkbk->pending_page(pending_req, i), false); if (ret) { printk(KERN_ALERT "Failed to install M2P override for"\ - " %lx (ret: %d)\n", (unsigned long)map[i].dev_bus_addr, ret); + " %lx (ret: %d)\n", (unsigned long) + map[i].dev_bus_addr, ret); /* We could switch over to GNTTABOP_copy */ continue; } @@ -536,9 +544,11 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, (req->u.rw.seg[i].first_sect << 9); } - /* If we have failed at this point, we need to undo the M2P override, set - * gnttab_set_unmap_op on all of the grant references and perform the - * hypercall to unmap the grants - that is all done in fast_flush_area. */ + /* If we have failed at this point, we need to undo the M2P override, + * set gnttab_set_unmap_op on all of the grant references and perform + * the hypercall to unmap the grants - that is all done in + * fast_flush_area. + */ if (ret) goto fail_flush; @@ -554,7 +564,8 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, plug_queue(blkif, preq.bdev); /* We set it one so that the last submit_bio does not have to call - * atomic_inc. */ + * atomic_inc. + */ atomic_set(&pending_req->pendcnt, 1); blkif_get(blkif); @@ -575,7 +586,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, atomic_inc(&pending_req->pendcnt); submit_bio(operation, bio); } - + bio = bio_alloc(GFP_KERNEL, nseg-i); if (unlikely(bio == NULL)) goto fail_put_bio; @@ -694,7 +705,7 @@ static int __init blkif_init(void) if (!xen_pv_domain()) return -ENODEV; - blkbk = (struct xen_blkbk *)kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); + blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); if (!blkbk) { printk(KERN_ALERT "%s: out of memory!\n", __func__); return -ENOMEM; @@ -709,7 +720,8 @@ static int __init blkif_init(void) blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * mmap_pages, GFP_KERNEL); - if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || !blkbk->pending_pages) { + if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || + !blkbk->pending_pages) { rc = -ENOMEM; goto out_of_memory; } @@ -733,7 +745,8 @@ static int __init blkif_init(void) init_waitqueue_head(&blkbk->pending_free_wq); for (i = 0; i < blkif_reqs; i++) - list_add_tail(&blkbk->pending_reqs[i].free_list, &blkbk->pending_free); + list_add_tail(&blkbk->pending_reqs[i].free_list, + &blkbk->pending_free); rc = blkif_xenbus_init(); if (rc) From 0faa8cca883bbc6a0919e3c89128672659b75820 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 17:58:19 -0400 Subject: [PATCH 32/72] xen/blkback: remove per-queue plugging commit 7eaceaccab5f40bbfda044629a6298616aeaed50 ("block: remove per-queue plugging") added two new interfaces to plug and unplug: blk_start_plug and blk_finish_plug. Lets use those. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 44 +++++++---------------------------- drivers/xen/blkback/common.h | 1 - 2 files changed, 9 insertions(+), 36 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 2d413930f235..464f2e0b5a61 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -166,38 +166,6 @@ static void free_req(struct pending_req *req) wake_up(&blkbk->pending_free_wq); } -/* - * Give back a reference count on the underlaying storage. - * It is OK to make multiple calls in this function as it - * resets the plug to NULL when it is done on the first call. - */ -static void unplug_queue(struct blkif_st *blkif) -{ - if (blkif->plug == NULL) - return; - if (blkif->plug->unplug_fn) - blkif->plug->unplug_fn(blkif->plug); - blk_put_queue(blkif->plug); - blkif->plug = NULL; -} - -/* - * Take a reference count on the underlaying storage. - * It is OK to call this multiple times as we check to make sure - * not to double reference. We also give back a reference count - * if it corresponds to another queue. - */ -static void plug_queue(struct blkif_st *blkif, struct block_device *bdev) -{ - struct request_queue *q = bdev_get_queue(bdev); - - if (q == blkif->plug) - return; - unplug_queue(blkif); - blk_get_queue(q); - blkif->plug = q; -} - /* * Unmap the grant references, and also remove the M2P over-rides * used in the 'pending_req'. @@ -280,7 +248,6 @@ int blkif_schedule(void *arg) if (do_block_io_op(blkif)) blkif->waiting_reqs = 1; - unplug_queue(blkif); if (log_stats && time_after(jiffies, blkif->st_print)) print_stats(blkif); @@ -456,6 +423,8 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, struct bio *bio = NULL; int ret, i; int operation; + struct blk_plug plug; + struct request_queue *q; switch (req->operation) { case BLKIF_OP_READ: @@ -561,7 +530,8 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, } /* Get a reference count for the disk queue and start sending I/O */ - plug_queue(blkif, preq.bdev); + blk_get_queue(q); + blk_start_plug(&plug); /* We set it one so that the last submit_bio does not have to call * atomic_inc. @@ -620,11 +590,14 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, else if (operation == WRITE || operation == WRITE_BARRIER) blkif->st_wr_sect += preq.nr_sects; + blk_finish_plug(&plug); + blk_put_queue(q); return; fail_flush: fast_flush_area(pending_req); fail_response: + /* Haven't submitted any bio's yet. */ make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); free_req(pending_req); msleep(1); /* back off a bit */ @@ -634,7 +607,8 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, __end_block_io_op(pending_req, -EINVAL); if (bio) bio_put(bio); - unplug_queue(blkif); + blk_finish_plug(&plug); + blk_put_queue(q); msleep(1); /* back off a bit */ return; } diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index be3fc93d8a31..6257c1106591 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -79,7 +79,6 @@ struct blkif_st { /* One thread per one blkif. */ struct task_struct *xenblkd; unsigned int waiting_reqs; - struct request_queue *plug; /* statistics */ unsigned long st_print; From 7708992616487c00d5ca8ed7612111180d8e1b68 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 15 Apr 2011 10:51:27 -0400 Subject: [PATCH 33/72] xen/blkback: Seperate the bio allocation and the bio submission. We seperate the bio allocation (bio_alloc) from the bio submission so that the error paths are much easier, and also so that the bio submission can be done in one tight loop. It also makes the plug/unplug calls much much easier. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 45 ++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 464f2e0b5a61..3c10499d61a7 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -421,7 +421,8 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int nseg; struct bio *bio = NULL; - int ret, i; + struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int ret, i, nbio = 0; int operation; struct blk_plug plug; struct request_queue *q; @@ -529,14 +530,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, goto fail_flush; } - /* Get a reference count for the disk queue and start sending I/O */ - blk_get_queue(q); - blk_start_plug(&plug); - - /* We set it one so that the last submit_bio does not have to call - * atomic_inc. - */ - atomic_set(&pending_req->pendcnt, 1); + /* This corresponding blkif_put is done in __end_block_io_op */ blkif_get(blkif); for (i = 0; i < nseg; i++) { @@ -552,12 +546,8 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, blkbk->pending_page(pending_req, i), seg[i].nsec << 9, seg[i].buf & ~PAGE_MASK) == 0)) { - if (bio) { - atomic_inc(&pending_req->pendcnt); - submit_bio(operation, bio); - } - bio = bio_alloc(GFP_KERNEL, nseg-i); + bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i); if (unlikely(bio == NULL)) goto fail_put_bio; @@ -573,7 +563,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, /* This will be hit if the operation was a barrier. */ if (!bio) { BUG_ON(operation != WRITE_BARRIER); - bio = bio_alloc(GFP_KERNEL, 0); + bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) goto fail_put_bio; @@ -583,15 +573,28 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, bio->bi_sector = -1; } - submit_bio(operation, bio); + + /* We set it one so that the last submit_bio does not have to call + * atomic_inc. + */ + atomic_set(&pending_req->pendcnt, nbio); + + /* Get a reference count for the disk queue and start sending I/O */ + blk_get_queue(q); + blk_start_plug(&plug); + + for (i = 0; i < nbio; i++) + submit_bio(operation, biolist[i]); + + blk_finish_plug(&plug); + /* Let the I/Os go.. */ + blk_put_queue(q); if (operation == READ) blkif->st_rd_sect += preq.nr_sects; else if (operation == WRITE || operation == WRITE_BARRIER) blkif->st_wr_sect += preq.nr_sects; - blk_finish_plug(&plug); - blk_put_queue(q); return; fail_flush: @@ -604,11 +607,9 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, return; fail_put_bio: + for (i = 0; i < (nbio-1); i++) + bio_put(biolist[i]); __end_block_io_op(pending_req, -EINVAL); - if (bio) - bio_put(bio); - blk_finish_plug(&plug); - blk_put_queue(q); msleep(1); /* back off a bit */ return; } From b0aef17924a06646403cae8eecf6c73219a63c19 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 15 Apr 2011 10:58:05 -0400 Subject: [PATCH 34/72] xen/blkback: Cleanup move the code a bit around. Moving it so that the code that 'fast_flush_area' code is close to the code that deals with it so that the reader won't lose focus. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 97 +++++++++++++++++------------------ 1 file changed, 47 insertions(+), 50 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 3c10499d61a7..f282463d7b5c 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -167,41 +167,18 @@ static void free_req(struct pending_req *req) } /* - * Unmap the grant references, and also remove the M2P over-rides - * used in the 'pending_req'. -*/ -static void fast_flush_area(struct pending_req *req) + * Notification from the guest OS. + */ +static void blkif_notify_work(struct blkif_st *blkif) { - struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - unsigned int i, invcount = 0; - grant_handle_t handle; - int ret; + blkif->waiting_reqs = 1; + wake_up(&blkif->wq); +} - for (i = 0; i < req->nr_pages; i++) { - handle = pending_handle(req, i); - if (handle == BLKBACK_INVALID_HANDLE) - continue; - gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), - GNTMAP_host_map, handle); - pending_handle(req, i) = BLKBACK_INVALID_HANDLE; - invcount++; - } - - ret = HYPERVISOR_grant_table_op( - GNTTABOP_unmap_grant_ref, unmap, invcount); - BUG_ON(ret); - /* Note, we use invcount, so nr->pages, so we can't index - * using vaddr(req, i). - */ - for (i = 0; i < invcount; i++) { - ret = m2p_remove_override( - virt_to_page(unmap[i].host_addr), false); - if (ret) { - printk(KERN_ALERT "Failed to remove M2P override for " \ - "%lx\n", (unsigned long)unmap[i].host_addr); - continue; - } - } +irqreturn_t blkif_be_int(int irq, void *dev_id) +{ + blkif_notify_work(dev_id); + return IRQ_HANDLED; } /* @@ -264,6 +241,43 @@ int blkif_schedule(void *arg) return 0; } +/* + * Unmap the grant references, and also remove the M2P over-rides + * used in the 'pending_req'. +*/ +static void fast_flush_area(struct pending_req *req) +{ + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned int i, invcount = 0; + grant_handle_t handle; + int ret; + + for (i = 0; i < req->nr_pages; i++) { + handle = pending_handle(req, i); + if (handle == BLKBACK_INVALID_HANDLE) + continue; + gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), + GNTMAP_host_map, handle); + pending_handle(req, i) = BLKBACK_INVALID_HANDLE; + invcount++; + } + + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, unmap, invcount); + BUG_ON(ret); + /* Note, we use invcount, so nr->pages, so we can't index + * using vaddr(req, i). + */ + for (i = 0; i < invcount; i++) { + ret = m2p_remove_override( + virt_to_page(unmap[i].host_addr), false); + if (ret) { + printk(KERN_ALERT "Failed to remove M2P override for " \ + "%lx\n", (unsigned long)unmap[i].host_addr); + continue; + } + } +} /* * Completion callback on the bio's. Called as bh->b_end_io() */ @@ -305,23 +319,6 @@ static void end_block_io_op(struct bio *bio, int error) } -/* - * Notification from the guest OS. - */ - -static void blkif_notify_work(struct blkif_st *blkif) -{ - blkif->waiting_reqs = 1; - wake_up(&blkif->wq); -} - -irqreturn_t blkif_be_int(int irq, void *dev_id) -{ - blkif_notify_work(dev_id); - return IRQ_HANDLED; -} - - /* * Function to copy the from the ring buffer the 'struct blkif_request' From 1a95fe6e42cefc52c62c471ad87d7fe8643231df Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 15 Apr 2011 11:35:13 -0400 Subject: [PATCH 35/72] xen/blkback: Shuffle code around (vbd_translate moved higher). We take out the chunk of code dealing with mapping to the guest of pages into the xen_blk_map_buf code. And we also move the vbd_translate to be done much earlier. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 131 ++++++++++++++++++---------------- 1 file changed, 71 insertions(+), 60 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index f282463d7b5c..211b2005f963 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -241,6 +241,10 @@ int blkif_schedule(void *arg) return 0; } +struct seg_buf { + unsigned long buf; + unsigned int nsec; +}; /* * Unmap the grant references, and also remove the M2P over-rides * used in the 'pending_req'. @@ -278,6 +282,62 @@ static void fast_flush_area(struct pending_req *req) } } } +static int xen_blk_map_buf(struct blkif_request *req, struct pending_req *pending_req, + struct seg_buf seg[]) +{ + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int i; + int nseg = req->nr_segments; + int ret = 0; + /* Fill out preq.nr_sects with proper amount of sectors, and setup + * assign map[..] with the PFN of the page in our domain with the + * corresponding grant reference for each page. + */ + for (i = 0; i < nseg; i++) { + uint32_t flags; + + flags = GNTMAP_host_map; + if (pending_req->operation != BLKIF_OP_READ) + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, + req->u.rw.seg[i].gref, pending_req->blkif->domid); + } + + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); + BUG_ON(ret); + + /* Now swizzel the MFN in our domain with the MFN from the other domain + * so that when we access vaddr(pending_req,i) it has the contents of + * the page from the other domain. + */ + for (i = 0; i < nseg; i++) { + if (unlikely(map[i].status != 0)) { + DPRINTK("invalid buffer -- could not remap it\n"); + map[i].handle = BLKBACK_INVALID_HANDLE; + ret |= 1; + } + + pending_handle(pending_req, i) = map[i].handle; + + if (ret) + continue; + + ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), + blkbk->pending_page(pending_req, i), false); + if (ret) { + printk(KERN_ALERT "Failed to install M2P override for"\ + " %lx (ret: %d)\n", (unsigned long) + map[i].dev_bus_addr, ret); + /* We could switch over to GNTTABOP_copy */ + continue; + } + + seg[i].buf = map[i].dev_bus_addr | + (req->u.rw.seg[i].first_sect << 9); + } + return ret; +} + /* * Completion callback on the bio's. Called as bh->b_end_io() */ @@ -411,15 +471,12 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, struct blkif_request *req, struct pending_req *pending_req) { - struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct phys_req preq; - struct { - unsigned long buf; unsigned int nsec; - } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int nseg; struct bio *bio = NULL; struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - int ret, i, nbio = 0; + int i, nbio = 0; int operation; struct blk_plug plug; struct request_queue *q; @@ -444,6 +501,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { DPRINTK("Bad number of segments in request (%d)\n", nseg); + /* Haven't submitted any bio's yet. */ goto fail_response; } @@ -456,76 +514,29 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, pending_req->operation = req->operation; pending_req->status = BLKIF_RSP_OKAY; pending_req->nr_pages = nseg; - - /* Fill out preq.nr_sects with proper amount of sectors, and setup - * assign map[..] with the PFN of the page in our domain with the - * corresponding grant reference for each page. - */ for (i = 0; i < nseg; i++) { - uint32_t flags; - seg[i].nsec = req->u.rw.seg[i].last_sect - req->u.rw.seg[i].first_sect + 1; if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) goto fail_response; preq.nr_sects += seg[i].nsec; - - flags = GNTMAP_host_map; - if (operation != READ) - flags |= GNTMAP_readonly; - gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, - req->u.rw.seg[i].gref, blkif->domid); } - ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); - BUG_ON(ret); - - /* Now swizzel the MFN in our domain with the MFN from the other domain - * so that when we access vaddr(pending_req,i) it has the contents of - * the page from the other domain. - */ - for (i = 0; i < nseg; i++) { - if (unlikely(map[i].status != 0)) { - DPRINTK("invalid buffer -- could not remap it\n"); - map[i].handle = BLKBACK_INVALID_HANDLE; - ret |= 1; - } - - pending_handle(pending_req, i) = map[i].handle; - - if (ret) - continue; - - ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), - blkbk->pending_page(pending_req, i), false); - if (ret) { - printk(KERN_ALERT "Failed to install M2P override for"\ - " %lx (ret: %d)\n", (unsigned long) - map[i].dev_bus_addr, ret); - /* We could switch over to GNTTABOP_copy */ - continue; - } - - seg[i].buf = map[i].dev_bus_addr | - (req->u.rw.seg[i].first_sect << 9); - } - - /* If we have failed at this point, we need to undo the M2P override, - * set gnttab_set_unmap_op on all of the grant references and perform - * the hypercall to unmap the grants - that is all done in - * fast_flush_area. - */ - if (ret) - goto fail_flush; - if (vbd_translate(&preq, blkif, operation) != 0) { DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", operation == READ ? "read" : "write", preq.sector_number, preq.sector_number + preq.nr_sects, preq.dev); - goto fail_flush; + goto fail_response; } + /* If we have failed at this point, we need to undo the M2P override, + * set gnttab_set_unmap_op on all of the grant references and perform + * the hypercall to unmap the grants - that is all done in + * fast_flush_area. + */ + if (xen_blk_map_buf(req, pending_req, seg)) + goto fail_flush; /* This corresponding blkif_put is done in __end_block_io_op */ blkif_get(blkif); From 976222e05ea5a9959ccf880d7a24efbf79b3c6cf Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 15 Apr 2011 11:38:29 -0400 Subject: [PATCH 36/72] xen/blkback: Move the check for misaligned I/O higher. We move it up higher to be in same loop that actually computes the sector number. This way, all of the code that deals with verifying that the request is correct is all done before we do any of the page mapping, I/O submission, etc. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 211b2005f963..9598e0fd0f9e 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -521,6 +521,13 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) goto fail_response; preq.nr_sects += seg[i].nsec; + + if (((int)preq.sector_number|(int)seg[i].nsec) & + ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { + DPRINTK("Misaligned I/O request from domain %d", + blkif->domid); + goto fail_response; + } } if (vbd_translate(&preq, blkif, operation) != 0) { @@ -542,13 +549,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, blkif_get(blkif); for (i = 0; i < nseg; i++) { - if (((int)preq.sector_number|(int)seg[i].nsec) & - ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { - DPRINTK("Misaligned I/O request from domain %d", - blkif->domid); - goto fail_put_bio; - } - while ((bio == NULL) || (bio_add_page(bio, blkbk->pending_page(pending_req, i), From 9f3aedf573dd034d59e7eb6c4ee97648d5be8fc6 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 15 Apr 2011 11:50:34 -0400 Subject: [PATCH 37/72] xen/blkback: Change fast_flush_area to xen_blkbk_unmap, and tweak xen_blk_map_seg. The previous name ('fast_flush_area') had nothing to do with what it does right now. Changing the names so that the code dealing with mapping pages in and out of the guest is called xen_blkbk_[map|unmap]. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 9598e0fd0f9e..c645c83f900b 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -249,7 +249,7 @@ struct seg_buf { * Unmap the grant references, and also remove the M2P over-rides * used in the 'pending_req'. */ -static void fast_flush_area(struct pending_req *req) +static void xen_blkbk_unmap(struct pending_req *req) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int i, invcount = 0; @@ -282,8 +282,8 @@ static void fast_flush_area(struct pending_req *req) } } } -static int xen_blk_map_buf(struct blkif_request *req, struct pending_req *pending_req, - struct seg_buf seg[]) +static int xen_blkbk_map(struct blkif_request *req, struct pending_req *pending_req, + struct seg_buf seg[]) { struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int i; @@ -361,7 +361,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) * the proper response on the ring. */ if (atomic_dec_and_test(&pending_req->pendcnt)) { - fast_flush_area(pending_req); + xen_blkbk_unmap(pending_req); make_response(pending_req->blkif, pending_req->id, pending_req->operation, pending_req->status); blkif_put(pending_req->blkif); @@ -540,9 +540,9 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, /* If we have failed at this point, we need to undo the M2P override, * set gnttab_set_unmap_op on all of the grant references and perform * the hypercall to unmap the grants - that is all done in - * fast_flush_area. + * xen_blkbk_unmap. */ - if (xen_blk_map_buf(req, pending_req, seg)) + if (xen_blkbk_map(req, pending_req, seg)) goto fail_flush; /* This corresponding blkif_put is done in __end_block_io_op */ @@ -606,7 +606,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, return; fail_flush: - fast_flush_area(pending_req); + xen_blkbk_unmap(pending_req); fail_response: /* Haven't submitted any bio's yet. */ make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); From e93504933ee6982bdc005fa5c24e1ea330faaf8b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 18 Apr 2011 11:34:55 -0400 Subject: [PATCH 38/72] xen/blkback: Move the check for misaligned I/O once more. The commit 976222e05ea5a9959ccf880d7a24efbf79b3c6cf xen/blkback: Move the check for misaligned I/O higher. moved it a bit to high. The preq->vbdev was not set, so the check for misaligned I/O would cause a NULL pointer derefence. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index c645c83f900b..a0d3227955c9 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -514,6 +514,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, pending_req->operation = req->operation; pending_req->status = BLKIF_RSP_OKAY; pending_req->nr_pages = nseg; + for (i = 0; i < nseg; i++) { seg[i].nsec = req->u.rw.seg[i].last_sect - req->u.rw.seg[i].first_sect + 1; @@ -522,12 +523,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, goto fail_response; preq.nr_sects += seg[i].nsec; - if (((int)preq.sector_number|(int)seg[i].nsec) & - ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { - DPRINTK("Misaligned I/O request from domain %d", - blkif->domid); - goto fail_response; - } } if (vbd_translate(&preq, blkif, operation) != 0) { @@ -537,6 +532,16 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, preq.sector_number + preq.nr_sects, preq.dev); goto fail_response; } + /* This check _MUST_ be done after vbd_translate as the preq.bdev + * is set there. */ + for (i = 0; i < nseg; i++) { + if (((int)preq.sector_number|(int)seg[i].nsec) & + ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { + DPRINTK("Misaligned I/O request from domain %d", + blkif->domid); + goto fail_response; + } + } /* If we have failed at this point, we need to undo the M2P override, * set gnttab_set_unmap_op on all of the grant references and perform * the hypercall to unmap the grants - that is all done in From 6fd17b5643bf05c29fc226a5aee96328056fca10 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 18 Apr 2011 12:04:17 -0400 Subject: [PATCH 39/72] xen/blkback: Get the 'requeust_queue' properly. After the commit 0faa8cca883bbc6a0919e3c89128672659b75820 (" xen/blkback: remove per-queue plugging") we forgot to retrieve the 'struct request_queue' from the block device. This puts the functionality back in and fixes a NULL pointer bug. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index a0d3227955c9..3751325bfc32 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -542,6 +542,9 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, goto fail_response; } } + q = bdev_get_queue(preq.bdev); + if (!q) + goto fail_response; /* If we have failed at this point, we need to undo the M2P override, * set gnttab_set_unmap_op on all of the grant references and perform * the hypercall to unmap the grants - that is all done in From d2436eda2e81f1993bfe6349f17f52503bffeff5 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 18 Apr 2011 14:17:49 -0400 Subject: [PATCH 40/72] block, xen/blkback: remove blk_[get|put]_queue calls. They were used to check if the queue does not have QUEUE_FLAG_DEAD set. That is not necessary anymore as the 'submit_io' call ends up doing that for us. Signed-off-by: Konrad Rzeszutek Wilk --- block/blk-core.c | 2 -- drivers/xen/blkback/blkback.c | 6 ------ 2 files changed, 8 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 9b60e69a5400..90f22cc30799 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -351,7 +351,6 @@ void blk_put_queue(struct request_queue *q) { kobject_put(&q->kobj); } -EXPORT_SYMBOL_GPL(blk_put_queue); /* * Note: If a driver supplied the queue lock, it should not zap that lock @@ -573,7 +572,6 @@ int blk_get_queue(struct request_queue *q) return 1; } -EXPORT_SYMBOL_GPL(blk_get_queue); static inline void blk_free_request(struct request_queue *q, struct request *rq) { diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 3751325bfc32..59a2bae0f35e 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -479,7 +479,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, int i, nbio = 0; int operation; struct blk_plug plug; - struct request_queue *q; switch (req->operation) { case BLKIF_OP_READ: @@ -542,9 +541,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, goto fail_response; } } - q = bdev_get_queue(preq.bdev); - if (!q) - goto fail_response; /* If we have failed at this point, we need to undo the M2P override, * set gnttab_set_unmap_op on all of the grant references and perform * the hypercall to unmap the grants - that is all done in @@ -596,7 +592,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, atomic_set(&pending_req->pendcnt, nbio); /* Get a reference count for the disk queue and start sending I/O */ - blk_get_queue(q); blk_start_plug(&plug); for (i = 0; i < nbio; i++) @@ -604,7 +599,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, blk_finish_plug(&plug); /* Let the I/Os go.. */ - blk_put_queue(q); if (operation == READ) blkif->st_rd_sect += preq.nr_sects; From dfc07b13dcacefda6ebdea14584ed8724dc980ef Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 18 Apr 2011 14:24:23 -0400 Subject: [PATCH 41/72] xen/blkback: Move it from drivers/xen to drivers/block .. and modify the Makefile and Kconfig files appropriately. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/Kconfig | 8 ++++++++ drivers/block/Makefile | 1 + drivers/{xen/blkback => block/xen-blkback}/Makefile | 0 drivers/{xen/blkback => block/xen-blkback}/blkback.c | 0 drivers/{xen/blkback => block/xen-blkback}/common.h | 0 drivers/{xen/blkback => block/xen-blkback}/interface.c | 0 drivers/{xen/blkback => block/xen-blkback}/vbd.c | 0 drivers/{xen/blkback => block/xen-blkback}/xenbus.c | 0 drivers/xen/Kconfig | 8 -------- drivers/xen/Makefile | 1 - 10 files changed, 9 insertions(+), 9 deletions(-) rename drivers/{xen/blkback => block/xen-blkback}/Makefile (100%) rename drivers/{xen/blkback => block/xen-blkback}/blkback.c (100%) rename drivers/{xen/blkback => block/xen-blkback}/common.h (100%) rename drivers/{xen/blkback => block/xen-blkback}/interface.c (100%) rename drivers/{xen/blkback => block/xen-blkback}/vbd.c (100%) rename drivers/{xen/blkback => block/xen-blkback}/xenbus.c (100%) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 83c32cb72582..9abb64689712 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -470,6 +470,14 @@ config XEN_BLKDEV_FRONTEND block device driver. It communicates with a back-end driver in another domain which drives the actual block device. +config XEN_BLKDEV_BACKEND + tristate "Block-device backend driver" + depends on XEN_BACKEND + help + The block-device backend driver allows the kernel to export its + block devices to other guests via a high-performance shared-memory + interface. + config VIRTIO_BLK tristate "Virtio block driver (EXPERIMENTAL)" depends on EXPERIMENTAL && VIRTIO diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 40528ba56d1b..76646e9a1c91 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_BLK_DEV_UB) += ub.o obj-$(CONFIG_BLK_DEV_HD) += hd.o obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o +obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/ obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ obj-$(CONFIG_BLK_DEV_RBD) += rbd.o diff --git a/drivers/xen/blkback/Makefile b/drivers/block/xen-blkback/Makefile similarity index 100% rename from drivers/xen/blkback/Makefile rename to drivers/block/xen-blkback/Makefile diff --git a/drivers/xen/blkback/blkback.c b/drivers/block/xen-blkback/blkback.c similarity index 100% rename from drivers/xen/blkback/blkback.c rename to drivers/block/xen-blkback/blkback.c diff --git a/drivers/xen/blkback/common.h b/drivers/block/xen-blkback/common.h similarity index 100% rename from drivers/xen/blkback/common.h rename to drivers/block/xen-blkback/common.h diff --git a/drivers/xen/blkback/interface.c b/drivers/block/xen-blkback/interface.c similarity index 100% rename from drivers/xen/blkback/interface.c rename to drivers/block/xen-blkback/interface.c diff --git a/drivers/xen/blkback/vbd.c b/drivers/block/xen-blkback/vbd.c similarity index 100% rename from drivers/xen/blkback/vbd.c rename to drivers/block/xen-blkback/vbd.c diff --git a/drivers/xen/blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c similarity index 100% rename from drivers/xen/blkback/xenbus.c rename to drivers/block/xen-blkback/xenbus.c diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index fb1af628cbfc..a59638b37c1a 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -37,14 +37,6 @@ config XEN_BACKEND Support for backend device drivers that provide I/O services to other virtual machines. -config XEN_BLKDEV_BACKEND - tristate "Block-device backend driver" - depends on XEN_BACKEND && BLOCK - help - The block-device backend driver allows the kernel to export its - block devices to other guests via a high-performance shared-memory - interface. - config XENFS tristate "Xen filesystem" default y diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 29c0a416f082..f420f1ff7f13 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -11,7 +11,6 @@ obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o -obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o From ee9ff8537eacb4383bf9146df6c21b9301c9baa2 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 20 Apr 2011 10:57:29 -0400 Subject: [PATCH 42/72] xen/blkback: Squash vbd.c,interface.c in blkback.c and xenbus.c respectivly. Daniel Stodden suggested to eliminate vbd.c and interface.c, inlining the critical bits where they belong, respectively. Leaving only blkback.c for the data- and xenbus.c for the control path. Suggested-by: Daniel Stodden Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/Makefile | 2 +- drivers/block/xen-blkback/blkback.c | 135 +++++++++++++++++++ drivers/block/xen-blkback/interface.c | 185 -------------------------- drivers/block/xen-blkback/vbd.c | 162 ---------------------- drivers/block/xen-blkback/xenbus.c | 151 +++++++++++++++++++++ 5 files changed, 287 insertions(+), 348 deletions(-) delete mode 100644 drivers/block/xen-blkback/interface.c delete mode 100644 drivers/block/xen-blkback/vbd.c diff --git a/drivers/block/xen-blkback/Makefile b/drivers/block/xen-blkback/Makefile index f1ae1ff07a4d..e491c1b76878 100644 --- a/drivers/block/xen-blkback/Makefile +++ b/drivers/block/xen-blkback/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_XEN_BLKDEV_BACKEND) := xen-blkback.o -xen-blkback-y := blkback.o xenbus.o interface.o vbd.o +xen-blkback-y := blkback.o xenbus.o diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 59a2bae0f35e..63001fac9af2 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -166,6 +166,141 @@ static void free_req(struct pending_req *req) wake_up(&blkbk->pending_free_wq); } +/* + * Routines for managing virtual block devices (vbds). + */ + +#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ + (_v)->bdev->bd_part->nr_sects : \ + get_capacity((_v)->bdev->bd_disk)) + +unsigned long long vbd_size(struct vbd *vbd) +{ + return vbd_sz(vbd); +} + +unsigned int vbd_info(struct vbd *vbd) +{ + return vbd->type | (vbd->readonly ? VDISK_READONLY : 0); +} + +unsigned long vbd_secsize(struct vbd *vbd) +{ + return bdev_logical_block_size(vbd->bdev); +} + +int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, unsigned major, + unsigned minor, int readonly, int cdrom) +{ + struct vbd *vbd; + struct block_device *bdev; + + vbd = &blkif->vbd; + vbd->handle = handle; + vbd->readonly = readonly; + vbd->type = 0; + + vbd->pdevice = MKDEV(major, minor); + + bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? + FMODE_READ : FMODE_WRITE, NULL); + + if (IS_ERR(bdev)) { + DPRINTK("vbd_creat: device %08x could not be opened.\n", + vbd->pdevice); + return -ENOENT; + } + + vbd->bdev = bdev; + vbd->size = vbd_size(vbd); + + if (vbd->bdev->bd_disk == NULL) { + DPRINTK("vbd_creat: device %08x doesn't exist.\n", + vbd->pdevice); + vbd_free(vbd); + return -ENOENT; + } + + if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) + vbd->type |= VDISK_CDROM; + if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) + vbd->type |= VDISK_REMOVABLE; + + DPRINTK("Successful creation of handle=%04x (dom=%u)\n", + handle, blkif->domid); + return 0; +} + +void vbd_free(struct vbd *vbd) +{ + if (vbd->bdev) + blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); + vbd->bdev = NULL; +} + +int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation) +{ + struct vbd *vbd = &blkif->vbd; + int rc = -EACCES; + + if ((operation != READ) && vbd->readonly) + goto out; + + if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd))) + goto out; + + req->dev = vbd->pdevice; + req->bdev = vbd->bdev; + rc = 0; + + out: + return rc; +} + +void vbd_resize(struct blkif_st *blkif) +{ + struct vbd *vbd = &blkif->vbd; + struct xenbus_transaction xbt; + int err; + struct xenbus_device *dev = blkback_xenbus(blkif->be); + unsigned long long new_size = vbd_size(vbd); + + printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n", + blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); + printk(KERN_INFO "VBD Resize: new size %llu\n", new_size); + vbd->size = new_size; +again: + err = xenbus_transaction_start(&xbt); + if (err) { + printk(KERN_WARNING "Error starting transaction"); + return; + } + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", + vbd_size(vbd)); + if (err) { + printk(KERN_WARNING "Error writing new size"); + goto abort; + } + /* + * Write the current state; we will use this to synchronize + * the front-end. If the current state is "connected" the + * front-end will get the new size information online. + */ + err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); + if (err) { + printk(KERN_WARNING "Error writing the state"); + goto abort; + } + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + if (err) + printk(KERN_WARNING "Error ending transaction"); +abort: + xenbus_transaction_end(xbt, 1); +} + /* * Notification from the guest OS. */ diff --git a/drivers/block/xen-blkback/interface.c b/drivers/block/xen-blkback/interface.c deleted file mode 100644 index 163aed41e825..000000000000 --- a/drivers/block/xen-blkback/interface.c +++ /dev/null @@ -1,185 +0,0 @@ -/****************************************************************************** - * Block-device interface management. - * - * Copyright (c) 2004, Keir Fraser - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "common.h" -#include -#include -#include - -static struct kmem_cache *blkif_cachep; - -struct blkif_st *blkif_alloc(domid_t domid) -{ - struct blkif_st *blkif; - - blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); - if (!blkif) - return ERR_PTR(-ENOMEM); - - memset(blkif, 0, sizeof(*blkif)); - blkif->domid = domid; - spin_lock_init(&blkif->blk_ring_lock); - atomic_set(&blkif->refcnt, 1); - init_waitqueue_head(&blkif->wq); - blkif->st_print = jiffies; - init_waitqueue_head(&blkif->waiting_to_free); - - return blkif; -} - -static int map_frontend_page(struct blkif_st *blkif, unsigned long shared_page) -{ - struct gnttab_map_grant_ref op; - - gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, - GNTMAP_host_map, shared_page, blkif->domid); - - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) - BUG(); - - if (op.status) { - DPRINTK(" Grant table operation failure !\n"); - return op.status; - } - - blkif->shmem_ref = shared_page; - blkif->shmem_handle = op.handle; - - return 0; -} - -static void unmap_frontend_page(struct blkif_st *blkif) -{ - struct gnttab_unmap_grant_ref op; - - gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, - GNTMAP_host_map, blkif->shmem_handle); - - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) - BUG(); -} - -int blkif_map(struct blkif_st *blkif, unsigned long shared_page, - unsigned int evtchn) -{ - int err; - - /* Already connected through? */ - if (blkif->irq) - return 0; - - blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE); - if (!blkif->blk_ring_area) - return -ENOMEM; - - err = map_frontend_page(blkif, shared_page); - if (err) { - free_vm_area(blkif->blk_ring_area); - return err; - } - - switch (blkif->blk_protocol) { - case BLKIF_PROTOCOL_NATIVE: - { - struct blkif_sring *sring; - sring = (struct blkif_sring *)blkif->blk_ring_area->addr; - BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); - break; - } - case BLKIF_PROTOCOL_X86_32: - { - struct blkif_x86_32_sring *sring_x86_32; - sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr; - BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); - break; - } - case BLKIF_PROTOCOL_X86_64: - { - struct blkif_x86_64_sring *sring_x86_64; - sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr; - BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); - break; - } - default: - BUG(); - } - - err = bind_interdomain_evtchn_to_irqhandler( - blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif); - if (err < 0) { - unmap_frontend_page(blkif); - free_vm_area(blkif->blk_ring_area); - blkif->blk_rings.common.sring = NULL; - return err; - } - blkif->irq = err; - - return 0; -} - -void blkif_disconnect(struct blkif_st *blkif) -{ - if (blkif->xenblkd) { - kthread_stop(blkif->xenblkd); - blkif->xenblkd = NULL; - } - - atomic_dec(&blkif->refcnt); - wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); - atomic_inc(&blkif->refcnt); - - if (blkif->irq) { - unbind_from_irqhandler(blkif->irq, blkif); - blkif->irq = 0; - } - - if (blkif->blk_rings.common.sring) { - unmap_frontend_page(blkif); - free_vm_area(blkif->blk_ring_area); - blkif->blk_rings.common.sring = NULL; - } -} - -void blkif_free(struct blkif_st *blkif) -{ - if (!atomic_dec_and_test(&blkif->refcnt)) - BUG(); - kmem_cache_free(blkif_cachep, blkif); -} - -int __init blkif_interface_init(void) -{ - blkif_cachep = kmem_cache_create("blkif_cache", sizeof(struct blkif_st), - 0, 0, NULL); - if (!blkif_cachep) - return -ENOMEM; - - return 0; -} diff --git a/drivers/block/xen-blkback/vbd.c b/drivers/block/xen-blkback/vbd.c deleted file mode 100644 index d0ff4cf91a34..000000000000 --- a/drivers/block/xen-blkback/vbd.c +++ /dev/null @@ -1,162 +0,0 @@ -/****************************************************************************** - * Routines for managing virtual block devices (VBDs). - * - * Copyright (c) 2003-2005, Keir Fraser & Steve Hand - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "common.h" - -#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ - (_v)->bdev->bd_part->nr_sects : \ - get_capacity((_v)->bdev->bd_disk)) - -unsigned long long vbd_size(struct vbd *vbd) -{ - return vbd_sz(vbd); -} - -unsigned int vbd_info(struct vbd *vbd) -{ - return vbd->type | (vbd->readonly ? VDISK_READONLY : 0); -} - -unsigned long vbd_secsize(struct vbd *vbd) -{ - return bdev_logical_block_size(vbd->bdev); -} - -int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, unsigned major, - unsigned minor, int readonly, int cdrom) -{ - struct vbd *vbd; - struct block_device *bdev; - - vbd = &blkif->vbd; - vbd->handle = handle; - vbd->readonly = readonly; - vbd->type = 0; - - vbd->pdevice = MKDEV(major, minor); - - bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? - FMODE_READ : FMODE_WRITE, NULL); - - if (IS_ERR(bdev)) { - DPRINTK("vbd_creat: device %08x could not be opened.\n", - vbd->pdevice); - return -ENOENT; - } - - vbd->bdev = bdev; - vbd->size = vbd_size(vbd); - - if (vbd->bdev->bd_disk == NULL) { - DPRINTK("vbd_creat: device %08x doesn't exist.\n", - vbd->pdevice); - vbd_free(vbd); - return -ENOENT; - } - - if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) - vbd->type |= VDISK_CDROM; - if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) - vbd->type |= VDISK_REMOVABLE; - - DPRINTK("Successful creation of handle=%04x (dom=%u)\n", - handle, blkif->domid); - return 0; -} - -void vbd_free(struct vbd *vbd) -{ - if (vbd->bdev) - blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); - vbd->bdev = NULL; -} - -int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation) -{ - struct vbd *vbd = &blkif->vbd; - int rc = -EACCES; - - if ((operation != READ) && vbd->readonly) - goto out; - - if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd))) - goto out; - - req->dev = vbd->pdevice; - req->bdev = vbd->bdev; - rc = 0; - - out: - return rc; -} - -void vbd_resize(struct blkif_st *blkif) -{ - struct vbd *vbd = &blkif->vbd; - struct xenbus_transaction xbt; - int err; - struct xenbus_device *dev = blkback_xenbus(blkif->be); - unsigned long long new_size = vbd_size(vbd); - - printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n", - blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); - printk(KERN_INFO "VBD Resize: new size %llu\n", new_size); - vbd->size = new_size; -again: - err = xenbus_transaction_start(&xbt); - if (err) { - printk(KERN_WARNING "Error starting transaction"); - return; - } - err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", - vbd_size(vbd)); - if (err) { - printk(KERN_WARNING "Error writing new size"); - goto abort; - } - /* - * Write the current state; we will use this to synchronize - * the front-end. If the current state is "connected" the - * front-end will get the new size information online. - */ - err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); - if (err) { - printk(KERN_WARNING "Error writing the state"); - goto abort; - } - - err = xenbus_transaction_end(xbt, 0); - if (err == -EAGAIN) - goto again; - if (err) - printk(KERN_WARNING "Error ending transaction"); -abort: - xenbus_transaction_end(xbt, 1); -} diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index b41ed65db2d3..0c263a248007 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include "common.h" #undef DPRINTK @@ -36,6 +38,7 @@ struct backend_info { char *mode; }; +static struct kmem_cache *blkif_cachep; static void connect(struct backend_info *); static int connect_ring(struct backend_info *); static void backend_changed(struct xenbus_watch *, const char **, @@ -106,6 +109,154 @@ static void update_blkif_status(struct blkif_st *blkif) } } +struct blkif_st *blkif_alloc(domid_t domid) +{ + struct blkif_st *blkif; + + blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); + if (!blkif) + return ERR_PTR(-ENOMEM); + + memset(blkif, 0, sizeof(*blkif)); + blkif->domid = domid; + spin_lock_init(&blkif->blk_ring_lock); + atomic_set(&blkif->refcnt, 1); + init_waitqueue_head(&blkif->wq); + blkif->st_print = jiffies; + init_waitqueue_head(&blkif->waiting_to_free); + + return blkif; +} + +static int map_frontend_page(struct blkif_st *blkif, unsigned long shared_page) +{ + struct gnttab_map_grant_ref op; + + gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, shared_page, blkif->domid); + + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) + BUG(); + + if (op.status) { + DPRINTK(" Grant table operation failure !\n"); + return op.status; + } + + blkif->shmem_ref = shared_page; + blkif->shmem_handle = op.handle; + + return 0; +} + +static void unmap_frontend_page(struct blkif_st *blkif) +{ + struct gnttab_unmap_grant_ref op; + + gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, blkif->shmem_handle); + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + BUG(); +} + +int blkif_map(struct blkif_st *blkif, unsigned long shared_page, + unsigned int evtchn) +{ + int err; + + /* Already connected through? */ + if (blkif->irq) + return 0; + + blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE); + if (!blkif->blk_ring_area) + return -ENOMEM; + + err = map_frontend_page(blkif, shared_page); + if (err) { + free_vm_area(blkif->blk_ring_area); + return err; + } + + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + { + struct blkif_sring *sring; + sring = (struct blkif_sring *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_32: + { + struct blkif_x86_32_sring *sring_x86_32; + sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_64: + { + struct blkif_x86_64_sring *sring_x86_64; + sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); + break; + } + default: + BUG(); + } + + err = bind_interdomain_evtchn_to_irqhandler( + blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif); + if (err < 0) { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + blkif->blk_rings.common.sring = NULL; + return err; + } + blkif->irq = err; + + return 0; +} + +void blkif_disconnect(struct blkif_st *blkif) +{ + if (blkif->xenblkd) { + kthread_stop(blkif->xenblkd); + blkif->xenblkd = NULL; + } + + atomic_dec(&blkif->refcnt); + wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); + atomic_inc(&blkif->refcnt); + + if (blkif->irq) { + unbind_from_irqhandler(blkif->irq, blkif); + blkif->irq = 0; + } + + if (blkif->blk_rings.common.sring) { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + blkif->blk_rings.common.sring = NULL; + } +} + +void blkif_free(struct blkif_st *blkif) +{ + if (!atomic_dec_and_test(&blkif->refcnt)) + BUG(); + kmem_cache_free(blkif_cachep, blkif); +} + +int __init blkif_interface_init(void) +{ + blkif_cachep = kmem_cache_create("blkif_cache", sizeof(struct blkif_st), + 0, 0, NULL); + if (!blkif_cachep) + return -ENOMEM; + + return 0; +} /* * sysfs interface for VBD I/O requests From 6cd0388cd600a51a8824dc5b34f1107b367b0cac Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 20 Apr 2011 11:01:47 -0400 Subject: [PATCH 43/72] xen-blkback: Remove from the copyright notice the address. There is no need for it, as the address is updated constatly in the root of the Linux kernel. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/xenbus.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 0c263a248007..c6c5286aa813 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -12,9 +12,6 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include From 42c7841d171a2fe32005738dfebd724a90921496 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 20 Apr 2011 11:21:43 -0400 Subject: [PATCH 44/72] xen-blkback: Inline some of the functions that were moved from vbd/interface.c Shuffling code around. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 78 +++-------------------------- drivers/block/xen-blkback/common.h | 22 ++------ drivers/block/xen-blkback/xenbus.c | 58 +++++++++++++++++++-- 3 files changed, 65 insertions(+), 93 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 63001fac9af2..806c2c947c63 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -170,75 +170,9 @@ static void free_req(struct pending_req *req) * Routines for managing virtual block devices (vbds). */ -#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ - (_v)->bdev->bd_part->nr_sects : \ - get_capacity((_v)->bdev->bd_disk)) -unsigned long long vbd_size(struct vbd *vbd) -{ - return vbd_sz(vbd); -} - -unsigned int vbd_info(struct vbd *vbd) -{ - return vbd->type | (vbd->readonly ? VDISK_READONLY : 0); -} - -unsigned long vbd_secsize(struct vbd *vbd) -{ - return bdev_logical_block_size(vbd->bdev); -} - -int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, unsigned major, - unsigned minor, int readonly, int cdrom) -{ - struct vbd *vbd; - struct block_device *bdev; - - vbd = &blkif->vbd; - vbd->handle = handle; - vbd->readonly = readonly; - vbd->type = 0; - - vbd->pdevice = MKDEV(major, minor); - - bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? - FMODE_READ : FMODE_WRITE, NULL); - - if (IS_ERR(bdev)) { - DPRINTK("vbd_creat: device %08x could not be opened.\n", - vbd->pdevice); - return -ENOENT; - } - - vbd->bdev = bdev; - vbd->size = vbd_size(vbd); - - if (vbd->bdev->bd_disk == NULL) { - DPRINTK("vbd_creat: device %08x doesn't exist.\n", - vbd->pdevice); - vbd_free(vbd); - return -ENOENT; - } - - if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) - vbd->type |= VDISK_CDROM; - if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) - vbd->type |= VDISK_REMOVABLE; - - DPRINTK("Successful creation of handle=%04x (dom=%u)\n", - handle, blkif->domid); - return 0; -} - -void vbd_free(struct vbd *vbd) -{ - if (vbd->bdev) - blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); - vbd->bdev = NULL; -} - -int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation) +static int vbd_translate(struct phys_req *req, struct blkif_st *blkif, + int operation) { struct vbd *vbd = &blkif->vbd; int rc = -EACCES; @@ -257,13 +191,13 @@ int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation) return rc; } -void vbd_resize(struct blkif_st *blkif) +static void vbd_resize(struct blkif_st *blkif) { struct vbd *vbd = &blkif->vbd; struct xenbus_transaction xbt; int err; struct xenbus_device *dev = blkback_xenbus(blkif->be); - unsigned long long new_size = vbd_size(vbd); + unsigned long long new_size = vbd_sz(vbd); printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n", blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); @@ -276,7 +210,7 @@ again: return; } err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", - vbd_size(vbd)); + (unsigned long long)vbd_sz(vbd)); if (err) { printk(KERN_WARNING "Error writing new size"); goto abort; @@ -344,7 +278,7 @@ int blkif_schedule(void *arg) while (!kthread_should_stop()) { if (try_to_freeze()) continue; - if (unlikely(vbd->size != vbd_size(vbd))) + if (unlikely(vbd->size != vbd_sz(vbd))) vbd_resize(blkif); wait_event_interruptible( diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 6257c1106591..4b5acb3e8b24 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -95,12 +95,10 @@ struct blkif_st { grant_ref_t shmem_ref; }; -struct blkif_st *blkif_alloc(domid_t domid); -void blkif_disconnect(struct blkif_st *blkif); -void blkif_free(struct blkif_st *blkif); -int blkif_map(struct blkif_st *blkif, unsigned long shared_page, - unsigned int evtchn); -void vbd_resize(struct blkif_st *blkif); + +#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ + (_v)->bdev->bd_part->nr_sects : \ + get_capacity((_v)->bdev->bd_disk)) #define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) #define blkif_put(_b) \ @@ -109,24 +107,12 @@ void vbd_resize(struct blkif_st *blkif); wake_up(&(_b)->waiting_to_free);\ } while (0) -/* Create a vbd. */ -int vbd_create(struct blkif_st *blkif, blkif_vdev_t vdevice, unsigned major, - unsigned minor, int readonly, int cdrom); -void vbd_free(struct vbd *vbd); - -unsigned long long vbd_size(struct vbd *vbd); -unsigned int vbd_info(struct vbd *vbd); -unsigned long vbd_secsize(struct vbd *vbd); - struct phys_req { unsigned short dev; unsigned short nr_sects; struct block_device *bdev; blkif_sector_t sector_number; }; - -int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation); - int blkif_interface_init(void); int blkif_xenbus_init(void); diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index c6c5286aa813..75bf49bd365c 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -327,6 +327,56 @@ void xenvbd_sysfs_delif(struct xenbus_device *dev) device_remove_file(&dev->dev, &dev_attr_physical_device); } + +static void vbd_free(struct vbd *vbd) +{ + if (vbd->bdev) + blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); + vbd->bdev = NULL; +} + +static int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, + unsigned major, unsigned minor, int readonly, + int cdrom) +{ + struct vbd *vbd; + struct block_device *bdev; + + vbd = &blkif->vbd; + vbd->handle = handle; + vbd->readonly = readonly; + vbd->type = 0; + + vbd->pdevice = MKDEV(major, minor); + + bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? + FMODE_READ : FMODE_WRITE, NULL); + + if (IS_ERR(bdev)) { + DPRINTK("vbd_creat: device %08x could not be opened.\n", + vbd->pdevice); + return -ENOENT; + } + + vbd->bdev = bdev; + vbd->size = vbd_sz(vbd); + + if (vbd->bdev->bd_disk == NULL) { + DPRINTK("vbd_creat: device %08x doesn't exist.\n", + vbd->pdevice); + vbd_free(vbd); + return -ENOENT; + } + + if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) + vbd->type |= VDISK_CDROM; + if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) + vbd->type |= VDISK_REMOVABLE; + + DPRINTK("Successful creation of handle=%04x (dom=%u)\n", + handle, blkif->domid); + return 0; +} static int blkback_remove(struct xenbus_device *dev) { struct backend_info *be = dev_get_drvdata(&dev->dev); @@ -595,7 +645,7 @@ again: goto abort; err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", - vbd_size(&be->blkif->vbd)); + (unsigned long long)vbd_sz(&be->blkif->vbd)); if (err) { xenbus_dev_fatal(dev, err, "writing %s/sectors", dev->nodename); @@ -604,14 +654,16 @@ again: /* FIXME: use a typename instead */ err = xenbus_printf(xbt, dev->nodename, "info", "%u", - vbd_info(&be->blkif->vbd)); + be->blkif->vbd.type | + (be->blkif->vbd.readonly ? VDISK_READONLY : 0)); if (err) { xenbus_dev_fatal(dev, err, "writing %s/info", dev->nodename); goto abort; } err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu", - vbd_secsize(&be->blkif->vbd)); + (unsigned long) + bdev_logical_block_size(be->blkif->vbd.bdev)); if (err) { xenbus_dev_fatal(dev, err, "writing %s/sector-size", dev->nodename); From 8b6bf747d70e5bac1a34c8fd773230e1cfdd7546 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 20 Apr 2011 11:50:43 -0400 Subject: [PATCH 45/72] xen/blkback: Prefix exposed functions with xen_ And also shorten the name if it has blkback to blkbk. This results in the symbol table (if compiled in the kernel) to be much shorter, prettier, and also easier to search for. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 36 ++++++------- drivers/block/xen-blkback/common.h | 18 +++---- drivers/block/xen-blkback/xenbus.c | 80 +++++++++++++++-------------- 3 files changed, 68 insertions(+), 66 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 806c2c947c63..c4bc85e69d33 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -53,13 +53,13 @@ * pulled from a communication ring are quite likely to end up being part of * the same scatter/gather request at the disc. * - * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** + * ** TRY INCREASING 'xen_blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** * * This will increase the chances of being able to write whole tracks. * 64 should be enough to keep us competitive with Linux. */ -static int blkif_reqs = 64; -module_param_named(reqs, blkif_reqs, int, 0); +static int xen_blkif_reqs = 64; +module_param_named(reqs, xen_blkif_reqs, int, 0); MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); /* Run-time switchable: /sys/module/blkback/parameters/ */ @@ -196,7 +196,7 @@ static void vbd_resize(struct blkif_st *blkif) struct vbd *vbd = &blkif->vbd; struct xenbus_transaction xbt; int err; - struct xenbus_device *dev = blkback_xenbus(blkif->be); + struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be); unsigned long long new_size = vbd_sz(vbd); printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n", @@ -244,7 +244,7 @@ static void blkif_notify_work(struct blkif_st *blkif) wake_up(&blkif->wq); } -irqreturn_t blkif_be_int(int irq, void *dev_id) +irqreturn_t xen_blkif_be_int(int irq, void *dev_id) { blkif_notify_work(dev_id); return IRQ_HANDLED; @@ -265,12 +265,12 @@ static void print_stats(struct blkif_st *blkif) blkif->st_oo_req = 0; } -int blkif_schedule(void *arg) +int xen_blkif_schedule(void *arg) { struct blkif_st *blkif = arg; struct vbd *vbd = &blkif->vbd; - blkif_get(blkif); + xen_blkif_get(blkif); if (debug_lvl) printk(KERN_DEBUG "%s: started\n", current->comm); @@ -305,7 +305,7 @@ int blkif_schedule(void *arg) printk(KERN_DEBUG "%s: exiting\n", current->comm); blkif->xenblkd = NULL; - blkif_put(blkif); + xen_blkif_put(blkif); return 0; } @@ -417,7 +417,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && (error == -EOPNOTSUPP)) { DPRINTK("blkback: write barrier op failed, not supported\n"); - blkback_barrier(XBT_NIL, pending_req->blkif->be, 0); + xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0); pending_req->status = BLKIF_RSP_EOPNOTSUPP; } else if (error) { DPRINTK("Buffer not up-to-date at end of operation, " @@ -433,7 +433,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) xen_blkbk_unmap(pending_req); make_response(pending_req->blkif, pending_req->id, pending_req->operation, pending_req->status); - blkif_put(pending_req->blkif); + xen_blkif_put(pending_req->blkif); free_req(pending_req); } } @@ -619,7 +619,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, goto fail_flush; /* This corresponding blkif_put is done in __end_block_io_op */ - blkif_get(blkif); + xen_blkif_get(blkif); for (i = 0; i < nseg; i++) { while ((bio == NULL) || @@ -751,7 +751,7 @@ static void make_response(struct blkif_st *blkif, u64 id, notify_remote_via_irq(blkif->irq); } -static int __init blkif_init(void) +static int __init xen_blkif_init(void) { int i, mmap_pages; int rc = 0; @@ -765,10 +765,10 @@ static int __init blkif_init(void) return -ENOMEM; } - mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; + mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * - blkif_reqs, GFP_KERNEL); + xen_blkif_reqs, GFP_KERNEL); blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) * mmap_pages, GFP_KERNEL); blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * @@ -788,7 +788,7 @@ static int __init blkif_init(void) goto out_of_memory; } } - rc = blkif_interface_init(); + rc = xen_blkif_interface_init(); if (rc) goto failed_init; @@ -798,11 +798,11 @@ static int __init blkif_init(void) spin_lock_init(&blkbk->pending_free_lock); init_waitqueue_head(&blkbk->pending_free_wq); - for (i = 0; i < blkif_reqs; i++) + for (i = 0; i < xen_blkif_reqs; i++) list_add_tail(&blkbk->pending_reqs[i].free_list, &blkbk->pending_free); - rc = blkif_xenbus_init(); + rc = xen_blkif_xenbus_init(); if (rc) goto failed_init; @@ -823,6 +823,6 @@ static int __init blkif_init(void) return rc; } -module_init(blkif_init); +module_init(xen_blkif_init); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 4b5acb3e8b24..16af388268e7 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -100,8 +100,8 @@ struct blkif_st { (_v)->bdev->bd_part->nr_sects : \ get_capacity((_v)->bdev->bd_disk)) -#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) -#define blkif_put(_b) \ +#define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt)) +#define xen_blkif_put(_b) \ do { \ if (atomic_dec_and_test(&(_b)->refcnt)) \ wake_up(&(_b)->waiting_to_free);\ @@ -113,16 +113,16 @@ struct phys_req { struct block_device *bdev; blkif_sector_t sector_number; }; -int blkif_interface_init(void); +int xen_blkif_interface_init(void); -int blkif_xenbus_init(void); +int xen_blkif_xenbus_init(void); -irqreturn_t blkif_be_int(int irq, void *dev_id); -int blkif_schedule(void *arg); +irqreturn_t xen_blkif_be_int(int irq, void *dev_id); +int xen_blkif_schedule(void *arg); -int blkback_barrier(struct xenbus_transaction xbt, - struct backend_info *be, int state); +int xen_blkbk_barrier(struct xenbus_transaction xbt, + struct backend_info *be, int state); -struct xenbus_device *blkback_xenbus(struct backend_info *be); +struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); #endif /* __BLKIF__BACKEND__COMMON_H__ */ diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 75bf49bd365c..64b0a1c760fb 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -35,13 +35,13 @@ struct backend_info { char *mode; }; -static struct kmem_cache *blkif_cachep; +static struct kmem_cache *xen_blkif_cachep; static void connect(struct backend_info *); static int connect_ring(struct backend_info *); static void backend_changed(struct xenbus_watch *, const char **, unsigned int); -struct xenbus_device *blkback_xenbus(struct backend_info *be) +struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be) { return be->dev; } @@ -67,7 +67,7 @@ static int blkback_name(struct blkif_st *blkif, char *buf) return 0; } -static void update_blkif_status(struct blkif_st *blkif) +static void xen_update_blkif_status(struct blkif_st *blkif) { int err; char name[TASK_COMM_LEN]; @@ -98,7 +98,7 @@ static void update_blkif_status(struct blkif_st *blkif) } invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); - blkif->xenblkd = kthread_run(blkif_schedule, blkif, name); + blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, name); if (IS_ERR(blkif->xenblkd)) { err = PTR_ERR(blkif->xenblkd); blkif->xenblkd = NULL; @@ -106,11 +106,11 @@ static void update_blkif_status(struct blkif_st *blkif) } } -struct blkif_st *blkif_alloc(domid_t domid) +static struct blkif_st *xen_blkif_alloc(domid_t domid) { struct blkif_st *blkif; - blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); + blkif = kmem_cache_alloc(xen_blkif_cachep, GFP_KERNEL); if (!blkif) return ERR_PTR(-ENOMEM); @@ -157,8 +157,8 @@ static void unmap_frontend_page(struct blkif_st *blkif) BUG(); } -int blkif_map(struct blkif_st *blkif, unsigned long shared_page, - unsigned int evtchn) +static int xen_blkif_map(struct blkif_st *blkif, unsigned long shared_page, + unsigned int evtchn) { int err; @@ -202,8 +202,9 @@ int blkif_map(struct blkif_st *blkif, unsigned long shared_page, BUG(); } - err = bind_interdomain_evtchn_to_irqhandler( - blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif); + err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn, + xen_blkif_be_int, 0, + "blkif-backend", blkif); if (err < 0) { unmap_frontend_page(blkif); free_vm_area(blkif->blk_ring_area); @@ -215,7 +216,7 @@ int blkif_map(struct blkif_st *blkif, unsigned long shared_page, return 0; } -void blkif_disconnect(struct blkif_st *blkif) +static void xen_blkif_disconnect(struct blkif_st *blkif) { if (blkif->xenblkd) { kthread_stop(blkif->xenblkd); @@ -238,18 +239,19 @@ void blkif_disconnect(struct blkif_st *blkif) } } -void blkif_free(struct blkif_st *blkif) +void xen_blkif_free(struct blkif_st *blkif) { if (!atomic_dec_and_test(&blkif->refcnt)) BUG(); - kmem_cache_free(blkif_cachep, blkif); + kmem_cache_free(xen_blkif_cachep, blkif); } -int __init blkif_interface_init(void) +int __init xen_blkif_interface_init(void) { - blkif_cachep = kmem_cache_create("blkif_cache", sizeof(struct blkif_st), - 0, 0, NULL); - if (!blkif_cachep) + xen_blkif_cachep = kmem_cache_create("blkif_cache", + sizeof(struct blkif_st), + 0, 0, NULL); + if (!xen_blkif_cachep) return -ENOMEM; return 0; @@ -377,7 +379,7 @@ static int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, handle, blkif->domid); return 0; } -static int blkback_remove(struct xenbus_device *dev) +static int xen_blkbk_remove(struct xenbus_device *dev) { struct backend_info *be = dev_get_drvdata(&dev->dev); @@ -393,9 +395,9 @@ static int blkback_remove(struct xenbus_device *dev) } if (be->blkif) { - blkif_disconnect(be->blkif); + xen_blkif_disconnect(be->blkif); vbd_free(&be->blkif->vbd); - blkif_free(be->blkif); + xen_blkif_free(be->blkif); be->blkif = NULL; } @@ -404,8 +406,8 @@ static int blkback_remove(struct xenbus_device *dev) return 0; } -int blkback_barrier(struct xenbus_transaction xbt, - struct backend_info *be, int state) +int xen_blkbk_barrier(struct xenbus_transaction xbt, + struct backend_info *be, int state) { struct xenbus_device *dev = be->dev; int err; @@ -423,8 +425,8 @@ int blkback_barrier(struct xenbus_transaction xbt, * structures, and watch the store waiting for the hotplug scripts to tell us * the device's physical major and minor numbers. Switch to InitWait. */ -static int blkback_probe(struct xenbus_device *dev, - const struct xenbus_device_id *id) +static int xen_blkbk_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) { int err; struct backend_info *be = kzalloc(sizeof(struct backend_info), @@ -437,7 +439,7 @@ static int blkback_probe(struct xenbus_device *dev, be->dev = dev; dev_set_drvdata(&dev->dev, be); - be->blkif = blkif_alloc(dev->otherend_id); + be->blkif = xen_blkif_alloc(dev->otherend_id); if (IS_ERR(be->blkif)) { err = PTR_ERR(be->blkif); be->blkif = NULL; @@ -461,7 +463,7 @@ static int blkback_probe(struct xenbus_device *dev, fail: DPRINTK("failed"); - blkback_remove(dev); + xen_blkbk_remove(dev); return err; } @@ -550,7 +552,7 @@ static void backend_changed(struct xenbus_watch *watch, } /* We're potentially connected now */ - update_blkif_status(be->blkif); + xen_update_blkif_status(be->blkif); } } @@ -586,16 +588,16 @@ static void frontend_changed(struct xenbus_device *dev, /* Enforce precondition before potential leak point. * blkif_disconnect() is idempotent. */ - blkif_disconnect(be->blkif); + xen_blkif_disconnect(be->blkif); err = connect_ring(be); if (err) break; - update_blkif_status(be->blkif); + xen_update_blkif_status(be->blkif); break; case XenbusStateClosing: - blkif_disconnect(be->blkif); + xen_blkif_disconnect(be->blkif); xenbus_switch_state(dev, XenbusStateClosing); break; @@ -640,7 +642,7 @@ again: return; } - err = blkback_barrier(xbt, be, 1); + err = xen_blkbk_barrier(xbt, be, 1); if (err) goto abort; @@ -726,7 +728,7 @@ static int connect_ring(struct backend_info *be) ring_ref, evtchn, be->blkif->blk_protocol, protocol); /* Map the shared frame, irq etc. */ - err = blkif_map(be->blkif, ring_ref, evtchn); + err = xen_blkif_map(be->blkif, ring_ref, evtchn); if (err) { xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", ring_ref, evtchn); @@ -740,23 +742,23 @@ static int connect_ring(struct backend_info *be) /* ** Driver Registration ** */ -static const struct xenbus_device_id blkback_ids[] = { +static const struct xenbus_device_id xen_blkbk_ids[] = { { "vbd" }, { "" } }; -static struct xenbus_driver blkback = { +static struct xenbus_driver xen_blkbk = { .name = "vbd", .owner = THIS_MODULE, - .ids = blkback_ids, - .probe = blkback_probe, - .remove = blkback_remove, + .ids = xen_blkbk_ids, + .probe = xen_blkbk_probe, + .remove = xen_blkbk_remove, .otherend_changed = frontend_changed }; -int blkif_xenbus_init(void) +int xen_blkif_xenbus_init(void) { - return xenbus_register_backend(&blkback); + return xenbus_register_backend(&xen_blkbk); } From 97961ef46b9b5a6a7c918a38b898a7b3e49869f4 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 26 Apr 2011 12:57:59 -0400 Subject: [PATCH 46/72] xen/blkback: Move the plugging/unplugging to a higher level. We used to the plug/unplug on the submit_bio. But that means if within a stream of WRITE, WRITE, WRITE,...,WRITE we have one READ, it could stall the pipeline (as the 'submio_bio' could trigger the unplug_fnc to be called and stall/sync when doing the READ). Instead we want to move the unplugging when the whole (or as a much as possible) ring buffer has been processed. This also eliminates us doing plug/unplug for each request. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index c4bc85e69d33..ed85ba94b2e0 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -276,6 +276,8 @@ int xen_blkif_schedule(void *arg) printk(KERN_DEBUG "%s: started\n", current->comm); while (!kthread_should_stop()) { + struct blk_plug plug; + if (try_to_freeze()) continue; if (unlikely(vbd->size != vbd_sz(vbd))) @@ -292,9 +294,13 @@ int xen_blkif_schedule(void *arg) blkif->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ + blk_start_plug(&plug); + if (do_block_io_op(blkif)) blkif->waiting_reqs = 1; + blk_finish_plug(&plug); + if (log_stats && time_after(jiffies, blkif->st_print)) print_stats(blkif); } @@ -547,7 +553,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int i, nbio = 0; int operation; - struct blk_plug plug; switch (req->operation) { case BLKIF_OP_READ: @@ -660,15 +665,9 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, */ atomic_set(&pending_req->pendcnt, nbio); - /* Get a reference count for the disk queue and start sending I/O */ - blk_start_plug(&plug); - for (i = 0; i < nbio; i++) submit_bio(operation, biolist[i]); - blk_finish_plug(&plug); - /* Let the I/Os go.. */ - if (operation == READ) blkif->st_rd_sect += preq.nr_sects; else if (operation == WRITE || operation == WRITE_BARRIER) From 013c3ca184851078b9c04744efd4d47e52c6ecf8 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 26 Apr 2011 16:24:18 -0400 Subject: [PATCH 47/72] xen/blkback: Stick REQ_SYNC on WRITEs to deal with CFQ I/O scheduler. If one runs a simple fio request with random read/write with a 20%/80% ratio, the numbers are incredibly bad when using the CFQ scheduler. IOmeter | | | | 64K, randrw | NOOP | CFQ | deadline | randrwmix=80 | | | | --------------+-------+------+----------+ blkback |103/27 |32/10 | 102/27 | --------------+-------+------+----------+ QEMU qdisk |103/27 |102/27| 102/27 | The problem as explained by Vivek Goyal was: ".. that difference is that sync vs async requests. In the case of a kernel thread submitting IO, [..] all the WRITES might be being considered as async and will go in a different queue. If you mix those with some READS, they are always sync and will go in differnet queue. In presence of sync queue, CFQ will idle and choke up WRITES in an attempt to improve latencies of READs. In case of AIO [note: this is what QEMU qdisk is doing] , [..] it is direct IO and both READS and WRITES will be considered SYNC and will go in a single queue and no choking of WRITES will take place." The solution is quite simple, tack on REQ_SYNC (which is what the WRITE_ODIRECT macro points to) and the numbers go back up. Suggested-by: Vivek Goyal --- drivers/block/xen-blkback/blkback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index ed85ba94b2e0..8583b130499a 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -559,7 +559,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, operation = READ; break; case BLKIF_OP_WRITE: - operation = WRITE; + operation = WRITE_ODIRECT; break; case BLKIF_OP_WRITE_BARRIER: operation = WRITE_BARRIER; From a19be5f0f073525306f6a4b000d90dc84065ed93 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 27 Apr 2011 12:40:11 -0400 Subject: [PATCH 48/72] Revert "xen/blkback: Move the plugging/unplugging to a higher level." This reverts commit 97961ef46b9b5a6a7c918a38b898a7b3e49869f4 b/c we lose about 15% performance if we do the unplugging and the end of the reading the ring buffer. --- drivers/block/xen-blkback/blkback.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 8583b130499a..eb068d0b47ea 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -276,8 +276,6 @@ int xen_blkif_schedule(void *arg) printk(KERN_DEBUG "%s: started\n", current->comm); while (!kthread_should_stop()) { - struct blk_plug plug; - if (try_to_freeze()) continue; if (unlikely(vbd->size != vbd_sz(vbd))) @@ -294,13 +292,9 @@ int xen_blkif_schedule(void *arg) blkif->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ - blk_start_plug(&plug); - if (do_block_io_op(blkif)) blkif->waiting_reqs = 1; - blk_finish_plug(&plug); - if (log_stats && time_after(jiffies, blkif->st_print)) print_stats(blkif); } @@ -553,6 +547,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int i, nbio = 0; int operation; + struct blk_plug plug; switch (req->operation) { case BLKIF_OP_READ: @@ -665,9 +660,15 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, */ atomic_set(&pending_req->pendcnt, nbio); + /* Get a reference count for the disk queue and start sending I/O */ + blk_start_plug(&plug); + for (i = 0; i < nbio; i++) submit_bio(operation, biolist[i]); + blk_finish_plug(&plug); + /* Let the I/Os go.. */ + if (operation == READ) blkif->st_rd_sect += preq.nr_sects; else if (operation == WRITE || operation == WRITE_BARRIER) From 73d842af27b863cbc816e75003edbc287bf57130 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 5 May 2011 12:41:03 -0400 Subject: [PATCH 49/72] xen-blkfront: Provide for 'feature-flush-cache' the BLKIF_OP_WRITE_FLUSH_CACHE operation. The operation BLKIF_OP_WRITE_FLUSH_CACHE has existed in the Xen tree header file for years but it was never present in the Linux tree because the frontend (nor the backend) supported this interface. Signed-off-by: Konrad Rzeszutek Wilk --- include/xen/interface/io/blkif.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index 61e523af3c46..3d5d6db864fe 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h @@ -44,6 +44,19 @@ typedef uint64_t blkif_sector_t; */ #define BLKIF_OP_WRITE_BARRIER 2 +/* + * Recognised if "feature-flush-cache" is present in backend xenbus + * info. A flush will ask the underlying storage hardware to flush its + * non-volatile caches as appropriate. The "feature-flush-cache" node + * contains a boolean indicating whether flush requests are likely to + * succeed or fail. Either way, a flush request may fail at any time + * with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying + * block-device hardware. The boolean simply indicates whether or not it + * is worthwhile for the frontend to attempt flushes. If a backend does + * not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the + * "feature-flush-cache" node! + */ +#define BLKIF_OP_FLUSH_DISKCACHE 3 /* * Maximum scatter/gather segments per request. * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE. From 24f567f952aa308c3352f3340b9d296fc72bd066 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 4 May 2011 17:07:27 -0400 Subject: [PATCH 50/72] xen/blkback: Add support for BLKIF_OP_FLUSH_DISKCACHE and drop BLKIF_OP_WRITE_BARRIER. We drop the support for 'feature-barrier' and add in the support for the 'feature-flush-cache' if the real backend storage supports flushing. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 33 ++++++++++++++++------------- drivers/block/xen-blkback/common.h | 7 +++--- drivers/block/xen-blkback/xenbus.c | 19 +++++++++++------ 3 files changed, 34 insertions(+), 25 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index eb068d0b47ea..72ede0bf2697 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -46,8 +46,6 @@ #include #include "common.h" -#define WRITE_BARRIER (REQ_WRITE | REQ_FLUSH | REQ_FUA) - /* * These are rather arbitrary. They are fairly large because adjacent requests * pulled from a communication ring are quite likely to end up being part of @@ -256,9 +254,9 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id) static void print_stats(struct blkif_st *blkif) { - printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n", + printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | f %4d\n", current->comm, blkif->st_oo_req, - blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req); + blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req); blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); blkif->st_rd_req = 0; blkif->st_wr_req = 0; @@ -414,10 +412,10 @@ static int xen_blkbk_map(struct blkif_request *req, struct pending_req *pending_ static void __end_block_io_op(struct pending_req *pending_req, int error) { /* An error fails the entire request. */ - if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && + if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) && (error == -EOPNOTSUPP)) { - DPRINTK("blkback: write barrier op failed, not supported\n"); - xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0); + DPRINTK("blkback: flush diskcache op failed, not supported\n"); + xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); pending_req->status = BLKIF_RSP_EOPNOTSUPP; } else if (error) { DPRINTK("Buffer not up-to-date at end of operation, " @@ -506,13 +504,14 @@ static int do_block_io_op(struct blkif_st *blkif) blkif->st_rd_req++; dispatch_rw_block_io(blkif, &req, pending_req); break; - case BLKIF_OP_WRITE_BARRIER: - blkif->st_br_req++; + case BLKIF_OP_FLUSH_DISKCACHE: + blkif->st_f_req++; /* fall through */ case BLKIF_OP_WRITE: blkif->st_wr_req++; dispatch_rw_block_io(blkif, &req, pending_req); break; + case BLKIF_OP_WRITE_BARRIER: default: /* A good sign something is wrong: sleep for a while to * avoid excessive CPU consumption by a bad guest. */ @@ -556,9 +555,14 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, case BLKIF_OP_WRITE: operation = WRITE_ODIRECT; break; - case BLKIF_OP_WRITE_BARRIER: - operation = WRITE_BARRIER; + case BLKIF_OP_FLUSH_DISKCACHE: + operation = WRITE_FLUSH; + /* The frontend likes to set this to -1, which vbd_translate + * is alergic too. */ + req->u.rw.sector_number = 0; break; + case BLKIF_OP_WRITE_BARRIER: + /* Should never get here. */ default: operation = 0; /* make gcc happy */ BUG(); @@ -566,7 +570,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, /* Check that the number of segments is sane. */ nseg = req->nr_segments; - if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || + if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { DPRINTK("Bad number of segments in request (%d)\n", nseg); /* Haven't submitted any bio's yet. */ @@ -643,7 +647,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, /* This will be hit if the operation was a barrier. */ if (!bio) { - BUG_ON(operation != WRITE_BARRIER); + BUG_ON(operation != WRITE_FLUSH); bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) goto fail_put_bio; @@ -651,7 +655,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, bio->bi_bdev = preq.bdev; bio->bi_private = pending_req; bio->bi_end_io = end_block_io_op; - bio->bi_sector = -1; } @@ -671,7 +674,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, if (operation == READ) blkif->st_rd_sect += preq.nr_sects; - else if (operation == WRITE || operation == WRITE_BARRIER) + else if (operation == WRITE || operation == WRITE_FLUSH) blkif->st_wr_sect += preq.nr_sects; return; diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 16af388268e7..af93837e1295 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -53,6 +53,7 @@ struct vbd { u32 pdevice; /* phys device that this vbd maps to */ struct block_device *bdev; sector_t size; /* Cached size parameter */ + bool flush_support; }; struct backend_info; @@ -85,7 +86,7 @@ struct blkif_st { int st_rd_req; int st_wr_req; int st_oo_req; - int st_br_req; + int st_f_req; int st_rd_sect; int st_wr_sect; @@ -120,8 +121,8 @@ int xen_blkif_xenbus_init(void); irqreturn_t xen_blkif_be_int(int irq, void *dev_id); int xen_blkif_schedule(void *arg); -int xen_blkbk_barrier(struct xenbus_transaction xbt, - struct backend_info *be, int state); +int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, + struct backend_info *be, int state); struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 64b0a1c760fb..9adcf806f83f 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -276,7 +276,7 @@ int __init xen_blkif_interface_init(void) VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); -VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req); +VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req); VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); @@ -284,7 +284,7 @@ static struct attribute *vbdstat_attrs[] = { &dev_attr_oo_req.attr, &dev_attr_rd_req.attr, &dev_attr_wr_req.attr, - &dev_attr_br_req.attr, + &dev_attr_f_req.attr, &dev_attr_rd_sect.attr, &dev_attr_wr_sect.attr, NULL @@ -343,6 +343,7 @@ static int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, { struct vbd *vbd; struct block_device *bdev; + struct request_queue *q; vbd = &blkif->vbd; vbd->handle = handle; @@ -375,6 +376,10 @@ static int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) vbd->type |= VDISK_REMOVABLE; + q = bdev_get_queue(bdev); + if (q && q->flush_flags) + vbd->flush_support = true; + DPRINTK("Successful creation of handle=%04x (dom=%u)\n", handle, blkif->domid); return 0; @@ -406,16 +411,16 @@ static int xen_blkbk_remove(struct xenbus_device *dev) return 0; } -int xen_blkbk_barrier(struct xenbus_transaction xbt, - struct backend_info *be, int state) +int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, + struct backend_info *be, int state) { struct xenbus_device *dev = be->dev; int err; - err = xenbus_printf(xbt, dev->nodename, "feature-barrier", + err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache", "%d", state); if (err) - xenbus_dev_fatal(dev, err, "writing feature-barrier"); + xenbus_dev_fatal(dev, err, "writing feature-flush-cache"); return err; } @@ -642,7 +647,7 @@ again: return; } - err = xen_blkbk_barrier(xbt, be, 1); + err = xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support); if (err) goto abort; From fc53bf757ede292312eee10d64f4e691c8c8cebf Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 5 May 2011 13:37:23 -0400 Subject: [PATCH 51/72] xen/blkback: Squash the checking for operation into dispatch_rw_block_io We do a check for the operations right before calling dispatch_rw_block_io. And then we do the same check in dispatch_rw_block_io. This patch squashes those checks into the 'dispatch_rw_block_io' function. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 45 +++++++++-------------------- 1 file changed, 13 insertions(+), 32 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 72ede0bf2697..5f4284729a3a 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -123,9 +123,9 @@ static inline unsigned long vaddr(struct pending_req *req, int seg) static int do_block_io_op(struct blkif_st *blkif); -static void dispatch_rw_block_io(struct blkif_st *blkif, - struct blkif_request *req, - struct pending_req *pending_req); +static int dispatch_rw_block_io(struct blkif_st *blkif, + struct blkif_request *req, + struct pending_req *pending_req); static void make_response(struct blkif_st *blkif, u64 id, unsigned short op, int st); @@ -499,30 +499,8 @@ static int do_block_io_op(struct blkif_st *blkif) /* Apply all sanity checks to /private copy/ of request. */ barrier(); - switch (req.operation) { - case BLKIF_OP_READ: - blkif->st_rd_req++; - dispatch_rw_block_io(blkif, &req, pending_req); + if (dispatch_rw_block_io(blkif, &req, pending_req)) break; - case BLKIF_OP_FLUSH_DISKCACHE: - blkif->st_f_req++; - /* fall through */ - case BLKIF_OP_WRITE: - blkif->st_wr_req++; - dispatch_rw_block_io(blkif, &req, pending_req); - break; - case BLKIF_OP_WRITE_BARRIER: - default: - /* A good sign something is wrong: sleep for a while to - * avoid excessive CPU consumption by a bad guest. */ - msleep(1); - DPRINTK("error: unknown block io operation [%d]\n", - req.operation); - make_response(blkif, req.id, req.operation, - BLKIF_RSP_ERROR); - free_req(pending_req); - break; - } /* Yield point for this unbounded loop. */ cond_resched(); @@ -535,7 +513,7 @@ static int do_block_io_op(struct blkif_st *blkif) * Transumation of the 'struct blkif_request' to a proper 'struct bio' * and call the 'submit_bio' to pass it to the underlaying storage. */ -static void dispatch_rw_block_io(struct blkif_st *blkif, +static int dispatch_rw_block_io(struct blkif_st *blkif, struct blkif_request *req, struct pending_req *pending_req) { @@ -550,22 +528,25 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, switch (req->operation) { case BLKIF_OP_READ: + blkif->st_rd_req++; operation = READ; break; case BLKIF_OP_WRITE: + blkif->st_wr_req++; operation = WRITE_ODIRECT; break; case BLKIF_OP_FLUSH_DISKCACHE: + blkif->st_f_req++; operation = WRITE_FLUSH; /* The frontend likes to set this to -1, which vbd_translate * is alergic too. */ req->u.rw.sector_number = 0; break; case BLKIF_OP_WRITE_BARRIER: - /* Should never get here. */ default: operation = 0; /* make gcc happy */ - BUG(); + goto fail_response; + break; } /* Check that the number of segments is sane. */ @@ -677,7 +658,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, else if (operation == WRITE || operation == WRITE_FLUSH) blkif->st_wr_sect += preq.nr_sects; - return; + return 0; fail_flush: xen_blkbk_unmap(pending_req); @@ -686,14 +667,14 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); free_req(pending_req); msleep(1); /* back off a bit */ - return; + return -EIO; fail_put_bio: for (i = 0; i < (nbio-1); i++) bio_put(biolist[i]); __end_block_io_op(pending_req, -EINVAL); msleep(1); /* back off a bit */ - return; + return -EIO; } From 3d68b39926b3b247d76cc4da0256e979b2b730e3 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 5 May 2011 13:42:10 -0400 Subject: [PATCH 52/72] xen/blkback: Fix up some of the comments. They had the wrong data or were in the wrong spot. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 5f4284729a3a..b9bdd9e43ab9 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -603,7 +603,7 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, if (xen_blkbk_map(req, pending_req, seg)) goto fail_flush; - /* This corresponding blkif_put is done in __end_block_io_op */ + /* This corresponding xen_blkif_put is done in __end_block_io_op */ xen_blkif_get(blkif); for (i = 0; i < nseg; i++) { @@ -626,7 +626,7 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, preq.sector_number += seg[i].nsec; } - /* This will be hit if the operation was a barrier. */ + /* This will be hit if the operation was a flush. */ if (!bio) { BUG_ON(operation != WRITE_FLUSH); bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, 0); @@ -650,8 +650,8 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, for (i = 0; i < nbio; i++) submit_bio(operation, biolist[i]); - blk_finish_plug(&plug); /* Let the I/Os go.. */ + blk_finish_plug(&plug); if (operation == READ) blkif->st_rd_sect += preq.nr_sects; From 01f37f2d53e14a05b7fc3601d182f31ac3b35847 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 11 May 2011 15:57:09 -0400 Subject: [PATCH 53/72] xen/blkback: Fixed up comments and converted spaces to tabs. Suggested-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 70 ++++++++++++++++----------- drivers/block/xen-blkback/common.h | 73 +++++++++++++++-------------- drivers/block/xen-blkback/xenbus.c | 39 ++++++++------- 3 files changed, 103 insertions(+), 79 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index b9bdd9e43ab9..6808cc7d9c73 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -73,13 +73,13 @@ module_param(debug_lvl, int, 0644); * response queued for it, with the saved 'id' passed back. */ struct pending_req { - struct blkif_st *blkif; - u64 id; - int nr_pages; - atomic_t pendcnt; - unsigned short operation; - int status; - struct list_head free_list; + struct blkif_st *blkif; + u64 id; + int nr_pages; + atomic_t pendcnt; + unsigned short operation; + int status; + struct list_head free_list; }; #define BLKBACK_INVALID_HANDLE (~0) @@ -103,7 +103,8 @@ static struct xen_blkbk *blkbk; * Little helpful macro to figure out the index and virtual address of the * pending_pages[..]. For each 'pending_req' we have have up to * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through - * 10 and would index in the pending_pages[..]. */ + * 10 and would index in the pending_pages[..]. + */ static inline int vaddr_pagenr(struct pending_req *req, int seg) { return (req - blkbk->pending_reqs) * @@ -167,8 +168,6 @@ static void free_req(struct pending_req *req) /* * Routines for managing virtual block devices (vbds). */ - - static int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation) { @@ -315,7 +314,7 @@ struct seg_buf { /* * Unmap the grant references, and also remove the M2P over-rides * used in the 'pending_req'. -*/ + */ static void xen_blkbk_unmap(struct pending_req *req) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; @@ -336,27 +335,32 @@ static void xen_blkbk_unmap(struct pending_req *req) ret = HYPERVISOR_grant_table_op( GNTTABOP_unmap_grant_ref, unmap, invcount); BUG_ON(ret); - /* Note, we use invcount, so nr->pages, so we can't index + /* + * Note, we use invcount, so nr->pages, so we can't index * using vaddr(req, i). */ for (i = 0; i < invcount; i++) { ret = m2p_remove_override( virt_to_page(unmap[i].host_addr), false); if (ret) { - printk(KERN_ALERT "Failed to remove M2P override for " \ - "%lx\n", (unsigned long)unmap[i].host_addr); + printk(KERN_ALERT "Failed to remove M2P override for %lx\n", + (unsigned long)unmap[i].host_addr); continue; } } } -static int xen_blkbk_map(struct blkif_request *req, struct pending_req *pending_req, + +static int xen_blkbk_map(struct blkif_request *req, + struct pending_req *pending_req, struct seg_buf seg[]) { struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int i; int nseg = req->nr_segments; int ret = 0; - /* Fill out preq.nr_sects with proper amount of sectors, and setup + + /* + * Fill out preq.nr_sects with proper amount of sectors, and setup * assign map[..] with the PFN of the page in our domain with the * corresponding grant reference for each page. */ @@ -367,13 +371,15 @@ static int xen_blkbk_map(struct blkif_request *req, struct pending_req *pending_ if (pending_req->operation != BLKIF_OP_READ) flags |= GNTMAP_readonly; gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, - req->u.rw.seg[i].gref, pending_req->blkif->domid); + req->u.rw.seg[i].gref, + pending_req->blkif->domid); } ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); BUG_ON(ret); - /* Now swizzel the MFN in our domain with the MFN from the other domain + /* + * Now swizzle the MFN in our domain with the MFN from the other domain * so that when we access vaddr(pending_req,i) it has the contents of * the page from the other domain. */ @@ -423,7 +429,8 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) pending_req->status = BLKIF_RSP_ERROR; } - /* If all of the bio's have completed it is time to unmap + /* + * If all of the bio's have completed it is time to unmap * the grant references associated with 'request' and provide * the proper response on the ring. */ @@ -510,8 +517,8 @@ static int do_block_io_op(struct blkif_st *blkif) } /* - * Transumation of the 'struct blkif_request' to a proper 'struct bio' - * and call the 'submit_bio' to pass it to the underlaying storage. + * Transmutation of the 'struct blkif_request' to a proper 'struct bio' + * and call the 'submit_bio' to pass it to the underlying storage. */ static int dispatch_rw_block_io(struct blkif_st *blkif, struct blkif_request *req, @@ -538,8 +545,10 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, case BLKIF_OP_FLUSH_DISKCACHE: blkif->st_f_req++; operation = WRITE_FLUSH; - /* The frontend likes to set this to -1, which vbd_translate - * is alergic too. */ + /* + * The frontend likes to set this to -1, which vbd_translate + * is alergic too. + */ req->u.rw.sector_number = 0; break; case BLKIF_OP_WRITE_BARRIER: @@ -585,8 +594,11 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, preq.sector_number + preq.nr_sects, preq.dev); goto fail_response; } - /* This check _MUST_ be done after vbd_translate as the preq.bdev - * is set there. */ + + /* + * This check _MUST_ be done after vbd_translate as the preq.bdev + * is set there. + */ for (i = 0; i < nseg; i++) { if (((int)preq.sector_number|(int)seg[i].nsec) & ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { @@ -595,7 +607,9 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, goto fail_response; } } - /* If we have failed at this point, we need to undo the M2P override, + + /* + * If we have failed at this point, we need to undo the M2P override, * set gnttab_set_unmap_op on all of the grant references and perform * the hypercall to unmap the grants - that is all done in * xen_blkbk_unmap. @@ -638,8 +652,8 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, bio->bi_end_io = end_block_io_op; } - - /* We set it one so that the last submit_bio does not have to call + /* + * We set it one so that the last submit_bio does not have to call * atomic_inc. */ atomic_set(&pending_req->pendcnt, nbio); diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index af93837e1295..e37dcf7f6b8e 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -47,53 +47,58 @@ __FILE__ , __LINE__ , ## _a) struct vbd { - blkif_vdev_t handle; /* what the domain refers to this vbd as */ - unsigned char readonly; /* Non-zero -> read-only */ - unsigned char type; /* VDISK_xxx */ - u32 pdevice; /* phys device that this vbd maps to */ - struct block_device *bdev; - sector_t size; /* Cached size parameter */ - bool flush_support; + /* What the domain refers to this vbd as. */ + blkif_vdev_t handle; + /* Non-zero -> read-only */ + unsigned char readonly; + /* VDISK_xxx */ + unsigned char type; + /* phys device that this vbd maps to. */ + u32 pdevice; + struct block_device *bdev; + /* Cached size parameter. */ + sector_t size; + bool flush_support; }; struct backend_info; struct blkif_st { /* Unique identifier for this interface. */ - domid_t domid; - unsigned int handle; + domid_t domid; + unsigned int handle; /* Physical parameters of the comms window. */ - unsigned int irq; + unsigned int irq; /* Comms information. */ - enum blkif_protocol blk_protocol; - union blkif_back_rings blk_rings; - struct vm_struct *blk_ring_area; + enum blkif_protocol blk_protocol; + union blkif_back_rings blk_rings; + struct vm_struct *blk_ring_area; /* The VBD attached to this interface. */ - struct vbd vbd; + struct vbd vbd; /* Back pointer to the backend_info. */ - struct backend_info *be; + struct backend_info *be; /* Private fields. */ - spinlock_t blk_ring_lock; - atomic_t refcnt; + spinlock_t blk_ring_lock; + atomic_t refcnt; - wait_queue_head_t wq; + wait_queue_head_t wq; /* One thread per one blkif. */ - struct task_struct *xenblkd; - unsigned int waiting_reqs; + struct task_struct *xenblkd; + unsigned int waiting_reqs; /* statistics */ - unsigned long st_print; - int st_rd_req; - int st_wr_req; - int st_oo_req; - int st_f_req; - int st_rd_sect; - int st_wr_sect; + unsigned long st_print; + int st_rd_req; + int st_wr_req; + int st_oo_req; + int st_f_req; + int st_rd_sect; + int st_wr_sect; - wait_queue_head_t waiting_to_free; + wait_queue_head_t waiting_to_free; - grant_handle_t shmem_handle; - grant_ref_t shmem_ref; + grant_handle_t shmem_handle; + grant_ref_t shmem_ref; }; @@ -109,10 +114,10 @@ struct blkif_st { } while (0) struct phys_req { - unsigned short dev; - unsigned short nr_sects; - struct block_device *bdev; - blkif_sector_t sector_number; + unsigned short dev; + unsigned short nr_sects; + struct block_device *bdev; + blkif_sector_t sector_number; }; int xen_blkif_interface_init(void); diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 9adcf806f83f..0cda406b4edb 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -27,12 +27,12 @@ __func__, __LINE__, ##args) struct backend_info { - struct xenbus_device *dev; - struct blkif_st *blkif; - struct xenbus_watch backend_watch; - unsigned major; - unsigned minor; - char *mode; + struct xenbus_device *dev; + struct blkif_st *blkif; + struct xenbus_watch backend_watch; + unsigned major; + unsigned minor; + char *mode; }; static struct kmem_cache *xen_blkif_cachep; @@ -425,7 +425,7 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, return err; } -/** +/* * Entry point to this code when a new device is created. Allocate the basic * structures, and watch the store waiting for the hotplug scripts to tell us * the device's physical major and minor numbers. Switch to InitWait. @@ -473,7 +473,7 @@ fail: } -/** +/* * Callback received when the hotplug scripts have placed the physical-device * node. Read it and the mode node, and create a vbd. If the frontend is * ready, connect. @@ -495,9 +495,11 @@ static void backend_changed(struct xenbus_watch *watch, err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x", &major, &minor); if (XENBUS_EXIST_ERR(err)) { - /* Since this watch will fire once immediately after it is - registered, we expect this. Ignore it, and wait for the - hotplug scripts. */ + /* + * Since this watch will fire once immediately after it is + * registered, we expect this. Ignore it, and wait for the + * hotplug scripts. + */ return; } if (err != 2) { @@ -562,7 +564,7 @@ static void backend_changed(struct xenbus_watch *watch, } -/** +/* * Callback received when the frontend's state changes. */ static void frontend_changed(struct xenbus_device *dev, @@ -584,13 +586,16 @@ static void frontend_changed(struct xenbus_device *dev, case XenbusStateInitialised: case XenbusStateConnected: - /* Ensure we connect even when two watches fire in - close successsion and we miss the intermediate value - of frontend_state. */ + /* + * Ensure we connect even when two watches fire in + * close successsion and we miss the intermediate value + * of frontend_state. + */ if (dev->state == XenbusStateConnected) break; - /* Enforce precondition before potential leak point. + /* + * Enforce precondition before potential leak point. * blkif_disconnect() is idempotent. */ xen_blkif_disconnect(be->blkif); @@ -627,7 +632,7 @@ static void frontend_changed(struct xenbus_device *dev, /* ** Connection ** */ -/** +/* * Write the physical details regarding the block device to the store, and * switch to Connected state. */ From ebe8190659244ec21b5f16950cf7b156f5b7eb01 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 16:42:31 -0400 Subject: [PATCH 54/72] xen/blkback: Change printk/DPRINTK to pr_.. type variant. And also make them uniform and prefix the message with 'xen-blkback'. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 58 ++++++++++++++--------------- drivers/block/xen-blkback/xenbus.c | 19 ++++------ 2 files changed, 37 insertions(+), 40 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 6808cc7d9c73..5c9568e39eab 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -196,20 +196,20 @@ static void vbd_resize(struct blkif_st *blkif) struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be); unsigned long long new_size = vbd_sz(vbd); - printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n", + pr_info("xen-blkback: VBD Resize: Domid: %d, Device: (%d, %d)\n", blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); - printk(KERN_INFO "VBD Resize: new size %llu\n", new_size); + pr_info("xen-blkback: VBD Resize: new size %llu\n", new_size); vbd->size = new_size; again: err = xenbus_transaction_start(&xbt); if (err) { - printk(KERN_WARNING "Error starting transaction"); + pr_warn("xen-blkback: Error starting transaction"); return; } err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", (unsigned long long)vbd_sz(vbd)); if (err) { - printk(KERN_WARNING "Error writing new size"); + pr_warn("xen-blkback: Error writing new size"); goto abort; } /* @@ -219,7 +219,7 @@ again: */ err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); if (err) { - printk(KERN_WARNING "Error writing the state"); + pr_warn("xen-blkback: Error writing the state"); goto abort; } @@ -227,7 +227,7 @@ again: if (err == -EAGAIN) goto again; if (err) - printk(KERN_WARNING "Error ending transaction"); + pr_warn("xen-blkback: Error ending transaction"); abort: xenbus_transaction_end(xbt, 1); } @@ -253,9 +253,9 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id) static void print_stats(struct blkif_st *blkif) { - printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | f %4d\n", - current->comm, blkif->st_oo_req, - blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req); + pr_debug("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n", + current->comm, blkif->st_oo_req, + blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req); blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); blkif->st_rd_req = 0; blkif->st_wr_req = 0; @@ -270,7 +270,7 @@ int xen_blkif_schedule(void *arg) xen_blkif_get(blkif); if (debug_lvl) - printk(KERN_DEBUG "%s: started\n", current->comm); + pr_debug("xen-blkback: %s: started\n", current->comm); while (!kthread_should_stop()) { if (try_to_freeze()) @@ -299,7 +299,7 @@ int xen_blkif_schedule(void *arg) if (log_stats) print_stats(blkif); if (debug_lvl) - printk(KERN_DEBUG "%s: exiting\n", current->comm); + pr_debug("xen-blkback: %s: exiting\n", current->comm); blkif->xenblkd = NULL; xen_blkif_put(blkif); @@ -343,8 +343,8 @@ static void xen_blkbk_unmap(struct pending_req *req) ret = m2p_remove_override( virt_to_page(unmap[i].host_addr), false); if (ret) { - printk(KERN_ALERT "Failed to remove M2P override for %lx\n", - (unsigned long)unmap[i].host_addr); + pr_alert("xen-blkback: Failed to remove M2P override for %lx\n", + (unsigned long)unmap[i].host_addr); continue; } } @@ -385,7 +385,7 @@ static int xen_blkbk_map(struct blkif_request *req, */ for (i = 0; i < nseg; i++) { if (unlikely(map[i].status != 0)) { - DPRINTK("invalid buffer -- could not remap it\n"); + pr_debug("xen-blkback: invalid buffer -- could not remap it\n"); map[i].handle = BLKBACK_INVALID_HANDLE; ret |= 1; } @@ -398,9 +398,8 @@ static int xen_blkbk_map(struct blkif_request *req, ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), blkbk->pending_page(pending_req, i), false); if (ret) { - printk(KERN_ALERT "Failed to install M2P override for"\ - " %lx (ret: %d)\n", (unsigned long) - map[i].dev_bus_addr, ret); + pr_alert("xen-blkback: Failed to install M2P override for %lx (ret: %d)\n", + (unsigned long)map[i].dev_bus_addr, ret); /* We could switch over to GNTTABOP_copy */ continue; } @@ -420,12 +419,12 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) /* An error fails the entire request. */ if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) && (error == -EOPNOTSUPP)) { - DPRINTK("blkback: flush diskcache op failed, not supported\n"); + pr_debug("xen-blkback: flush diskcache op failed, not supported\n"); xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); pending_req->status = BLKIF_RSP_EOPNOTSUPP; } else if (error) { - DPRINTK("Buffer not up-to-date at end of operation, " - "error=%d\n", error); + pr_debug("xen-blkback: Buffer not up-to-date at end of operation," + " error=%d\n", error); pending_req->status = BLKIF_RSP_ERROR; } @@ -562,7 +561,8 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, nseg = req->nr_segments; if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { - DPRINTK("Bad number of segments in request (%d)\n", nseg); + pr_debug("xen-blkback: Bad number of segments in request (%d)\n", + nseg); /* Haven't submitted any bio's yet. */ goto fail_response; } @@ -588,10 +588,10 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, } if (vbd_translate(&preq, blkif, operation) != 0) { - DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", - operation == READ ? "read" : "write", - preq.sector_number, - preq.sector_number + preq.nr_sects, preq.dev); + pr_debug("xen-blkback: access denied: %s of [%llu,%llu] on dev=%04x\n", + operation == READ ? "read" : "write", + preq.sector_number, + preq.sector_number + preq.nr_sects, preq.dev); goto fail_response; } @@ -602,8 +602,8 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, for (i = 0; i < nseg; i++) { if (((int)preq.sector_number|(int)seg[i].nsec) & ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { - DPRINTK("Misaligned I/O request from domain %d", - blkif->domid); + pr_debug("xen-blkback: Misaligned I/O request from domain %d", + blkif->domid); goto fail_response; } } @@ -759,7 +759,7 @@ static int __init xen_blkif_init(void) blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); if (!blkbk) { - printk(KERN_ALERT "%s: out of memory!\n", __func__); + pr_alert("xen-blkback: %s: out of memory!\n", __func__); return -ENOMEM; } @@ -807,7 +807,7 @@ static int __init xen_blkif_init(void) return 0; out_of_memory: - printk(KERN_ERR "%s: out of memory\n", __func__); + pr_alert("xen-blkback: %s: out of memory\n", __func__); failed_init: kfree(blkbk->pending_reqs); kfree(blkbk->pending_grant_handles); diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 0cda406b4edb..c86519c477f3 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -23,7 +23,7 @@ #undef DPRINTK #define DPRINTK(fmt, args...) \ - pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \ + pr_debug("xen-blkback: (%s:%d) " fmt ".\n", \ __func__, __LINE__, ##args) struct backend_info { @@ -136,7 +136,7 @@ static int map_frontend_page(struct blkif_st *blkif, unsigned long shared_page) BUG(); if (op.status) { - DPRINTK(" Grant table operation failure !\n"); + DPRINTK("Grant table operation failure !\n"); return op.status; } @@ -509,10 +509,8 @@ static void backend_changed(struct xenbus_watch *watch, if ((be->major || be->minor) && ((be->major != major) || (be->minor != minor))) { - printk(KERN_WARNING - "blkback: changing physical device (from %x:%x to " - "%x:%x) not supported.\n", be->major, be->minor, - major, minor); + pr_warn("xen-blkback: changing physical device (from %x:%x to %x:%x) not supported.\n", + be->major, be->minor, major, minor); return; } @@ -578,8 +576,8 @@ static void frontend_changed(struct xenbus_device *dev, switch (frontend_state) { case XenbusStateInitialising: if (dev->state == XenbusStateClosed) { - printk(KERN_INFO "%s: %s: prepare for reconnect\n", - __func__, dev->nodename); + pr_info("xen-blkback: %s: prepare for reconnect\n", + dev->nodename); xenbus_switch_state(dev, XenbusStateInitWait); } break; @@ -733,9 +731,8 @@ static int connect_ring(struct backend_info *be) xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); return -1; } - printk(KERN_INFO - "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n", - ring_ref, evtchn, be->blkif->blk_protocol, protocol); + pr_info("xen-blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n", + ring_ref, evtchn, be->blkif->blk_protocol, protocol); /* Map the shared frame, irq etc. */ err = xen_blkif_map(be->blkif, ring_ref, evtchn); From 1afbd730a33c6e4ca780a70351e8929dd4c40636 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 11 May 2011 16:15:24 -0400 Subject: [PATCH 55/72] xen/blkback: Make the DPRINTK uniform. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/common.h | 6 +++--- drivers/block/xen-blkback/xenbus.c | 5 ----- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index e37dcf7f6b8e..46e5d0630440 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -42,9 +42,9 @@ #include #include -#define DPRINTK(_f, _a...) \ - pr_debug("(file=%s, line=%d) " _f, \ - __FILE__ , __LINE__ , ## _a) +#define DPRINTK(fmt, args...) \ + pr_debug("xen-blkback: (%s:%d) " fmt ".\n", \ + __func__, __LINE__, ##args) struct vbd { /* What the domain refers to this vbd as. */ diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index c86519c477f3..fa01dbbee0ad 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -21,11 +21,6 @@ #include #include "common.h" -#undef DPRINTK -#define DPRINTK(fmt, args...) \ - pr_debug("xen-blkback: (%s:%d) " fmt ".\n", \ - __func__, __LINE__, ##args) - struct backend_info { struct xenbus_device *dev; struct blkif_st *blkif; From 22b20f2dffd09edd66127f2022c26d0039bad88e Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 16:43:12 -0400 Subject: [PATCH 56/72] xen/blkback: Use the DRV_PFX in the pr_.. macros. To make it easier to read. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 36 ++++++++++++++--------------- drivers/block/xen-blkback/common.h | 3 ++- drivers/block/xen-blkback/xenbus.c | 6 ++--- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 5c9568e39eab..09fe528dd088 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -196,20 +196,20 @@ static void vbd_resize(struct blkif_st *blkif) struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be); unsigned long long new_size = vbd_sz(vbd); - pr_info("xen-blkback: VBD Resize: Domid: %d, Device: (%d, %d)\n", + pr_info(DRV_PFX "VBD Resize: Domid: %d, Device: (%d, %d)\n", blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); - pr_info("xen-blkback: VBD Resize: new size %llu\n", new_size); + pr_info(DRV_PFX "VBD Resize: new size %llu\n", new_size); vbd->size = new_size; again: err = xenbus_transaction_start(&xbt); if (err) { - pr_warn("xen-blkback: Error starting transaction"); + pr_warn(DRV_PFX "Error starting transaction"); return; } err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", (unsigned long long)vbd_sz(vbd)); if (err) { - pr_warn("xen-blkback: Error writing new size"); + pr_warn(DRV_PFX "Error writing new size"); goto abort; } /* @@ -219,7 +219,7 @@ again: */ err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); if (err) { - pr_warn("xen-blkback: Error writing the state"); + pr_warn(DRV_PFX "Error writing the state"); goto abort; } @@ -227,7 +227,7 @@ again: if (err == -EAGAIN) goto again; if (err) - pr_warn("xen-blkback: Error ending transaction"); + pr_warn(DRV_PFX "Error ending transaction"); abort: xenbus_transaction_end(xbt, 1); } @@ -270,7 +270,7 @@ int xen_blkif_schedule(void *arg) xen_blkif_get(blkif); if (debug_lvl) - pr_debug("xen-blkback: %s: started\n", current->comm); + pr_debug(DRV_PFX "%s: started\n", current->comm); while (!kthread_should_stop()) { if (try_to_freeze()) @@ -299,7 +299,7 @@ int xen_blkif_schedule(void *arg) if (log_stats) print_stats(blkif); if (debug_lvl) - pr_debug("xen-blkback: %s: exiting\n", current->comm); + pr_debug(DRV_PFX "%s: exiting\n", current->comm); blkif->xenblkd = NULL; xen_blkif_put(blkif); @@ -343,7 +343,7 @@ static void xen_blkbk_unmap(struct pending_req *req) ret = m2p_remove_override( virt_to_page(unmap[i].host_addr), false); if (ret) { - pr_alert("xen-blkback: Failed to remove M2P override for %lx\n", + pr_alert(DRV_PFX "Failed to remove M2P override for %lx\n", (unsigned long)unmap[i].host_addr); continue; } @@ -385,7 +385,7 @@ static int xen_blkbk_map(struct blkif_request *req, */ for (i = 0; i < nseg; i++) { if (unlikely(map[i].status != 0)) { - pr_debug("xen-blkback: invalid buffer -- could not remap it\n"); + pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); map[i].handle = BLKBACK_INVALID_HANDLE; ret |= 1; } @@ -398,7 +398,7 @@ static int xen_blkbk_map(struct blkif_request *req, ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), blkbk->pending_page(pending_req, i), false); if (ret) { - pr_alert("xen-blkback: Failed to install M2P override for %lx (ret: %d)\n", + pr_alert(DRV_PFX "Failed to install M2P override for %lx (ret: %d)\n", (unsigned long)map[i].dev_bus_addr, ret); /* We could switch over to GNTTABOP_copy */ continue; @@ -419,11 +419,11 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) /* An error fails the entire request. */ if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) && (error == -EOPNOTSUPP)) { - pr_debug("xen-blkback: flush diskcache op failed, not supported\n"); + pr_debug(DRV_PFX "flush diskcache op failed, not supported\n"); xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); pending_req->status = BLKIF_RSP_EOPNOTSUPP; } else if (error) { - pr_debug("xen-blkback: Buffer not up-to-date at end of operation," + pr_debug(DRV_PFX "Buffer not up-to-date at end of operation," " error=%d\n", error); pending_req->status = BLKIF_RSP_ERROR; } @@ -561,7 +561,7 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, nseg = req->nr_segments; if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { - pr_debug("xen-blkback: Bad number of segments in request (%d)\n", + pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", nseg); /* Haven't submitted any bio's yet. */ goto fail_response; @@ -588,7 +588,7 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, } if (vbd_translate(&preq, blkif, operation) != 0) { - pr_debug("xen-blkback: access denied: %s of [%llu,%llu] on dev=%04x\n", + pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n", operation == READ ? "read" : "write", preq.sector_number, preq.sector_number + preq.nr_sects, preq.dev); @@ -602,7 +602,7 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, for (i = 0; i < nseg; i++) { if (((int)preq.sector_number|(int)seg[i].nsec) & ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { - pr_debug("xen-blkback: Misaligned I/O request from domain %d", + pr_debug(DRV_PFX "Misaligned I/O request from domain %d", blkif->domid); goto fail_response; } @@ -759,7 +759,7 @@ static int __init xen_blkif_init(void) blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); if (!blkbk) { - pr_alert("xen-blkback: %s: out of memory!\n", __func__); + pr_alert(DRV_PFX "%s: out of memory!\n", __func__); return -ENOMEM; } @@ -807,7 +807,7 @@ static int __init xen_blkif_init(void) return 0; out_of_memory: - pr_alert("xen-blkback: %s: out of memory\n", __func__); + pr_alert(DRV_PFX "%s: out of memory\n", __func__); failed_init: kfree(blkbk->pending_reqs); kfree(blkbk->pending_grant_handles); diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 46e5d0630440..da96e3eaa641 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -42,8 +42,9 @@ #include #include +#define DRV_PFX "xen-blkback:" #define DPRINTK(fmt, args...) \ - pr_debug("xen-blkback: (%s:%d) " fmt ".\n", \ + pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ __func__, __LINE__, ##args) struct vbd { diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index fa01dbbee0ad..1c3fa6507e6d 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -504,7 +504,7 @@ static void backend_changed(struct xenbus_watch *watch, if ((be->major || be->minor) && ((be->major != major) || (be->minor != minor))) { - pr_warn("xen-blkback: changing physical device (from %x:%x to %x:%x) not supported.\n", + pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n", be->major, be->minor, major, minor); return; } @@ -571,7 +571,7 @@ static void frontend_changed(struct xenbus_device *dev, switch (frontend_state) { case XenbusStateInitialising: if (dev->state == XenbusStateClosed) { - pr_info("xen-blkback: %s: prepare for reconnect\n", + pr_info(DRV_PFX "%s: prepare for reconnect\n", dev->nodename); xenbus_switch_state(dev, XenbusStateInitWait); } @@ -726,7 +726,7 @@ static int connect_ring(struct backend_info *be) xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); return -1; } - pr_info("xen-blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n", + pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n", ring_ref, evtchn, be->blkif->blk_protocol, protocol); /* Map the shared frame, irq etc. */ From 72468bfcb815bc9875a870973469f68e20c78717 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 11 May 2011 16:21:08 -0400 Subject: [PATCH 57/72] xen/blkback: Removing the debug_lvl option. It is not really used for anything. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 09fe528dd088..453b51ac737f 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -62,9 +62,7 @@ MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); /* Run-time switchable: /sys/module/blkback/parameters/ */ static unsigned int log_stats; -static unsigned int debug_lvl; module_param(log_stats, int, 0644); -module_param(debug_lvl, int, 0644); /* * Each outstanding request that we've passed to the lower device layers has a @@ -269,9 +267,6 @@ int xen_blkif_schedule(void *arg) xen_blkif_get(blkif); - if (debug_lvl) - pr_debug(DRV_PFX "%s: started\n", current->comm); - while (!kthread_should_stop()) { if (try_to_freeze()) continue; @@ -298,8 +293,6 @@ int xen_blkif_schedule(void *arg) if (log_stats) print_stats(blkif); - if (debug_lvl) - pr_debug(DRV_PFX "%s: exiting\n", current->comm); blkif->xenblkd = NULL; xen_blkif_put(blkif); From 68c88dd7d3caf1737112238fbe91cccd8e7a69fc Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 11 May 2011 16:23:39 -0400 Subject: [PATCH 58/72] xen/blkback: Move blkif_get_x86_[32|64]_req to common.h in block/xen-blkback dir. From the blkif.h header, which was exposed to the frontend. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/common.h | 32 ++++++++++++++++++++++++++++++ include/xen/blkif.h | 30 ---------------------------- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index da96e3eaa641..647d974da392 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -132,4 +132,36 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); +static void inline blkif_get_x86_32_req(struct blkif_request *dst, + struct blkif_x86_32_request *src) +{ + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; + dst->operation = src->operation; + dst->nr_segments = src->nr_segments; + dst->handle = src->handle; + dst->id = src->id; + dst->u.rw.sector_number = src->sector_number; + barrier(); + if (n > dst->nr_segments) + n = dst->nr_segments; + for (i = 0; i < n; i++) + dst->u.rw.seg[i] = src->seg[i]; +} + +static void inline blkif_get_x86_64_req(struct blkif_request *dst, + struct blkif_x86_64_request *src) +{ + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; + dst->operation = src->operation; + dst->nr_segments = src->nr_segments; + dst->handle = src->handle; + dst->id = src->id; + dst->u.rw.sector_number = src->sector_number; + barrier(); + if (n > dst->nr_segments) + n = dst->nr_segments; + for (i = 0; i < n; i++) + dst->u.rw.seg[i] = src->seg[i]; +} + #endif /* __BLKIF__BACKEND__COMMON_H__ */ diff --git a/include/xen/blkif.h b/include/xen/blkif.h index ab794269fc53..6ed7c01253b2 100644 --- a/include/xen/blkif.h +++ b/include/xen/blkif.h @@ -89,34 +89,4 @@ enum blkif_protocol { BLKIF_PROTOCOL_X86_64 = 3, }; -static void inline blkif_get_x86_32_req(struct blkif_request *dst, struct blkif_x86_32_request *src) -{ - int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; - dst->operation = src->operation; - dst->nr_segments = src->nr_segments; - dst->handle = src->handle; - dst->id = src->id; - dst->u.rw.sector_number = src->sector_number; - barrier(); - if (n > dst->nr_segments) - n = dst->nr_segments; - for (i = 0; i < n; i++) - dst->u.rw.seg[i] = src->seg[i]; -} - -static void inline blkif_get_x86_64_req(struct blkif_request *dst, struct blkif_x86_64_request *src) -{ - int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; - dst->operation = src->operation; - dst->nr_segments = src->nr_segments; - dst->handle = src->handle; - dst->id = src->id; - dst->u.rw.sector_number = src->sector_number; - barrier(); - if (n > dst->nr_segments) - n = dst->nr_segments; - for (i = 0; i < n; i++) - dst->u.rw.seg[i] = src->seg[i]; -} - #endif /* __XEN_BLKIF_H__ */ From b9fc02968c5dd3c0461b4bb126499a17b13fb86e Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 11 May 2011 16:26:59 -0400 Subject: [PATCH 59/72] xen/blkback: Fix spelling mistakes. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/xenbus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 1c3fa6507e6d..5d2bbf6240c8 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -351,7 +351,7 @@ static int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, FMODE_READ : FMODE_WRITE, NULL); if (IS_ERR(bdev)) { - DPRINTK("vbd_creat: device %08x could not be opened.\n", + DPRINTK("vbd_create: device %08x could not be opened.\n", vbd->pdevice); return -ENOENT; } @@ -360,7 +360,7 @@ static int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, vbd->size = vbd_sz(vbd); if (vbd->bdev->bd_disk == NULL) { - DPRINTK("vbd_creat: device %08x doesn't exist.\n", + DPRINTK("vbd_create: device %08x doesn't exist.\n", vbd->pdevice); vbd_free(vbd); return -ENOENT; From a4c348580e65c95d4b278bb6f154f622df12b893 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 16:10:55 -0400 Subject: [PATCH 60/72] xen/blkback: Flesh out the description in the Kconfig. with more details. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/Kconfig | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 9abb64689712..717d6e4e18d3 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -478,6 +478,19 @@ config XEN_BLKDEV_BACKEND block devices to other guests via a high-performance shared-memory interface. + The corresponding Linux frontend driver is enabled by the + CONFIG_XEN_BLKDEV_FRONTEND configuration option. + + The backend driver attaches itself to a any block device specified + in the XenBus configuration. There are no limits to what the block + device as long as it has a major and minor. + + If you are compiling a kernel to run in a Xen block backend driver + domain (often this is domain 0) you should say Y here. To + compile this driver as a module, chose M here: the module + will be called xen-blkback. + + config VIRTIO_BLK tristate "Virtio block driver (EXPERIMENTAL)" depends on EXPERIMENTAL && VIRTIO From 41ca4d388560d2048c7b64ff5ca7dc3bac0d0812 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 16:14:15 -0400 Subject: [PATCH 61/72] xen/blkback: Fix checkpatch.pl warnings about more than 80 lines. Break up the macro usage. Signed-off-by: Konrad Rzeszutek Wilk --- include/xen/blkif.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/include/xen/blkif.h b/include/xen/blkif.h index 6ed7c01253b2..3c9ca7a93ba4 100644 --- a/include/xen/blkif.h +++ b/include/xen/blkif.h @@ -72,9 +72,12 @@ struct blkif_x86_64_response { typedef struct blkif_x86_64_request blkif_x86_64_request_t; typedef struct blkif_x86_64_response blkif_x86_64_response_t; -DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, struct blkif_common_response); -DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, struct blkif_x86_32_response); -DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, struct blkif_x86_64_response); +DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, + struct blkif_common_response); +DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, + struct blkif_x86_32_response); +DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, + struct blkif_x86_64_response); union blkif_back_rings { struct blkif_back_ring native; From 03e0edf946a08f498788bb6e8ab58453d98f25b9 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 16:19:23 -0400 Subject: [PATCH 62/72] xen/blkback: Checkpatch.pl recommend against multiple assigments. CHECK: multiple assignments should be avoided Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 9 ++++++--- drivers/block/xen-blkback/xenbus.c | 6 ++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 453b51ac737f..362fbf6f656d 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -620,10 +620,11 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, seg[i].nsec << 9, seg[i].buf & ~PAGE_MASK) == 0)) { - bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i); + bio = bio_alloc(GFP_KERNEL, nseg-i); if (unlikely(bio == NULL)) goto fail_put_bio; + biolist[nbio++] = bio; bio->bi_bdev = preq.bdev; bio->bi_private = pending_req; bio->bi_end_io = end_block_io_op; @@ -636,10 +637,12 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, /* This will be hit if the operation was a flush. */ if (!bio) { BUG_ON(operation != WRITE_FLUSH); - bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, 0); + + bio = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) goto fail_put_bio; + biolist[nbio++] = bio; bio->bi_bdev = preq.bdev; bio->bi_private = pending_req; bio->bi_end_io = end_block_io_op; @@ -677,7 +680,7 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, return -EIO; fail_put_bio: - for (i = 0; i < (nbio-1); i++) + for (i = 0; i < nbio; i++) bio_put(biolist[i]); __end_block_io_op(pending_req, -EINVAL); msleep(1); /* back off a bit */ diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 5d2bbf6240c8..ba8d30662d19 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -538,7 +538,8 @@ static void backend_changed(struct xenbus_watch *watch, err = vbd_create(be->blkif, handle, major, minor, (NULL == strchr(be->mode, 'w')), cdrom); if (err) { - be->major = be->minor = 0; + be->major = 0; + be->minor = 0; xenbus_dev_fatal(dev, err, "creating vbd structure"); return; } @@ -546,7 +547,8 @@ static void backend_changed(struct xenbus_watch *watch, err = xenvbd_sysfs_addif(dev); if (err) { vbd_free(&be->blkif->vbd); - be->major = be->minor = 0; + be->major = 0; + be->minor = 0; xenbus_dev_fatal(dev, err, "creating sysfs entries"); return; } From b0f801273f7359a7d91fc94f5c6bf216bc17aaa1 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 16:23:06 -0400 Subject: [PATCH 63/72] xen/blkback: Fixing some more of the cleanpatch.pl warnings. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 2 +- drivers/block/xen-blkback/common.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 362fbf6f656d..d06eb6a50d57 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -637,7 +637,7 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, /* This will be hit if the operation was a flush. */ if (!bio) { BUG_ON(operation != WRITE_FLUSH); - + bio = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) goto fail_put_bio; diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 647d974da392..629546558a47 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -132,7 +132,7 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); -static void inline blkif_get_x86_32_req(struct blkif_request *dst, +static inline void blkif_get_x86_32_req(struct blkif_request *dst, struct blkif_x86_32_request *src) { int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; @@ -148,7 +148,7 @@ static void inline blkif_get_x86_32_req(struct blkif_request *dst, dst->u.rw.seg[i] = src->seg[i]; } -static void inline blkif_get_x86_64_req(struct blkif_request *dst, +static inline void blkif_get_x86_64_req(struct blkif_request *dst, struct blkif_x86_64_request *src) { int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; From 452a6b2bb6de677acdd2ccb8b39cf6e8fe06f306 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 16:31:51 -0400 Subject: [PATCH 64/72] xen/blkback: Move include/xen/blkif.h into drivers/block/xen-blkback/common.h Not point of the blkif.h file. It is not used by the frontend. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/common.h | 72 +++++++++++++++++++++- include/xen/blkif.h | 95 ------------------------------ 2 files changed, 71 insertions(+), 96 deletions(-) delete mode 100644 include/xen/blkif.h diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 629546558a47..b8856fe2568f 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -38,15 +38,85 @@ #include #include #include -#include #include #include +#include +#include +#include #define DRV_PFX "xen-blkback:" #define DPRINTK(fmt, args...) \ pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ __func__, __LINE__, ##args) + +/* Not a real protocol. Used to generate ring structs which contain + * the elements common to all protocols only. This way we get a + * compiler-checkable way to use common struct elements, so we can + * avoid using switch(protocol) in a number of places. */ +struct blkif_common_request { + char dummy; +}; +struct blkif_common_response { + char dummy; +}; + +/* i386 protocol version */ +#pragma pack(push, 4) +struct blkif_x86_32_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +struct blkif_x86_32_response { + uint64_t id; /* copied from request */ + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; +typedef struct blkif_x86_32_request blkif_x86_32_request_t; +typedef struct blkif_x86_32_response blkif_x86_32_response_t; +#pragma pack(pop) + +/* x86_64 protocol version */ +struct blkif_x86_64_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t __attribute__((__aligned__(8))) id; + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +struct blkif_x86_64_response { + uint64_t __attribute__((__aligned__(8))) id; + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; +typedef struct blkif_x86_64_request blkif_x86_64_request_t; +typedef struct blkif_x86_64_response blkif_x86_64_response_t; + +DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, + struct blkif_common_response); +DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, + struct blkif_x86_32_response); +DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, + struct blkif_x86_64_response); + +union blkif_back_rings { + struct blkif_back_ring native; + struct blkif_common_back_ring common; + struct blkif_x86_32_back_ring x86_32; + struct blkif_x86_64_back_ring x86_64; +}; + +enum blkif_protocol { + BLKIF_PROTOCOL_NATIVE = 1, + BLKIF_PROTOCOL_X86_32 = 2, + BLKIF_PROTOCOL_X86_64 = 3, +}; + struct vbd { /* What the domain refers to this vbd as. */ blkif_vdev_t handle; diff --git a/include/xen/blkif.h b/include/xen/blkif.h deleted file mode 100644 index 3c9ca7a93ba4..000000000000 --- a/include/xen/blkif.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef __XEN_BLKIF_H__ -#define __XEN_BLKIF_H__ - -#include -#include -#include - -/* Not a real protocol. Used to generate ring structs which contain - * the elements common to all protocols only. This way we get a - * compiler-checkable way to use common struct elements, so we can - * avoid using switch(protocol) in a number of places. */ -struct blkif_common_request { - char dummy; -}; -struct blkif_common_response { - char dummy; -}; - -/* i386 protocol version */ -#pragma pack(push, 4) -struct blkif_x86_32_request { - uint8_t operation; /* BLKIF_OP_??? */ - uint8_t nr_segments; /* number of segments */ - blkif_vdev_t handle; /* only for read/write requests */ - uint64_t id; /* private guest value, echoed in resp */ - blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ - struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -}; -struct blkif_x86_32_response { - uint64_t id; /* copied from request */ - uint8_t operation; /* copied from request */ - int16_t status; /* BLKIF_RSP_??? */ -}; -typedef struct blkif_x86_32_request blkif_x86_32_request_t; -typedef struct blkif_x86_32_response blkif_x86_32_response_t; -#pragma pack(pop) - -/* x86_64 protocol version */ -struct blkif_x86_64_request { - uint8_t operation; /* BLKIF_OP_??? */ - uint8_t nr_segments; /* number of segments */ - blkif_vdev_t handle; /* only for read/write requests */ - uint64_t __attribute__((__aligned__(8))) id; - blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ - struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -}; -struct blkif_x86_64_response { - uint64_t __attribute__((__aligned__(8))) id; - uint8_t operation; /* copied from request */ - int16_t status; /* BLKIF_RSP_??? */ -}; -typedef struct blkif_x86_64_request blkif_x86_64_request_t; -typedef struct blkif_x86_64_response blkif_x86_64_response_t; - -DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, - struct blkif_common_response); -DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, - struct blkif_x86_32_response); -DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, - struct blkif_x86_64_response); - -union blkif_back_rings { - struct blkif_back_ring native; - struct blkif_common_back_ring common; - struct blkif_x86_32_back_ring x86_32; - struct blkif_x86_64_back_ring x86_64; -}; - -enum blkif_protocol { - BLKIF_PROTOCOL_NATIVE = 1, - BLKIF_PROTOCOL_X86_32 = 2, - BLKIF_PROTOCOL_X86_64 = 3, -}; - -#endif /* __XEN_BLKIF_H__ */ From 325a64860472765ecaeaa0081e9ddd67671183d4 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 16:37:04 -0400 Subject: [PATCH 65/72] xen/blkback: Remove the unused typedefs. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/common.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index b8856fe2568f..b0db6aabb5b8 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -76,8 +76,6 @@ struct blkif_x86_32_response { uint8_t operation; /* copied from request */ int16_t status; /* BLKIF_RSP_??? */ }; -typedef struct blkif_x86_32_request blkif_x86_32_request_t; -typedef struct blkif_x86_32_response blkif_x86_32_response_t; #pragma pack(pop) /* x86_64 protocol version */ @@ -94,8 +92,6 @@ struct blkif_x86_64_response { uint8_t operation; /* copied from request */ int16_t status; /* BLKIF_RSP_??? */ }; -typedef struct blkif_x86_64_request blkif_x86_64_request_t; -typedef struct blkif_x86_64_response blkif_x86_64_response_t; DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, struct blkif_common_response); From 30fd150202fb2d08a62f9c2966a4b1fcf2e861e7 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 16:47:48 -0400 Subject: [PATCH 66/72] xen/blkback: Change structure name blkif_st to xen_blkif. No need for that '_st' and xen_blkif is more apt. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 28 ++++++++++++++-------------- drivers/block/xen-blkback/common.h | 2 +- drivers/block/xen-blkback/xenbus.c | 24 ++++++++++++------------ 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index d06eb6a50d57..d438781ecc7c 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -71,7 +71,7 @@ module_param(log_stats, int, 0644); * response queued for it, with the saved 'id' passed back. */ struct pending_req { - struct blkif_st *blkif; + struct xen_blkif *blkif; u64 id; int nr_pages; atomic_t pendcnt; @@ -121,11 +121,11 @@ static inline unsigned long vaddr(struct pending_req *req, int seg) (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)]) -static int do_block_io_op(struct blkif_st *blkif); -static int dispatch_rw_block_io(struct blkif_st *blkif, +static int do_block_io_op(struct xen_blkif *blkif); +static int dispatch_rw_block_io(struct xen_blkif *blkif, struct blkif_request *req, struct pending_req *pending_req); -static void make_response(struct blkif_st *blkif, u64 id, +static void make_response(struct xen_blkif *blkif, u64 id, unsigned short op, int st); /* @@ -166,7 +166,7 @@ static void free_req(struct pending_req *req) /* * Routines for managing virtual block devices (vbds). */ -static int vbd_translate(struct phys_req *req, struct blkif_st *blkif, +static int vbd_translate(struct phys_req *req, struct xen_blkif *blkif, int operation) { struct vbd *vbd = &blkif->vbd; @@ -186,7 +186,7 @@ static int vbd_translate(struct phys_req *req, struct blkif_st *blkif, return rc; } -static void vbd_resize(struct blkif_st *blkif) +static void vbd_resize(struct xen_blkif *blkif) { struct vbd *vbd = &blkif->vbd; struct xenbus_transaction xbt; @@ -233,7 +233,7 @@ abort: /* * Notification from the guest OS. */ -static void blkif_notify_work(struct blkif_st *blkif) +static void blkif_notify_work(struct xen_blkif *blkif) { blkif->waiting_reqs = 1; wake_up(&blkif->wq); @@ -249,7 +249,7 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id) * SCHEDULER FUNCTIONS */ -static void print_stats(struct blkif_st *blkif) +static void print_stats(struct xen_blkif *blkif) { pr_debug("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n", current->comm, blkif->st_oo_req, @@ -262,7 +262,7 @@ static void print_stats(struct blkif_st *blkif) int xen_blkif_schedule(void *arg) { - struct blkif_st *blkif = arg; + struct xen_blkif *blkif = arg; struct vbd *vbd = &blkif->vbd; xen_blkif_get(blkif); @@ -451,7 +451,7 @@ static void end_block_io_op(struct bio *bio, int error) * (which has the sectors we want, number of them, grant references, etc), * and transmute it to the block API to hand it over to the proper block disk. */ -static int do_block_io_op(struct blkif_st *blkif) +static int do_block_io_op(struct xen_blkif *blkif) { union blkif_back_rings *blk_rings = &blkif->blk_rings; struct blkif_request req; @@ -512,9 +512,9 @@ static int do_block_io_op(struct blkif_st *blkif) * Transmutation of the 'struct blkif_request' to a proper 'struct bio' * and call the 'submit_bio' to pass it to the underlying storage. */ -static int dispatch_rw_block_io(struct blkif_st *blkif, - struct blkif_request *req, - struct pending_req *pending_req) +static int dispatch_rw_block_io(struct xen_blkif *blkif, + struct blkif_request *req, + struct pending_req *pending_req) { struct phys_req preq; struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; @@ -692,7 +692,7 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, /* * Put a response on the ring on how the operation fared. */ -static void make_response(struct blkif_st *blkif, u64 id, +static void make_response(struct xen_blkif *blkif, u64 id, unsigned short op, int st) { struct blkif_response resp; diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index b0db6aabb5b8..722c048663d6 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -130,7 +130,7 @@ struct vbd { struct backend_info; -struct blkif_st { +struct xen_blkif { /* Unique identifier for this interface. */ domid_t domid; unsigned int handle; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index ba8d30662d19..f355f7a5d52d 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -23,7 +23,7 @@ struct backend_info { struct xenbus_device *dev; - struct blkif_st *blkif; + struct xen_blkif *blkif; struct xenbus_watch backend_watch; unsigned major; unsigned minor; @@ -41,7 +41,7 @@ struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be) return be->dev; } -static int blkback_name(struct blkif_st *blkif, char *buf) +static int blkback_name(struct xen_blkif *blkif, char *buf) { char *devpath, *devname; struct xenbus_device *dev = blkif->be->dev; @@ -62,7 +62,7 @@ static int blkback_name(struct blkif_st *blkif, char *buf) return 0; } -static void xen_update_blkif_status(struct blkif_st *blkif) +static void xen_update_blkif_status(struct xen_blkif *blkif) { int err; char name[TASK_COMM_LEN]; @@ -101,9 +101,9 @@ static void xen_update_blkif_status(struct blkif_st *blkif) } } -static struct blkif_st *xen_blkif_alloc(domid_t domid) +static struct xen_blkif *xen_blkif_alloc(domid_t domid) { - struct blkif_st *blkif; + struct xen_blkif *blkif; blkif = kmem_cache_alloc(xen_blkif_cachep, GFP_KERNEL); if (!blkif) @@ -120,7 +120,7 @@ static struct blkif_st *xen_blkif_alloc(domid_t domid) return blkif; } -static int map_frontend_page(struct blkif_st *blkif, unsigned long shared_page) +static int map_frontend_page(struct xen_blkif *blkif, unsigned long shared_page) { struct gnttab_map_grant_ref op; @@ -141,7 +141,7 @@ static int map_frontend_page(struct blkif_st *blkif, unsigned long shared_page) return 0; } -static void unmap_frontend_page(struct blkif_st *blkif) +static void unmap_frontend_page(struct xen_blkif *blkif) { struct gnttab_unmap_grant_ref op; @@ -152,7 +152,7 @@ static void unmap_frontend_page(struct blkif_st *blkif) BUG(); } -static int xen_blkif_map(struct blkif_st *blkif, unsigned long shared_page, +static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, unsigned int evtchn) { int err; @@ -211,7 +211,7 @@ static int xen_blkif_map(struct blkif_st *blkif, unsigned long shared_page, return 0; } -static void xen_blkif_disconnect(struct blkif_st *blkif) +static void xen_blkif_disconnect(struct xen_blkif *blkif) { if (blkif->xenblkd) { kthread_stop(blkif->xenblkd); @@ -234,7 +234,7 @@ static void xen_blkif_disconnect(struct blkif_st *blkif) } } -void xen_blkif_free(struct blkif_st *blkif) +void xen_blkif_free(struct xen_blkif *blkif) { if (!atomic_dec_and_test(&blkif->refcnt)) BUG(); @@ -244,7 +244,7 @@ void xen_blkif_free(struct blkif_st *blkif) int __init xen_blkif_interface_init(void) { xen_blkif_cachep = kmem_cache_create("blkif_cache", - sizeof(struct blkif_st), + sizeof(struct xen_blkif), 0, 0, NULL); if (!xen_blkif_cachep) return -ENOMEM; @@ -332,7 +332,7 @@ static void vbd_free(struct vbd *vbd) vbd->bdev = NULL; } -static int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, +static int vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, unsigned major, unsigned minor, int readonly, int cdrom) { From 3d814731ba67f9514bdf380c1b95dd852ac82a2f Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 16:53:56 -0400 Subject: [PATCH 67/72] xen/blkback: Prefix 'vbd' with 'xen' in structs and functions. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 20 ++++++++--------- drivers/block/xen-blkback/common.h | 4 ++-- drivers/block/xen-blkback/xenbus.c | 34 ++++++++++++++--------------- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index d438781ecc7c..1e454a30e4e1 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -166,10 +166,10 @@ static void free_req(struct pending_req *req) /* * Routines for managing virtual block devices (vbds). */ -static int vbd_translate(struct phys_req *req, struct xen_blkif *blkif, - int operation) +static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif, + int operation) { - struct vbd *vbd = &blkif->vbd; + struct xen_vbd *vbd = &blkif->vbd; int rc = -EACCES; if ((operation != READ) && vbd->readonly) @@ -186,9 +186,9 @@ static int vbd_translate(struct phys_req *req, struct xen_blkif *blkif, return rc; } -static void vbd_resize(struct xen_blkif *blkif) +static void xen_vbd_resize(struct xen_blkif *blkif) { - struct vbd *vbd = &blkif->vbd; + struct xen_vbd *vbd = &blkif->vbd; struct xenbus_transaction xbt; int err; struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be); @@ -263,7 +263,7 @@ static void print_stats(struct xen_blkif *blkif) int xen_blkif_schedule(void *arg) { struct xen_blkif *blkif = arg; - struct vbd *vbd = &blkif->vbd; + struct xen_vbd *vbd = &blkif->vbd; xen_blkif_get(blkif); @@ -271,7 +271,7 @@ int xen_blkif_schedule(void *arg) if (try_to_freeze()) continue; if (unlikely(vbd->size != vbd_sz(vbd))) - vbd_resize(blkif); + xen_vbd_resize(blkif); wait_event_interruptible( blkif->wq, @@ -538,7 +538,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, blkif->st_f_req++; operation = WRITE_FLUSH; /* - * The frontend likes to set this to -1, which vbd_translate + * The frontend likes to set this to -1, which xen_vbd_translate * is alergic too. */ req->u.rw.sector_number = 0; @@ -580,7 +580,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, } - if (vbd_translate(&preq, blkif, operation) != 0) { + if (xen_vbd_translate(&preq, blkif, operation) != 0) { pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n", operation == READ ? "read" : "write", preq.sector_number, @@ -589,7 +589,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, } /* - * This check _MUST_ be done after vbd_translate as the preq.bdev + * This check _MUST_ be done after xen_vbd_translate as the preq.bdev * is set there. */ for (i = 0; i < nseg; i++) { diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 722c048663d6..1e4ccdeadef3 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -113,7 +113,7 @@ enum blkif_protocol { BLKIF_PROTOCOL_X86_64 = 3, }; -struct vbd { +struct xen_vbd { /* What the domain refers to this vbd as. */ blkif_vdev_t handle; /* Non-zero -> read-only */ @@ -141,7 +141,7 @@ struct xen_blkif { union blkif_back_rings blk_rings; struct vm_struct *blk_ring_area; /* The VBD attached to this interface. */ - struct vbd vbd; + struct xen_vbd vbd; /* Back pointer to the backend_info. */ struct backend_info *be; /* Private fields. */ diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index f355f7a5d52d..e470d8869053 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -275,7 +275,7 @@ VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req); VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); -static struct attribute *vbdstat_attrs[] = { +static struct attribute *xen_vbdstat_attrs[] = { &dev_attr_oo_req.attr, &dev_attr_rd_req.attr, &dev_attr_wr_req.attr, @@ -285,9 +285,9 @@ static struct attribute *vbdstat_attrs[] = { NULL }; -static struct attribute_group vbdstat_group = { +static struct attribute_group xen_vbdstat_group = { .name = "statistics", - .attrs = vbdstat_attrs, + .attrs = xen_vbdstat_attrs, }; VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); @@ -305,13 +305,13 @@ int xenvbd_sysfs_addif(struct xenbus_device *dev) if (error) goto fail2; - error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group); + error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group); if (error) goto fail3; return 0; -fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); +fail3: sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group); fail2: device_remove_file(&dev->dev, &dev_attr_mode); fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); return error; @@ -319,24 +319,24 @@ fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); void xenvbd_sysfs_delif(struct xenbus_device *dev) { - sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); + sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group); device_remove_file(&dev->dev, &dev_attr_mode); device_remove_file(&dev->dev, &dev_attr_physical_device); } -static void vbd_free(struct vbd *vbd) +static void xen_vbd_free(struct xen_vbd *vbd) { if (vbd->bdev) blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); vbd->bdev = NULL; } -static int vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, - unsigned major, unsigned minor, int readonly, - int cdrom) +static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, + unsigned major, unsigned minor, int readonly, + int cdrom) { - struct vbd *vbd; + struct xen_vbd *vbd; struct block_device *bdev; struct request_queue *q; @@ -351,7 +351,7 @@ static int vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, FMODE_READ : FMODE_WRITE, NULL); if (IS_ERR(bdev)) { - DPRINTK("vbd_create: device %08x could not be opened.\n", + DPRINTK("xen_vbd_create: device %08x could not be opened.\n", vbd->pdevice); return -ENOENT; } @@ -360,9 +360,9 @@ static int vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, vbd->size = vbd_sz(vbd); if (vbd->bdev->bd_disk == NULL) { - DPRINTK("vbd_create: device %08x doesn't exist.\n", + DPRINTK("xen_vbd_create: device %08x doesn't exist.\n", vbd->pdevice); - vbd_free(vbd); + xen_vbd_free(vbd); return -ENOENT; } @@ -396,7 +396,7 @@ static int xen_blkbk_remove(struct xenbus_device *dev) if (be->blkif) { xen_blkif_disconnect(be->blkif); - vbd_free(&be->blkif->vbd); + xen_vbd_free(&be->blkif->vbd); xen_blkif_free(be->blkif); be->blkif = NULL; } @@ -535,7 +535,7 @@ static void backend_changed(struct xenbus_watch *watch, be->major = major; be->minor = minor; - err = vbd_create(be->blkif, handle, major, minor, + err = xen_vbd_create(be->blkif, handle, major, minor, (NULL == strchr(be->mode, 'w')), cdrom); if (err) { be->major = 0; @@ -546,7 +546,7 @@ static void backend_changed(struct xenbus_watch *watch, err = xenvbd_sysfs_addif(dev); if (err) { - vbd_free(&be->blkif->vbd); + xen_vbd_free(&be->blkif->vbd); be->major = 0; be->minor = 0; xenbus_dev_fatal(dev, err, "creating sysfs entries"); From 5a577e38724226e06337bc8361f492b6bb76b9a5 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 16:58:21 -0400 Subject: [PATCH 68/72] xen/blkback: Add the prefix XEN in the common.h. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/common.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 1e4ccdeadef3..9e40b283a468 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -24,8 +24,8 @@ * IN THE SOFTWARE. */ -#ifndef __BLKIF__BACKEND__COMMON_H__ -#define __BLKIF__BACKEND__COMMON_H__ +#ifndef __XEN_BLKIF__BACKEND__COMMON_H__ +#define __XEN_BLKIF__BACKEND__COMMON_H__ #include #include @@ -230,4 +230,4 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst, dst->u.rw.seg[i] = src->seg[i]; } -#endif /* __BLKIF__BACKEND__COMMON_H__ */ +#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */ From cca537af7d6defe8001c2229da738f8a3c934fc0 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 17:23:30 -0400 Subject: [PATCH 69/72] xen/blkback: if log_stats is enabled print out the data. And not depend on the driver being built with -DDEBUG flag. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 1e454a30e4e1..9dee5454740f 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -251,7 +251,7 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id) static void print_stats(struct xen_blkif *blkif) { - pr_debug("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n", + pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n", current->comm, blkif->st_oo_req, blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req); blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); From 5185432277ddf5bd91ad5af29cd1945f25ed10fc Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 18:02:28 -0400 Subject: [PATCH 70/72] xen/blkback: Align the tabs on the structure. The recent changes caused this field of the structure to be offset a bit. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/xenbus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index e470d8869053..34570823355b 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -23,7 +23,7 @@ struct backend_info { struct xenbus_device *dev; - struct xen_blkif *blkif; + struct xen_blkif *blkif; struct xenbus_watch backend_watch; unsigned major; unsigned minor; From 496b318eb65558c1a3a4fe882cb9da6d1dc6493a Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Fri, 13 May 2011 09:45:40 -0400 Subject: [PATCH 71/72] xen/blkback: fix xenbus_transaction_start() hang caused by double xenbus_transaction_end() vbd_resize() up_read()'s xs_state.suspend_mutex twice in a row via double xenbus_transaction_end() calls. The next down_read() in xenbus_transaction_start() (at eg. the next resize attempt) hangs. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=618317 Acked-by: Jan Beulich Acked-by: Ian Campbell Signed-off-by: Laszlo Ersek Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 9dee5454740f..dba55e3a4a86 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -226,6 +226,7 @@ again: goto again; if (err) pr_warn(DRV_PFX "Error ending transaction"); + return; abort: xenbus_transaction_end(xbt, 1); } From 8ab521506c4dbb144f0c04c55e3d8bec42c1b2b9 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 17 May 2011 11:07:05 +0100 Subject: [PATCH 72/72] xen/blkback: don't fail empty barrier requests The sector number on empty barrier requests may (will?) be -1, which, given that it's being treated as unsigned 64-bit quantity, will almost always exceed the actual (virtual) disk's size. Inspired by Konrad's "When writting barriers set the sector number to zero...". While at it also add overflow checking to the math in vbd_translate(). Signed-off-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index dba55e3a4a86..c73910cc28c9 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -175,8 +175,14 @@ static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif, if ((operation != READ) && vbd->readonly) goto out; - if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd))) - goto out; + if (likely(req->nr_sects)) { + blkif_sector_t end = req->sector_number + req->nr_sects; + + if (unlikely(end < req->sector_number)) + goto out; + if (unlikely(end > vbd_sz(vbd))) + goto out; + } req->dev = vbd->pdevice; req->bdev = vbd->bdev; @@ -538,11 +544,6 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, case BLKIF_OP_FLUSH_DISKCACHE: blkif->st_f_req++; operation = WRITE_FLUSH; - /* - * The frontend likes to set this to -1, which xen_vbd_translate - * is alergic too. - */ - req->u.rw.sector_number = 0; break; case BLKIF_OP_WRITE_BARRIER: default: