From 42fb2e0720511fa1da2f8e751be393f851b71d80 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 3 Aug 2010 16:54:38 +0200 Subject: [PATCH 01/25] virtio: Factor virtqueue_map_sg out Separate the mapping of requests to host memory from the descriptor iteration. The next patch will make use of it in a different context. Signed-off-by: Kevin Wolf --- hw/virtio.c | 40 +++++++++++++++++++++++++--------------- hw/virtio.h | 3 +++ 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/hw/virtio.c b/hw/virtio.c index 4475bb3e44..85312b3ebe 100644 --- a/hw/virtio.c +++ b/hw/virtio.c @@ -360,11 +360,26 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes) return 0; } +void virtqueue_map_sg(struct iovec *sg, target_phys_addr_t *addr, + size_t num_sg, int is_write) +{ + unsigned int i; + target_phys_addr_t len; + + for (i = 0; i < num_sg; i++) { + len = sg[i].iov_len; + sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write); + if (sg[i].iov_base == NULL || len != sg[i].iov_len) { + fprintf(stderr, "virtio: trying to map MMIO memory\n"); + exit(1); + } + } +} + int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) { unsigned int i, head, max; target_phys_addr_t desc_pa = vq->vring.desc; - target_phys_addr_t len; if (!virtqueue_num_heads(vq, vq->last_avail_idx)) return 0; @@ -388,29 +403,20 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) i = 0; } + /* Collect all the descriptors */ do { struct iovec *sg; - int is_write = 0; if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) { elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i); sg = &elem->in_sg[elem->in_num++]; - is_write = 1; - } else + } else { + elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i); sg = &elem->out_sg[elem->out_num++]; - - /* Grab the first descriptor, and check it's OK. */ - sg->iov_len = vring_desc_len(desc_pa, i); - len = sg->iov_len; - - sg->iov_base = cpu_physical_memory_map(vring_desc_addr(desc_pa, i), - &len, is_write); - - if (sg->iov_base == NULL || len != sg->iov_len) { - fprintf(stderr, "virtio: trying to map MMIO memory\n"); - exit(1); } + sg->iov_len = vring_desc_len(desc_pa, i); + /* If we've got too many, that implies a descriptor loop. */ if ((elem->in_num + elem->out_num) > max) { fprintf(stderr, "Looped descriptor"); @@ -418,6 +424,10 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) } } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max); + /* Now map what we have collected */ + virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1); + virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0); + elem->index = head; vq->inuse++; diff --git a/hw/virtio.h b/hw/virtio.h index 5836ab61e7..1deeb2cb9c 100644 --- a/hw/virtio.h +++ b/hw/virtio.h @@ -81,6 +81,7 @@ typedef struct VirtQueueElement unsigned int out_num; unsigned int in_num; target_phys_addr_t in_addr[VIRTQUEUE_MAX_SIZE]; + target_phys_addr_t out_addr[VIRTQUEUE_MAX_SIZE]; struct iovec in_sg[VIRTQUEUE_MAX_SIZE]; struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; } VirtQueueElement; @@ -142,6 +143,8 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count); void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len, unsigned int idx); +void virtqueue_map_sg(struct iovec *sg, target_phys_addr_t *addr, + size_t num_sg, int is_write); int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem); int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes); From b6a4805b55b409134dc712677fdc4f6a8795e965 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 3 Aug 2010 16:57:02 +0200 Subject: [PATCH 02/25] virtio-blk: Fix migration of queued requests in_sg[].iovec and out_sg[].ioved are pointer to (source) host memory and therefore invalid after migration. When loading the device state we must create a new mapping on the destination host. Signed-off-by: Kevin Wolf --- hw/virtio-blk.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c index c3a73438f9..395eb9a068 100644 --- a/hw/virtio-blk.c +++ b/hw/virtio-blk.c @@ -481,6 +481,11 @@ static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id) qemu_get_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem)); req->next = s->rq; s->rq = req; + + virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr, + req->elem.in_num, 1); + virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr, + req->elem.out_num, 0); } return 0; From ee1811965fd15e0b41f8d508b951a8ab826ae3a7 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 5 Aug 2010 13:05:22 +0200 Subject: [PATCH 03/25] block: Fix image re-open in bdrv_commit Arguably we should re-open the backing file with the backing file format and not with the format of the snapshot image. Signed-off-by: Kevin Wolf --- block.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/block.c b/block.c index da70f2968b..a5514e361a 100644 --- a/block.c +++ b/block.c @@ -745,6 +745,7 @@ int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res) int bdrv_commit(BlockDriverState *bs) { BlockDriver *drv = bs->drv; + BlockDriver *backing_drv; int64_t sector, total_sectors; int n, ro, open_flags; int ret = 0, rw_ret = 0; @@ -762,7 +763,8 @@ int bdrv_commit(BlockDriverState *bs) if (bs->backing_hd->keep_read_only) { return -EACCES; } - + + backing_drv = bs->backing_hd->drv; ro = bs->backing_hd->read_only; strncpy(filename, bs->backing_hd->filename, sizeof(filename)); open_flags = bs->backing_hd->open_flags; @@ -772,12 +774,14 @@ int bdrv_commit(BlockDriverState *bs) bdrv_delete(bs->backing_hd); bs->backing_hd = NULL; bs_rw = bdrv_new(""); - rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR, drv); + rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR, + backing_drv); if (rw_ret < 0) { bdrv_delete(bs_rw); /* try to re-open read-only */ bs_ro = bdrv_new(""); - ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, drv); + ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, + backing_drv); if (ret < 0) { bdrv_delete(bs_ro); /* drive not functional anymore */ @@ -828,7 +832,8 @@ ro_cleanup: bdrv_delete(bs->backing_hd); bs->backing_hd = NULL; bs_ro = bdrv_new(""); - ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, drv); + ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, + backing_drv); if (ret < 0) { bdrv_delete(bs_ro); /* drive not functional anymore */ From 010cb2b314f2fa629b6b0b8f2f73c9d562fff49f Mon Sep 17 00:00:00 2001 From: Izumi Tsutsui Date: Sun, 8 Aug 2010 18:31:04 +0900 Subject: [PATCH 04/25] sheepdog: remove unnecessary includes "qemu_socket.h" includes all necessary files and including without could cause errors on some systems. Signed-off-by: Izumi Tsutsui Signed-off-by: Kevin Wolf --- block/sheepdog.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index 81aa564f26..e62820a804 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -8,16 +8,6 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -#ifdef _WIN32 -#include -#include -#include -#else -#include -#include - -#define closesocket(s) close(s) -#endif #include "qemu-common.h" #include "qemu-error.h" From cdbae85169c384d1641aa1ae86cdeefe16285745 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 17 Aug 2010 18:58:55 +0200 Subject: [PATCH 05/25] qemu-img rebase: Open new backing file read-only We never write to a backing file, so opening rw is useless. It just means that you can't rebase on top of a file for which you don't have write permissions. Signed-off-by: Kevin Wolf --- qemu-img.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qemu-img.c b/qemu-img.c index e300f911cb..d2a978b912 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -1286,7 +1286,7 @@ static int img_rebase(int argc, char **argv) } bs_new_backing = bdrv_new("new_backing"); - ret = bdrv_open(bs_new_backing, out_baseimg, BDRV_O_FLAGS | BDRV_O_RDWR, + ret = bdrv_open(bs_new_backing, out_baseimg, BDRV_O_FLAGS, new_backing_drv); if (ret) { error("Could not open new backing file '%s'", out_baseimg); From 2aa326be0d2039f51192707bdb2fc935d0e87c21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Minier?= Date: Sun, 22 Aug 2010 00:47:23 +0200 Subject: [PATCH 06/25] vvfat: fat_chksum(): fix access above array bounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Loïc Minier Signed-off-by: Kevin Wolf --- block/vvfat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/vvfat.c b/block/vvfat.c index 6d61c2e6c3..365332aa21 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -512,7 +512,7 @@ static inline uint8_t fat_chksum(const direntry_t* entry) for(i=0;i<11;i++) { unsigned char c; - c = (i <= 8) ? entry->name[i] : entry->extension[i-8]; + c = (i < 8) ? entry->name[i] : entry->extension[i-8]; chksum=(((chksum&0xfe)>>1)|((chksum&0x01)?0x80:0)) + c; } From 1d45f8b542f6b80b24c44533ef0dd9e1a3b17ea5 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Wed, 25 Aug 2010 22:48:33 +0200 Subject: [PATCH 07/25] nbd: Introduce NBD named exports. This patch allows to connect Qemu using NBD protocol to an nbd-server using named exports. For instance, if on the host "isoserver", in /etc/nbd-server/config, you have: [generic] [debian-500-ppc-netinst] exportname = /ISO/debian-500-powerpc-netinst.iso [Fedora-10-ppc-netinst] exportname = /ISO/Fedora-10-ppc-netinst.iso You can connect to it, using: qemu -cdrom nbd:isoserver:exportname=debian-500-ppc-netinst qemu -cdrom nbd:isoserver:exportname=Fedora-10-ppc-netinst NOTE: you need at least nbd-server 2.9.18 Signed-off-by: Laurent Vivier Signed-off-by: Kevin Wolf --- block/nbd.c | 66 ++++++++++++++++++++-------- nbd.c | 118 +++++++++++++++++++++++++++++++++++++++++++++----- nbd.h | 5 ++- qemu-doc.texi | 7 +++ qemu-nbd.c | 4 +- 5 files changed, 168 insertions(+), 32 deletions(-) diff --git a/block/nbd.c b/block/nbd.c index a1ec123a63..5e9d6cb8e6 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -33,6 +33,8 @@ #include #include +#define EN_OPTSTR ":exportname=" + typedef struct BDRVNBDState { int sock; off_t size; @@ -42,55 +44,83 @@ typedef struct BDRVNBDState { static int nbd_open(BlockDriverState *bs, const char* filename, int flags) { BDRVNBDState *s = bs->opaque; + uint32_t nbdflags; + + char *file; + char *name; const char *host; const char *unixpath; int sock; off_t size; size_t blocksize; int ret; + int err = -EINVAL; - if (!strstart(filename, "nbd:", &host)) - return -EINVAL; + file = qemu_strdup(filename); + + name = strstr(file, EN_OPTSTR); + if (name) { + if (name[strlen(EN_OPTSTR)] == 0) { + goto out; + } + name[0] = 0; + name += strlen(EN_OPTSTR); + } + + if (!strstart(file, "nbd:", &host)) { + goto out; + } if (strstart(host, "unix:", &unixpath)) { - if (unixpath[0] != '/') - return -EINVAL; + if (unixpath[0] != '/') { + goto out; + } sock = unix_socket_outgoing(unixpath); } else { - uint16_t port; + uint16_t port = NBD_DEFAULT_PORT; char *p, *r; char hostname[128]; pstrcpy(hostname, 128, host); p = strchr(hostname, ':'); - if (p == NULL) - return -EINVAL; + if (p != NULL) { + *p = '\0'; + p++; - *p = '\0'; - p++; + port = strtol(p, &r, 0); + if (r == p) { + goto out; + } + } else if (name == NULL) { + goto out; + } - port = strtol(p, &r, 0); - if (r == p) - return -EINVAL; sock = tcp_socket_outgoing(hostname, port); } - if (sock == -1) - return -errno; + if (sock == -1) { + err = -errno; + goto out; + } - ret = nbd_receive_negotiate(sock, &size, &blocksize); - if (ret == -1) - return -errno; + ret = nbd_receive_negotiate(sock, name, &nbdflags, &size, &blocksize); + if (ret == -1) { + err = -errno; + goto out; + } s->sock = sock; s->size = size; s->blocksize = blocksize; + err = 0; - return 0; +out: + qemu_free(file); + return err; } static int nbd_read(BlockDriverState *bs, int64_t sector_num, diff --git a/nbd.c b/nbd.c index a9f295f2bb..30dec8d2c5 100644 --- a/nbd.c +++ b/nbd.c @@ -62,6 +62,8 @@ #define NBD_SET_SIZE_BLOCKS _IO(0xab, 7) #define NBD_DISCONNECT _IO(0xab, 8) +#define NBD_OPT_EXPORT_NAME (1 << 0) + /* That's all folks */ #define read_sync(fd, buffer, size) nbd_wr_sync(fd, buffer, size, true) @@ -296,22 +298,27 @@ int nbd_negotiate(int csock, off_t size) return 0; } -int nbd_receive_negotiate(int csock, off_t *size, size_t *blocksize) +int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, + off_t *size, size_t *blocksize) { - char buf[8 + 8 + 8 + 128]; - uint64_t magic; + char buf[256]; + uint64_t magic, s; + uint16_t tmp; TRACE("Receiving negotation."); - if (read_sync(csock, buf, sizeof(buf)) != sizeof(buf)) { + if (read_sync(csock, buf, 8) != 8) { LOG("read failed"); errno = EINVAL; return -1; } - magic = be64_to_cpup((uint64_t*)(buf + 8)); - *size = be64_to_cpup((uint64_t*)(buf + 16)); - *blocksize = 1024; + buf[8] = '\0'; + if (strlen(buf) == 0) { + LOG("server connection closed"); + errno = EINVAL; + return -1; + } TRACE("Magic is %c%c%c%c%c%c%c%c", qemu_isprint(buf[0]) ? buf[0] : '.', @@ -322,8 +329,6 @@ int nbd_receive_negotiate(int csock, off_t *size, size_t *blocksize) qemu_isprint(buf[5]) ? buf[5] : '.', qemu_isprint(buf[6]) ? buf[6] : '.', qemu_isprint(buf[7]) ? buf[7] : '.'); - TRACE("Magic is 0x%" PRIx64, magic); - TRACE("Size is %" PRIu64, *size); if (memcmp(buf, "NBDMAGIC", 8) != 0) { LOG("Invalid magic received"); @@ -331,10 +336,99 @@ int nbd_receive_negotiate(int csock, off_t *size, size_t *blocksize) return -1; } - TRACE("Checking magic"); + if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) { + LOG("read failed"); + errno = EINVAL; + return -1; + } + magic = be64_to_cpu(magic); + TRACE("Magic is 0x%" PRIx64, magic); - if (magic != 0x00420281861253LL) { - LOG("Bad magic received"); + if (name) { + uint32_t reserved = 0; + uint32_t opt; + uint32_t namesize; + + TRACE("Checking magic (opts_magic)"); + if (magic != 0x49484156454F5054LL) { + LOG("Bad magic received"); + errno = EINVAL; + return -1; + } + if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) { + LOG("flags read failed"); + errno = EINVAL; + return -1; + } + *flags = be16_to_cpu(tmp) << 16; + /* reserved for future use */ + if (write_sync(csock, &reserved, sizeof(reserved)) != + sizeof(reserved)) { + LOG("write failed (reserved)"); + errno = EINVAL; + return -1; + } + /* write the export name */ + magic = cpu_to_be64(magic); + if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) { + LOG("write failed (magic)"); + errno = EINVAL; + return -1; + } + opt = cpu_to_be32(NBD_OPT_EXPORT_NAME); + if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) { + LOG("write failed (opt)"); + errno = EINVAL; + return -1; + } + namesize = cpu_to_be32(strlen(name)); + if (write_sync(csock, &namesize, sizeof(namesize)) != + sizeof(namesize)) { + LOG("write failed (namesize)"); + errno = EINVAL; + return -1; + } + if (write_sync(csock, (char*)name, strlen(name)) != strlen(name)) { + LOG("write failed (name)"); + errno = EINVAL; + return -1; + } + } else { + TRACE("Checking magic (cli_magic)"); + + if (magic != 0x00420281861253LL) { + LOG("Bad magic received"); + errno = EINVAL; + return -1; + } + } + + if (read_sync(csock, &s, sizeof(s)) != sizeof(s)) { + LOG("read failed"); + errno = EINVAL; + return -1; + } + *size = be64_to_cpu(s); + *blocksize = 1024; + TRACE("Size is %" PRIu64, *size); + + if (!name) { + if (read_sync(csock, flags, sizeof(*flags)) != sizeof(*flags)) { + LOG("read failed (flags)"); + errno = EINVAL; + return -1; + } + *flags = be32_to_cpup(flags); + } else { + if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) { + LOG("read failed (tmp)"); + errno = EINVAL; + return -1; + } + *flags |= be32_to_cpu(tmp); + } + if (read_sync(csock, &buf, 124) != 124) { + LOG("read failed (buf)"); errno = EINVAL; return -1; } diff --git a/nbd.h b/nbd.h index 5a1fbdf512..8ff65a1941 100644 --- a/nbd.h +++ b/nbd.h @@ -42,6 +42,8 @@ enum { NBD_CMD_DISC = 2 }; +#define NBD_DEFAULT_PORT 10809 + size_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read); int tcp_socket_outgoing(const char *address, uint16_t port); int tcp_socket_incoming(const char *address, uint16_t port); @@ -49,7 +51,8 @@ int unix_socket_outgoing(const char *path); int unix_socket_incoming(const char *path); int nbd_negotiate(int csock, off_t size); -int nbd_receive_negotiate(int csock, off_t *size, size_t *blocksize); +int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, + off_t *size, size_t *blocksize); int nbd_init(int fd, int csock, off_t size, size_t blocksize); int nbd_send_request(int csock, struct nbd_request *request); int nbd_receive_reply(int csock, struct nbd_reply *reply); diff --git a/qemu-doc.texi b/qemu-doc.texi index 55a966fe71..d7d760fedd 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -620,6 +620,13 @@ qemu linux1.img -hdb nbd:unix:/tmp/my_socket qemu linux2.img -hdb nbd:unix:/tmp/my_socket @end example +If the nbd-server uses named exports (since NBD 2.9.18), you must use the +"exportname" option: +@example +qemu -cdrom nbd:localhost:exportname=debian-500-ppc-netinst +qemu -cdrom nbd:localhost:exportname=openSUSE-11.1-ppc-netinst +@end example + @node pcsys_network @section Network emulation diff --git a/qemu-nbd.c b/qemu-nbd.c index 4e607cfb61..41e5c5d857 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -230,6 +230,7 @@ int main(int argc, char **argv) int nb_fds = 0; int max_fd; int persistent = 0; + uint32_t nbdflags; while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) { switch (ch) { @@ -398,7 +399,8 @@ int main(int argc, char **argv) goto out; } - ret = nbd_receive_negotiate(sock, &size, &blocksize); + ret = nbd_receive_negotiate(sock, NULL, &nbdflags, + &size, &blocksize); if (ret == -1) { ret = 1; goto out; From 34cf0081294513bc734896c9051c20ca6c19c3db Mon Sep 17 00:00:00 2001 From: Andrew de Quincey Date: Sun, 8 Aug 2010 21:04:50 +0100 Subject: [PATCH 08/25] posix-aio-compat: Fix async_conmtext for ioctl Set the async_context_id field when queuing an async ioctl call Signed-off-by: Andrew de Quincey Signed-off-by: Kevin Wolf --- posix-aio-compat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/posix-aio-compat.c b/posix-aio-compat.c index a67ffe3113..efc59683c0 100644 --- a/posix-aio-compat.c +++ b/posix-aio-compat.c @@ -599,6 +599,7 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd, acb->aio_type = QEMU_AIO_IOCTL; acb->aio_fildes = fd; acb->ev_signo = SIGUSR2; + acb->async_context_id = get_async_context_id(); acb->aio_offset = 0; acb->aio_ioctl_buf = buf; acb->aio_ioctl_cmd = req; From f920991574357a9b4c560d551f1d92bb6c6a6333 Mon Sep 17 00:00:00 2001 From: Miguel Di Ciurcio Filho Date: Wed, 4 Aug 2010 14:55:48 -0300 Subject: [PATCH 09/25] monitor: make 'info snapshots' show only fully available snapshots The output generated by 'info snapshots' shows only snapshots that exist on the block device that saves the VM state. This output can cause an user to erroneously try to load an snapshot that is not available on all block devices. $ qemu-img snapshot -l xxtest.qcow2 Snapshot list: ID TAG VM SIZE DATE VM CLOCK 1 1.5M 2010-07-26 16:51:52 00:00:08.599 2 1.5M 2010-07-26 16:51:53 00:00:09.719 3 1.5M 2010-07-26 17:26:49 00:00:13.245 4 1.5M 2010-07-26 19:01:00 00:00:46.763 $ qemu-img snapshot -l xxtest2.qcow2 Snapshot list: ID TAG VM SIZE DATE VM CLOCK 3 0 2010-07-26 17:26:49 00:00:13.245 4 0 2010-07-26 19:01:00 00:00:46.763 Current output: $ qemu -hda xxtest.qcow2 -hdb xxtest2.qcow2 -monitor stdio -vnc :0 QEMU 0.12.4 monitor - type 'help' for more information (qemu) info snapshots Snapshot devices: ide0-hd0 Snapshot list (from ide0-hd0): ID TAG VM SIZE DATE VM CLOCK 1 1.5M 2010-07-26 16:51:52 00:00:08.599 2 1.5M 2010-07-26 16:51:53 00:00:09.719 3 1.5M 2010-07-26 17:26:49 00:00:13.245 4 1.5M 2010-07-26 19:01:00 00:00:46.763 Snapshots 1 and 2 do not exist on xxtest2.qcow, but they are displayed anyway. This patch sumarizes the output to only show fully available snapshots. New output: (qemu) info snapshots ID TAG VM SIZE DATE VM CLOCK 3 1.5M 2010-07-26 17:26:49 00:00:13.245 4 1.5M 2010-07-26 19:01:00 00:00:46.763 Signed-off-by: Miguel Di Ciurcio Filho Signed-off-by: Kevin Wolf --- savevm.c | 61 ++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 17 deletions(-) diff --git a/savevm.c b/savevm.c index 99e49499a9..d2865929f0 100644 --- a/savevm.c +++ b/savevm.c @@ -2039,8 +2039,10 @@ void do_delvm(Monitor *mon, const QDict *qdict) void do_info_snapshots(Monitor *mon) { BlockDriverState *bs, *bs1; - QEMUSnapshotInfo *sn_tab, *sn; - int nb_sns, i; + QEMUSnapshotInfo *sn_tab, *sn, s, *sn_info = &s; + int nb_sns, i, ret, available; + int total; + int *available_snapshots; char buf[256]; bs = bdrv_snapshots(); @@ -2048,27 +2050,52 @@ void do_info_snapshots(Monitor *mon) monitor_printf(mon, "No available block device supports snapshots\n"); return; } - monitor_printf(mon, "Snapshot devices:"); - bs1 = NULL; - while ((bs1 = bdrv_next(bs1))) { - if (bdrv_can_snapshot(bs1)) { - if (bs == bs1) - monitor_printf(mon, " %s", bdrv_get_device_name(bs1)); - } - } - monitor_printf(mon, "\n"); nb_sns = bdrv_snapshot_list(bs, &sn_tab); if (nb_sns < 0) { monitor_printf(mon, "bdrv_snapshot_list: error %d\n", nb_sns); return; } - monitor_printf(mon, "Snapshot list (from %s):\n", - bdrv_get_device_name(bs)); - monitor_printf(mon, "%s\n", bdrv_snapshot_dump(buf, sizeof(buf), NULL)); - for(i = 0; i < nb_sns; i++) { - sn = &sn_tab[i]; - monitor_printf(mon, "%s\n", bdrv_snapshot_dump(buf, sizeof(buf), sn)); + + if (nb_sns == 0) { + monitor_printf(mon, "There is no snapshot available.\n"); + return; } + + available_snapshots = qemu_mallocz(sizeof(int) * nb_sns); + total = 0; + for (i = 0; i < nb_sns; i++) { + sn = &sn_tab[i]; + available = 1; + bs1 = NULL; + + while ((bs1 = bdrv_next(bs1))) { + if (bdrv_can_snapshot(bs1) && bs1 != bs) { + ret = bdrv_snapshot_find(bs1, sn_info, sn->id_str); + if (ret < 0) { + available = 0; + break; + } + } + } + + if (available) { + available_snapshots[total] = i; + total++; + } + } + + if (total > 0) { + monitor_printf(mon, "%s\n", bdrv_snapshot_dump(buf, sizeof(buf), NULL)); + for (i = 0; i < total; i++) { + sn = &sn_tab[available_snapshots[i]]; + monitor_printf(mon, "%s\n", bdrv_snapshot_dump(buf, sizeof(buf), sn)); + } + } else { + monitor_printf(mon, "There is no suitable snapshot available\n"); + } + qemu_free(sn_tab); + qemu_free(available_snapshots); + } From 7d631a116ad8fe07001e2cc4c559a06aac82745f Mon Sep 17 00:00:00 2001 From: Miguel Di Ciurcio Filho Date: Wed, 4 Aug 2010 14:55:49 -0300 Subject: [PATCH 10/25] savevm: Generate a name when run without one When savevm is run without a name, the name stays blank and the snapshot is saved anyway. The new behavior is when savevm is run without parameters a name will be created automaticaly, so the snapshot is accessible to the user without needing the id when loadvm is run. (qemu) savevm (qemu) info snapshots ID TAG VM SIZE DATE VM CLOCK 1 vm-20100728134640 978K 2010-07-28 13:46:40 00:00:08.603 We use a name with the format 'vm-YYYYMMDDHHMMSS'. This is a first step to hide the internal id, because I don't see a reason to expose this kind of internals to the user. Signed-off-by: Miguel Di Ciurcio Filho Signed-off-by: Kevin Wolf --- savevm.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/savevm.c b/savevm.c index d2865929f0..4d9f822f27 100644 --- a/savevm.c +++ b/savevm.c @@ -1837,8 +1837,10 @@ void do_savevm(Monitor *mon, const QDict *qdict) uint32_t vm_state_size; #ifdef _WIN32 struct _timeb tb; + struct tm *ptm; #else struct timeval tv; + struct tm tm; #endif const char *name = qdict_get_try_str(qdict, "name"); @@ -1869,15 +1871,6 @@ void do_savevm(Monitor *mon, const QDict *qdict) vm_stop(0); memset(sn, 0, sizeof(*sn)); - if (name) { - ret = bdrv_snapshot_find(bs, old_sn, name); - if (ret >= 0) { - pstrcpy(sn->name, sizeof(sn->name), old_sn->name); - pstrcpy(sn->id_str, sizeof(sn->id_str), old_sn->id_str); - } else { - pstrcpy(sn->name, sizeof(sn->name), name); - } - } /* fill auxiliary fields */ #ifdef _WIN32 @@ -1891,6 +1884,24 @@ void do_savevm(Monitor *mon, const QDict *qdict) #endif sn->vm_clock_nsec = qemu_get_clock(vm_clock); + if (name) { + ret = bdrv_snapshot_find(bs, old_sn, name); + if (ret >= 0) { + pstrcpy(sn->name, sizeof(sn->name), old_sn->name); + pstrcpy(sn->id_str, sizeof(sn->id_str), old_sn->id_str); + } else { + pstrcpy(sn->name, sizeof(sn->name), name); + } + } else { +#ifdef _WIN32 + ptm = localtime(&tb.time); + strftime(sn->name, sizeof(sn->name), "vm-%Y%m%d%H%M%S", ptm); +#else + localtime_r(&tv.tv_sec, &tm); + strftime(sn->name, sizeof(sn->name), "vm-%Y%m%d%H%M%S", &tm); +#endif + } + /* Delete old snapshots of the same name */ if (name && del_existing_snapshots(mon, name) < 0) { goto the_end; From 78e70c30612833fd0017cfa5b519bc23df808927 Mon Sep 17 00:00:00 2001 From: Bernhard Kohl Date: Tue, 31 Aug 2010 11:22:29 +0200 Subject: [PATCH 11/25] scsi-disk: fix the mode data length field returned by the MODE SENSE command The MODE DATA LENGTH field indicates the length in bytes of the following data that is available to be transferred. The mode data length does not include the number of bytes in the MODE DATA LENGTH field. Signed-off-by: Bernhard Kohl Signed-off-by: Kevin Wolf --- hw/scsi-disk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index 07a6d86946..b627ffee74 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -653,7 +653,7 @@ static int scsi_disk_emulate_mode_sense(SCSIRequest *req, uint8_t *outbuf) } buflen = p - outbuf; - outbuf[0] = buflen - 4; + outbuf[0] = buflen - 1; if (buflen > req->cmd.xfer) buflen = req->cmd.xfer; return buflen; From ce512ee115b20bfc8a562d528a3f14eeff9ddf64 Mon Sep 17 00:00:00 2001 From: Bernhard Kohl Date: Tue, 31 Aug 2010 14:08:23 +0200 Subject: [PATCH 12/25] scsi-disk: fix the mode data header returned by the MODE SENSE(10) command The header for the MODE SENSE(10) command is 8 bytes long. Signed-off-by: Bernhard Kohl Signed-off-by: Kevin Wolf --- hw/scsi-disk.c | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index b627ffee74..3e727b91a7 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -607,6 +607,7 @@ static int scsi_disk_emulate_mode_sense(SCSIRequest *req, uint8_t *outbuf) uint64_t nb_sectors; int page, dbd, buflen; uint8_t *p; + uint8_t dev_specific_param; dbd = req->cmd.buf[1] & 0x8; page = req->cmd.buf[2] & 0x3f; @@ -614,16 +615,31 @@ static int scsi_disk_emulate_mode_sense(SCSIRequest *req, uint8_t *outbuf) memset(outbuf, 0, req->cmd.xfer); p = outbuf; - p[1] = 0; /* Default media type. */ - p[3] = 0; /* Block descriptor length. */ if (bdrv_is_read_only(s->bs)) { - p[2] = 0x80; /* Readonly. */ + dev_specific_param = 0x80; /* Readonly. */ + } else { + dev_specific_param = 0x00; + } + + if (req->cmd.buf[0] == MODE_SENSE) { + p[1] = 0; /* Default media type. */ + p[2] = dev_specific_param; + p[3] = 0; /* Block descriptor length. */ + p += 4; + } else { /* MODE_SENSE_10 */ + p[2] = 0; /* Default media type. */ + p[3] = dev_specific_param; + p[6] = p[7] = 0; /* Block descriptor length. */ + p += 8; } - p += 4; bdrv_get_geometry(s->bs, &nb_sectors); if ((~dbd) & nb_sectors) { - outbuf[3] = 8; /* Block descriptor length */ + if (req->cmd.buf[0] == MODE_SENSE) { + outbuf[3] = 8; /* Block descriptor length */ + } else { /* MODE_SENSE_10 */ + outbuf[7] = 8; /* Block descriptor length */ + } nb_sectors /= s->cluster_size; nb_sectors--; if (nb_sectors > 0xffffff) @@ -653,7 +669,17 @@ static int scsi_disk_emulate_mode_sense(SCSIRequest *req, uint8_t *outbuf) } buflen = p - outbuf; - outbuf[0] = buflen - 1; + /* + * The mode data length field specifies the length in bytes of the + * following data that is available to be transferred. The mode data + * length does not include itself. + */ + if (req->cmd.buf[0] == MODE_SENSE) { + outbuf[0] = buflen - 1; + } else { /* MODE_SENSE_10 */ + outbuf[0] = ((buflen - 2) >> 8) & 0xff; + outbuf[1] = (buflen - 2) & 0xff; + } if (buflen > req->cmd.xfer) buflen = req->cmd.xfer; return buflen; From 282ab04eb1e6f4faa6c5d2827e3209c4a1eec40e Mon Sep 17 00:00:00 2001 From: Bernhard Kohl Date: Tue, 31 Aug 2010 14:08:24 +0200 Subject: [PATCH 13/25] scsi-disk: respect the page control (PC) field in the MODE SENSE command The page control (PC) field defines the type of mode parameter values to be returned in the mode pages: PC=0 : Current values PC=1 : Changeable values PC=2 : Default values PC=3 : Saved values The current implementation always returns the same type of parameters. This is OK for Current and Default values as we don't support changes to be done by the MODE SELECT command. For Saved values the following applies (implemented by this patch): "A PC field value of 3h requests that the target return the saved values of the mode parameters. Implementation of saved page parameters is optional. Mode parameters not supported by the target shall be set to zero. If saved values are not implemented, the command shall be terminated with CHECK CONDITION status, the sense key set to ILLEGAL REQUEST and the additional sense code set to SAVING PARAMETERS NOT SUPPORTED." For Changeable values the following applies (implemented by this patch): "A PC field value of 1h requests that the target return a mask denoting those mode parameters that are changeable. In the mask, the fields of the mode parameters that are changeable shall be set to all one bits and the fields of the mode parameters that are non-changeable (i.e. defined by the target) shall be set to all zero bits." In newer versions of the SCSI-2 spec the following clause was added. "If the logical unit does not implement changeable parameters mode pages and the device server receives a MODE SENSE command with 01b in the PC field, then the command shall be terminated with CHECK CONDITION status, with the sense key set to ILLEGAL REQUEST, and the additional sense code set to INVALID FIELD IN CDB." This was not yet included in the SCSI-2 Working Drafts from 1986-1993. I assume that the variant to return CHECK CONDITION for PC=1 is not widely implemented by real devices. I have a legacy OS which fails, if MODE_SENSE returns non GOOD for PC=1. So for highest compatibility I implemented the former variant with this patch. The last Working Draft X3T9.2 Rev. 10L 7-SEP-93 can be found here: http://ldkelley.com/SCSI2/SCSI2/SCSI2-08.html#8.2.10 In mode_sense_page() this patch also avoids multiple hard coded definitions of the same mode page length. Instead I use the varable p[1]. In fact the returned length of the mode pages 4 and 5 were wrong (2 bytes less). Signed-off-by: Bernhard Kohl Signed-off-by: Kevin Wolf --- hw/scsi-disk.c | 45 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index 3e727b91a7..1287cab5e9 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -486,16 +486,26 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf) return buflen; } -static int mode_sense_page(SCSIRequest *req, int page, uint8_t *p) +static int mode_sense_page(SCSIRequest *req, int page, uint8_t *p, + int page_control) { SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); BlockDriverState *bdrv = s->bs; int cylinders, heads, secs; + /* + * If Changeable Values are requested, a mask denoting those mode parameters + * that are changeable shall be returned. As we currently don't support + * parameter changes via MODE_SELECT all bits are returned set to zero. + * The buffer was already menset to zero by the caller of this function. + */ switch (page) { case 4: /* Rigid disk device geometry page. */ p[0] = 4; p[1] = 0x16; + if (page_control == 1) { /* Changeable Values */ + return p[1] + 2; + } /* if a geometry hint is available, use it */ bdrv_get_geometry_hint(bdrv, &cylinders, &heads, &secs); p[2] = (cylinders >> 16) & 0xff; @@ -520,11 +530,14 @@ static int mode_sense_page(SCSIRequest *req, int page, uint8_t *p) /* Medium rotation rate [rpm], 5400 rpm */ p[20] = (5400 >> 8) & 0xff; p[21] = 5400 & 0xff; - return 0x16; + return p[1] + 2; case 5: /* Flexible disk device geometry page. */ p[0] = 5; p[1] = 0x1e; + if (page_control == 1) { /* Changeable Values */ + return p[1] + 2; + } /* Transfer rate [kbit/s], 5Mbit/s */ p[2] = 5000 >> 8; p[3] = 5000 & 0xff; @@ -556,21 +569,27 @@ static int mode_sense_page(SCSIRequest *req, int page, uint8_t *p) /* Medium rotation rate [rpm], 5400 rpm */ p[28] = (5400 >> 8) & 0xff; p[29] = 5400 & 0xff; - return 0x1e; + return p[1] + 2; case 8: /* Caching page. */ p[0] = 8; p[1] = 0x12; + if (page_control == 1) { /* Changeable Values */ + return p[1] + 2; + } if (bdrv_enable_write_cache(s->bs)) { p[2] = 4; /* WCE */ } - return 20; + return p[1] + 2; case 0x2a: /* CD Capabilities and Mechanical Status page. */ if (bdrv_get_type_hint(bdrv) != BDRV_TYPE_CDROM) return 0; p[0] = 0x2a; p[1] = 0x14; + if (page_control == 1) { /* Changeable Values */ + return p[1] + 2; + } p[2] = 3; // CD-R & CD-RW read p[3] = 0; // Writing not supported p[4] = 0x7f; /* Audio, composite, digital out, @@ -594,7 +613,7 @@ static int mode_sense_page(SCSIRequest *req, int page, uint8_t *p) p[19] = (16 * 176) & 0xff; p[20] = (16 * 176) >> 8; // 16x write speed current p[21] = (16 * 176) & 0xff; - return 22; + return p[1] + 2; default: return 0; @@ -605,13 +624,15 @@ static int scsi_disk_emulate_mode_sense(SCSIRequest *req, uint8_t *outbuf) { SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); uint64_t nb_sectors; - int page, dbd, buflen; + int page, dbd, buflen, page_control; uint8_t *p; uint8_t dev_specific_param; dbd = req->cmd.buf[1] & 0x8; page = req->cmd.buf[2] & 0x3f; - DPRINTF("Mode Sense (page %d, len %zd)\n", page, req->cmd.xfer); + page_control = (req->cmd.buf[2] & 0xc0) >> 6; + DPRINTF("Mode Sense(%d) (page %d, len %d, page_control %d)\n", + (req->cmd.buf[0] == MODE_SENSE) ? 6 : 10, page, len, page_control); memset(outbuf, 0, req->cmd.xfer); p = outbuf; @@ -655,16 +676,20 @@ static int scsi_disk_emulate_mode_sense(SCSIRequest *req, uint8_t *outbuf) p += 8; } + if (page_control == 3) { /* Saved Values */ + return -1; /* ILLEGAL_REQUEST */ + } + switch (page) { case 0x04: case 0x05: case 0x08: case 0x2a: - p += mode_sense_page(req, page, p); + p += mode_sense_page(req, page, p, page_control); break; case 0x3f: - p += mode_sense_page(req, 0x08, p); - p += mode_sense_page(req, 0x2a, p); + p += mode_sense_page(req, 0x08, p, page_control); + p += mode_sense_page(req, 0x2a, p, page_control); break; } From 2488b74081650a5312fe1515660b6cb095244c34 Mon Sep 17 00:00:00 2001 From: Bernhard Kohl Date: Tue, 31 Aug 2010 14:08:25 +0200 Subject: [PATCH 14/25] scsi-disk: fix the block descriptor returned by the MODE SENSE command The block descriptor contains the number of blocks, not the highest LBA. Real hard disks return 0 if the number of blocks exceed the maximum 0xFFFFFF. SCSI-Spec: http://ldkelley.com/SCSI2/SCSI2/SCSI2-08.html#8.3.3 "The number of blocks field specifies the number of logical blocks on the medium to which the density code and block length fields apply. A value of zero indicates that all of the remaining logical blocks of the logical unit shall have the medium characteristics specified." Signed-off-by: Bernhard Kohl Signed-off-by: Kevin Wolf --- hw/scsi-disk.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index 1287cab5e9..e085d5b6f9 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -662,9 +662,8 @@ static int scsi_disk_emulate_mode_sense(SCSIRequest *req, uint8_t *outbuf) outbuf[7] = 8; /* Block descriptor length */ } nb_sectors /= s->cluster_size; - nb_sectors--; if (nb_sectors > 0xffffff) - nb_sectors = 0xffffff; + nb_sectors = 0; p[0] = 0; /* media density code */ p[1] = (nb_sectors >> 16) & 0xff; p[2] = (nb_sectors >> 8) & 0xff; From a9c17b2bf3639662fbdeb736289ebabfda9fa21a Mon Sep 17 00:00:00 2001 From: Bernhard Kohl Date: Tue, 31 Aug 2010 14:08:26 +0200 Subject: [PATCH 15/25] scsi-disk: return CHECK CONDITION for unknown page codes in the MODE SENSE command SCSI-Spec: http://ldkelley.com/SCSI2/SCSI2/SCSI2-08.html#8.2.10 "An initiator may request any one or all of the supported mode pages from a target. If an initiator issues a MODE SENSE command with a page code value not implemented by the target, the target shall return CHECK CONDITION status and shall set the sense key to ILLEGAL REQUEST and the additional sense code to INVALID FIELD IN CDB." Signed-off-by: Bernhard Kohl Signed-off-by: Kevin Wolf --- hw/scsi-disk.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index e085d5b6f9..44f99b08c2 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -690,6 +690,8 @@ static int scsi_disk_emulate_mode_sense(SCSIRequest *req, uint8_t *outbuf) p += mode_sense_page(req, 0x08, p, page_control); p += mode_sense_page(req, 0x2a, p, page_control); break; + default: + return -1; /* ILLEGAL_REQUEST */ } buflen = p - outbuf; From 333d50fe3d9a1ff0a6a1a44ef42a0d3a2a7f2abe Mon Sep 17 00:00:00 2001 From: Bernhard Kohl Date: Tue, 31 Aug 2010 14:08:27 +0200 Subject: [PATCH 16/25] scsi-disk: fix the check of the DBD bit in the MODE SENSE command The DBD bit does not work as expected. SCSI-Spec: http://ldkelley.com/SCSI2/SCSI2/SCSI2-08.html#8.2.10 "A disable block descriptors (DBD) bit of zero indicates that the target may return zero or more block descriptors in the returned MODE SENSE data (see 8.3.3), at the target's discretion. A DBD bit of one specifies that the target shall not return any block descriptors in the returned MODE SENSE data." Signed-off-by: Bernhard Kohl Signed-off-by: Kevin Wolf --- hw/scsi-disk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index 44f99b08c2..0c90c77b9b 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -655,7 +655,7 @@ static int scsi_disk_emulate_mode_sense(SCSIRequest *req, uint8_t *outbuf) } bdrv_get_geometry(s->bs, &nb_sectors); - if ((~dbd) & nb_sectors) { + if (!dbd && nb_sectors) { if (req->cmd.buf[0] == MODE_SENSE) { outbuf[3] = 8; /* Block descriptor length */ } else { /* MODE_SENSE_10 */ From aa2b1e8908271a2d7f31b73106cb83b8b4c49dfc Mon Sep 17 00:00:00 2001 From: Bernhard Kohl Date: Wed, 1 Sep 2010 16:33:21 +0200 Subject: [PATCH 17/25] scsi: fix and improve debug prints Some of them are not compile clean. Signed-off-by: Bernhard Kohl Signed-off-by: Kevin Wolf --- hw/lsi53c895a.c | 4 ++-- hw/scsi-disk.c | 8 ++++---- hw/scsi-generic.c | 18 ++++++++++++++---- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/hw/lsi53c895a.c b/hw/lsi53c895a.c index bd7b661426..5eaf69ed3f 100644 --- a/hw/lsi53c895a.c +++ b/hw/lsi53c895a.c @@ -600,7 +600,7 @@ static void lsi_queue_command(LSIState *s) { lsi_request *p = s->current; - DPRINTF("Queueing tag=0x%x\n", s->current_tag); + DPRINTF("Queueing tag=0x%x\n", p->tag); assert(s->current != NULL); assert(s->current->dma_len == 0); QTAILQ_INSERT_TAIL(&s->queue, s->current, next); @@ -880,7 +880,7 @@ static void lsi_do_msgout(LSIState *s) break; case 0x20: /* SIMPLE queue */ s->select_tag |= lsi_get_msgbyte(s) | LSI_TAG_VALID; - DPRINTF("SIMPLE queue tag=0x%x\n", s->current_tag & 0xff); + DPRINTF("SIMPLE queue tag=0x%x\n", s->select_tag & 0xff); break; case 0x21: /* HEAD of queue */ BADF("HEAD queue not implemented\n"); diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index 0c90c77b9b..b80c9bd2c3 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -135,7 +135,7 @@ static void scsi_read_complete(void * opaque, int ret) scsi_command_complete(r, CHECK_CONDITION, NO_SENSE); return; } - DPRINTF("Data ready tag=0x%x len=%" PRId64 "\n", r->req.tag, r->iov.iov_len); + DPRINTF("Data ready tag=0x%x len=%zd\n", r->req.tag, r->iov.iov_len); r->req.bus->complete(r->req.bus, SCSI_REASON_DATA, r->req.tag, r->iov.iov_len); } @@ -155,7 +155,7 @@ static void scsi_read_data(SCSIDevice *d, uint32_t tag) return; } if (r->sector_count == (uint32_t)-1) { - DPRINTF("Read buf_len=%" PRId64 "\n", r->iov.iov_len); + DPRINTF("Read buf_len=%zd\n", r->iov.iov_len); r->sector_count = 0; r->req.bus->complete(r->req.bus, SCSI_REASON_DATA, r->req.tag, r->iov.iov_len); return; @@ -631,8 +631,8 @@ static int scsi_disk_emulate_mode_sense(SCSIRequest *req, uint8_t *outbuf) dbd = req->cmd.buf[1] & 0x8; page = req->cmd.buf[2] & 0x3f; page_control = (req->cmd.buf[2] & 0xc0) >> 6; - DPRINTF("Mode Sense(%d) (page %d, len %d, page_control %d)\n", - (req->cmd.buf[0] == MODE_SENSE) ? 6 : 10, page, len, page_control); + DPRINTF("Mode Sense(%d) (page %d, xfer %zd, page_control %d)\n", + (req->cmd.buf[0] == MODE_SENSE) ? 6 : 10, page, req->cmd.xfer, page_control); memset(outbuf, 0, req->cmd.xfer); p = outbuf; diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c index aa4f62ae57..953802777d 100644 --- a/hw/scsi-generic.c +++ b/hw/scsi-generic.c @@ -164,7 +164,7 @@ static void scsi_read_complete(void * opaque, int ret) int len; if (ret) { - DPRINTF("IO error\n"); + DPRINTF("IO error ret %d\n", ret); scsi_command_complete(r, ret); return; } @@ -236,7 +236,7 @@ static void scsi_write_complete(void * opaque, int ret) if (r->req.cmd.buf[0] == MODE_SELECT && r->req.cmd.buf[4] == 12 && s->qdev.type == TYPE_TAPE) { s->qdev.blocksize = (r->buf[9] << 16) | (r->buf[10] << 8) | r->buf[11]; - DPRINTF("block size %d\n", s->blocksize); + DPRINTF("block size %d\n", s->qdev.blocksize); } scsi_command_complete(r, ret); @@ -351,8 +351,18 @@ static int32_t scsi_send_command(SCSIDevice *d, uint32_t tag, } scsi_req_fixup(&r->req); - DPRINTF("Command: lun=%d tag=0x%x data=0x%02x len %d\n", lun, tag, - cmd[0], r->req.cmd.xfer); + DPRINTF("Command: lun=%d tag=0x%x len %zd data=0x%02x", lun, tag, + r->req.cmd.xfer, cmd[0]); + +#ifdef DEBUG_SCSI + { + int i; + for (i = 1; i < r->req.cmd.len; i++) { + printf(" 0x%02x", cmd[i]); + } + printf("\n"); + } +#endif if (r->req.cmd.xfer == 0) { if (r->buf != NULL) From b407a81e70306e8199315e39576939dc7ac925d5 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 2 Sep 2010 10:38:03 +0100 Subject: [PATCH 18/25] qemu-io: Make alloc output useful when nb_sectors=1 There is no indication whether or not the sector is allocated when nb_sectors=1: sector allocated at offset 64 KiB This message is produced whether or not the sector is allocated. Simply use the same message as the plural case, I don't think the English is so broken that we need special case output here: 0/1 sectors allocated at offset 64 KiB This change does not affect qemu-iotests since nb_sectors=1 is not used there. Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- qemu-io.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/qemu-io.c b/qemu-io.c index 2dbe20f33e..bd3bd16fdf 100644 --- a/qemu-io.c +++ b/qemu-io.c @@ -1427,11 +1427,8 @@ alloc_f(int argc, char **argv) cvtstr(offset, s1, sizeof(s1)); - if (nb_sectors == 1) - printf("sector allocated at offset %s\n", s1); - else - printf("%d/%d sectors allocated at offset %s\n", - sum_alloc, nb_sectors, s1); + printf("%d/%d sectors allocated at offset %s\n", + sum_alloc, nb_sectors, s1); return 0; } From 897804d6299af372a43110799cbe1d6804d5e1bc Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 2 Sep 2010 17:48:32 +0200 Subject: [PATCH 19/25] raw-posix: Don't use file name for host_cdrom detection on Linux On Linux, we have code to detect CD-ROMs using an ioctl. We shouldn't lose anything but false positives by removing the check for a /dev/cd* path. Signed-off-by: Kevin Wolf --- block/raw-posix.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/block/raw-posix.c b/block/raw-posix.c index 72fb8cebd4..0fce449092 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1154,9 +1154,6 @@ static int cdrom_probe_device(const char *filename) int fd, ret; int prio = 0; - if (strstart(filename, "/dev/cd", NULL)) - prio = 50; - fd = open(filename, O_RDONLY | O_NONBLOCK); if (fd < 0) { goto out; From 79d1d3311319f3390f540f547becaba9d957f84c Mon Sep 17 00:00:00 2001 From: "Jonathan A. Kollasch" Date: Fri, 3 Sep 2010 07:57:46 -0500 Subject: [PATCH 20/25] Improve ATA IDENTIFY word 64 contents. Fill in word 64 of IDENTIFY data to indicate support for PIO modes 3 and 4. This allows NetBSD guests to use UltraDMA modes instead of just PIO mode 0. Signed-off-by: Jonathan A. Kollasch Signed-off-by: Kevin Wolf --- hw/ide/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/ide/core.c b/hw/ide/core.c index 3651d2be91..1e466d1ff5 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -139,6 +139,7 @@ static void ide_identify(IDEState *s) put_le16(p + 61, s->nb_sectors >> 16); put_le16(p + 62, 0x07); /* single word dma0-2 supported */ put_le16(p + 63, 0x07); /* mdma0-2 supported */ + put_le16(p + 64, 0x03); /* pio3-4 supported */ put_le16(p + 65, 120); put_le16(p + 66, 120); put_le16(p + 67, 120); @@ -199,13 +200,12 @@ static void ide_atapi_identify(IDEState *s) put_le16(p + 53, 7); /* words 64-70, 54-58, 88 valid */ put_le16(p + 62, 7); /* single word dma0-2 supported */ put_le16(p + 63, 7); /* mdma0-2 supported */ - put_le16(p + 64, 0x3f); /* PIO modes supported */ #else put_le16(p + 49, 1 << 9); /* LBA supported, no DMA */ put_le16(p + 53, 3); /* words 64-70, 54-58 valid */ put_le16(p + 63, 0x103); /* DMA modes XXX: may be incorrect */ - put_le16(p + 64, 1); /* PIO modes */ #endif + put_le16(p + 64, 3); /* pio3-4 supported */ put_le16(p + 65, 0xb4); /* minimum DMA multiword tx cycle time */ put_le16(p + 66, 0xb4); /* recommended DMA multiword tx cycle time */ put_le16(p + 67, 0x12c); /* minimum PIO cycle time without flow control */ From ebef0bbb1a8b5420a50e887972cc6bf0d150f9b7 Mon Sep 17 00:00:00 2001 From: Bernhard Kohl Date: Mon, 6 Sep 2010 11:50:55 +0200 Subject: [PATCH 21/25] scsi-disk: add some optional scsi commands I use a legacy OS which depends on some optional SCSI commands. In fact this implementation does nothing special, but provides minimum support for the following commands: REZERO UNIT WRITE AND VERIFY(10) WRITE AND VERIFY(12) WRITE AND VERIFY(16) MODE SELECT(6) MODE SELECT(10) SEEK(6) SEEK(10) Signed-off-by: Bernhard Kohl Signed-off-by: Kevin Wolf --- hw/scsi-disk.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index b80c9bd2c3..1446ca634c 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -892,6 +892,12 @@ static int scsi_disk_emulate_command(SCSIRequest *req, uint8_t *outbuf) break; case VERIFY: break; + case REZERO_UNIT: + DPRINTF("Rezero Unit\n"); + if (!bdrv_is_inserted(s->bs)) { + goto not_ready; + } + break; default: goto illegal_request; } @@ -1011,6 +1017,7 @@ static int32_t scsi_send_command(SCSIDevice *d, uint32_t tag, case SERVICE_ACTION_IN: case REPORT_LUNS: case VERIFY: + case REZERO_UNIT: rc = scsi_disk_emulate_command(&r->req, outbuf); if (rc > 0) { r->iov.iov_len = rc; @@ -1034,13 +1041,40 @@ static int32_t scsi_send_command(SCSIDevice *d, uint32_t tag, case WRITE_10: case WRITE_12: case WRITE_16: - DPRINTF("Write (sector %" PRId64 ", count %d)\n", lba, len); + case WRITE_VERIFY: + case WRITE_VERIFY_12: + case WRITE_VERIFY_16: + DPRINTF("Write %s(sector %" PRId64 ", count %d)\n", + (command & 0xe) == 0xe ? "And Verify " : "", lba, len); if (lba > s->max_lba) goto illegal_lba; r->sector = lba * s->cluster_size; r->sector_count = len * s->cluster_size; is_write = 1; break; + case MODE_SELECT: + DPRINTF("Mode Select(6) (len %d)\n", len); + /* We don't support mode parameter changes. + Allow the mode parameter header + block descriptors only. */ + if (len > 12) { + goto fail; + } + break; + case MODE_SELECT_10: + DPRINTF("Mode Select(10) (len %d)\n", len); + /* We don't support mode parameter changes. + Allow the mode parameter header + block descriptors only. */ + if (len > 16) { + goto fail; + } + break; + case SEEK_6: + case SEEK_10: + DPRINTF("Seek(%d) (sector %" PRId64 ")\n", command == SEEK_6 ? 6 : 10, lba); + if (lba > s->max_lba) { + goto illegal_lba; + } + break; default: DPRINTF("Unknown SCSI command (%2.2x)\n", buf[0]); fail: From 05acda4d1660591ea317619699f6aa7c659a90f1 Mon Sep 17 00:00:00 2001 From: Bernhard Kohl Date: Mon, 6 Sep 2010 17:06:02 +0200 Subject: [PATCH 22/25] raw-posix: improve detection of scsi-generic devices Allow symbolic links which point to /dev/sgX devices. Signed-off-by: Bernhard Kohl Signed-off-by: Kevin Wolf --- block/raw-posix.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/block/raw-posix.c b/block/raw-posix.c index 0fce449092..813372a6c7 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -48,6 +48,7 @@ #endif #ifdef __linux__ #include +#include #include #include #endif @@ -868,8 +869,13 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) s->type = FTYPE_FILE; #if defined(__linux__) - if (strstart(filename, "/dev/sg", NULL)) { - bs->sg = 1; + { + char resolved_path[ MAXPATHLEN ], *temp; + + temp = realpath(filename, resolved_path); + if (temp && strstart(temp, "/dev/sg", NULL)) { + bs->sg = 1; + } } #endif From 1bd8e175580a87c7b9e6791faca7626f9bc3ceeb Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 31 Aug 2010 13:44:25 +0200 Subject: [PATCH 23/25] qemu-img convert: Use cache=unsafe for output image If qemu-img crashes during the conversion, the user will throw away the broken output file anyway and start over. So no need to be too cautious. Signed-off-by: Kevin Wolf --- qemu-img.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qemu-img.c b/qemu-img.c index d2a978b912..4e035e44cc 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -783,7 +783,8 @@ static int img_convert(int argc, char **argv) goto out; } - out_bs = bdrv_new_open(out_filename, out_fmt, BDRV_O_FLAGS | BDRV_O_RDWR); + out_bs = bdrv_new_open(out_filename, out_fmt, + BDRV_O_FLAGS | BDRV_O_RDWR | BDRV_O_NO_FLUSH); if (!out_bs) { ret = -1; goto out; From ceb25e5c7554255931f7f5647d3ad9df36111f53 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 31 Aug 2010 15:08:03 +0200 Subject: [PATCH 24/25] block: Fix BDRV_O_CACHE_MASK BDRV_O_CACHE_MASK should have been extended when cache=unsafe introduced a new flag BDRV_O_NO_FLUSH. There are currently no users that would change their behaviour because of this, but let's clean it up before things break. Signed-off-by: Kevin Wolf --- block.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block.h b/block.h index db131a3438..5f6438010f 100644 --- a/block.h +++ b/block.h @@ -35,7 +35,7 @@ typedef struct QEMUSnapshotInfo { #define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */ #define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */ -#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB) +#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH) #define BDRV_SECTOR_BITS 9 #define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS) From 7ec5e6a4ca43494949465f9f9f3d9e4c7c620503 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 1 Sep 2010 12:40:52 +0200 Subject: [PATCH 25/25] qcow2: Remove unnecessary flush after L2 write When a new cluster was allocated, we only need a flush after the write to the L2 table if it was a COW and we need to decrease the refcounts of the old clusters. Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 166922f8be..f562b1602e 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -655,7 +655,7 @@ static int write_l2_entries(BlockDriverState *bs, uint64_t *l2_table, int ret; BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE); - ret = bdrv_pwrite_sync(bs->file, l2_offset + start_offset, + ret = bdrv_pwrite(bs->file, l2_offset + start_offset, &l2_table[l2_start_index], len); if (ret < 0) { return ret; @@ -718,9 +718,17 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) goto err; } - for (i = 0; i < j; i++) - qcow2_free_any_clusters(bs, - be64_to_cpu(old_cluster[i]) & ~QCOW_OFLAG_COPIED, 1); + /* + * If this was a COW, we need to decrease the refcount of the old cluster. + * Also flush bs->file to get the right order for L2 and refcount update. + */ + if (j != 0) { + bdrv_flush(bs->file); + for (i = 0; i < j; i++) { + qcow2_free_any_clusters(bs, + be64_to_cpu(old_cluster[i]) & ~QCOW_OFLAG_COPIED, 1); + } + } ret = 0; err: