From 76534da749ceb3b4c13ec09b173bcf6d6c2e36d4 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 3 Apr 2013 12:41:46 +0200 Subject: [PATCH 1/7] usb-storage: Forward serial number to scsi-disk usb-storage takes care to fetch the USB serial number from -drive options, but it neglected to pass its own 'serial' property to the scsi-disk it creates. With this patch, the 'serial' qdev property and the 'serial' option in -drive behave the same and correctly apply the serial number on both USB and SCSI level. Signed-off-by: Kevin Wolf Acked-by: Gerd Hoffmann --- hw/pci/pci-hotplug.c | 2 +- hw/scsi-bus.c | 8 ++++++-- hw/scsi.h | 3 ++- hw/usb/dev-storage.c | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/hw/pci/pci-hotplug.c b/hw/pci/pci-hotplug.c index f38df30540..180ee07fef 100644 --- a/hw/pci/pci-hotplug.c +++ b/hw/pci/pci-hotplug.c @@ -99,7 +99,7 @@ static int scsi_hot_add(Monitor *mon, DeviceState *adapter, dinfo->unit = qemu_opt_get_number(dinfo->opts, "unit", -1); dinfo->bus = scsibus->busnr; scsidev = scsi_bus_legacy_add_drive(scsibus, dinfo->bdrv, dinfo->unit, - false, -1); + false, -1, NULL); if (!scsidev) { return -1; } diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c index 08787c2a9b..ac2093a5ef 100644 --- a/hw/scsi-bus.c +++ b/hw/scsi-bus.c @@ -207,7 +207,8 @@ static int scsi_qdev_exit(DeviceState *qdev) /* handle legacy '-drive if=scsi,...' cmd line args */ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockDriverState *bdrv, - int unit, bool removable, int bootindex) + int unit, bool removable, int bootindex, + const char *serial) { const char *driver; DeviceState *dev; @@ -221,6 +222,9 @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockDriverState *bdrv, if (object_property_find(OBJECT(dev), "removable", NULL)) { qdev_prop_set_bit(dev, "removable", removable); } + if (serial) { + qdev_prop_set_string(dev, "serial", serial); + } if (qdev_prop_set_drive(dev, "drive", bdrv) < 0) { qdev_free(dev); return NULL; @@ -243,7 +247,7 @@ int scsi_bus_legacy_handle_cmdline(SCSIBus *bus) continue; } qemu_opts_loc_restore(dinfo->opts); - if (!scsi_bus_legacy_add_drive(bus, dinfo->bdrv, unit, false, -1)) { + if (!scsi_bus_legacy_add_drive(bus, dinfo->bdrv, unit, false, -1, NULL)) { res = -1; break; } diff --git a/hw/scsi.h b/hw/scsi.h index 33e2e0bdf1..02a1497d7a 100644 --- a/hw/scsi.h +++ b/hw/scsi.h @@ -160,7 +160,8 @@ static inline SCSIBus *scsi_bus_from_device(SCSIDevice *d) } SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockDriverState *bdrv, - int unit, bool removable, int bootindex); + int unit, bool removable, int bootindex, + const char *serial); int scsi_bus_legacy_handle_cmdline(SCSIBus *bus); /* diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c index d3f01aa2a7..21651b3637 100644 --- a/hw/usb/dev-storage.c +++ b/hw/usb/dev-storage.c @@ -625,7 +625,7 @@ static int usb_msd_initfn_storage(USBDevice *dev) usb_desc_init(dev); scsi_bus_new(&s->bus, &s->dev.qdev, &usb_msd_scsi_info_storage); scsi_dev = scsi_bus_legacy_add_drive(&s->bus, bs, 0, !!s->removable, - s->conf.bootindex); + s->conf.bootindex, s->serial); if (!scsi_dev) { return -1; } From 5905fbc9c94ccd744c1b249472eafcc2d827548a Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 5 Apr 2013 11:32:19 +0200 Subject: [PATCH 2/7] block: fix I/O throttling accounting blind spot I/O throttling relies on bdrv_acct_done() which is called when a request completes. This leaves a blind spot since we only charge for completed requests, not submitted requests. For example, if there is 1 operation remaining in this time slice the guest could submit 3 operations and they will all be submitted successfully since they don't actually get accounted for until they complete. Originally we probably thought this is okay since the requests will be accounted when the time slice is extended. In practice it causes fluctuations since the guest can exceed its I/O limit and it will be punished for this later on. Account for I/O upon submission so that I/O limits are enforced properly. Signed-off-by: Stefan Hajnoczi Tested-By: Benoit Canet Signed-off-by: Kevin Wolf --- block.c | 21 ++++++++++----------- include/block/block_int.h | 2 +- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/block.c b/block.c index 0ae2e93982..25976b556a 100644 --- a/block.c +++ b/block.c @@ -141,7 +141,6 @@ void bdrv_io_limits_disable(BlockDriverState *bs) bs->slice_start = 0; bs->slice_end = 0; bs->slice_time = 0; - memset(&bs->io_base, 0, sizeof(bs->io_base)); } static void bdrv_block_timer(void *opaque) @@ -1436,8 +1435,8 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, bs_dest->slice_time = bs_src->slice_time; bs_dest->slice_start = bs_src->slice_start; bs_dest->slice_end = bs_src->slice_end; + bs_dest->slice_submitted = bs_src->slice_submitted; bs_dest->io_limits = bs_src->io_limits; - bs_dest->io_base = bs_src->io_base; bs_dest->throttled_reqs = bs_src->throttled_reqs; bs_dest->block_timer = bs_src->block_timer; bs_dest->io_limits_enabled = bs_src->io_limits_enabled; @@ -3768,9 +3767,9 @@ static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors, slice_time = bs->slice_end - bs->slice_start; slice_time /= (NANOSECONDS_PER_SECOND); bytes_limit = bps_limit * slice_time; - bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write]; + bytes_base = bs->slice_submitted.bytes[is_write]; if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) { - bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write]; + bytes_base += bs->slice_submitted.bytes[!is_write]; } /* bytes_base: the bytes of data which have been read/written; and @@ -3828,9 +3827,9 @@ static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write, slice_time = bs->slice_end - bs->slice_start; slice_time /= (NANOSECONDS_PER_SECOND); ios_limit = iops_limit * slice_time; - ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write]; + ios_base = bs->slice_submitted.ios[is_write]; if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) { - ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write]; + ios_base += bs->slice_submitted.ios[!is_write]; } if (ios_base + 1 <= ios_limit) { @@ -3875,11 +3874,7 @@ static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, bs->slice_start = now; bs->slice_end = now + bs->slice_time; - bs->io_base.bytes[is_write] = bs->nr_bytes[is_write]; - bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write]; - - bs->io_base.ios[is_write] = bs->nr_ops[is_write]; - bs->io_base.ios[!is_write] = bs->nr_ops[!is_write]; + memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted)); } elapsed_time = now - bs->slice_start; @@ -3907,6 +3902,10 @@ static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, *wait = 0; } + bs->slice_submitted.bytes[is_write] += (int64_t)nb_sectors * + BDRV_SECTOR_SIZE; + bs->slice_submitted.ios[is_write]++; + return false; } diff --git a/include/block/block_int.h b/include/block/block_int.h index 0986a2d6ac..83941d8672 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -256,7 +256,7 @@ struct BlockDriverState { int64_t slice_start; int64_t slice_end; BlockIOLimit io_limits; - BlockIOBaseValue io_base; + BlockIOBaseValue slice_submitted; CoQueue throttled_reqs; QEMUTimer *block_timer; bool io_limits_enabled; From ae29d6c64bd8d55873a2cb1df50ae4321b497447 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 5 Apr 2013 11:32:20 +0200 Subject: [PATCH 3/7] block: keep I/O throttling slice time constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is not necessary to adjust the slice time at runtime. We already extend the current slice in order to carry over accounting into the next slice. Changing the actual slice time value introduces oscillations. The guest may experience large changes in throughput or IOPS from one moment to the next when slice times are adjusted. Reported-by: BenoƮt Canet Signed-off-by: Stefan Hajnoczi Tested-By: Benoit Canet Signed-off-by: Kevin Wolf --- block.c | 19 +++++++++---------- blockdev.c | 1 - include/block/block_int.h | 1 - 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/block.c b/block.c index 25976b556a..00eca275ba 100644 --- a/block.c +++ b/block.c @@ -140,7 +140,6 @@ void bdrv_io_limits_disable(BlockDriverState *bs) bs->slice_start = 0; bs->slice_end = 0; - bs->slice_time = 0; } static void bdrv_block_timer(void *opaque) @@ -1432,7 +1431,6 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, bs_dest->enable_write_cache = bs_src->enable_write_cache; /* i/o timing parameters */ - bs_dest->slice_time = bs_src->slice_time; bs_dest->slice_start = bs_src->slice_start; bs_dest->slice_end = bs_src->slice_end; bs_dest->slice_submitted = bs_src->slice_submitted; @@ -3749,6 +3747,7 @@ static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors, bool is_write, double elapsed_time, uint64_t *wait) { uint64_t bps_limit = 0; + uint64_t extension; double bytes_limit, bytes_base, bytes_res; double slice_time, wait_time; @@ -3796,8 +3795,10 @@ static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors, * info can be kept until the timer fire, so it is increased and tuned * based on the result of experiment. */ - bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10; - bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME; + extension = wait_time * NANOSECONDS_PER_SECOND; + extension = DIV_ROUND_UP(extension, BLOCK_IO_SLICE_TIME) * + BLOCK_IO_SLICE_TIME; + bs->slice_end += extension; if (wait) { *wait = wait_time * BLOCK_IO_SLICE_TIME * 10; } @@ -3848,8 +3849,8 @@ static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write, wait_time = 0; } - bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10; - bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME; + /* Exceeded current slice, extend it by another slice time */ + bs->slice_end += BLOCK_IO_SLICE_TIME; if (wait) { *wait = wait_time * BLOCK_IO_SLICE_TIME * 10; } @@ -3868,12 +3869,10 @@ static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, now = qemu_get_clock_ns(vm_clock); if ((bs->slice_start < now) && (bs->slice_end > now)) { - bs->slice_end = now + bs->slice_time; + bs->slice_end = now + BLOCK_IO_SLICE_TIME; } else { - bs->slice_time = 5 * BLOCK_IO_SLICE_TIME; bs->slice_start = now; - bs->slice_end = now + bs->slice_time; - + bs->slice_end = now + BLOCK_IO_SLICE_TIME; memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted)); } diff --git a/blockdev.c b/blockdev.c index 8cdc9ce16a..6dc999d802 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1069,7 +1069,6 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd, } bs->io_limits = io_limits; - bs->slice_time = BLOCK_IO_SLICE_TIME; if (!bs->io_limits_enabled && bdrv_io_limits_enabled(bs)) { bdrv_io_limits_enable(bs); diff --git a/include/block/block_int.h b/include/block/block_int.h index 83941d8672..9aa98b5d12 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -252,7 +252,6 @@ struct BlockDriverState { unsigned int copy_on_read_in_flight; /* the time for latest disk I/O */ - int64_t slice_time; int64_t slice_start; int64_t slice_end; BlockIOLimit io_limits; From e660fb8b3ccc94652774d5895d122c0f13aecb89 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 5 Apr 2013 11:32:21 +0200 Subject: [PATCH 4/7] block: drop duplicated slice extension code The current slice is extended when an I/O request exceeds the limit. There is no need to extend the slice every time we check a request. Signed-off-by: Stefan Hajnoczi Tested-By: Benoit Canet Signed-off-by: Kevin Wolf --- block.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/block.c b/block.c index 00eca275ba..aa16fc4c68 100644 --- a/block.c +++ b/block.c @@ -3867,10 +3867,7 @@ static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, int bps_ret, iops_ret; now = qemu_get_clock_ns(vm_clock); - if ((bs->slice_start < now) - && (bs->slice_end > now)) { - bs->slice_end = now + BLOCK_IO_SLICE_TIME; - } else { + if (now > bs->slice_end) { bs->slice_start = now; bs->slice_end = now + BLOCK_IO_SLICE_TIME; memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted)); From 0775437fafc5c733564645a22f75490770bf41f7 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 5 Apr 2013 11:32:22 +0200 Subject: [PATCH 5/7] block: clean up I/O throttling wait_time code The wait_time variable is in seconds. Reflect this in a comment and use NANOSECONDS_PER_SECOND instead of BLOCK_IO_SLICE_TIME * 10 (which happens to have the right value). Signed-off-by: Stefan Hajnoczi Tested-By: Benoit Canet Signed-off-by: Kevin Wolf --- block.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block.c b/block.c index aa16fc4c68..602d8a443e 100644 --- a/block.c +++ b/block.c @@ -3800,7 +3800,7 @@ static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors, BLOCK_IO_SLICE_TIME; bs->slice_end += extension; if (wait) { - *wait = wait_time * BLOCK_IO_SLICE_TIME * 10; + *wait = wait_time * NANOSECONDS_PER_SECOND; } return true; @@ -3841,7 +3841,7 @@ static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write, return false; } - /* Calc approx time to dispatch */ + /* Calc approx time to dispatch, in seconds */ wait_time = (ios_base + 1) / iops_limit; if (wait_time > elapsed_time) { wait_time = wait_time - elapsed_time; @@ -3852,7 +3852,7 @@ static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write, /* Exceeded current slice, extend it by another slice time */ bs->slice_end += BLOCK_IO_SLICE_TIME; if (wait) { - *wait = wait_time * BLOCK_IO_SLICE_TIME * 10; + *wait = wait_time * NANOSECONDS_PER_SECOND; } return true; From c2bc78b6a975ea2dcd7eee9f0dce22cc060cdcdc Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 5 Apr 2013 12:51:31 +0200 Subject: [PATCH 6/7] qcow2: Return real error in qcow2_update_snapshot_refcount This fixes the error message triggered by the following script: cat > /tmp/blkdebug.cfg < --- block/qcow2-refcount.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index c38e970bf2..4799681137 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -747,10 +747,9 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, if (l1_table_offset != s->l1_table_offset) { l1_table = g_malloc0(align_offset(l1_size2, 512)); l1_allocated = 1; - if (bdrv_pread(bs->file, l1_table_offset, - l1_table, l1_size2) != l1_size2) - { - ret = -EIO; + + ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2); + if (ret < 0) { goto fail; } @@ -802,7 +801,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, } if (refcount < 0) { - ret = -EIO; + ret = refcount; goto fail; } } @@ -833,7 +832,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, refcount = get_refcount(bs, l2_offset >> s->cluster_bits); } if (refcount < 0) { - ret = -EIO; + ret = refcount; goto fail; } else if (refcount == 1) { l2_offset |= QCOW_OFLAG_COPIED; From c2b6ff51e4a3ad1f7ec5dbc94970e9778b31d718 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 5 Apr 2013 12:57:10 +0200 Subject: [PATCH 7/7] qcow2: Fix L1 write error handling in qcow2_update_snapshot_refcount It ignored the error code, and at least the 'goto fail' is obvious nonsense as it creates an endless loop (if the next attempt doesn't magically succeed) and leaves the in-memory L1 table in big-endian instead of converting it back. In error cases, there's no point in writing an updated L1 table, so skip this part for them. Signed-off-by: Kevin Wolf --- block/qcow2-refcount.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 4799681137..b32738f8d9 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -851,14 +851,16 @@ fail: } /* Update L1 only if it isn't deleted anyway (addend = -1) */ - if (addend >= 0 && l1_modified) { - for(i = 0; i < l1_size; i++) + if (ret == 0 && addend >= 0 && l1_modified) { + for (i = 0; i < l1_size; i++) { cpu_to_be64s(&l1_table[i]); - if (bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, - l1_size2) < 0) - goto fail; - for(i = 0; i < l1_size; i++) + } + + ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2); + + for (i = 0; i < l1_size; i++) { be64_to_cpus(&l1_table[i]); + } } if (l1_allocated) g_free(l1_table);