From 76534da749ceb3b4c13ec09b173bcf6d6c2e36d4 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Wed, 3 Apr 2013 12:41:46 +0200
Subject: [PATCH 1/7] usb-storage: Forward serial number to scsi-disk

usb-storage takes care to fetch the USB serial number from -drive
options, but it neglected to pass its own 'serial' property to the
scsi-disk it creates. With this patch, the 'serial' qdev property and
the 'serial' option in -drive behave the same and correctly apply the
serial number on both USB and SCSI level.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
---
 hw/pci/pci-hotplug.c | 2 +-
 hw/scsi-bus.c        | 8 ++++++--
 hw/scsi.h            | 3 ++-
 hw/usb/dev-storage.c | 2 +-
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/hw/pci/pci-hotplug.c b/hw/pci/pci-hotplug.c
index f38df30540..180ee07fef 100644
--- a/hw/pci/pci-hotplug.c
+++ b/hw/pci/pci-hotplug.c
@@ -99,7 +99,7 @@ static int scsi_hot_add(Monitor *mon, DeviceState *adapter,
     dinfo->unit = qemu_opt_get_number(dinfo->opts, "unit", -1);
     dinfo->bus = scsibus->busnr;
     scsidev = scsi_bus_legacy_add_drive(scsibus, dinfo->bdrv, dinfo->unit,
-                                        false, -1);
+                                        false, -1, NULL);
     if (!scsidev) {
         return -1;
     }
diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index 08787c2a9b..ac2093a5ef 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -207,7 +207,8 @@ static int scsi_qdev_exit(DeviceState *qdev)
 
 /* handle legacy '-drive if=scsi,...' cmd line args */
 SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockDriverState *bdrv,
-                                      int unit, bool removable, int bootindex)
+                                      int unit, bool removable, int bootindex,
+                                      const char *serial)
 {
     const char *driver;
     DeviceState *dev;
@@ -221,6 +222,9 @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockDriverState *bdrv,
     if (object_property_find(OBJECT(dev), "removable", NULL)) {
         qdev_prop_set_bit(dev, "removable", removable);
     }
+    if (serial) {
+        qdev_prop_set_string(dev, "serial", serial);
+    }
     if (qdev_prop_set_drive(dev, "drive", bdrv) < 0) {
         qdev_free(dev);
         return NULL;
@@ -243,7 +247,7 @@ int scsi_bus_legacy_handle_cmdline(SCSIBus *bus)
             continue;
         }
         qemu_opts_loc_restore(dinfo->opts);
-        if (!scsi_bus_legacy_add_drive(bus, dinfo->bdrv, unit, false, -1)) {
+        if (!scsi_bus_legacy_add_drive(bus, dinfo->bdrv, unit, false, -1, NULL)) {
             res = -1;
             break;
         }
diff --git a/hw/scsi.h b/hw/scsi.h
index 33e2e0bdf1..02a1497d7a 100644
--- a/hw/scsi.h
+++ b/hw/scsi.h
@@ -160,7 +160,8 @@ static inline SCSIBus *scsi_bus_from_device(SCSIDevice *d)
 }
 
 SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockDriverState *bdrv,
-                                      int unit, bool removable, int bootindex);
+                                      int unit, bool removable, int bootindex,
+                                      const char *serial);
 int scsi_bus_legacy_handle_cmdline(SCSIBus *bus);
 
 /*
diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
index d3f01aa2a7..21651b3637 100644
--- a/hw/usb/dev-storage.c
+++ b/hw/usb/dev-storage.c
@@ -625,7 +625,7 @@ static int usb_msd_initfn_storage(USBDevice *dev)
     usb_desc_init(dev);
     scsi_bus_new(&s->bus, &s->dev.qdev, &usb_msd_scsi_info_storage);
     scsi_dev = scsi_bus_legacy_add_drive(&s->bus, bs, 0, !!s->removable,
-                                            s->conf.bootindex);
+                                            s->conf.bootindex, s->serial);
     if (!scsi_dev) {
         return -1;
     }

From 5905fbc9c94ccd744c1b249472eafcc2d827548a Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Fri, 5 Apr 2013 11:32:19 +0200
Subject: [PATCH 2/7] block: fix I/O throttling accounting blind spot

I/O throttling relies on bdrv_acct_done() which is called when a request
completes.  This leaves a blind spot since we only charge for completed
requests, not submitted requests.

For example, if there is 1 operation remaining in this time slice the
guest could submit 3 operations and they will all be submitted
successfully since they don't actually get accounted for until they
complete.

Originally we probably thought this is okay since the requests will be
accounted when the time slice is extended.  In practice it causes
fluctuations since the guest can exceed its I/O limit and it will be
punished for this later on.

Account for I/O upon submission so that I/O limits are enforced
properly.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Tested-By: Benoit Canet <benoit@irqsave.net>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c                   | 21 ++++++++++-----------
 include/block/block_int.h |  2 +-
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/block.c b/block.c
index 0ae2e93982..25976b556a 100644
--- a/block.c
+++ b/block.c
@@ -141,7 +141,6 @@ void bdrv_io_limits_disable(BlockDriverState *bs)
     bs->slice_start = 0;
     bs->slice_end   = 0;
     bs->slice_time  = 0;
-    memset(&bs->io_base, 0, sizeof(bs->io_base));
 }
 
 static void bdrv_block_timer(void *opaque)
@@ -1436,8 +1435,8 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
     bs_dest->slice_time         = bs_src->slice_time;
     bs_dest->slice_start        = bs_src->slice_start;
     bs_dest->slice_end          = bs_src->slice_end;
+    bs_dest->slice_submitted    = bs_src->slice_submitted;
     bs_dest->io_limits          = bs_src->io_limits;
-    bs_dest->io_base            = bs_src->io_base;
     bs_dest->throttled_reqs     = bs_src->throttled_reqs;
     bs_dest->block_timer        = bs_src->block_timer;
     bs_dest->io_limits_enabled  = bs_src->io_limits_enabled;
@@ -3768,9 +3767,9 @@ static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
     slice_time = bs->slice_end - bs->slice_start;
     slice_time /= (NANOSECONDS_PER_SECOND);
     bytes_limit = bps_limit * slice_time;
-    bytes_base  = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
+    bytes_base  = bs->slice_submitted.bytes[is_write];
     if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
-        bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
+        bytes_base += bs->slice_submitted.bytes[!is_write];
     }
 
     /* bytes_base: the bytes of data which have been read/written; and
@@ -3828,9 +3827,9 @@ static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
     slice_time = bs->slice_end - bs->slice_start;
     slice_time /= (NANOSECONDS_PER_SECOND);
     ios_limit  = iops_limit * slice_time;
-    ios_base   = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
+    ios_base   = bs->slice_submitted.ios[is_write];
     if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
-        ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
+        ios_base += bs->slice_submitted.ios[!is_write];
     }
 
     if (ios_base + 1 <= ios_limit) {
@@ -3875,11 +3874,7 @@ static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
         bs->slice_start = now;
         bs->slice_end   = now + bs->slice_time;
 
-        bs->io_base.bytes[is_write]  = bs->nr_bytes[is_write];
-        bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
-
-        bs->io_base.ios[is_write]    = bs->nr_ops[is_write];
-        bs->io_base.ios[!is_write]   = bs->nr_ops[!is_write];
+        memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted));
     }
 
     elapsed_time  = now - bs->slice_start;
@@ -3907,6 +3902,10 @@ static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
         *wait = 0;
     }
 
+    bs->slice_submitted.bytes[is_write] += (int64_t)nb_sectors *
+                                           BDRV_SECTOR_SIZE;
+    bs->slice_submitted.ios[is_write]++;
+
     return false;
 }
 
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 0986a2d6ac..83941d8672 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -256,7 +256,7 @@ struct BlockDriverState {
     int64_t slice_start;
     int64_t slice_end;
     BlockIOLimit io_limits;
-    BlockIOBaseValue  io_base;
+    BlockIOBaseValue slice_submitted;
     CoQueue      throttled_reqs;
     QEMUTimer    *block_timer;
     bool         io_limits_enabled;

From ae29d6c64bd8d55873a2cb1df50ae4321b497447 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Fri, 5 Apr 2013 11:32:20 +0200
Subject: [PATCH 3/7] block: keep I/O throttling slice time constant
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is not necessary to adjust the slice time at runtime.  We already
extend the current slice in order to carry over accounting into the next
slice.  Changing the actual slice time value introduces oscillations.

The guest may experience large changes in throughput or IOPS from one
moment to the next when slice times are adjusted.

Reported-by: Benoît Canet <benoit@irqsave.net>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Tested-By: Benoit Canet <benoit@irqsave.net>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c                   | 19 +++++++++----------
 blockdev.c                |  1 -
 include/block/block_int.h |  1 -
 3 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/block.c b/block.c
index 25976b556a..00eca275ba 100644
--- a/block.c
+++ b/block.c
@@ -140,7 +140,6 @@ void bdrv_io_limits_disable(BlockDriverState *bs)
 
     bs->slice_start = 0;
     bs->slice_end   = 0;
-    bs->slice_time  = 0;
 }
 
 static void bdrv_block_timer(void *opaque)
@@ -1432,7 +1431,6 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
     bs_dest->enable_write_cache = bs_src->enable_write_cache;
 
     /* i/o timing parameters */
-    bs_dest->slice_time         = bs_src->slice_time;
     bs_dest->slice_start        = bs_src->slice_start;
     bs_dest->slice_end          = bs_src->slice_end;
     bs_dest->slice_submitted    = bs_src->slice_submitted;
@@ -3749,6 +3747,7 @@ static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
                  bool is_write, double elapsed_time, uint64_t *wait)
 {
     uint64_t bps_limit = 0;
+    uint64_t extension;
     double   bytes_limit, bytes_base, bytes_res;
     double   slice_time, wait_time;
 
@@ -3796,8 +3795,10 @@ static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
      * info can be kept until the timer fire, so it is increased and tuned
      * based on the result of experiment.
      */
-    bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
-    bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
+    extension = wait_time * NANOSECONDS_PER_SECOND;
+    extension = DIV_ROUND_UP(extension, BLOCK_IO_SLICE_TIME) *
+                BLOCK_IO_SLICE_TIME;
+    bs->slice_end += extension;
     if (wait) {
         *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
     }
@@ -3848,8 +3849,8 @@ static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
         wait_time = 0;
     }
 
-    bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
-    bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
+    /* Exceeded current slice, extend it by another slice time */
+    bs->slice_end += BLOCK_IO_SLICE_TIME;
     if (wait) {
         *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
     }
@@ -3868,12 +3869,10 @@ static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
     now = qemu_get_clock_ns(vm_clock);
     if ((bs->slice_start < now)
         && (bs->slice_end > now)) {
-        bs->slice_end = now + bs->slice_time;
+        bs->slice_end = now + BLOCK_IO_SLICE_TIME;
     } else {
-        bs->slice_time  =  5 * BLOCK_IO_SLICE_TIME;
         bs->slice_start = now;
-        bs->slice_end   = now + bs->slice_time;
-
+        bs->slice_end   = now + BLOCK_IO_SLICE_TIME;
         memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted));
     }
 
diff --git a/blockdev.c b/blockdev.c
index 8cdc9ce16a..6dc999d802 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1069,7 +1069,6 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
     }
 
     bs->io_limits = io_limits;
-    bs->slice_time = BLOCK_IO_SLICE_TIME;
 
     if (!bs->io_limits_enabled && bdrv_io_limits_enabled(bs)) {
         bdrv_io_limits_enable(bs);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 83941d8672..9aa98b5d12 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -252,7 +252,6 @@ struct BlockDriverState {
     unsigned int copy_on_read_in_flight;
 
     /* the time for latest disk I/O */
-    int64_t slice_time;
     int64_t slice_start;
     int64_t slice_end;
     BlockIOLimit io_limits;

From e660fb8b3ccc94652774d5895d122c0f13aecb89 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Fri, 5 Apr 2013 11:32:21 +0200
Subject: [PATCH 4/7] block: drop duplicated slice extension code

The current slice is extended when an I/O request exceeds the limit.
There is no need to extend the slice every time we check a request.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Tested-By: Benoit Canet <benoit@irqsave.net>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/block.c b/block.c
index 00eca275ba..aa16fc4c68 100644
--- a/block.c
+++ b/block.c
@@ -3867,10 +3867,7 @@ static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
     int      bps_ret, iops_ret;
 
     now = qemu_get_clock_ns(vm_clock);
-    if ((bs->slice_start < now)
-        && (bs->slice_end > now)) {
-        bs->slice_end = now + BLOCK_IO_SLICE_TIME;
-    } else {
+    if (now > bs->slice_end) {
         bs->slice_start = now;
         bs->slice_end   = now + BLOCK_IO_SLICE_TIME;
         memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted));

From 0775437fafc5c733564645a22f75490770bf41f7 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Fri, 5 Apr 2013 11:32:22 +0200
Subject: [PATCH 5/7] block: clean up I/O throttling wait_time code

The wait_time variable is in seconds.  Reflect this in a comment and use
NANOSECONDS_PER_SECOND instead of BLOCK_IO_SLICE_TIME * 10 (which
happens to have the right value).

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Tested-By: Benoit Canet <benoit@irqsave.net>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/block.c b/block.c
index aa16fc4c68..602d8a443e 100644
--- a/block.c
+++ b/block.c
@@ -3800,7 +3800,7 @@ static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
                 BLOCK_IO_SLICE_TIME;
     bs->slice_end += extension;
     if (wait) {
-        *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
+        *wait = wait_time * NANOSECONDS_PER_SECOND;
     }
 
     return true;
@@ -3841,7 +3841,7 @@ static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
         return false;
     }
 
-    /* Calc approx time to dispatch */
+    /* Calc approx time to dispatch, in seconds */
     wait_time = (ios_base + 1) / iops_limit;
     if (wait_time > elapsed_time) {
         wait_time = wait_time - elapsed_time;
@@ -3852,7 +3852,7 @@ static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
     /* Exceeded current slice, extend it by another slice time */
     bs->slice_end += BLOCK_IO_SLICE_TIME;
     if (wait) {
-        *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
+        *wait = wait_time * NANOSECONDS_PER_SECOND;
     }
 
     return true;

From c2bc78b6a975ea2dcd7eee9f0dce22cc060cdcdc Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 5 Apr 2013 12:51:31 +0200
Subject: [PATCH 6/7] qcow2: Return real error in
 qcow2_update_snapshot_refcount

This fixes the error message triggered by the following script:

    cat > /tmp/blkdebug.cfg <<EOF
    [inject-error]
    event = "cluster_free"
    errno = "28"
    immediately = "off"
    EOF

    $qemu_img create -f qcow2 test.qcow2 10G
    $qemu_img snapshot -c snap test.qcow2
    $qemu_img snapshot -d snap blkdebug:/tmp/blkdebug.cfg:test.qcow2

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/qcow2-refcount.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index c38e970bf2..4799681137 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -747,10 +747,9 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
     if (l1_table_offset != s->l1_table_offset) {
         l1_table = g_malloc0(align_offset(l1_size2, 512));
         l1_allocated = 1;
-        if (bdrv_pread(bs->file, l1_table_offset,
-                       l1_table, l1_size2) != l1_size2)
-        {
-            ret = -EIO;
+
+        ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
+        if (ret < 0) {
             goto fail;
         }
 
@@ -802,7 +801,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                         }
 
                         if (refcount < 0) {
-                            ret = -EIO;
+                            ret = refcount;
                             goto fail;
                         }
                     }
@@ -833,7 +832,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                 refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
             }
             if (refcount < 0) {
-                ret = -EIO;
+                ret = refcount;
                 goto fail;
             } else if (refcount == 1) {
                 l2_offset |= QCOW_OFLAG_COPIED;

From c2b6ff51e4a3ad1f7ec5dbc94970e9778b31d718 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 5 Apr 2013 12:57:10 +0200
Subject: [PATCH 7/7] qcow2: Fix L1 write error handling in
 qcow2_update_snapshot_refcount

It ignored the error code, and at least the 'goto fail' is obvious
nonsense as it creates an endless loop (if the next attempt doesn't
magically succeed) and leaves the in-memory L1 table in big-endian
instead of converting it back.

In error cases, there's no point in writing an updated L1 table, so
skip this part for them.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/qcow2-refcount.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 4799681137..b32738f8d9 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -851,14 +851,16 @@ fail:
     }
 
     /* Update L1 only if it isn't deleted anyway (addend = -1) */
-    if (addend >= 0 && l1_modified) {
-        for(i = 0; i < l1_size; i++)
+    if (ret == 0 && addend >= 0 && l1_modified) {
+        for (i = 0; i < l1_size; i++) {
             cpu_to_be64s(&l1_table[i]);
-        if (bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table,
-                        l1_size2) < 0)
-            goto fail;
-        for(i = 0; i < l1_size; i++)
+        }
+
+        ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2);
+
+        for (i = 0; i < l1_size; i++) {
             be64_to_cpus(&l1_table[i]);
+        }
     }
     if (l1_allocated)
         g_free(l1_table);