From 6d07859926fdb8515fd5cb94a692df7896c3848e Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 14 Jul 2016 16:33:22 +0300
Subject: [PATCH 01/14] dirty-bitmap: operate with int64_t amount

Underlying HBitmap operates even with uint64_t. Thus this change is safe.
This would be useful f.e. to mark entire bitmap dirty in one call.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468503209-19498-2-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/dirty-bitmap.c         | 6 +++---
 include/block/block_int.h    | 2 +-
 include/block/dirty-bitmap.h | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 4902ca557f..f2bfdcfdea 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -326,14 +326,14 @@ void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
 }
 
 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                           int64_t cur_sector, int nr_sectors)
+                           int64_t cur_sector, int64_t nr_sectors)
 {
     assert(bdrv_dirty_bitmap_enabled(bitmap));
     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
 }
 
 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                             int64_t cur_sector, int nr_sectors)
+                             int64_t cur_sector, int64_t nr_sectors)
 {
     assert(bdrv_dirty_bitmap_enabled(bitmap));
     hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
@@ -361,7 +361,7 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
 }
 
 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
-                    int nr_sectors)
+                    int64_t nr_sectors)
 {
     BdrvDirtyBitmap *bitmap;
     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
diff --git a/include/block/block_int.h b/include/block/block_int.h
index a6b13adb45..09be16f88c 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -783,7 +783,7 @@ void blk_dev_eject_request(BlockBackend *blk, bool force);
 bool blk_dev_is_tray_open(BlockBackend *blk);
 bool blk_dev_is_medium_locked(BlockBackend *blk);
 
-void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int64_t nr_sect);
 bool bdrv_requests_pending(BlockDriverState *bs);
 
 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 80afe603f6..ee3388f90d 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -33,9 +33,9 @@ DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap);
 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
                    int64_t sector);
 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                           int64_t cur_sector, int nr_sectors);
+                           int64_t cur_sector, int64_t nr_sectors);
 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                             int64_t cur_sector, int nr_sectors);
+                             int64_t cur_sector, int64_t nr_sectors);
 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
 void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);

From 531509ba2856ca8efdcf0df18a7dec66af0d36ee Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 14 Jul 2016 16:33:23 +0300
Subject: [PATCH 02/14] mirror: make sectors_in_flight int64_t

We keep here the sum of int fields. Thus this could easily overflow,
especially when we will start sending big requests in next patches.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468503209-19498-3-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/mirror.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/mirror.c b/block/mirror.c
index b1e633ecad..fbacee234b 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -60,7 +60,7 @@ typedef struct MirrorBlockJob {
 
     unsigned long *in_flight_bitmap;
     int in_flight;
-    int sectors_in_flight;
+    int64_t sectors_in_flight;
     int ret;
     bool unmap;
     bool waiting_for_io;

From 49efb1f5b0ea45bdb75d578460a7866a58167d1f Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 14 Jul 2016 16:33:24 +0300
Subject: [PATCH 03/14] mirror: create mirror_throttle helper

The patch also places last_pause_ns from stack in mirror_run into
MirrorBlockJob structure. This helper will be useful in next patches.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1468503209-19498-4-git-send-email-den@openvz.org
CC: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
CC: Eric Blake <eblake@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Fam Zheng <famz@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/mirror.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index fbacee234b..5fde13b1f5 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -58,6 +58,7 @@ typedef struct MirrorBlockJob {
     QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
     int buf_free_count;
 
+    uint64_t last_pause_ns;
     unsigned long *in_flight_bitmap;
     int in_flight;
     int64_t sectors_in_flight;
@@ -514,6 +515,18 @@ static void mirror_exit(BlockJob *job, void *opaque)
     bdrv_unref(src);
 }
 
+static void mirror_throttle(MirrorBlockJob *s)
+{
+    int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+
+    if (now - s->last_pause_ns > SLICE_TIME) {
+        s->last_pause_ns = now;
+        block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, 0);
+    } else {
+        block_job_pause_point(&s->common);
+    }
+}
+
 static void coroutine_fn mirror_run(void *opaque)
 {
     MirrorBlockJob *s = opaque;
@@ -521,7 +534,6 @@ static void coroutine_fn mirror_run(void *opaque)
     BlockDriverState *bs = blk_bs(s->common.blk);
     BlockDriverState *target_bs = blk_bs(s->target);
     int64_t sector_num, end, length;
-    uint64_t last_pause_ns;
     BlockDriverInfo bdi;
     char backing_filename[2]; /* we only need 2 characters because we are only
                                  checking for a NULL string */
@@ -577,7 +589,7 @@ static void coroutine_fn mirror_run(void *opaque)
 
     mirror_free_init(s);
 
-    last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
     if (!s->is_none_mode) {
         /* First part, loop on the sectors and initialize the dirty bitmap.  */
         BlockDriverState *base = s->base;
@@ -587,14 +599,8 @@ static void coroutine_fn mirror_run(void *opaque)
             /* Just to make sure we are not exceeding int limit. */
             int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS,
                                  end - sector_num);
-            int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
 
-            if (now - last_pause_ns > SLICE_TIME) {
-                last_pause_ns = now;
-                block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, 0);
-            } else {
-                block_job_pause_point(&s->common);
-            }
+            mirror_throttle(s);
 
             if (block_job_is_cancelled(&s->common)) {
                 goto immediate_exit;
@@ -617,7 +623,7 @@ static void coroutine_fn mirror_run(void *opaque)
     bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
     for (;;) {
         uint64_t delay_ns = 0;
-        int64_t cnt;
+        int64_t cnt, delta;
         bool should_complete;
 
         if (s->ret < 0) {
@@ -640,7 +646,8 @@ static void coroutine_fn mirror_run(void *opaque)
          * We do so every SLICE_TIME nanoseconds, or when there is an error,
          * or when the source is clean, whichever comes first.
          */
-        if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME &&
+        delta = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->last_pause_ns;
+        if (delta < SLICE_TIME &&
             s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
             if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                 (cnt == 0 && s->in_flight > 0)) {
@@ -710,7 +717,7 @@ static void coroutine_fn mirror_run(void *opaque)
             s->common.cancelled = false;
             break;
         }
-        last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+        s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
     }
 
 immediate_exit:

From c0b363ad43b00d77c81813db20eec45923973549 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 14 Jul 2016 16:33:25 +0300
Subject: [PATCH 04/14] mirror: create mirror_dirty_init helper for mirror_run

The code inside the helper will be extended in the next patch. mirror_run
itself is overbloated at the moment.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Vladimir Sementsov-Ogievskiy<vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468503209-19498-5-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
CC: Eric Blake <eblake@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/mirror.c | 70 +++++++++++++++++++++++++++++---------------------
 1 file changed, 41 insertions(+), 29 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 5fde13b1f5..0e74666d01 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -527,18 +527,54 @@ static void mirror_throttle(MirrorBlockJob *s)
     }
 }
 
+static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
+{
+    int64_t sector_num, end;
+    BlockDriverState *base = s->base;
+    BlockDriverState *bs = blk_bs(s->common.blk);
+    BlockDriverState *target_bs = blk_bs(s->target);
+    bool mark_all_dirty = base == NULL && !bdrv_has_zero_init(target_bs);
+    int ret, n;
+
+    end = s->bdev_length / BDRV_SECTOR_SIZE;
+
+    /* First part, loop on the sectors and initialize the dirty bitmap.  */
+    for (sector_num = 0; sector_num < end; ) {
+        /* Just to make sure we are not exceeding int limit. */
+        int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS,
+                             end - sector_num);
+
+        mirror_throttle(s);
+
+        if (block_job_is_cancelled(&s->common)) {
+            return 0;
+        }
+
+        ret = bdrv_is_allocated_above(bs, base, sector_num, nb_sectors, &n);
+        if (ret < 0) {
+            return ret;
+        }
+
+        assert(n > 0);
+        if (ret == 1 || mark_all_dirty) {
+            bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
+        }
+        sector_num += n;
+    }
+    return 0;
+}
+
 static void coroutine_fn mirror_run(void *opaque)
 {
     MirrorBlockJob *s = opaque;
     MirrorExitData *data;
     BlockDriverState *bs = blk_bs(s->common.blk);
     BlockDriverState *target_bs = blk_bs(s->target);
-    int64_t sector_num, end, length;
+    int64_t length;
     BlockDriverInfo bdi;
     char backing_filename[2]; /* we only need 2 characters because we are only
                                  checking for a NULL string */
     int ret = 0;
-    int n;
     int target_cluster_size = BDRV_SECTOR_SIZE;
 
     if (block_job_is_cancelled(&s->common)) {
@@ -580,7 +616,6 @@ static void coroutine_fn mirror_run(void *opaque)
     s->target_cluster_sectors = target_cluster_size >> BDRV_SECTOR_BITS;
     s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov);
 
-    end = s->bdev_length / BDRV_SECTOR_SIZE;
     s->buf = qemu_try_blockalign(bs, s->buf_size);
     if (s->buf == NULL) {
         ret = -ENOMEM;
@@ -591,32 +626,9 @@ static void coroutine_fn mirror_run(void *opaque)
 
     s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
     if (!s->is_none_mode) {
-        /* First part, loop on the sectors and initialize the dirty bitmap.  */
-        BlockDriverState *base = s->base;
-        bool mark_all_dirty = s->base == NULL && !bdrv_has_zero_init(target_bs);
-
-        for (sector_num = 0; sector_num < end; ) {
-            /* Just to make sure we are not exceeding int limit. */
-            int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS,
-                                 end - sector_num);
-
-            mirror_throttle(s);
-
-            if (block_job_is_cancelled(&s->common)) {
-                goto immediate_exit;
-            }
-
-            ret = bdrv_is_allocated_above(bs, base, sector_num, nb_sectors, &n);
-
-            if (ret < 0) {
-                goto immediate_exit;
-            }
-
-            assert(n > 0);
-            if (ret == 1 || mark_all_dirty) {
-                bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
-            }
-            sector_num += n;
+        ret = mirror_dirty_init(s);
+        if (ret < 0 || block_job_is_cancelled(&s->common)) {
+            goto immediate_exit;
         }
     }
 

From 2f0342efdbb0a3c5b5a6a4f4831cbe90c445510b Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 14 Jul 2016 16:33:26 +0300
Subject: [PATCH 05/14] block: remove extra condition in
 bdrv_can_write_zeroes_with_unmap

All .bdrv_co_write_zeroes callbacks nowadays work perfectly even
with backing store attached. If future new callbacks would be unable to do
that - they have a chance to block this in bdrv_get_info().

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468503209-19498-6-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block.c b/block.c
index d2dac3dce9..30d64e6ca5 100644
--- a/block.c
+++ b/block.c
@@ -2837,7 +2837,7 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
 {
     BlockDriverInfo bdi;
 
-    if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
+    if (!(bs->open_flags & BDRV_O_UNMAP)) {
         return false;
     }
 

From b7d5062c9cf53f21a555843775392dc03364bbff Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 14 Jul 2016 16:33:27 +0300
Subject: [PATCH 06/14] mirror: optimize dirty bitmap filling in mirror_run a
 bit

There is no need to scan allocation tables if we have mark_all_dirty flag
set. Just mark it all dirty.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Vladimir Sementsov-Ogievskiy<vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468503209-19498-7-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/mirror.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 0e74666d01..b7b1ded30f 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -533,11 +533,15 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
     BlockDriverState *base = s->base;
     BlockDriverState *bs = blk_bs(s->common.blk);
     BlockDriverState *target_bs = blk_bs(s->target);
-    bool mark_all_dirty = base == NULL && !bdrv_has_zero_init(target_bs);
     int ret, n;
 
     end = s->bdev_length / BDRV_SECTOR_SIZE;
 
+    if (base == NULL && !bdrv_has_zero_init(target_bs)) {
+        bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, end);
+        return 0;
+    }
+
     /* First part, loop on the sectors and initialize the dirty bitmap.  */
     for (sector_num = 0; sector_num < end; ) {
         /* Just to make sure we are not exceeding int limit. */
@@ -556,7 +560,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
         }
 
         assert(n > 0);
-        if (ret == 1 || mark_all_dirty) {
+        if (ret == 1) {
             bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
         }
         sector_num += n;

From c7c2769c0e5769eaad9d968fe2161505b657e02a Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 14 Jul 2016 16:33:28 +0300
Subject: [PATCH 07/14] mirror: efficiently zero out target

With a bdrv_co_write_zeroes method on a target BDS and when this method
is working as indicated by the bdrv_can_write_zeroes_with_unmap(), zeroes
will not be placed into the wire. Thus the target could be very efficiently
zeroed out. This should be done with the largest chunk possible.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Vladimir Sementsov-Ogievskiy<vsementsov@virtuozzo.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468503209-19498-8-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
CC: Eric Blake <eblake@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/mirror.c | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index b7b1ded30f..0262b03989 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -538,8 +538,32 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
     end = s->bdev_length / BDRV_SECTOR_SIZE;
 
     if (base == NULL && !bdrv_has_zero_init(target_bs)) {
-        bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, end);
-        return 0;
+        if (!bdrv_can_write_zeroes_with_unmap(target_bs)) {
+            bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, end);
+            return 0;
+        }
+
+        for (sector_num = 0; sector_num < end; ) {
+            int nb_sectors = MIN(end - sector_num,
+                QEMU_ALIGN_DOWN(INT_MAX, s->granularity) >> BDRV_SECTOR_BITS);
+
+            mirror_throttle(s);
+
+            if (block_job_is_cancelled(&s->common)) {
+                return 0;
+            }
+
+            if (s->in_flight >= MAX_IN_FLIGHT) {
+                trace_mirror_yield(s, s->in_flight, s->buf_free_count, -1);
+                mirror_wait_for_io(s);
+                continue;
+            }
+
+            mirror_do_zero_or_discard(s, sector_num, nb_sectors, false);
+            sector_num += nb_sectors;
+        }
+
+        mirror_drain(s);
     }
 
     /* First part, loop on the sectors and initialize the dirty bitmap.  */

From 4b5004d9fc5b7d8e4447dc81c2f26477c2d590f7 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 14 Jul 2016 16:33:29 +0300
Subject: [PATCH 08/14] mirror: improve performance of mirroring of empty disk

We should not take into account zero blocks for delay calculations.
They are not read and thus IO throttling is not required. In the
other case VM migration with 16 Tb QCOW2 disk with 4 Gb of data takes
days.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468503209-19498-9-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
CC: Eric Blake <eblake@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/mirror.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 0262b03989..f78186d47b 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -323,6 +323,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
     int nb_chunks = 1;
     int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
     int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+    bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
 
     sector_num = hbitmap_iter_next(&s->hbi);
     if (sector_num < 0) {
@@ -373,7 +374,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
     bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
     while (nb_chunks > 0 && sector_num < end) {
         int ret;
-        int io_sectors;
+        int io_sectors, io_sectors_acct;
         BlockDriverState *file;
         enum MirrorMethod {
             MIRROR_METHOD_COPY,
@@ -410,12 +411,17 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
         switch (mirror_method) {
         case MIRROR_METHOD_COPY:
             io_sectors = mirror_do_read(s, sector_num, io_sectors);
+            io_sectors_acct = io_sectors;
             break;
         case MIRROR_METHOD_ZERO:
-            mirror_do_zero_or_discard(s, sector_num, io_sectors, false);
-            break;
         case MIRROR_METHOD_DISCARD:
-            mirror_do_zero_or_discard(s, sector_num, io_sectors, true);
+            mirror_do_zero_or_discard(s, sector_num, io_sectors,
+                                      mirror_method == MIRROR_METHOD_DISCARD);
+            if (write_zeroes_ok) {
+                io_sectors_acct = 0;
+            } else {
+                io_sectors_acct = io_sectors;
+            }
             break;
         default:
             abort();
@@ -424,7 +430,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
         sector_num += io_sectors;
         nb_chunks -= DIV_ROUND_UP(io_sectors, sectors_per_chunk);
         if (s->common.speed) {
-            delay_ns = ratelimit_calculate_delay(&s->limit, io_sectors);
+            delay_ns = ratelimit_calculate_delay(&s->limit, io_sectors_acct);
         }
     }
     return delay_ns;

From cf56a3c632d039d00e29dfe8676321d6d349190c Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Wed, 22 Jun 2016 15:35:27 +0300
Subject: [PATCH 09/14] mirror: fix request throttling in drive-mirror

There are 2 deficiencies here:
- mirror_iteration could start several requests inside. Thus we could
  simply have more in_flight requests than MAX_IN_FLIGHT.
- keeping this in mind throttling in mirror_run which is checking
  s->in_flight == MAX_IN_FLIGHT is wrong.

The patch adds the check and throttling into mirror_iteration and fixes
the check in mirror_run() to be sure.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 1466598927-5990-1-git-send-email-den@openvz.org
CC: Jeff Cody <jcody@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
(cherry picked from commit e648dc95c28fbca12e67be26a1fc4b9a0676c3fe)
---
 block/mirror.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/block/mirror.c b/block/mirror.c
index f78186d47b..836a5d0194 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -407,6 +407,11 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
             }
         }
 
+        while (s->in_flight >= MAX_IN_FLIGHT) {
+            trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
+            mirror_wait_for_io(s);
+        }
+
         mirror_clip_sectors(s, sector_num, &io_sectors);
         switch (mirror_method) {
         case MIRROR_METHOD_COPY:
@@ -695,7 +700,7 @@ static void coroutine_fn mirror_run(void *opaque)
         delta = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->last_pause_ns;
         if (delta < SLICE_TIME &&
             s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
-            if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
+            if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                 (cnt == 0 && s->in_flight > 0)) {
                 trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
                 mirror_wait_for_io(s);

From d5cf4079ca713cccd16302159fc06f8540c33a57 Mon Sep 17 00:00:00 2001
From: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Date: Tue, 19 Jul 2016 22:27:29 +0530
Subject: [PATCH 10/14] block/gluster: rename [server, volname, image] ->
 [host, volume, path]

A future patch will add support for multiple gluster servers. Existing
terminology is a bit unusual in relation to what names are used by
other networked devices, and doesn't map very well to the terminology
we expect to use for multiple servers.  Therefore, rename the following
options:
'server'  -> 'host'
'image'   -> 'path'
'volname' -> 'volume'

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1468947453-5433-2-git-send-email-prasanna.kalever@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/gluster.c | 54 ++++++++++++++++++++++++-------------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/block/gluster.c b/block/gluster.c
index 406c1e6357..26478b0f8d 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -29,10 +29,10 @@ typedef struct BDRVGlusterState {
 } BDRVGlusterState;
 
 typedef struct GlusterConf {
-    char *server;
+    char *host;
     int port;
-    char *volname;
-    char *image;
+    char *volume;
+    char *path;
     char *transport;
     int debug_level;
 } GlusterConf;
@@ -40,9 +40,9 @@ typedef struct GlusterConf {
 static void qemu_gluster_gconf_free(GlusterConf *gconf)
 {
     if (gconf) {
-        g_free(gconf->server);
-        g_free(gconf->volname);
-        g_free(gconf->image);
+        g_free(gconf->host);
+        g_free(gconf->volume);
+        g_free(gconf->path);
         g_free(gconf->transport);
         g_free(gconf);
     }
@@ -62,19 +62,19 @@ static int parse_volume_options(GlusterConf *gconf, char *path)
     if (*p == '\0') {
         return -EINVAL;
     }
-    gconf->volname = g_strndup(q, p - q);
+    gconf->volume = g_strndup(q, p - q);
 
-    /* image */
+    /* path */
     p += strspn(p, "/");
     if (*p == '\0') {
         return -EINVAL;
     }
-    gconf->image = g_strdup(p);
+    gconf->path = g_strdup(p);
     return 0;
 }
 
 /*
- * file=gluster[+transport]://[server[:port]]/volname/image[?socket=...]
+ * file=gluster[+transport]://[host[:port]]/volume/path[?socket=...]
  *
  * 'gluster' is the protocol.
  *
@@ -83,10 +83,10 @@ static int parse_volume_options(GlusterConf *gconf, char *path)
  * tcp, unix and rdma. If a transport type isn't specified, then tcp
  * type is assumed.
  *
- * 'server' specifies the server where the volume file specification for
+ * 'host' specifies the host where the volume file specification for
  * the given volume resides. This can be either hostname, ipv4 address
  * or ipv6 address. ipv6 address needs to be within square brackets [ ].
- * If transport type is 'unix', then 'server' field should not be specified.
+ * If transport type is 'unix', then 'host' field should not be specified.
  * The 'socket' field needs to be populated with the path to unix domain
  * socket.
  *
@@ -95,9 +95,9 @@ static int parse_volume_options(GlusterConf *gconf, char *path)
  * default port. If the transport type is unix, then 'port' should not be
  * specified.
  *
- * 'volname' is the name of the gluster volume which contains the VM image.
+ * 'volume' is the name of the gluster volume which contains the VM image.
  *
- * 'image' is the path to the actual VM image that resides on gluster volume.
+ * 'path' is the path to the actual VM image that resides on gluster volume.
  *
  * Examples:
  *
@@ -106,7 +106,7 @@ static int parse_volume_options(GlusterConf *gconf, char *path)
  * file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img
  * file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img
  * file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img
- * file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img
+ * file=gluster+tcp://host.domain.com:24007/testvol/dir/a.img
  * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket
  * file=gluster+rdma://1.2.3.4:24007/testvol/a.img
  */
@@ -157,9 +157,9 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
             ret = -EINVAL;
             goto out;
         }
-        gconf->server = g_strdup(qp->p[0].value);
+        gconf->host = g_strdup(qp->p[0].value);
     } else {
-        gconf->server = g_strdup(uri->server ? uri->server : "localhost");
+        gconf->host = g_strdup(uri->server ? uri->server : "localhost");
         gconf->port = uri->port;
     }
 
@@ -180,18 +180,18 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,
 
     ret = qemu_gluster_parseuri(gconf, filename);
     if (ret < 0) {
-        error_setg(errp, "Usage: file=gluster[+transport]://[server[:port]]/"
-                   "volname/image[?socket=...]");
+        error_setg(errp, "Usage: file=gluster[+transport]://[host[:port]]/"
+                   "volume/path[?socket=...]");
         errno = -ret;
         goto out;
     }
 
-    glfs = glfs_new(gconf->volname);
+    glfs = glfs_new(gconf->volume);
     if (!glfs) {
         goto out;
     }
 
-    ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->server,
+    ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->host,
             gconf->port);
     if (ret < 0) {
         goto out;
@@ -205,9 +205,9 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,
     ret = glfs_init(glfs);
     if (ret) {
         error_setg_errno(errp, errno,
-                         "Gluster connection failed for server=%s port=%d "
-                         "volume=%s image=%s transport=%s", gconf->server,
-                         gconf->port, gconf->volname, gconf->image,
+                         "Gluster connection failed for host=%s port=%d "
+                         "volume=%s path=%s transport=%s", gconf->host,
+                         gconf->port, gconf->volume, gconf->path,
                          gconf->transport);
 
         /* glfs_init sometimes doesn't set errno although docs suggest that */
@@ -373,7 +373,7 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
 
     qemu_gluster_parse_flags(bdrv_flags, &open_flags);
 
-    s->fd = glfs_open(s->glfs, gconf->image, open_flags);
+    s->fd = glfs_open(s->glfs, gconf->path, open_flags);
     if (!s->fd) {
         ret = -errno;
     }
@@ -439,7 +439,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
     }
 #endif
 
-    reop_s->fd = glfs_open(reop_s->glfs, gconf->image, open_flags);
+    reop_s->fd = glfs_open(reop_s->glfs, gconf->path, open_flags);
     if (reop_s->fd == NULL) {
         /* reops->glfs will be cleaned up in _abort */
         ret = -errno;
@@ -587,7 +587,7 @@ static int qemu_gluster_create(const char *filename,
         goto out;
     }
 
-    fd = glfs_creat(glfs, gconf->image,
+    fd = glfs_creat(glfs, gconf->path,
         O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
     if (!fd) {
         ret = -errno;

From f70c50c81746ecd352617f2a15eca5cb03cf6219 Mon Sep 17 00:00:00 2001
From: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Date: Tue, 19 Jul 2016 22:27:30 +0530
Subject: [PATCH 11/14] block/gluster: code cleanup

unified coding styles of multiline function arguments and other error functions
moved random declarations of structures and other list variables

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1468947453-5433-3-git-send-email-prasanna.kalever@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/gluster.c | 143 +++++++++++++++++++++++++-----------------------
 1 file changed, 75 insertions(+), 68 deletions(-)

diff --git a/block/gluster.c b/block/gluster.c
index 26478b0f8d..4fce2ee406 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -13,6 +13,12 @@
 #include "qapi/error.h"
 #include "qemu/uri.h"
 
+#define GLUSTER_OPT_FILENAME        "filename"
+#define GLUSTER_OPT_DEBUG           "debug"
+#define GLUSTER_DEBUG_DEFAULT       4
+#define GLUSTER_DEBUG_MAX           9
+
+
 typedef struct GlusterAIOCB {
     int64_t size;
     int ret;
@@ -28,6 +34,11 @@ typedef struct BDRVGlusterState {
     int debug_level;
 } BDRVGlusterState;
 
+typedef struct BDRVGlusterReopenState {
+    struct glfs *glfs;
+    struct glfs_fd *fd;
+} BDRVGlusterReopenState;
+
 typedef struct GlusterConf {
     char *host;
     int port;
@@ -37,6 +48,49 @@ typedef struct GlusterConf {
     int debug_level;
 } GlusterConf;
 
+
+static QemuOptsList qemu_gluster_create_opts = {
+    .name = "qemu-gluster-create-opts",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head),
+    .desc = {
+        {
+            .name = BLOCK_OPT_SIZE,
+            .type = QEMU_OPT_SIZE,
+            .help = "Virtual disk size"
+        },
+        {
+            .name = BLOCK_OPT_PREALLOC,
+            .type = QEMU_OPT_STRING,
+            .help = "Preallocation mode (allowed values: off, full)"
+        },
+        {
+            .name = GLUSTER_OPT_DEBUG,
+            .type = QEMU_OPT_NUMBER,
+            .help = "Gluster log level, valid range is 0-9",
+        },
+        { /* end of list */ }
+    }
+};
+
+static QemuOptsList runtime_opts = {
+    .name = "gluster",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = GLUSTER_OPT_FILENAME,
+            .type = QEMU_OPT_STRING,
+            .help = "URL to the gluster image",
+        },
+        {
+            .name = GLUSTER_OPT_DEBUG,
+            .type = QEMU_OPT_NUMBER,
+            .help = "Gluster log level, valid range is 0-9",
+        },
+        { /* end of list */ }
+    },
+};
+
+
 static void qemu_gluster_gconf_free(GlusterConf *gconf)
 {
     if (gconf) {
@@ -181,7 +235,7 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,
     ret = qemu_gluster_parseuri(gconf, filename);
     if (ret < 0) {
         error_setg(errp, "Usage: file=gluster[+transport]://[host[:port]]/"
-                   "volume/path[?socket=...]");
+                         "volume/path[?socket=...]");
         errno = -ret;
         goto out;
     }
@@ -255,30 +309,6 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
     qemu_bh_schedule(acb->bh);
 }
 
-#define GLUSTER_OPT_FILENAME "filename"
-#define GLUSTER_OPT_DEBUG "debug"
-#define GLUSTER_DEBUG_DEFAULT 4
-#define GLUSTER_DEBUG_MAX 9
-
-/* TODO Convert to fine grained options */
-static QemuOptsList runtime_opts = {
-    .name = "gluster",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = GLUSTER_OPT_FILENAME,
-            .type = QEMU_OPT_STRING,
-            .help = "URL to the gluster image",
-        },
-        {
-            .name = GLUSTER_OPT_DEBUG,
-            .type = QEMU_OPT_NUMBER,
-            .help = "Gluster log level, valid range is 0-9",
-        },
-        { /* end of list */ }
-    },
-};
-
 static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
 {
     assert(open_flags != NULL);
@@ -395,12 +425,6 @@ out:
     return ret;
 }
 
-typedef struct BDRVGlusterReopenState {
-    struct glfs *glfs;
-    struct glfs_fd *fd;
-} BDRVGlusterReopenState;
-
-
 static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
                                        BlockReopenQueue *queue, Error **errp)
 {
@@ -501,7 +525,9 @@ static void qemu_gluster_reopen_abort(BDRVReopenState *state)
 
 #ifdef CONFIG_GLUSTERFS_ZEROFILL
 static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
-        int64_t offset, int size, BdrvRequestFlags flags)
+                                                      int64_t offset,
+                                                      int size,
+                                                      BdrvRequestFlags flags)
 {
     int ret;
     GlusterAIOCB acb;
@@ -527,7 +553,7 @@ static inline bool gluster_supports_zerofill(void)
 }
 
 static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
-        int64_t size)
+                                        int64_t size)
 {
     return glfs_zerofill(fd, offset, size);
 }
@@ -539,7 +565,7 @@ static inline bool gluster_supports_zerofill(void)
 }
 
 static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
-        int64_t size)
+                                        int64_t size)
 {
     return 0;
 }
@@ -576,19 +602,17 @@ static int qemu_gluster_create(const char *filename,
     tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
     if (!tmp || !strcmp(tmp, "off")) {
         prealloc = 0;
-    } else if (!strcmp(tmp, "full") &&
-               gluster_supports_zerofill()) {
+    } else if (!strcmp(tmp, "full") && gluster_supports_zerofill()) {
         prealloc = 1;
     } else {
         error_setg(errp, "Invalid preallocation mode: '%s'"
-            " or GlusterFS doesn't support zerofill API",
-            tmp);
+                         " or GlusterFS doesn't support zerofill API", tmp);
         ret = -EINVAL;
         goto out;
     }
 
     fd = glfs_creat(glfs, gconf->path,
-        O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
+                    O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
     if (!fd) {
         ret = -errno;
     } else {
@@ -614,7 +638,8 @@ out:
 }
 
 static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write)
+                                           int64_t sector_num, int nb_sectors,
+                                           QEMUIOVector *qiov, int write)
 {
     int ret;
     GlusterAIOCB acb;
@@ -629,10 +654,10 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
 
     if (write) {
         ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
-            gluster_finish_aiocb, &acb);
+                                 gluster_finish_aiocb, &acb);
     } else {
         ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
-            gluster_finish_aiocb, &acb);
+                                gluster_finish_aiocb, &acb);
     }
 
     if (ret < 0) {
@@ -657,13 +682,17 @@ static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
 }
 
 static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+                                              int64_t sector_num,
+                                              int nb_sectors,
+                                              QEMUIOVector *qiov)
 {
     return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 0);
 }
 
 static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+                                               int64_t sector_num,
+                                               int nb_sectors,
+                                               QEMUIOVector *qiov)
 {
     return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
 }
@@ -725,7 +754,8 @@ error:
 
 #ifdef CONFIG_GLUSTERFS_DISCARD
 static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors)
+                                                int64_t sector_num,
+                                                int nb_sectors)
 {
     int ret;
     GlusterAIOCB acb;
@@ -934,29 +964,6 @@ static int64_t coroutine_fn qemu_gluster_co_get_block_status(
 }
 
 
-static QemuOptsList qemu_gluster_create_opts = {
-    .name = "qemu-gluster-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        {
-            .name = BLOCK_OPT_PREALLOC,
-            .type = QEMU_OPT_STRING,
-            .help = "Preallocation mode (allowed values: off, full)"
-        },
-        {
-            .name = GLUSTER_OPT_DEBUG,
-            .type = QEMU_OPT_NUMBER,
-            .help = "Gluster log level, valid range is 0-9",
-        },
-        { /* end of list */ }
-    }
-};
-
 static BlockDriver bdrv_gluster = {
     .format_name                  = "gluster",
     .protocol_name                = "gluster",

From 0552ff24656c26b2a0d135966fb0feaa90d7f9bc Mon Sep 17 00:00:00 2001
From: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Date: Tue, 19 Jul 2016 22:27:31 +0530
Subject: [PATCH 12/14] block/gluster: deprecate rdma support

gluster volfile server fetch happens through unix and/or tcp, it doesn't
support volfile fetch over rdma. The rdma code may actually mislead,
so to make sure things do not break, for now we fallback to tcp when requested
for rdma, with a warning.

If you are wondering how this worked all these days, its the gluster libgfapi
code which handles anything other than unix transport as socket/tcp, sad but
true.

Also gluster doesn't support ipv6 addresses, removing the ipv6 related
comments/docs section

[Jeff: Minor grammatical fixes in comments and commit message, per
review comments]

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1468947453-5433-4-git-send-email-prasanna.kalever@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/gluster.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/block/gluster.c b/block/gluster.c
index 4fce2ee406..a042ccb075 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -12,6 +12,7 @@
 #include "block/block_int.h"
 #include "qapi/error.h"
 #include "qemu/uri.h"
+#include "qemu/error-report.h"
 
 #define GLUSTER_OPT_FILENAME        "filename"
 #define GLUSTER_OPT_DEBUG           "debug"
@@ -134,12 +135,10 @@ static int parse_volume_options(GlusterConf *gconf, char *path)
  *
  * 'transport' specifies the transport type used to connect to gluster
  * management daemon (glusterd). Valid transport types are
- * tcp, unix and rdma. If a transport type isn't specified, then tcp
- * type is assumed.
+ * tcp or unix. If a transport type isn't specified, then tcp type is assumed.
  *
  * 'host' specifies the host where the volume file specification for
- * the given volume resides. This can be either hostname, ipv4 address
- * or ipv6 address. ipv6 address needs to be within square brackets [ ].
+ * the given volume resides. This can be either hostname or ipv4 address.
  * If transport type is 'unix', then 'host' field should not be specified.
  * The 'socket' field needs to be populated with the path to unix domain
  * socket.
@@ -158,11 +157,8 @@ static int parse_volume_options(GlusterConf *gconf, char *path)
  * file=gluster://1.2.3.4/testvol/a.img
  * file=gluster+tcp://1.2.3.4/testvol/a.img
  * file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img
- * file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img
- * file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img
  * file=gluster+tcp://host.domain.com:24007/testvol/dir/a.img
  * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket
- * file=gluster+rdma://1.2.3.4:24007/testvol/a.img
  */
 static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
 {
@@ -185,7 +181,9 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
         gconf->transport = g_strdup("unix");
         is_unix = true;
     } else if (!strcmp(uri->scheme, "gluster+rdma")) {
-        gconf->transport = g_strdup("rdma");
+        gconf->transport = g_strdup("tcp");
+        error_report("Warning: rdma feature is not supported, falling "
+                     "back to tcp");
     } else {
         ret = -EINVAL;
         goto out;
@@ -1048,6 +1046,12 @@ static BlockDriver bdrv_gluster_unix = {
     .create_opts                  = &qemu_gluster_create_opts,
 };
 
+/* rdma is deprecated (actually never supported for volfile fetch).
+ * Let's maintain it for the protocol compatibility, to make sure things
+ * won't break immediately. For now, gluster+rdma will fall back to gluster+tcp
+ * protocol with a warning.
+ * TODO: remove gluster+rdma interface support
+ */
 static BlockDriver bdrv_gluster_rdma = {
     .format_name                  = "gluster",
     .protocol_name                = "gluster+rdma",

From 7edac2ddebbc11000ce71244e13b2c9462bf7ad5 Mon Sep 17 00:00:00 2001
From: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Date: Tue, 19 Jul 2016 22:27:32 +0530
Subject: [PATCH 13/14] block/gluster: using new qapi schema

this patch adds 'GlusterServer' related schema in qapi/block-core.json

[Jeff: minor fix-ups of comments and formatting, per patch reviews]

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1468947453-5433-5-git-send-email-prasanna.kalever@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/gluster.c      | 109 +++++++++++++++++++++++--------------------
 qapi/block-core.json |  68 +++++++++++++++++++++++++--
 2 files changed, 122 insertions(+), 55 deletions(-)

diff --git a/block/gluster.c b/block/gluster.c
index a042ccb075..80e68319ce 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -16,6 +16,7 @@
 
 #define GLUSTER_OPT_FILENAME        "filename"
 #define GLUSTER_OPT_DEBUG           "debug"
+#define GLUSTER_DEFAULT_PORT        24007
 #define GLUSTER_DEBUG_DEFAULT       4
 #define GLUSTER_DEBUG_MAX           9
 
@@ -40,15 +41,6 @@ typedef struct BDRVGlusterReopenState {
     struct glfs_fd *fd;
 } BDRVGlusterReopenState;
 
-typedef struct GlusterConf {
-    char *host;
-    int port;
-    char *volume;
-    char *path;
-    char *transport;
-    int debug_level;
-} GlusterConf;
-
 
 static QemuOptsList qemu_gluster_create_opts = {
     .name = "qemu-gluster-create-opts",
@@ -92,18 +84,7 @@ static QemuOptsList runtime_opts = {
 };
 
 
-static void qemu_gluster_gconf_free(GlusterConf *gconf)
-{
-    if (gconf) {
-        g_free(gconf->host);
-        g_free(gconf->volume);
-        g_free(gconf->path);
-        g_free(gconf->transport);
-        g_free(gconf);
-    }
-}
-
-static int parse_volume_options(GlusterConf *gconf, char *path)
+static int parse_volume_options(BlockdevOptionsGluster *gconf, char *path)
 {
     char *p, *q;
 
@@ -160,8 +141,10 @@ static int parse_volume_options(GlusterConf *gconf, char *path)
  * file=gluster+tcp://host.domain.com:24007/testvol/dir/a.img
  * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket
  */
-static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
+static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
+                                  const char *filename)
 {
+    GlusterServer *gsconf;
     URI *uri;
     QueryParams *qp = NULL;
     bool is_unix = false;
@@ -172,16 +155,18 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
         return -EINVAL;
     }
 
+    gconf->server = gsconf = g_new0(GlusterServer, 1);
+
     /* transport */
     if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
-        gconf->transport = g_strdup("tcp");
+        gsconf->type = GLUSTER_TRANSPORT_TCP;
     } else if (!strcmp(uri->scheme, "gluster+tcp")) {
-        gconf->transport = g_strdup("tcp");
+        gsconf->type = GLUSTER_TRANSPORT_TCP;
     } else if (!strcmp(uri->scheme, "gluster+unix")) {
-        gconf->transport = g_strdup("unix");
+        gsconf->type = GLUSTER_TRANSPORT_UNIX;
         is_unix = true;
     } else if (!strcmp(uri->scheme, "gluster+rdma")) {
-        gconf->transport = g_strdup("tcp");
+        gsconf->type = GLUSTER_TRANSPORT_TCP;
         error_report("Warning: rdma feature is not supported, falling "
                      "back to tcp");
     } else {
@@ -209,10 +194,14 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
             ret = -EINVAL;
             goto out;
         }
-        gconf->host = g_strdup(qp->p[0].value);
+        gsconf->u.q_unix.path = g_strdup(qp->p[0].value);
     } else {
-        gconf->host = g_strdup(uri->server ? uri->server : "localhost");
-        gconf->port = uri->port;
+        gsconf->u.tcp.host = g_strdup(uri->server ? uri->server : "localhost");
+        if (uri->port) {
+            gsconf->u.tcp.port = g_strdup_printf("%d", uri->port);
+        } else {
+            gsconf->u.tcp.port = g_strdup_printf("%d", GLUSTER_DEFAULT_PORT);
+        }
     }
 
 out:
@@ -223,17 +212,18 @@ out:
     return ret;
 }
 
-static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,
-                                      Error **errp)
+static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
+                                      const char *filename, Error **errp)
 {
     struct glfs *glfs = NULL;
     int ret;
     int old_errno;
 
-    ret = qemu_gluster_parseuri(gconf, filename);
+    ret = qemu_gluster_parse_uri(gconf, filename);
     if (ret < 0) {
-        error_setg(errp, "Usage: file=gluster[+transport]://[host[:port]]/"
-                         "volume/path[?socket=...]");
+        error_setg(errp, "Invalid URI");
+        error_append_hint(errp, "Usage: file=gluster[+transport]://"
+                                "[host[:port]]/volume/path[?socket=...]\n");
         errno = -ret;
         goto out;
     }
@@ -243,8 +233,16 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,
         goto out;
     }
 
-    ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->host,
-            gconf->port);
+    if (gconf->server->type == GLUSTER_TRANSPORT_UNIX) {
+        ret = glfs_set_volfile_server(glfs,
+                                   GlusterTransport_lookup[gconf->server->type],
+                                   gconf->server->u.q_unix.path, 0);
+    } else {
+        ret = glfs_set_volfile_server(glfs,
+                                   GlusterTransport_lookup[gconf->server->type],
+                                   gconf->server->u.tcp.host,
+                                   atoi(gconf->server->u.tcp.port));
+    }
     if (ret < 0) {
         goto out;
     }
@@ -256,15 +254,22 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,
 
     ret = glfs_init(glfs);
     if (ret) {
-        error_setg_errno(errp, errno,
-                         "Gluster connection failed for host=%s port=%d "
-                         "volume=%s path=%s transport=%s", gconf->host,
-                         gconf->port, gconf->volume, gconf->path,
-                         gconf->transport);
+        if (gconf->server->type == GLUSTER_TRANSPORT_UNIX) {
+            error_setg(errp,
+                       "Gluster connection for volume %s, path %s failed on "
+                       "socket %s ", gconf->volume, gconf->path,
+                       gconf->server->u.q_unix.path);
+        } else {
+            error_setg(errp,
+                       "Gluster connection for volume %s, path %s failed on "
+                       "host  %s and port %s ", gconf->volume, gconf->path,
+                       gconf->server->u.tcp.host, gconf->server->u.tcp.port);
+        }
 
         /* glfs_init sometimes doesn't set errno although docs suggest that */
-        if (errno == 0)
+        if (errno == 0) {
             errno = EINVAL;
+        }
 
         goto out;
     }
@@ -352,7 +357,7 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
     BDRVGlusterState *s = bs->opaque;
     int open_flags = 0;
     int ret = 0;
-    GlusterConf *gconf = g_new0(GlusterConf, 1);
+    BlockdevOptionsGluster *gconf = NULL;
     QemuOpts *opts;
     Error *local_err = NULL;
     const char *filename;
@@ -375,7 +380,9 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
         s->debug_level = GLUSTER_DEBUG_MAX;
     }
 
+    gconf = g_new0(BlockdevOptionsGluster, 1);
     gconf->debug_level = s->debug_level;
+    gconf->has_debug_level = true;
     s->glfs = qemu_gluster_init(gconf, filename, errp);
     if (!s->glfs) {
         ret = -errno;
@@ -410,7 +417,7 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
 
 out:
     qemu_opts_del(opts);
-    qemu_gluster_gconf_free(gconf);
+    qapi_free_BlockdevOptionsGluster(gconf);
     if (!ret) {
         return ret;
     }
@@ -429,7 +436,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
     int ret = 0;
     BDRVGlusterState *s;
     BDRVGlusterReopenState *reop_s;
-    GlusterConf *gconf = NULL;
+    BlockdevOptionsGluster *gconf;
     int open_flags = 0;
 
     assert(state != NULL);
@@ -442,9 +449,9 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
 
     qemu_gluster_parse_flags(state->flags, &open_flags);
 
-    gconf = g_new0(GlusterConf, 1);
-
+    gconf = g_new0(BlockdevOptionsGluster, 1);
     gconf->debug_level = s->debug_level;
+    gconf->has_debug_level = true;
     reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, errp);
     if (reop_s->glfs == NULL) {
         ret = -errno;
@@ -470,7 +477,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
 
 exit:
     /* state->opaque will be freed in either the _abort or _commit */
-    qemu_gluster_gconf_free(gconf);
+    qapi_free_BlockdevOptionsGluster(gconf);
     return ret;
 }
 
@@ -572,14 +579,15 @@ static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
 static int qemu_gluster_create(const char *filename,
                                QemuOpts *opts, Error **errp)
 {
+    BlockdevOptionsGluster *gconf;
     struct glfs *glfs;
     struct glfs_fd *fd;
     int ret = 0;
     int prealloc = 0;
     int64_t total_size = 0;
     char *tmp = NULL;
-    GlusterConf *gconf = g_new0(GlusterConf, 1);
 
+    gconf = g_new0(BlockdevOptionsGluster, 1);
     gconf->debug_level = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG,
                                                  GLUSTER_DEBUG_DEFAULT);
     if (gconf->debug_level < 0) {
@@ -587,6 +595,7 @@ static int qemu_gluster_create(const char *filename,
     } else if (gconf->debug_level > GLUSTER_DEBUG_MAX) {
         gconf->debug_level = GLUSTER_DEBUG_MAX;
     }
+    gconf->has_debug_level = true;
 
     glfs = qemu_gluster_init(gconf, filename, errp);
     if (!glfs) {
@@ -628,7 +637,7 @@ static int qemu_gluster_create(const char *filename,
     }
 out:
     g_free(tmp);
-    qemu_gluster_gconf_free(gconf);
+    qapi_free_BlockdevOptionsGluster(gconf);
     if (glfs) {
         glfs_fini(glfs);
     }
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 3444a9bc3e..e30e496bd3 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1666,13 +1666,14 @@
 # @host_device, @host_cdrom: Since 2.1
 #
 # Since: 2.0
+# @gluster: Since 2.7
 ##
 { 'enum': 'BlockdevDriver',
   'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop',
-            'dmg', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
-            'http', 'https', 'luks', 'null-aio', 'null-co', 'parallels',
-            'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'tftp', 'vdi', 'vhdx',
-            'vmdk', 'vpc', 'vvfat' ] }
+            'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
+            'host_device', 'http', 'https', 'luks', 'null-aio', 'null-co',
+            'parallels', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'tftp',
+            'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
 
 ##
 # @BlockdevOptionsFile
@@ -2064,6 +2065,63 @@
             '*rewrite-corrupted': 'bool',
             '*read-pattern': 'QuorumReadPattern' } }
 
+##
+# @GlusterTransport
+#
+# An enumeration of Gluster transport types
+#
+# @tcp:   TCP   - Transmission Control Protocol
+#
+# @unix:  UNIX  - Unix domain socket
+#
+# Since: 2.7
+##
+{ 'enum': 'GlusterTransport',
+  'data': [ 'unix', 'tcp' ] }
+
+
+##
+# @GlusterServer
+#
+# Captures the address of a socket
+#
+# Details for connecting to a gluster server
+#
+# @type:       Transport type used for gluster connection
+#
+# @unix:       socket file
+#
+# @tcp:        host address and port number
+#
+# Since: 2.7
+##
+{ 'union': 'GlusterServer',
+  'base': { 'type': 'GlusterTransport' },
+  'discriminator': 'type',
+  'data': { 'unix': 'UnixSocketAddress',
+            'tcp': 'InetSocketAddress' } }
+
+##
+# @BlockdevOptionsGluster
+#
+# Driver specific block device options for Gluster
+#
+# @volume:      name of gluster volume where VM image resides
+#
+# @path:        absolute path to image file in gluster volume
+#
+# @server:      gluster server description
+#
+# @debug-level: #optional libgfapi log level (default '4' which is Error)
+#
+# Since: 2.7
+##
+{ 'struct': 'BlockdevOptionsGluster',
+  'data': { 'volume': 'str',
+            'path': 'str',
+            'server': 'GlusterServer',
+            '*debug_level': 'int' } }
+
 ##
 # @BlockdevOptions
 #
@@ -2111,7 +2169,7 @@
       'file':       'BlockdevOptionsFile',
       'ftp':        'BlockdevOptionsFile',
       'ftps':       'BlockdevOptionsFile',
-# TODO gluster: Wait for structured options
+      'gluster':    'BlockdevOptionsGluster',
       'host_cdrom': 'BlockdevOptionsFile',
       'host_device':'BlockdevOptionsFile',
       'http':       'BlockdevOptionsFile',

From 6c7189bb29de9fa2202f613f3c6caf028f96f261 Mon Sep 17 00:00:00 2001
From: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Date: Tue, 19 Jul 2016 22:27:33 +0530
Subject: [PATCH 14/14] block/gluster: add support for multiple gluster servers

This patch adds a way to specify multiple volfile servers to the gluster
block backend of QEMU with tcp|rdma transport types and their port numbers.

Problem:

Currently VM Image on gluster volume is specified like this:

file=gluster[+tcp]://host[:port]/testvol/a.img

Say we have three hosts in a trusted pool with replica 3 volume in action.
When the host mentioned in the command above goes down for some reason,
the other two hosts are still available. But there's currently no way
to tell QEMU about them.

Solution:

New way of specifying VM Image on gluster volume with volfile servers:
(We still support old syntax to maintain backward compatibility)

Basic command line syntax looks like:

Pattern I:
 -drive driver=gluster,
        volume=testvol,path=/path/a.raw,[debug=N,]
        server.0.type=tcp,
        server.0.host=1.2.3.4,
        server.0.port=24007,
        server.1.type=unix,
        server.1.socket=/path/socketfile

Pattern II:
 'json:{"driver":"qcow2","file":{"driver":"gluster",
       "volume":"testvol","path":"/path/a.qcow2",["debug":N,]
       "server":[{hostinfo_1}, ...{hostinfo_N}]}}'

   driver      => 'gluster' (protocol name)
   volume      => name of gluster volume where our VM image resides
   path        => absolute path of image in gluster volume
  [debug]      => libgfapi loglevel [(0 - 9) default 4 -> Error]

  {hostinfo}   => {{type:"tcp",host:"1.2.3.4"[,port=24007]},
                   {type:"unix",socket:"/path/sockfile"}}

   type        => transport type used to connect to gluster management daemon,
                  it can be tcp|unix
   host        => host address (hostname/ipv4/ipv6 addresses/socket path)
   port        => port number on which glusterd is listening.
   socket      => path to socket file

Examples:
1.
 -drive driver=qcow2,file.driver=gluster,
        file.volume=testvol,file.path=/path/a.qcow2,file.debug=9,
        file.server.0.type=tcp,
        file.server.0.host=1.2.3.4,
        file.server.0.port=24007,
        file.server.1.type=unix,
        file.server.1.socket=/var/run/glusterd.socket
2.
  'json:{"driver":"qcow2","file":{"driver":"gluster","volume":"testvol",
         "path":"/path/a.qcow2","debug":9,"server":
         [{"type":"tcp","host":"1.2.3.4","port":"24007"},
          {"type":"unix","socket":"/var/run/glusterd.socket"}
         ]}}'

This patch gives a mechanism to provide all the server addresses, which are in
replica set, so in case host1 is down VM can still boot from any of the
active hosts.

This is equivalent to the backup-volfile-servers option supported by
mount.glusterfs (FUSE way of mounting gluster volume)

credits: sincere thanks to all the supporters

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-id: 1468947453-5433-6-git-send-email-prasanna.kalever@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/gluster.c      | 397 ++++++++++++++++++++++++++++++++++++++-----
 qapi/block-core.json |   2 +-
 2 files changed, 358 insertions(+), 41 deletions(-)

diff --git a/block/gluster.c b/block/gluster.c
index 80e68319ce..296bd9929e 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -11,15 +11,27 @@
 #include <glusterfs/api/glfs.h>
 #include "block/block_int.h"
 #include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
 #include "qemu/uri.h"
 #include "qemu/error-report.h"
 
 #define GLUSTER_OPT_FILENAME        "filename"
+#define GLUSTER_OPT_VOLUME          "volume"
+#define GLUSTER_OPT_PATH            "path"
+#define GLUSTER_OPT_TYPE            "type"
+#define GLUSTER_OPT_SERVER_PATTERN  "server."
+#define GLUSTER_OPT_HOST            "host"
+#define GLUSTER_OPT_PORT            "port"
+#define GLUSTER_OPT_TO              "to"
+#define GLUSTER_OPT_IPV4            "ipv4"
+#define GLUSTER_OPT_IPV6            "ipv6"
+#define GLUSTER_OPT_SOCKET          "socket"
 #define GLUSTER_OPT_DEBUG           "debug"
 #define GLUSTER_DEFAULT_PORT        24007
 #define GLUSTER_DEBUG_DEFAULT       4
 #define GLUSTER_DEBUG_MAX           9
 
+#define GERR_INDEX_HINT "hint: check in 'server' array index '%d'\n"
 
 typedef struct GlusterAIOCB {
     int64_t size;
@@ -83,6 +95,92 @@ static QemuOptsList runtime_opts = {
     },
 };
 
+static QemuOptsList runtime_json_opts = {
+    .name = "gluster_json",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_json_opts.head),
+    .desc = {
+        {
+            .name = GLUSTER_OPT_VOLUME,
+            .type = QEMU_OPT_STRING,
+            .help = "name of gluster volume where VM image resides",
+        },
+        {
+            .name = GLUSTER_OPT_PATH,
+            .type = QEMU_OPT_STRING,
+            .help = "absolute path to image file in gluster volume",
+        },
+        {
+            .name = GLUSTER_OPT_DEBUG,
+            .type = QEMU_OPT_NUMBER,
+            .help = "Gluster log level, valid range is 0-9",
+        },
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList runtime_type_opts = {
+    .name = "gluster_type",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_type_opts.head),
+    .desc = {
+        {
+            .name = GLUSTER_OPT_TYPE,
+            .type = QEMU_OPT_STRING,
+            .help = "tcp|unix",
+        },
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList runtime_unix_opts = {
+    .name = "gluster_unix",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_unix_opts.head),
+    .desc = {
+        {
+            .name = GLUSTER_OPT_SOCKET,
+            .type = QEMU_OPT_STRING,
+            .help = "socket file path)",
+        },
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList runtime_tcp_opts = {
+    .name = "gluster_tcp",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head),
+    .desc = {
+        {
+            .name = GLUSTER_OPT_TYPE,
+            .type = QEMU_OPT_STRING,
+            .help = "tcp|unix",
+        },
+        {
+            .name = GLUSTER_OPT_HOST,
+            .type = QEMU_OPT_STRING,
+            .help = "host address (hostname/ipv4/ipv6 addresses)",
+        },
+        {
+            .name = GLUSTER_OPT_PORT,
+            .type = QEMU_OPT_NUMBER,
+            .help = "port number on which glusterd is listening (default 24007)",
+        },
+        {
+            .name = "to",
+            .type = QEMU_OPT_NUMBER,
+            .help = "max port number, not supported by gluster",
+        },
+        {
+            .name = "ipv4",
+            .type = QEMU_OPT_BOOL,
+            .help = "ipv4 bool value, not supported by gluster",
+        },
+        {
+            .name = "ipv6",
+            .type = QEMU_OPT_BOOL,
+            .help = "ipv6 bool value, not supported by gluster",
+        },
+        { /* end of list */ }
+    },
+};
 
 static int parse_volume_options(BlockdevOptionsGluster *gconf, char *path)
 {
@@ -155,7 +253,8 @@ static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
         return -EINVAL;
     }
 
-    gconf->server = gsconf = g_new0(GlusterServer, 1);
+    gconf->server = g_new0(GlusterServerList, 1);
+    gconf->server->value = gsconf = g_new0(GlusterServer, 1);
 
     /* transport */
     if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
@@ -212,39 +311,34 @@ out:
     return ret;
 }
 
-static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
-                                      const char *filename, Error **errp)
+static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
+                                           Error **errp)
 {
-    struct glfs *glfs = NULL;
+    struct glfs *glfs;
     int ret;
     int old_errno;
-
-    ret = qemu_gluster_parse_uri(gconf, filename);
-    if (ret < 0) {
-        error_setg(errp, "Invalid URI");
-        error_append_hint(errp, "Usage: file=gluster[+transport]://"
-                                "[host[:port]]/volume/path[?socket=...]\n");
-        errno = -ret;
-        goto out;
-    }
+    GlusterServerList *server;
 
     glfs = glfs_new(gconf->volume);
     if (!glfs) {
         goto out;
     }
 
-    if (gconf->server->type == GLUSTER_TRANSPORT_UNIX) {
-        ret = glfs_set_volfile_server(glfs,
-                                   GlusterTransport_lookup[gconf->server->type],
-                                   gconf->server->u.q_unix.path, 0);
-    } else {
-        ret = glfs_set_volfile_server(glfs,
-                                   GlusterTransport_lookup[gconf->server->type],
-                                   gconf->server->u.tcp.host,
-                                   atoi(gconf->server->u.tcp.port));
-    }
-    if (ret < 0) {
-        goto out;
+    for (server = gconf->server; server; server = server->next) {
+        if (server->value->type  == GLUSTER_TRANSPORT_UNIX) {
+            ret = glfs_set_volfile_server(glfs,
+                                   GlusterTransport_lookup[server->value->type],
+                                   server->value->u.q_unix.path, 0);
+        } else {
+            ret = glfs_set_volfile_server(glfs,
+                                   GlusterTransport_lookup[server->value->type],
+                                   server->value->u.tcp.host,
+                                   atoi(server->value->u.tcp.port));
+        }
+
+        if (ret < 0) {
+            goto out;
+        }
     }
 
     ret = glfs_set_logging(glfs, "-", gconf->debug_level);
@@ -254,18 +348,21 @@ static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
 
     ret = glfs_init(glfs);
     if (ret) {
-        if (gconf->server->type == GLUSTER_TRANSPORT_UNIX) {
-            error_setg(errp,
-                       "Gluster connection for volume %s, path %s failed on "
-                       "socket %s ", gconf->volume, gconf->path,
-                       gconf->server->u.q_unix.path);
-        } else {
-            error_setg(errp,
-                       "Gluster connection for volume %s, path %s failed on "
-                       "host  %s and port %s ", gconf->volume, gconf->path,
-                       gconf->server->u.tcp.host, gconf->server->u.tcp.port);
+        error_setg(errp, "Gluster connection for volume %s, path %s failed"
+                         " to connect", gconf->volume, gconf->path);
+        for (server = gconf->server; server; server = server->next) {
+            if (server->value->type  == GLUSTER_TRANSPORT_UNIX) {
+                error_append_hint(errp, "hint: failed on socket %s ",
+                                  server->value->u.q_unix.path);
+            } else {
+                error_append_hint(errp, "hint: failed on host %s and port %s ",
+                                  server->value->u.tcp.host,
+                                  server->value->u.tcp.port);
+            }
         }
 
+        error_append_hint(errp, "Please refer to gluster logs for more info\n");
+
         /* glfs_init sometimes doesn't set errno although docs suggest that */
         if (errno == 0) {
             errno = EINVAL;
@@ -284,6 +381,226 @@ out:
     return NULL;
 }
 
+static int qapi_enum_parse(const char *opt)
+{
+    int i;
+
+    if (!opt) {
+        return GLUSTER_TRANSPORT__MAX;
+    }
+
+    for (i = 0; i < GLUSTER_TRANSPORT__MAX; i++) {
+        if (!strcmp(opt, GlusterTransport_lookup[i])) {
+            return i;
+        }
+    }
+
+    return i;
+}
+
+/*
+ * Convert the json formatted command line into qapi.
+*/
+static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
+                                  QDict *options, Error **errp)
+{
+    QemuOpts *opts;
+    GlusterServer *gsconf;
+    GlusterServerList *curr = NULL;
+    QDict *backing_options = NULL;
+    Error *local_err = NULL;
+    char *str = NULL;
+    const char *ptr;
+    size_t num_servers;
+    int i;
+
+    /* create opts info from runtime_json_opts list */
+    opts = qemu_opts_create(&runtime_json_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err) {
+        goto out;
+    }
+
+    num_servers = qdict_array_entries(options, GLUSTER_OPT_SERVER_PATTERN);
+    if (num_servers < 1) {
+        error_setg(&local_err, QERR_MISSING_PARAMETER, "server");
+        goto out;
+    }
+
+    ptr = qemu_opt_get(opts, GLUSTER_OPT_VOLUME);
+    if (!ptr) {
+        error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_VOLUME);
+        goto out;
+    }
+    gconf->volume = g_strdup(ptr);
+
+    ptr = qemu_opt_get(opts, GLUSTER_OPT_PATH);
+    if (!ptr) {
+        error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_PATH);
+        goto out;
+    }
+    gconf->path = g_strdup(ptr);
+    qemu_opts_del(opts);
+
+    for (i = 0; i < num_servers; i++) {
+        str = g_strdup_printf(GLUSTER_OPT_SERVER_PATTERN"%d.", i);
+        qdict_extract_subqdict(options, &backing_options, str);
+
+        /* create opts info from runtime_type_opts list */
+        opts = qemu_opts_create(&runtime_type_opts, NULL, 0, &error_abort);
+        qemu_opts_absorb_qdict(opts, backing_options, &local_err);
+        if (local_err) {
+            goto out;
+        }
+
+        ptr = qemu_opt_get(opts, GLUSTER_OPT_TYPE);
+        gsconf = g_new0(GlusterServer, 1);
+        gsconf->type = qapi_enum_parse(ptr);
+        if (!ptr) {
+            error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_TYPE);
+            error_append_hint(&local_err, GERR_INDEX_HINT, i);
+            goto out;
+
+        }
+        if (gsconf->type == GLUSTER_TRANSPORT__MAX) {
+            error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE,
+                       GLUSTER_OPT_TYPE, "tcp or unix");
+            error_append_hint(&local_err, GERR_INDEX_HINT, i);
+            goto out;
+        }
+        qemu_opts_del(opts);
+
+        if (gsconf->type == GLUSTER_TRANSPORT_TCP) {
+            /* create opts info from runtime_tcp_opts list */
+            opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort);
+            qemu_opts_absorb_qdict(opts, backing_options, &local_err);
+            if (local_err) {
+                goto out;
+            }
+
+            ptr = qemu_opt_get(opts, GLUSTER_OPT_HOST);
+            if (!ptr) {
+                error_setg(&local_err, QERR_MISSING_PARAMETER,
+                           GLUSTER_OPT_HOST);
+                error_append_hint(&local_err, GERR_INDEX_HINT, i);
+                goto out;
+            }
+            gsconf->u.tcp.host = g_strdup(ptr);
+            ptr = qemu_opt_get(opts, GLUSTER_OPT_PORT);
+            if (!ptr) {
+                error_setg(&local_err, QERR_MISSING_PARAMETER,
+                           GLUSTER_OPT_PORT);
+                error_append_hint(&local_err, GERR_INDEX_HINT, i);
+                goto out;
+            }
+            gsconf->u.tcp.port = g_strdup(ptr);
+
+            /* defend for unsupported fields in InetSocketAddress,
+             * i.e. @ipv4, @ipv6  and @to
+             */
+            ptr = qemu_opt_get(opts, GLUSTER_OPT_TO);
+            if (ptr) {
+                gsconf->u.tcp.has_to = true;
+            }
+            ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV4);
+            if (ptr) {
+                gsconf->u.tcp.has_ipv4 = true;
+            }
+            ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV6);
+            if (ptr) {
+                gsconf->u.tcp.has_ipv6 = true;
+            }
+            if (gsconf->u.tcp.has_to) {
+                error_setg(&local_err, "Parameter 'to' not supported");
+                goto out;
+            }
+            if (gsconf->u.tcp.has_ipv4 || gsconf->u.tcp.has_ipv6) {
+                error_setg(&local_err, "Parameters 'ipv4/ipv6' not supported");
+                goto out;
+            }
+            qemu_opts_del(opts);
+        } else {
+            /* create opts info from runtime_unix_opts list */
+            opts = qemu_opts_create(&runtime_unix_opts, NULL, 0, &error_abort);
+            qemu_opts_absorb_qdict(opts, backing_options, &local_err);
+            if (local_err) {
+                goto out;
+            }
+
+            ptr = qemu_opt_get(opts, GLUSTER_OPT_SOCKET);
+            if (!ptr) {
+                error_setg(&local_err, QERR_MISSING_PARAMETER,
+                           GLUSTER_OPT_SOCKET);
+                error_append_hint(&local_err, GERR_INDEX_HINT, i);
+                goto out;
+            }
+            gsconf->u.q_unix.path = g_strdup(ptr);
+            qemu_opts_del(opts);
+        }
+
+        if (gconf->server == NULL) {
+            gconf->server = g_new0(GlusterServerList, 1);
+            gconf->server->value = gsconf;
+            curr = gconf->server;
+        } else {
+            curr->next = g_new0(GlusterServerList, 1);
+            curr->next->value = gsconf;
+            curr = curr->next;
+        }
+
+        qdict_del(backing_options, str);
+        g_free(str);
+        str = NULL;
+    }
+
+    return 0;
+
+out:
+    error_propagate(errp, local_err);
+    qemu_opts_del(opts);
+    if (str) {
+        qdict_del(backing_options, str);
+        g_free(str);
+    }
+    errno = EINVAL;
+    return -errno;
+}
+
+static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
+                                      const char *filename,
+                                      QDict *options, Error **errp)
+{
+    int ret;
+    if (filename) {
+        ret = qemu_gluster_parse_uri(gconf, filename);
+        if (ret < 0) {
+            error_setg(errp, "invalid URI");
+            error_append_hint(errp, "Usage: file=gluster[+transport]://"
+                                    "[host[:port]]/volume/path[?socket=...]\n");
+            errno = -ret;
+            return NULL;
+        }
+    } else {
+        ret = qemu_gluster_parse_json(gconf, options, errp);
+        if (ret < 0) {
+            error_append_hint(errp, "Usage: "
+                             "-drive driver=qcow2,file.driver=gluster,"
+                             "file.volume=testvol,file.path=/path/a.qcow2"
+                             "[,file.debug=9],file.server.0.type=tcp,"
+                             "file.server.0.host=1.2.3.4,"
+                             "file.server.0.port=24007,"
+                             "file.server.1.transport=unix,"
+                             "file.server.1.socket=/var/run/glusterd.socket ..."
+                             "\n");
+            errno = -ret;
+            return NULL;
+        }
+
+    }
+
+    return qemu_gluster_glfs_init(gconf, errp);
+}
+
 static void qemu_gluster_complete_aio(void *opaque)
 {
     GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
@@ -383,7 +700,7 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
     gconf = g_new0(BlockdevOptionsGluster, 1);
     gconf->debug_level = s->debug_level;
     gconf->has_debug_level = true;
-    s->glfs = qemu_gluster_init(gconf, filename, errp);
+    s->glfs = qemu_gluster_init(gconf, filename, options, errp);
     if (!s->glfs) {
         ret = -errno;
         goto out;
@@ -452,7 +769,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
     gconf = g_new0(BlockdevOptionsGluster, 1);
     gconf->debug_level = s->debug_level;
     gconf->has_debug_level = true;
-    reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, errp);
+    reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, NULL, errp);
     if (reop_s->glfs == NULL) {
         ret = -errno;
         goto exit;
@@ -597,7 +914,7 @@ static int qemu_gluster_create(const char *filename,
     }
     gconf->has_debug_level = true;
 
-    glfs = qemu_gluster_init(gconf, filename, errp);
+    glfs = qemu_gluster_init(gconf, filename, NULL, errp);
     if (!glfs) {
         ret = -errno;
         goto out;
@@ -975,7 +1292,7 @@ static BlockDriver bdrv_gluster = {
     .format_name                  = "gluster",
     .protocol_name                = "gluster",
     .instance_size                = sizeof(BDRVGlusterState),
-    .bdrv_needs_filename          = true,
+    .bdrv_needs_filename          = false,
     .bdrv_file_open               = qemu_gluster_open,
     .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
     .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
@@ -1003,7 +1320,7 @@ static BlockDriver bdrv_gluster_tcp = {
     .format_name                  = "gluster",
     .protocol_name                = "gluster+tcp",
     .instance_size                = sizeof(BDRVGlusterState),
-    .bdrv_needs_filename          = true,
+    .bdrv_needs_filename          = false,
     .bdrv_file_open               = qemu_gluster_open,
     .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
     .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index e30e496bd3..f817c30e87 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2119,7 +2119,7 @@
 { 'struct': 'BlockdevOptionsGluster',
   'data': { 'volume': 'str',
             'path': 'str',
-            'server': 'GlusterServer',
+            'server': ['GlusterServer'],
             '*debug_level': 'int' } }
 
 ##