From 6d07859926fdb8515fd5cb94a692df7896c3848e Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 14 Jul 2016 16:33:22 +0300 Subject: [PATCH 01/14] dirty-bitmap: operate with int64_t amount Underlying HBitmap operates even with uint64_t. Thus this change is safe. This would be useful f.e. to mark entire bitmap dirty in one call. Signed-off-by: Denis V. Lunev Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Fam Zheng Message-id: 1468503209-19498-2-git-send-email-den@openvz.org CC: Stefan Hajnoczi CC: Kevin Wolf CC: Max Reitz CC: Jeff Cody Signed-off-by: Jeff Cody --- block/dirty-bitmap.c | 6 +++--- include/block/block_int.h | 2 +- include/block/dirty-bitmap.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 4902ca557f..f2bfdcfdea 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -326,14 +326,14 @@ void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) } void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int nr_sectors) + int64_t cur_sector, int64_t nr_sectors) { assert(bdrv_dirty_bitmap_enabled(bitmap)); hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); } void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int nr_sectors) + int64_t cur_sector, int64_t nr_sectors) { assert(bdrv_dirty_bitmap_enabled(bitmap)); hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); @@ -361,7 +361,7 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in) } void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, - int nr_sectors) + int64_t nr_sectors) { BdrvDirtyBitmap *bitmap; QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { diff --git a/include/block/block_int.h b/include/block/block_int.h index a6b13adb45..09be16f88c 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -783,7 +783,7 @@ void blk_dev_eject_request(BlockBackend *blk, bool force); bool blk_dev_is_tray_open(BlockBackend *blk); bool blk_dev_is_medium_locked(BlockBackend *blk); -void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); +void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int64_t nr_sect); bool bdrv_requests_pending(BlockDriverState *bs); void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index 80afe603f6..ee3388f90d 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -33,9 +33,9 @@ DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap); int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector); void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int nr_sectors); + int64_t cur_sector, int64_t nr_sectors); void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int nr_sectors); + int64_t cur_sector, int64_t nr_sectors); void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi); void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset); int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); From 531509ba2856ca8efdcf0df18a7dec66af0d36ee Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 14 Jul 2016 16:33:23 +0300 Subject: [PATCH 02/14] mirror: make sectors_in_flight int64_t We keep here the sum of int fields. Thus this could easily overflow, especially when we will start sending big requests in next patches. Signed-off-by: Denis V. Lunev Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Fam Zheng Message-id: 1468503209-19498-3-git-send-email-den@openvz.org CC: Stefan Hajnoczi CC: Kevin Wolf CC: Max Reitz CC: Jeff Cody Signed-off-by: Jeff Cody --- block/mirror.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/mirror.c b/block/mirror.c index b1e633ecad..fbacee234b 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -60,7 +60,7 @@ typedef struct MirrorBlockJob { unsigned long *in_flight_bitmap; int in_flight; - int sectors_in_flight; + int64_t sectors_in_flight; int ret; bool unmap; bool waiting_for_io; From 49efb1f5b0ea45bdb75d578460a7866a58167d1f Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 14 Jul 2016 16:33:24 +0300 Subject: [PATCH 03/14] mirror: create mirror_throttle helper The patch also places last_pause_ns from stack in mirror_run into MirrorBlockJob structure. This helper will be useful in next patches. Signed-off-by: Denis V. Lunev Reviewed-by: Eric Blake Message-id: 1468503209-19498-4-git-send-email-den@openvz.org CC: Vladimir Sementsov-Ogievskiy CC: Eric Blake CC: Stefan Hajnoczi CC: Fam Zheng CC: Kevin Wolf CC: Max Reitz CC: Jeff Cody Signed-off-by: Jeff Cody --- block/mirror.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index fbacee234b..5fde13b1f5 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -58,6 +58,7 @@ typedef struct MirrorBlockJob { QSIMPLEQ_HEAD(, MirrorBuffer) buf_free; int buf_free_count; + uint64_t last_pause_ns; unsigned long *in_flight_bitmap; int in_flight; int64_t sectors_in_flight; @@ -514,6 +515,18 @@ static void mirror_exit(BlockJob *job, void *opaque) bdrv_unref(src); } +static void mirror_throttle(MirrorBlockJob *s) +{ + int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + + if (now - s->last_pause_ns > SLICE_TIME) { + s->last_pause_ns = now; + block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, 0); + } else { + block_job_pause_point(&s->common); + } +} + static void coroutine_fn mirror_run(void *opaque) { MirrorBlockJob *s = opaque; @@ -521,7 +534,6 @@ static void coroutine_fn mirror_run(void *opaque) BlockDriverState *bs = blk_bs(s->common.blk); BlockDriverState *target_bs = blk_bs(s->target); int64_t sector_num, end, length; - uint64_t last_pause_ns; BlockDriverInfo bdi; char backing_filename[2]; /* we only need 2 characters because we are only checking for a NULL string */ @@ -577,7 +589,7 @@ static void coroutine_fn mirror_run(void *opaque) mirror_free_init(s); - last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); if (!s->is_none_mode) { /* First part, loop on the sectors and initialize the dirty bitmap. */ BlockDriverState *base = s->base; @@ -587,14 +599,8 @@ static void coroutine_fn mirror_run(void *opaque) /* Just to make sure we are not exceeding int limit. */ int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS, end - sector_num); - int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - if (now - last_pause_ns > SLICE_TIME) { - last_pause_ns = now; - block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, 0); - } else { - block_job_pause_point(&s->common); - } + mirror_throttle(s); if (block_job_is_cancelled(&s->common)) { goto immediate_exit; @@ -617,7 +623,7 @@ static void coroutine_fn mirror_run(void *opaque) bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi); for (;;) { uint64_t delay_ns = 0; - int64_t cnt; + int64_t cnt, delta; bool should_complete; if (s->ret < 0) { @@ -640,7 +646,8 @@ static void coroutine_fn mirror_run(void *opaque) * We do so every SLICE_TIME nanoseconds, or when there is an error, * or when the source is clean, whichever comes first. */ - if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME && + delta = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->last_pause_ns; + if (delta < SLICE_TIME && s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) { if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 || (cnt == 0 && s->in_flight > 0)) { @@ -710,7 +717,7 @@ static void coroutine_fn mirror_run(void *opaque) s->common.cancelled = false; break; } - last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); } immediate_exit: From c0b363ad43b00d77c81813db20eec45923973549 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 14 Jul 2016 16:33:25 +0300 Subject: [PATCH 04/14] mirror: create mirror_dirty_init helper for mirror_run The code inside the helper will be extended in the next patch. mirror_run itself is overbloated at the moment. Signed-off-by: Denis V. Lunev Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Reviewed-by: Fam Zheng Message-id: 1468503209-19498-5-git-send-email-den@openvz.org CC: Stefan Hajnoczi CC: Kevin Wolf CC: Max Reitz CC: Jeff Cody CC: Eric Blake Signed-off-by: Jeff Cody --- block/mirror.c | 70 +++++++++++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index 5fde13b1f5..0e74666d01 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -527,18 +527,54 @@ static void mirror_throttle(MirrorBlockJob *s) } } +static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) +{ + int64_t sector_num, end; + BlockDriverState *base = s->base; + BlockDriverState *bs = blk_bs(s->common.blk); + BlockDriverState *target_bs = blk_bs(s->target); + bool mark_all_dirty = base == NULL && !bdrv_has_zero_init(target_bs); + int ret, n; + + end = s->bdev_length / BDRV_SECTOR_SIZE; + + /* First part, loop on the sectors and initialize the dirty bitmap. */ + for (sector_num = 0; sector_num < end; ) { + /* Just to make sure we are not exceeding int limit. */ + int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS, + end - sector_num); + + mirror_throttle(s); + + if (block_job_is_cancelled(&s->common)) { + return 0; + } + + ret = bdrv_is_allocated_above(bs, base, sector_num, nb_sectors, &n); + if (ret < 0) { + return ret; + } + + assert(n > 0); + if (ret == 1 || mark_all_dirty) { + bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n); + } + sector_num += n; + } + return 0; +} + static void coroutine_fn mirror_run(void *opaque) { MirrorBlockJob *s = opaque; MirrorExitData *data; BlockDriverState *bs = blk_bs(s->common.blk); BlockDriverState *target_bs = blk_bs(s->target); - int64_t sector_num, end, length; + int64_t length; BlockDriverInfo bdi; char backing_filename[2]; /* we only need 2 characters because we are only checking for a NULL string */ int ret = 0; - int n; int target_cluster_size = BDRV_SECTOR_SIZE; if (block_job_is_cancelled(&s->common)) { @@ -580,7 +616,6 @@ static void coroutine_fn mirror_run(void *opaque) s->target_cluster_sectors = target_cluster_size >> BDRV_SECTOR_BITS; s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov); - end = s->bdev_length / BDRV_SECTOR_SIZE; s->buf = qemu_try_blockalign(bs, s->buf_size); if (s->buf == NULL) { ret = -ENOMEM; @@ -591,32 +626,9 @@ static void coroutine_fn mirror_run(void *opaque) s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); if (!s->is_none_mode) { - /* First part, loop on the sectors and initialize the dirty bitmap. */ - BlockDriverState *base = s->base; - bool mark_all_dirty = s->base == NULL && !bdrv_has_zero_init(target_bs); - - for (sector_num = 0; sector_num < end; ) { - /* Just to make sure we are not exceeding int limit. */ - int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS, - end - sector_num); - - mirror_throttle(s); - - if (block_job_is_cancelled(&s->common)) { - goto immediate_exit; - } - - ret = bdrv_is_allocated_above(bs, base, sector_num, nb_sectors, &n); - - if (ret < 0) { - goto immediate_exit; - } - - assert(n > 0); - if (ret == 1 || mark_all_dirty) { - bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n); - } - sector_num += n; + ret = mirror_dirty_init(s); + if (ret < 0 || block_job_is_cancelled(&s->common)) { + goto immediate_exit; } } From 2f0342efdbb0a3c5b5a6a4f4831cbe90c445510b Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 14 Jul 2016 16:33:26 +0300 Subject: [PATCH 05/14] block: remove extra condition in bdrv_can_write_zeroes_with_unmap All .bdrv_co_write_zeroes callbacks nowadays work perfectly even with backing store attached. If future new callbacks would be unable to do that - they have a chance to block this in bdrv_get_info(). Signed-off-by: Denis V. Lunev Reviewed-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Fam Zheng Message-id: 1468503209-19498-6-git-send-email-den@openvz.org CC: Stefan Hajnoczi CC: Kevin Wolf CC: Max Reitz CC: Jeff Cody Signed-off-by: Jeff Cody --- block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block.c b/block.c index d2dac3dce9..30d64e6ca5 100644 --- a/block.c +++ b/block.c @@ -2837,7 +2837,7 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) { BlockDriverInfo bdi; - if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) { + if (!(bs->open_flags & BDRV_O_UNMAP)) { return false; } From b7d5062c9cf53f21a555843775392dc03364bbff Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 14 Jul 2016 16:33:27 +0300 Subject: [PATCH 06/14] mirror: optimize dirty bitmap filling in mirror_run a bit There is no need to scan allocation tables if we have mark_all_dirty flag set. Just mark it all dirty. Signed-off-by: Denis V. Lunev Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Fam Zheng Message-id: 1468503209-19498-7-git-send-email-den@openvz.org CC: Stefan Hajnoczi CC: Kevin Wolf CC: Max Reitz CC: Jeff Cody Signed-off-by: Jeff Cody --- block/mirror.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index 0e74666d01..b7b1ded30f 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -533,11 +533,15 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) BlockDriverState *base = s->base; BlockDriverState *bs = blk_bs(s->common.blk); BlockDriverState *target_bs = blk_bs(s->target); - bool mark_all_dirty = base == NULL && !bdrv_has_zero_init(target_bs); int ret, n; end = s->bdev_length / BDRV_SECTOR_SIZE; + if (base == NULL && !bdrv_has_zero_init(target_bs)) { + bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, end); + return 0; + } + /* First part, loop on the sectors and initialize the dirty bitmap. */ for (sector_num = 0; sector_num < end; ) { /* Just to make sure we are not exceeding int limit. */ @@ -556,7 +560,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) } assert(n > 0); - if (ret == 1 || mark_all_dirty) { + if (ret == 1) { bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n); } sector_num += n; From c7c2769c0e5769eaad9d968fe2161505b657e02a Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 14 Jul 2016 16:33:28 +0300 Subject: [PATCH 07/14] mirror: efficiently zero out target With a bdrv_co_write_zeroes method on a target BDS and when this method is working as indicated by the bdrv_can_write_zeroes_with_unmap(), zeroes will not be placed into the wire. Thus the target could be very efficiently zeroed out. This should be done with the largest chunk possible. Signed-off-by: Denis V. Lunev Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Fam Zheng Message-id: 1468503209-19498-8-git-send-email-den@openvz.org CC: Stefan Hajnoczi CC: Kevin Wolf CC: Max Reitz CC: Jeff Cody CC: Eric Blake Signed-off-by: Jeff Cody --- block/mirror.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index b7b1ded30f..0262b03989 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -538,8 +538,32 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) end = s->bdev_length / BDRV_SECTOR_SIZE; if (base == NULL && !bdrv_has_zero_init(target_bs)) { - bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, end); - return 0; + if (!bdrv_can_write_zeroes_with_unmap(target_bs)) { + bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, end); + return 0; + } + + for (sector_num = 0; sector_num < end; ) { + int nb_sectors = MIN(end - sector_num, + QEMU_ALIGN_DOWN(INT_MAX, s->granularity) >> BDRV_SECTOR_BITS); + + mirror_throttle(s); + + if (block_job_is_cancelled(&s->common)) { + return 0; + } + + if (s->in_flight >= MAX_IN_FLIGHT) { + trace_mirror_yield(s, s->in_flight, s->buf_free_count, -1); + mirror_wait_for_io(s); + continue; + } + + mirror_do_zero_or_discard(s, sector_num, nb_sectors, false); + sector_num += nb_sectors; + } + + mirror_drain(s); } /* First part, loop on the sectors and initialize the dirty bitmap. */ From 4b5004d9fc5b7d8e4447dc81c2f26477c2d590f7 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 14 Jul 2016 16:33:29 +0300 Subject: [PATCH 08/14] mirror: improve performance of mirroring of empty disk We should not take into account zero blocks for delay calculations. They are not read and thus IO throttling is not required. In the other case VM migration with 16 Tb QCOW2 disk with 4 Gb of data takes days. Signed-off-by: Denis V. Lunev Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Fam Zheng Message-id: 1468503209-19498-9-git-send-email-den@openvz.org CC: Stefan Hajnoczi CC: Kevin Wolf CC: Max Reitz CC: Jeff Cody CC: Eric Blake Signed-off-by: Jeff Cody --- block/mirror.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index 0262b03989..f78186d47b 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -323,6 +323,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) int nb_chunks = 1; int64_t end = s->bdev_length / BDRV_SECTOR_SIZE; int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; + bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)); sector_num = hbitmap_iter_next(&s->hbi); if (sector_num < 0) { @@ -373,7 +374,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks); while (nb_chunks > 0 && sector_num < end) { int ret; - int io_sectors; + int io_sectors, io_sectors_acct; BlockDriverState *file; enum MirrorMethod { MIRROR_METHOD_COPY, @@ -410,12 +411,17 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) switch (mirror_method) { case MIRROR_METHOD_COPY: io_sectors = mirror_do_read(s, sector_num, io_sectors); + io_sectors_acct = io_sectors; break; case MIRROR_METHOD_ZERO: - mirror_do_zero_or_discard(s, sector_num, io_sectors, false); - break; case MIRROR_METHOD_DISCARD: - mirror_do_zero_or_discard(s, sector_num, io_sectors, true); + mirror_do_zero_or_discard(s, sector_num, io_sectors, + mirror_method == MIRROR_METHOD_DISCARD); + if (write_zeroes_ok) { + io_sectors_acct = 0; + } else { + io_sectors_acct = io_sectors; + } break; default: abort(); @@ -424,7 +430,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) sector_num += io_sectors; nb_chunks -= DIV_ROUND_UP(io_sectors, sectors_per_chunk); if (s->common.speed) { - delay_ns = ratelimit_calculate_delay(&s->limit, io_sectors); + delay_ns = ratelimit_calculate_delay(&s->limit, io_sectors_acct); } } return delay_ns; From cf56a3c632d039d00e29dfe8676321d6d349190c Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 22 Jun 2016 15:35:27 +0300 Subject: [PATCH 09/14] mirror: fix request throttling in drive-mirror There are 2 deficiencies here: - mirror_iteration could start several requests inside. Thus we could simply have more in_flight requests than MAX_IN_FLIGHT. - keeping this in mind throttling in mirror_run which is checking s->in_flight == MAX_IN_FLIGHT is wrong. The patch adds the check and throttling into mirror_iteration and fixes the check in mirror_run() to be sure. Signed-off-by: Denis V. Lunev Reviewed-by: Max Reitz Message-id: 1466598927-5990-1-git-send-email-den@openvz.org CC: Jeff Cody CC: Kevin Wolf CC: Max Reitz Signed-off-by: Jeff Cody (cherry picked from commit e648dc95c28fbca12e67be26a1fc4b9a0676c3fe) --- block/mirror.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/block/mirror.c b/block/mirror.c index f78186d47b..836a5d0194 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -407,6 +407,11 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) } } + while (s->in_flight >= MAX_IN_FLIGHT) { + trace_mirror_yield_in_flight(s, sector_num, s->in_flight); + mirror_wait_for_io(s); + } + mirror_clip_sectors(s, sector_num, &io_sectors); switch (mirror_method) { case MIRROR_METHOD_COPY: @@ -695,7 +700,7 @@ static void coroutine_fn mirror_run(void *opaque) delta = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->last_pause_ns; if (delta < SLICE_TIME && s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) { - if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 || + if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || (cnt == 0 && s->in_flight > 0)) { trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt); mirror_wait_for_io(s); From d5cf4079ca713cccd16302159fc06f8540c33a57 Mon Sep 17 00:00:00 2001 From: Prasanna Kumar Kalever Date: Tue, 19 Jul 2016 22:27:29 +0530 Subject: [PATCH 10/14] block/gluster: rename [server, volname, image] -> [host, volume, path] A future patch will add support for multiple gluster servers. Existing terminology is a bit unusual in relation to what names are used by other networked devices, and doesn't map very well to the terminology we expect to use for multiple servers. Therefore, rename the following options: 'server' -> 'host' 'image' -> 'path' 'volname' -> 'volume' Signed-off-by: Prasanna Kumar Kalever Reviewed-by: Eric Blake Reviewed-by: Jeff Cody Message-id: 1468947453-5433-2-git-send-email-prasanna.kalever@redhat.com Signed-off-by: Jeff Cody --- block/gluster.c | 54 ++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/block/gluster.c b/block/gluster.c index 406c1e6357..26478b0f8d 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -29,10 +29,10 @@ typedef struct BDRVGlusterState { } BDRVGlusterState; typedef struct GlusterConf { - char *server; + char *host; int port; - char *volname; - char *image; + char *volume; + char *path; char *transport; int debug_level; } GlusterConf; @@ -40,9 +40,9 @@ typedef struct GlusterConf { static void qemu_gluster_gconf_free(GlusterConf *gconf) { if (gconf) { - g_free(gconf->server); - g_free(gconf->volname); - g_free(gconf->image); + g_free(gconf->host); + g_free(gconf->volume); + g_free(gconf->path); g_free(gconf->transport); g_free(gconf); } @@ -62,19 +62,19 @@ static int parse_volume_options(GlusterConf *gconf, char *path) if (*p == '\0') { return -EINVAL; } - gconf->volname = g_strndup(q, p - q); + gconf->volume = g_strndup(q, p - q); - /* image */ + /* path */ p += strspn(p, "/"); if (*p == '\0') { return -EINVAL; } - gconf->image = g_strdup(p); + gconf->path = g_strdup(p); return 0; } /* - * file=gluster[+transport]://[server[:port]]/volname/image[?socket=...] + * file=gluster[+transport]://[host[:port]]/volume/path[?socket=...] * * 'gluster' is the protocol. * @@ -83,10 +83,10 @@ static int parse_volume_options(GlusterConf *gconf, char *path) * tcp, unix and rdma. If a transport type isn't specified, then tcp * type is assumed. * - * 'server' specifies the server where the volume file specification for + * 'host' specifies the host where the volume file specification for * the given volume resides. This can be either hostname, ipv4 address * or ipv6 address. ipv6 address needs to be within square brackets [ ]. - * If transport type is 'unix', then 'server' field should not be specified. + * If transport type is 'unix', then 'host' field should not be specified. * The 'socket' field needs to be populated with the path to unix domain * socket. * @@ -95,9 +95,9 @@ static int parse_volume_options(GlusterConf *gconf, char *path) * default port. If the transport type is unix, then 'port' should not be * specified. * - * 'volname' is the name of the gluster volume which contains the VM image. + * 'volume' is the name of the gluster volume which contains the VM image. * - * 'image' is the path to the actual VM image that resides on gluster volume. + * 'path' is the path to the actual VM image that resides on gluster volume. * * Examples: * @@ -106,7 +106,7 @@ static int parse_volume_options(GlusterConf *gconf, char *path) * file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img * file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img * file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img - * file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img + * file=gluster+tcp://host.domain.com:24007/testvol/dir/a.img * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket * file=gluster+rdma://1.2.3.4:24007/testvol/a.img */ @@ -157,9 +157,9 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename) ret = -EINVAL; goto out; } - gconf->server = g_strdup(qp->p[0].value); + gconf->host = g_strdup(qp->p[0].value); } else { - gconf->server = g_strdup(uri->server ? uri->server : "localhost"); + gconf->host = g_strdup(uri->server ? uri->server : "localhost"); gconf->port = uri->port; } @@ -180,18 +180,18 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename, ret = qemu_gluster_parseuri(gconf, filename); if (ret < 0) { - error_setg(errp, "Usage: file=gluster[+transport]://[server[:port]]/" - "volname/image[?socket=...]"); + error_setg(errp, "Usage: file=gluster[+transport]://[host[:port]]/" + "volume/path[?socket=...]"); errno = -ret; goto out; } - glfs = glfs_new(gconf->volname); + glfs = glfs_new(gconf->volume); if (!glfs) { goto out; } - ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->server, + ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->host, gconf->port); if (ret < 0) { goto out; @@ -205,9 +205,9 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename, ret = glfs_init(glfs); if (ret) { error_setg_errno(errp, errno, - "Gluster connection failed for server=%s port=%d " - "volume=%s image=%s transport=%s", gconf->server, - gconf->port, gconf->volname, gconf->image, + "Gluster connection failed for host=%s port=%d " + "volume=%s path=%s transport=%s", gconf->host, + gconf->port, gconf->volume, gconf->path, gconf->transport); /* glfs_init sometimes doesn't set errno although docs suggest that */ @@ -373,7 +373,7 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, qemu_gluster_parse_flags(bdrv_flags, &open_flags); - s->fd = glfs_open(s->glfs, gconf->image, open_flags); + s->fd = glfs_open(s->glfs, gconf->path, open_flags); if (!s->fd) { ret = -errno; } @@ -439,7 +439,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state, } #endif - reop_s->fd = glfs_open(reop_s->glfs, gconf->image, open_flags); + reop_s->fd = glfs_open(reop_s->glfs, gconf->path, open_flags); if (reop_s->fd == NULL) { /* reops->glfs will be cleaned up in _abort */ ret = -errno; @@ -587,7 +587,7 @@ static int qemu_gluster_create(const char *filename, goto out; } - fd = glfs_creat(glfs, gconf->image, + fd = glfs_creat(glfs, gconf->path, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR); if (!fd) { ret = -errno; From f70c50c81746ecd352617f2a15eca5cb03cf6219 Mon Sep 17 00:00:00 2001 From: Prasanna Kumar Kalever Date: Tue, 19 Jul 2016 22:27:30 +0530 Subject: [PATCH 11/14] block/gluster: code cleanup unified coding styles of multiline function arguments and other error functions moved random declarations of structures and other list variables Signed-off-by: Prasanna Kumar Kalever Reviewed-by: Eric Blake Reviewed-by: Jeff Cody Message-id: 1468947453-5433-3-git-send-email-prasanna.kalever@redhat.com Signed-off-by: Jeff Cody --- block/gluster.c | 143 +++++++++++++++++++++++++----------------------- 1 file changed, 75 insertions(+), 68 deletions(-) diff --git a/block/gluster.c b/block/gluster.c index 26478b0f8d..4fce2ee406 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -13,6 +13,12 @@ #include "qapi/error.h" #include "qemu/uri.h" +#define GLUSTER_OPT_FILENAME "filename" +#define GLUSTER_OPT_DEBUG "debug" +#define GLUSTER_DEBUG_DEFAULT 4 +#define GLUSTER_DEBUG_MAX 9 + + typedef struct GlusterAIOCB { int64_t size; int ret; @@ -28,6 +34,11 @@ typedef struct BDRVGlusterState { int debug_level; } BDRVGlusterState; +typedef struct BDRVGlusterReopenState { + struct glfs *glfs; + struct glfs_fd *fd; +} BDRVGlusterReopenState; + typedef struct GlusterConf { char *host; int port; @@ -37,6 +48,49 @@ typedef struct GlusterConf { int debug_level; } GlusterConf; + +static QemuOptsList qemu_gluster_create_opts = { + .name = "qemu-gluster-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size" + }, + { + .name = BLOCK_OPT_PREALLOC, + .type = QEMU_OPT_STRING, + .help = "Preallocation mode (allowed values: off, full)" + }, + { + .name = GLUSTER_OPT_DEBUG, + .type = QEMU_OPT_NUMBER, + .help = "Gluster log level, valid range is 0-9", + }, + { /* end of list */ } + } +}; + +static QemuOptsList runtime_opts = { + .name = "gluster", + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), + .desc = { + { + .name = GLUSTER_OPT_FILENAME, + .type = QEMU_OPT_STRING, + .help = "URL to the gluster image", + }, + { + .name = GLUSTER_OPT_DEBUG, + .type = QEMU_OPT_NUMBER, + .help = "Gluster log level, valid range is 0-9", + }, + { /* end of list */ } + }, +}; + + static void qemu_gluster_gconf_free(GlusterConf *gconf) { if (gconf) { @@ -181,7 +235,7 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename, ret = qemu_gluster_parseuri(gconf, filename); if (ret < 0) { error_setg(errp, "Usage: file=gluster[+transport]://[host[:port]]/" - "volume/path[?socket=...]"); + "volume/path[?socket=...]"); errno = -ret; goto out; } @@ -255,30 +309,6 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) qemu_bh_schedule(acb->bh); } -#define GLUSTER_OPT_FILENAME "filename" -#define GLUSTER_OPT_DEBUG "debug" -#define GLUSTER_DEBUG_DEFAULT 4 -#define GLUSTER_DEBUG_MAX 9 - -/* TODO Convert to fine grained options */ -static QemuOptsList runtime_opts = { - .name = "gluster", - .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), - .desc = { - { - .name = GLUSTER_OPT_FILENAME, - .type = QEMU_OPT_STRING, - .help = "URL to the gluster image", - }, - { - .name = GLUSTER_OPT_DEBUG, - .type = QEMU_OPT_NUMBER, - .help = "Gluster log level, valid range is 0-9", - }, - { /* end of list */ } - }, -}; - static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags) { assert(open_flags != NULL); @@ -395,12 +425,6 @@ out: return ret; } -typedef struct BDRVGlusterReopenState { - struct glfs *glfs; - struct glfs_fd *fd; -} BDRVGlusterReopenState; - - static int qemu_gluster_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue, Error **errp) { @@ -501,7 +525,9 @@ static void qemu_gluster_reopen_abort(BDRVReopenState *state) #ifdef CONFIG_GLUSTERFS_ZEROFILL static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int size, BdrvRequestFlags flags) + int64_t offset, + int size, + BdrvRequestFlags flags) { int ret; GlusterAIOCB acb; @@ -527,7 +553,7 @@ static inline bool gluster_supports_zerofill(void) } static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset, - int64_t size) + int64_t size) { return glfs_zerofill(fd, offset, size); } @@ -539,7 +565,7 @@ static inline bool gluster_supports_zerofill(void) } static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset, - int64_t size) + int64_t size) { return 0; } @@ -576,19 +602,17 @@ static int qemu_gluster_create(const char *filename, tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); if (!tmp || !strcmp(tmp, "off")) { prealloc = 0; - } else if (!strcmp(tmp, "full") && - gluster_supports_zerofill()) { + } else if (!strcmp(tmp, "full") && gluster_supports_zerofill()) { prealloc = 1; } else { error_setg(errp, "Invalid preallocation mode: '%s'" - " or GlusterFS doesn't support zerofill API", - tmp); + " or GlusterFS doesn't support zerofill API", tmp); ret = -EINVAL; goto out; } fd = glfs_creat(glfs, gconf->path, - O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR); + O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR); if (!fd) { ret = -errno; } else { @@ -614,7 +638,8 @@ out: } static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write) + int64_t sector_num, int nb_sectors, + QEMUIOVector *qiov, int write) { int ret; GlusterAIOCB acb; @@ -629,10 +654,10 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs, if (write) { ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0, - gluster_finish_aiocb, &acb); + gluster_finish_aiocb, &acb); } else { ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0, - gluster_finish_aiocb, &acb); + gluster_finish_aiocb, &acb); } if (ret < 0) { @@ -657,13 +682,17 @@ static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset) } static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) + int64_t sector_num, + int nb_sectors, + QEMUIOVector *qiov) { return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 0); } static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) + int64_t sector_num, + int nb_sectors, + QEMUIOVector *qiov) { return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1); } @@ -725,7 +754,8 @@ error: #ifdef CONFIG_GLUSTERFS_DISCARD static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs, - int64_t sector_num, int nb_sectors) + int64_t sector_num, + int nb_sectors) { int ret; GlusterAIOCB acb; @@ -934,29 +964,6 @@ static int64_t coroutine_fn qemu_gluster_co_get_block_status( } -static QemuOptsList qemu_gluster_create_opts = { - .name = "qemu-gluster-create-opts", - .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head), - .desc = { - { - .name = BLOCK_OPT_SIZE, - .type = QEMU_OPT_SIZE, - .help = "Virtual disk size" - }, - { - .name = BLOCK_OPT_PREALLOC, - .type = QEMU_OPT_STRING, - .help = "Preallocation mode (allowed values: off, full)" - }, - { - .name = GLUSTER_OPT_DEBUG, - .type = QEMU_OPT_NUMBER, - .help = "Gluster log level, valid range is 0-9", - }, - { /* end of list */ } - } -}; - static BlockDriver bdrv_gluster = { .format_name = "gluster", .protocol_name = "gluster", From 0552ff24656c26b2a0d135966fb0feaa90d7f9bc Mon Sep 17 00:00:00 2001 From: Prasanna Kumar Kalever Date: Tue, 19 Jul 2016 22:27:31 +0530 Subject: [PATCH 12/14] block/gluster: deprecate rdma support gluster volfile server fetch happens through unix and/or tcp, it doesn't support volfile fetch over rdma. The rdma code may actually mislead, so to make sure things do not break, for now we fallback to tcp when requested for rdma, with a warning. If you are wondering how this worked all these days, its the gluster libgfapi code which handles anything other than unix transport as socket/tcp, sad but true. Also gluster doesn't support ipv6 addresses, removing the ipv6 related comments/docs section [Jeff: Minor grammatical fixes in comments and commit message, per review comments] Signed-off-by: Prasanna Kumar Kalever Reviewed-by: Markus Armbruster Reviewed-by: Eric Blake Reviewed-by: Jeff Cody Message-id: 1468947453-5433-4-git-send-email-prasanna.kalever@redhat.com Signed-off-by: Jeff Cody --- block/gluster.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/block/gluster.c b/block/gluster.c index 4fce2ee406..a042ccb075 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -12,6 +12,7 @@ #include "block/block_int.h" #include "qapi/error.h" #include "qemu/uri.h" +#include "qemu/error-report.h" #define GLUSTER_OPT_FILENAME "filename" #define GLUSTER_OPT_DEBUG "debug" @@ -134,12 +135,10 @@ static int parse_volume_options(GlusterConf *gconf, char *path) * * 'transport' specifies the transport type used to connect to gluster * management daemon (glusterd). Valid transport types are - * tcp, unix and rdma. If a transport type isn't specified, then tcp - * type is assumed. + * tcp or unix. If a transport type isn't specified, then tcp type is assumed. * * 'host' specifies the host where the volume file specification for - * the given volume resides. This can be either hostname, ipv4 address - * or ipv6 address. ipv6 address needs to be within square brackets [ ]. + * the given volume resides. This can be either hostname or ipv4 address. * If transport type is 'unix', then 'host' field should not be specified. * The 'socket' field needs to be populated with the path to unix domain * socket. @@ -158,11 +157,8 @@ static int parse_volume_options(GlusterConf *gconf, char *path) * file=gluster://1.2.3.4/testvol/a.img * file=gluster+tcp://1.2.3.4/testvol/a.img * file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img - * file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img - * file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img * file=gluster+tcp://host.domain.com:24007/testvol/dir/a.img * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket - * file=gluster+rdma://1.2.3.4:24007/testvol/a.img */ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename) { @@ -185,7 +181,9 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename) gconf->transport = g_strdup("unix"); is_unix = true; } else if (!strcmp(uri->scheme, "gluster+rdma")) { - gconf->transport = g_strdup("rdma"); + gconf->transport = g_strdup("tcp"); + error_report("Warning: rdma feature is not supported, falling " + "back to tcp"); } else { ret = -EINVAL; goto out; @@ -1048,6 +1046,12 @@ static BlockDriver bdrv_gluster_unix = { .create_opts = &qemu_gluster_create_opts, }; +/* rdma is deprecated (actually never supported for volfile fetch). + * Let's maintain it for the protocol compatibility, to make sure things + * won't break immediately. For now, gluster+rdma will fall back to gluster+tcp + * protocol with a warning. + * TODO: remove gluster+rdma interface support + */ static BlockDriver bdrv_gluster_rdma = { .format_name = "gluster", .protocol_name = "gluster+rdma", From 7edac2ddebbc11000ce71244e13b2c9462bf7ad5 Mon Sep 17 00:00:00 2001 From: Prasanna Kumar Kalever Date: Tue, 19 Jul 2016 22:27:32 +0530 Subject: [PATCH 13/14] block/gluster: using new qapi schema this patch adds 'GlusterServer' related schema in qapi/block-core.json [Jeff: minor fix-ups of comments and formatting, per patch reviews] Signed-off-by: Prasanna Kumar Kalever Reviewed-by: Markus Armbruster Reviewed-by: Eric Blake Message-id: 1468947453-5433-5-git-send-email-prasanna.kalever@redhat.com Signed-off-by: Jeff Cody --- block/gluster.c | 109 +++++++++++++++++++++++-------------------- qapi/block-core.json | 68 +++++++++++++++++++++++++-- 2 files changed, 122 insertions(+), 55 deletions(-) diff --git a/block/gluster.c b/block/gluster.c index a042ccb075..80e68319ce 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -16,6 +16,7 @@ #define GLUSTER_OPT_FILENAME "filename" #define GLUSTER_OPT_DEBUG "debug" +#define GLUSTER_DEFAULT_PORT 24007 #define GLUSTER_DEBUG_DEFAULT 4 #define GLUSTER_DEBUG_MAX 9 @@ -40,15 +41,6 @@ typedef struct BDRVGlusterReopenState { struct glfs_fd *fd; } BDRVGlusterReopenState; -typedef struct GlusterConf { - char *host; - int port; - char *volume; - char *path; - char *transport; - int debug_level; -} GlusterConf; - static QemuOptsList qemu_gluster_create_opts = { .name = "qemu-gluster-create-opts", @@ -92,18 +84,7 @@ static QemuOptsList runtime_opts = { }; -static void qemu_gluster_gconf_free(GlusterConf *gconf) -{ - if (gconf) { - g_free(gconf->host); - g_free(gconf->volume); - g_free(gconf->path); - g_free(gconf->transport); - g_free(gconf); - } -} - -static int parse_volume_options(GlusterConf *gconf, char *path) +static int parse_volume_options(BlockdevOptionsGluster *gconf, char *path) { char *p, *q; @@ -160,8 +141,10 @@ static int parse_volume_options(GlusterConf *gconf, char *path) * file=gluster+tcp://host.domain.com:24007/testvol/dir/a.img * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket */ -static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename) +static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf, + const char *filename) { + GlusterServer *gsconf; URI *uri; QueryParams *qp = NULL; bool is_unix = false; @@ -172,16 +155,18 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename) return -EINVAL; } + gconf->server = gsconf = g_new0(GlusterServer, 1); + /* transport */ if (!uri->scheme || !strcmp(uri->scheme, "gluster")) { - gconf->transport = g_strdup("tcp"); + gsconf->type = GLUSTER_TRANSPORT_TCP; } else if (!strcmp(uri->scheme, "gluster+tcp")) { - gconf->transport = g_strdup("tcp"); + gsconf->type = GLUSTER_TRANSPORT_TCP; } else if (!strcmp(uri->scheme, "gluster+unix")) { - gconf->transport = g_strdup("unix"); + gsconf->type = GLUSTER_TRANSPORT_UNIX; is_unix = true; } else if (!strcmp(uri->scheme, "gluster+rdma")) { - gconf->transport = g_strdup("tcp"); + gsconf->type = GLUSTER_TRANSPORT_TCP; error_report("Warning: rdma feature is not supported, falling " "back to tcp"); } else { @@ -209,10 +194,14 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename) ret = -EINVAL; goto out; } - gconf->host = g_strdup(qp->p[0].value); + gsconf->u.q_unix.path = g_strdup(qp->p[0].value); } else { - gconf->host = g_strdup(uri->server ? uri->server : "localhost"); - gconf->port = uri->port; + gsconf->u.tcp.host = g_strdup(uri->server ? uri->server : "localhost"); + if (uri->port) { + gsconf->u.tcp.port = g_strdup_printf("%d", uri->port); + } else { + gsconf->u.tcp.port = g_strdup_printf("%d", GLUSTER_DEFAULT_PORT); + } } out: @@ -223,17 +212,18 @@ out: return ret; } -static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename, - Error **errp) +static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf, + const char *filename, Error **errp) { struct glfs *glfs = NULL; int ret; int old_errno; - ret = qemu_gluster_parseuri(gconf, filename); + ret = qemu_gluster_parse_uri(gconf, filename); if (ret < 0) { - error_setg(errp, "Usage: file=gluster[+transport]://[host[:port]]/" - "volume/path[?socket=...]"); + error_setg(errp, "Invalid URI"); + error_append_hint(errp, "Usage: file=gluster[+transport]://" + "[host[:port]]/volume/path[?socket=...]\n"); errno = -ret; goto out; } @@ -243,8 +233,16 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename, goto out; } - ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->host, - gconf->port); + if (gconf->server->type == GLUSTER_TRANSPORT_UNIX) { + ret = glfs_set_volfile_server(glfs, + GlusterTransport_lookup[gconf->server->type], + gconf->server->u.q_unix.path, 0); + } else { + ret = glfs_set_volfile_server(glfs, + GlusterTransport_lookup[gconf->server->type], + gconf->server->u.tcp.host, + atoi(gconf->server->u.tcp.port)); + } if (ret < 0) { goto out; } @@ -256,15 +254,22 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename, ret = glfs_init(glfs); if (ret) { - error_setg_errno(errp, errno, - "Gluster connection failed for host=%s port=%d " - "volume=%s path=%s transport=%s", gconf->host, - gconf->port, gconf->volume, gconf->path, - gconf->transport); + if (gconf->server->type == GLUSTER_TRANSPORT_UNIX) { + error_setg(errp, + "Gluster connection for volume %s, path %s failed on " + "socket %s ", gconf->volume, gconf->path, + gconf->server->u.q_unix.path); + } else { + error_setg(errp, + "Gluster connection for volume %s, path %s failed on " + "host %s and port %s ", gconf->volume, gconf->path, + gconf->server->u.tcp.host, gconf->server->u.tcp.port); + } /* glfs_init sometimes doesn't set errno although docs suggest that */ - if (errno == 0) + if (errno == 0) { errno = EINVAL; + } goto out; } @@ -352,7 +357,7 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, BDRVGlusterState *s = bs->opaque; int open_flags = 0; int ret = 0; - GlusterConf *gconf = g_new0(GlusterConf, 1); + BlockdevOptionsGluster *gconf = NULL; QemuOpts *opts; Error *local_err = NULL; const char *filename; @@ -375,7 +380,9 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, s->debug_level = GLUSTER_DEBUG_MAX; } + gconf = g_new0(BlockdevOptionsGluster, 1); gconf->debug_level = s->debug_level; + gconf->has_debug_level = true; s->glfs = qemu_gluster_init(gconf, filename, errp); if (!s->glfs) { ret = -errno; @@ -410,7 +417,7 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, out: qemu_opts_del(opts); - qemu_gluster_gconf_free(gconf); + qapi_free_BlockdevOptionsGluster(gconf); if (!ret) { return ret; } @@ -429,7 +436,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state, int ret = 0; BDRVGlusterState *s; BDRVGlusterReopenState *reop_s; - GlusterConf *gconf = NULL; + BlockdevOptionsGluster *gconf; int open_flags = 0; assert(state != NULL); @@ -442,9 +449,9 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state, qemu_gluster_parse_flags(state->flags, &open_flags); - gconf = g_new0(GlusterConf, 1); - + gconf = g_new0(BlockdevOptionsGluster, 1); gconf->debug_level = s->debug_level; + gconf->has_debug_level = true; reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, errp); if (reop_s->glfs == NULL) { ret = -errno; @@ -470,7 +477,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state, exit: /* state->opaque will be freed in either the _abort or _commit */ - qemu_gluster_gconf_free(gconf); + qapi_free_BlockdevOptionsGluster(gconf); return ret; } @@ -572,14 +579,15 @@ static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset, static int qemu_gluster_create(const char *filename, QemuOpts *opts, Error **errp) { + BlockdevOptionsGluster *gconf; struct glfs *glfs; struct glfs_fd *fd; int ret = 0; int prealloc = 0; int64_t total_size = 0; char *tmp = NULL; - GlusterConf *gconf = g_new0(GlusterConf, 1); + gconf = g_new0(BlockdevOptionsGluster, 1); gconf->debug_level = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG, GLUSTER_DEBUG_DEFAULT); if (gconf->debug_level < 0) { @@ -587,6 +595,7 @@ static int qemu_gluster_create(const char *filename, } else if (gconf->debug_level > GLUSTER_DEBUG_MAX) { gconf->debug_level = GLUSTER_DEBUG_MAX; } + gconf->has_debug_level = true; glfs = qemu_gluster_init(gconf, filename, errp); if (!glfs) { @@ -628,7 +637,7 @@ static int qemu_gluster_create(const char *filename, } out: g_free(tmp); - qemu_gluster_gconf_free(gconf); + qapi_free_BlockdevOptionsGluster(gconf); if (glfs) { glfs_fini(glfs); } diff --git a/qapi/block-core.json b/qapi/block-core.json index 3444a9bc3e..e30e496bd3 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1666,13 +1666,14 @@ # @host_device, @host_cdrom: Since 2.1 # # Since: 2.0 +# @gluster: Since 2.7 ## { 'enum': 'BlockdevDriver', 'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop', - 'dmg', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device', - 'http', 'https', 'luks', 'null-aio', 'null-co', 'parallels', - 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'tftp', 'vdi', 'vhdx', - 'vmdk', 'vpc', 'vvfat' ] } + 'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom', + 'host_device', 'http', 'https', 'luks', 'null-aio', 'null-co', + 'parallels', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'tftp', + 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] } ## # @BlockdevOptionsFile @@ -2064,6 +2065,63 @@ '*rewrite-corrupted': 'bool', '*read-pattern': 'QuorumReadPattern' } } +## +# @GlusterTransport +# +# An enumeration of Gluster transport types +# +# @tcp: TCP - Transmission Control Protocol +# +# @unix: UNIX - Unix domain socket +# +# Since: 2.7 +## +{ 'enum': 'GlusterTransport', + 'data': [ 'unix', 'tcp' ] } + + +## +# @GlusterServer +# +# Captures the address of a socket +# +# Details for connecting to a gluster server +# +# @type: Transport type used for gluster connection +# +# @unix: socket file +# +# @tcp: host address and port number +# +# Since: 2.7 +## +{ 'union': 'GlusterServer', + 'base': { 'type': 'GlusterTransport' }, + 'discriminator': 'type', + 'data': { 'unix': 'UnixSocketAddress', + 'tcp': 'InetSocketAddress' } } + +## +# @BlockdevOptionsGluster +# +# Driver specific block device options for Gluster +# +# @volume: name of gluster volume where VM image resides +# +# @path: absolute path to image file in gluster volume +# +# @server: gluster server description +# +# @debug-level: #optional libgfapi log level (default '4' which is Error) +# +# Since: 2.7 +## +{ 'struct': 'BlockdevOptionsGluster', + 'data': { 'volume': 'str', + 'path': 'str', + 'server': 'GlusterServer', + '*debug_level': 'int' } } + ## # @BlockdevOptions # @@ -2111,7 +2169,7 @@ 'file': 'BlockdevOptionsFile', 'ftp': 'BlockdevOptionsFile', 'ftps': 'BlockdevOptionsFile', -# TODO gluster: Wait for structured options + 'gluster': 'BlockdevOptionsGluster', 'host_cdrom': 'BlockdevOptionsFile', 'host_device':'BlockdevOptionsFile', 'http': 'BlockdevOptionsFile', From 6c7189bb29de9fa2202f613f3c6caf028f96f261 Mon Sep 17 00:00:00 2001 From: Prasanna Kumar Kalever Date: Tue, 19 Jul 2016 22:27:33 +0530 Subject: [PATCH 14/14] block/gluster: add support for multiple gluster servers This patch adds a way to specify multiple volfile servers to the gluster block backend of QEMU with tcp|rdma transport types and their port numbers. Problem: Currently VM Image on gluster volume is specified like this: file=gluster[+tcp]://host[:port]/testvol/a.img Say we have three hosts in a trusted pool with replica 3 volume in action. When the host mentioned in the command above goes down for some reason, the other two hosts are still available. But there's currently no way to tell QEMU about them. Solution: New way of specifying VM Image on gluster volume with volfile servers: (We still support old syntax to maintain backward compatibility) Basic command line syntax looks like: Pattern I: -drive driver=gluster, volume=testvol,path=/path/a.raw,[debug=N,] server.0.type=tcp, server.0.host=1.2.3.4, server.0.port=24007, server.1.type=unix, server.1.socket=/path/socketfile Pattern II: 'json:{"driver":"qcow2","file":{"driver":"gluster", "volume":"testvol","path":"/path/a.qcow2",["debug":N,] "server":[{hostinfo_1}, ...{hostinfo_N}]}}' driver => 'gluster' (protocol name) volume => name of gluster volume where our VM image resides path => absolute path of image in gluster volume [debug] => libgfapi loglevel [(0 - 9) default 4 -> Error] {hostinfo} => {{type:"tcp",host:"1.2.3.4"[,port=24007]}, {type:"unix",socket:"/path/sockfile"}} type => transport type used to connect to gluster management daemon, it can be tcp|unix host => host address (hostname/ipv4/ipv6 addresses/socket path) port => port number on which glusterd is listening. socket => path to socket file Examples: 1. -drive driver=qcow2,file.driver=gluster, file.volume=testvol,file.path=/path/a.qcow2,file.debug=9, file.server.0.type=tcp, file.server.0.host=1.2.3.4, file.server.0.port=24007, file.server.1.type=unix, file.server.1.socket=/var/run/glusterd.socket 2. 'json:{"driver":"qcow2","file":{"driver":"gluster","volume":"testvol", "path":"/path/a.qcow2","debug":9,"server": [{"type":"tcp","host":"1.2.3.4","port":"24007"}, {"type":"unix","socket":"/var/run/glusterd.socket"} ]}}' This patch gives a mechanism to provide all the server addresses, which are in replica set, so in case host1 is down VM can still boot from any of the active hosts. This is equivalent to the backup-volfile-servers option supported by mount.glusterfs (FUSE way of mounting gluster volume) credits: sincere thanks to all the supporters Signed-off-by: Prasanna Kumar Kalever Reviewed-by: Markus Armbruster Message-id: 1468947453-5433-6-git-send-email-prasanna.kalever@redhat.com Signed-off-by: Jeff Cody --- block/gluster.c | 397 ++++++++++++++++++++++++++++++++++++++----- qapi/block-core.json | 2 +- 2 files changed, 358 insertions(+), 41 deletions(-) diff --git a/block/gluster.c b/block/gluster.c index 80e68319ce..296bd9929e 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -11,15 +11,27 @@ #include #include "block/block_int.h" #include "qapi/error.h" +#include "qapi/qmp/qerror.h" #include "qemu/uri.h" #include "qemu/error-report.h" #define GLUSTER_OPT_FILENAME "filename" +#define GLUSTER_OPT_VOLUME "volume" +#define GLUSTER_OPT_PATH "path" +#define GLUSTER_OPT_TYPE "type" +#define GLUSTER_OPT_SERVER_PATTERN "server." +#define GLUSTER_OPT_HOST "host" +#define GLUSTER_OPT_PORT "port" +#define GLUSTER_OPT_TO "to" +#define GLUSTER_OPT_IPV4 "ipv4" +#define GLUSTER_OPT_IPV6 "ipv6" +#define GLUSTER_OPT_SOCKET "socket" #define GLUSTER_OPT_DEBUG "debug" #define GLUSTER_DEFAULT_PORT 24007 #define GLUSTER_DEBUG_DEFAULT 4 #define GLUSTER_DEBUG_MAX 9 +#define GERR_INDEX_HINT "hint: check in 'server' array index '%d'\n" typedef struct GlusterAIOCB { int64_t size; @@ -83,6 +95,92 @@ static QemuOptsList runtime_opts = { }, }; +static QemuOptsList runtime_json_opts = { + .name = "gluster_json", + .head = QTAILQ_HEAD_INITIALIZER(runtime_json_opts.head), + .desc = { + { + .name = GLUSTER_OPT_VOLUME, + .type = QEMU_OPT_STRING, + .help = "name of gluster volume where VM image resides", + }, + { + .name = GLUSTER_OPT_PATH, + .type = QEMU_OPT_STRING, + .help = "absolute path to image file in gluster volume", + }, + { + .name = GLUSTER_OPT_DEBUG, + .type = QEMU_OPT_NUMBER, + .help = "Gluster log level, valid range is 0-9", + }, + { /* end of list */ } + }, +}; + +static QemuOptsList runtime_type_opts = { + .name = "gluster_type", + .head = QTAILQ_HEAD_INITIALIZER(runtime_type_opts.head), + .desc = { + { + .name = GLUSTER_OPT_TYPE, + .type = QEMU_OPT_STRING, + .help = "tcp|unix", + }, + { /* end of list */ } + }, +}; + +static QemuOptsList runtime_unix_opts = { + .name = "gluster_unix", + .head = QTAILQ_HEAD_INITIALIZER(runtime_unix_opts.head), + .desc = { + { + .name = GLUSTER_OPT_SOCKET, + .type = QEMU_OPT_STRING, + .help = "socket file path)", + }, + { /* end of list */ } + }, +}; + +static QemuOptsList runtime_tcp_opts = { + .name = "gluster_tcp", + .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head), + .desc = { + { + .name = GLUSTER_OPT_TYPE, + .type = QEMU_OPT_STRING, + .help = "tcp|unix", + }, + { + .name = GLUSTER_OPT_HOST, + .type = QEMU_OPT_STRING, + .help = "host address (hostname/ipv4/ipv6 addresses)", + }, + { + .name = GLUSTER_OPT_PORT, + .type = QEMU_OPT_NUMBER, + .help = "port number on which glusterd is listening (default 24007)", + }, + { + .name = "to", + .type = QEMU_OPT_NUMBER, + .help = "max port number, not supported by gluster", + }, + { + .name = "ipv4", + .type = QEMU_OPT_BOOL, + .help = "ipv4 bool value, not supported by gluster", + }, + { + .name = "ipv6", + .type = QEMU_OPT_BOOL, + .help = "ipv6 bool value, not supported by gluster", + }, + { /* end of list */ } + }, +}; static int parse_volume_options(BlockdevOptionsGluster *gconf, char *path) { @@ -155,7 +253,8 @@ static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf, return -EINVAL; } - gconf->server = gsconf = g_new0(GlusterServer, 1); + gconf->server = g_new0(GlusterServerList, 1); + gconf->server->value = gsconf = g_new0(GlusterServer, 1); /* transport */ if (!uri->scheme || !strcmp(uri->scheme, "gluster")) { @@ -212,39 +311,34 @@ out: return ret; } -static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf, - const char *filename, Error **errp) +static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf, + Error **errp) { - struct glfs *glfs = NULL; + struct glfs *glfs; int ret; int old_errno; - - ret = qemu_gluster_parse_uri(gconf, filename); - if (ret < 0) { - error_setg(errp, "Invalid URI"); - error_append_hint(errp, "Usage: file=gluster[+transport]://" - "[host[:port]]/volume/path[?socket=...]\n"); - errno = -ret; - goto out; - } + GlusterServerList *server; glfs = glfs_new(gconf->volume); if (!glfs) { goto out; } - if (gconf->server->type == GLUSTER_TRANSPORT_UNIX) { - ret = glfs_set_volfile_server(glfs, - GlusterTransport_lookup[gconf->server->type], - gconf->server->u.q_unix.path, 0); - } else { - ret = glfs_set_volfile_server(glfs, - GlusterTransport_lookup[gconf->server->type], - gconf->server->u.tcp.host, - atoi(gconf->server->u.tcp.port)); - } - if (ret < 0) { - goto out; + for (server = gconf->server; server; server = server->next) { + if (server->value->type == GLUSTER_TRANSPORT_UNIX) { + ret = glfs_set_volfile_server(glfs, + GlusterTransport_lookup[server->value->type], + server->value->u.q_unix.path, 0); + } else { + ret = glfs_set_volfile_server(glfs, + GlusterTransport_lookup[server->value->type], + server->value->u.tcp.host, + atoi(server->value->u.tcp.port)); + } + + if (ret < 0) { + goto out; + } } ret = glfs_set_logging(glfs, "-", gconf->debug_level); @@ -254,18 +348,21 @@ static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf, ret = glfs_init(glfs); if (ret) { - if (gconf->server->type == GLUSTER_TRANSPORT_UNIX) { - error_setg(errp, - "Gluster connection for volume %s, path %s failed on " - "socket %s ", gconf->volume, gconf->path, - gconf->server->u.q_unix.path); - } else { - error_setg(errp, - "Gluster connection for volume %s, path %s failed on " - "host %s and port %s ", gconf->volume, gconf->path, - gconf->server->u.tcp.host, gconf->server->u.tcp.port); + error_setg(errp, "Gluster connection for volume %s, path %s failed" + " to connect", gconf->volume, gconf->path); + for (server = gconf->server; server; server = server->next) { + if (server->value->type == GLUSTER_TRANSPORT_UNIX) { + error_append_hint(errp, "hint: failed on socket %s ", + server->value->u.q_unix.path); + } else { + error_append_hint(errp, "hint: failed on host %s and port %s ", + server->value->u.tcp.host, + server->value->u.tcp.port); + } } + error_append_hint(errp, "Please refer to gluster logs for more info\n"); + /* glfs_init sometimes doesn't set errno although docs suggest that */ if (errno == 0) { errno = EINVAL; @@ -284,6 +381,226 @@ out: return NULL; } +static int qapi_enum_parse(const char *opt) +{ + int i; + + if (!opt) { + return GLUSTER_TRANSPORT__MAX; + } + + for (i = 0; i < GLUSTER_TRANSPORT__MAX; i++) { + if (!strcmp(opt, GlusterTransport_lookup[i])) { + return i; + } + } + + return i; +} + +/* + * Convert the json formatted command line into qapi. +*/ +static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf, + QDict *options, Error **errp) +{ + QemuOpts *opts; + GlusterServer *gsconf; + GlusterServerList *curr = NULL; + QDict *backing_options = NULL; + Error *local_err = NULL; + char *str = NULL; + const char *ptr; + size_t num_servers; + int i; + + /* create opts info from runtime_json_opts list */ + opts = qemu_opts_create(&runtime_json_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options, &local_err); + if (local_err) { + goto out; + } + + num_servers = qdict_array_entries(options, GLUSTER_OPT_SERVER_PATTERN); + if (num_servers < 1) { + error_setg(&local_err, QERR_MISSING_PARAMETER, "server"); + goto out; + } + + ptr = qemu_opt_get(opts, GLUSTER_OPT_VOLUME); + if (!ptr) { + error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_VOLUME); + goto out; + } + gconf->volume = g_strdup(ptr); + + ptr = qemu_opt_get(opts, GLUSTER_OPT_PATH); + if (!ptr) { + error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_PATH); + goto out; + } + gconf->path = g_strdup(ptr); + qemu_opts_del(opts); + + for (i = 0; i < num_servers; i++) { + str = g_strdup_printf(GLUSTER_OPT_SERVER_PATTERN"%d.", i); + qdict_extract_subqdict(options, &backing_options, str); + + /* create opts info from runtime_type_opts list */ + opts = qemu_opts_create(&runtime_type_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, backing_options, &local_err); + if (local_err) { + goto out; + } + + ptr = qemu_opt_get(opts, GLUSTER_OPT_TYPE); + gsconf = g_new0(GlusterServer, 1); + gsconf->type = qapi_enum_parse(ptr); + if (!ptr) { + error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_TYPE); + error_append_hint(&local_err, GERR_INDEX_HINT, i); + goto out; + + } + if (gsconf->type == GLUSTER_TRANSPORT__MAX) { + error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE, + GLUSTER_OPT_TYPE, "tcp or unix"); + error_append_hint(&local_err, GERR_INDEX_HINT, i); + goto out; + } + qemu_opts_del(opts); + + if (gsconf->type == GLUSTER_TRANSPORT_TCP) { + /* create opts info from runtime_tcp_opts list */ + opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, backing_options, &local_err); + if (local_err) { + goto out; + } + + ptr = qemu_opt_get(opts, GLUSTER_OPT_HOST); + if (!ptr) { + error_setg(&local_err, QERR_MISSING_PARAMETER, + GLUSTER_OPT_HOST); + error_append_hint(&local_err, GERR_INDEX_HINT, i); + goto out; + } + gsconf->u.tcp.host = g_strdup(ptr); + ptr = qemu_opt_get(opts, GLUSTER_OPT_PORT); + if (!ptr) { + error_setg(&local_err, QERR_MISSING_PARAMETER, + GLUSTER_OPT_PORT); + error_append_hint(&local_err, GERR_INDEX_HINT, i); + goto out; + } + gsconf->u.tcp.port = g_strdup(ptr); + + /* defend for unsupported fields in InetSocketAddress, + * i.e. @ipv4, @ipv6 and @to + */ + ptr = qemu_opt_get(opts, GLUSTER_OPT_TO); + if (ptr) { + gsconf->u.tcp.has_to = true; + } + ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV4); + if (ptr) { + gsconf->u.tcp.has_ipv4 = true; + } + ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV6); + if (ptr) { + gsconf->u.tcp.has_ipv6 = true; + } + if (gsconf->u.tcp.has_to) { + error_setg(&local_err, "Parameter 'to' not supported"); + goto out; + } + if (gsconf->u.tcp.has_ipv4 || gsconf->u.tcp.has_ipv6) { + error_setg(&local_err, "Parameters 'ipv4/ipv6' not supported"); + goto out; + } + qemu_opts_del(opts); + } else { + /* create opts info from runtime_unix_opts list */ + opts = qemu_opts_create(&runtime_unix_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, backing_options, &local_err); + if (local_err) { + goto out; + } + + ptr = qemu_opt_get(opts, GLUSTER_OPT_SOCKET); + if (!ptr) { + error_setg(&local_err, QERR_MISSING_PARAMETER, + GLUSTER_OPT_SOCKET); + error_append_hint(&local_err, GERR_INDEX_HINT, i); + goto out; + } + gsconf->u.q_unix.path = g_strdup(ptr); + qemu_opts_del(opts); + } + + if (gconf->server == NULL) { + gconf->server = g_new0(GlusterServerList, 1); + gconf->server->value = gsconf; + curr = gconf->server; + } else { + curr->next = g_new0(GlusterServerList, 1); + curr->next->value = gsconf; + curr = curr->next; + } + + qdict_del(backing_options, str); + g_free(str); + str = NULL; + } + + return 0; + +out: + error_propagate(errp, local_err); + qemu_opts_del(opts); + if (str) { + qdict_del(backing_options, str); + g_free(str); + } + errno = EINVAL; + return -errno; +} + +static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf, + const char *filename, + QDict *options, Error **errp) +{ + int ret; + if (filename) { + ret = qemu_gluster_parse_uri(gconf, filename); + if (ret < 0) { + error_setg(errp, "invalid URI"); + error_append_hint(errp, "Usage: file=gluster[+transport]://" + "[host[:port]]/volume/path[?socket=...]\n"); + errno = -ret; + return NULL; + } + } else { + ret = qemu_gluster_parse_json(gconf, options, errp); + if (ret < 0) { + error_append_hint(errp, "Usage: " + "-drive driver=qcow2,file.driver=gluster," + "file.volume=testvol,file.path=/path/a.qcow2" + "[,file.debug=9],file.server.0.type=tcp," + "file.server.0.host=1.2.3.4," + "file.server.0.port=24007," + "file.server.1.transport=unix," + "file.server.1.socket=/var/run/glusterd.socket ..." + "\n"); + errno = -ret; + return NULL; + } + + } + + return qemu_gluster_glfs_init(gconf, errp); +} + static void qemu_gluster_complete_aio(void *opaque) { GlusterAIOCB *acb = (GlusterAIOCB *)opaque; @@ -383,7 +700,7 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, gconf = g_new0(BlockdevOptionsGluster, 1); gconf->debug_level = s->debug_level; gconf->has_debug_level = true; - s->glfs = qemu_gluster_init(gconf, filename, errp); + s->glfs = qemu_gluster_init(gconf, filename, options, errp); if (!s->glfs) { ret = -errno; goto out; @@ -452,7 +769,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state, gconf = g_new0(BlockdevOptionsGluster, 1); gconf->debug_level = s->debug_level; gconf->has_debug_level = true; - reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, errp); + reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, NULL, errp); if (reop_s->glfs == NULL) { ret = -errno; goto exit; @@ -597,7 +914,7 @@ static int qemu_gluster_create(const char *filename, } gconf->has_debug_level = true; - glfs = qemu_gluster_init(gconf, filename, errp); + glfs = qemu_gluster_init(gconf, filename, NULL, errp); if (!glfs) { ret = -errno; goto out; @@ -975,7 +1292,7 @@ static BlockDriver bdrv_gluster = { .format_name = "gluster", .protocol_name = "gluster", .instance_size = sizeof(BDRVGlusterState), - .bdrv_needs_filename = true, + .bdrv_needs_filename = false, .bdrv_file_open = qemu_gluster_open, .bdrv_reopen_prepare = qemu_gluster_reopen_prepare, .bdrv_reopen_commit = qemu_gluster_reopen_commit, @@ -1003,7 +1320,7 @@ static BlockDriver bdrv_gluster_tcp = { .format_name = "gluster", .protocol_name = "gluster+tcp", .instance_size = sizeof(BDRVGlusterState), - .bdrv_needs_filename = true, + .bdrv_needs_filename = false, .bdrv_file_open = qemu_gluster_open, .bdrv_reopen_prepare = qemu_gluster_reopen_prepare, .bdrv_reopen_commit = qemu_gluster_reopen_commit, diff --git a/qapi/block-core.json b/qapi/block-core.json index e30e496bd3..f817c30e87 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2119,7 +2119,7 @@ { 'struct': 'BlockdevOptionsGluster', 'data': { 'volume': 'str', 'path': 'str', - 'server': 'GlusterServer', + 'server': ['GlusterServer'], '*debug_level': 'int' } } ##