2009-11-02 14:40:58 +01:00
|
|
|
/*
|
|
|
|
* QEMU live block migration
|
|
|
|
*
|
|
|
|
* Copyright IBM, Corp. 2009
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Liran Schour <lirans@il.ibm.com>
|
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2. See
|
|
|
|
* the COPYING file in the top-level directory.
|
|
|
|
*
|
2012-01-13 17:44:23 +01:00
|
|
|
* Contributions after 2012-01-13 are licensed under the terms of the
|
|
|
|
* GNU GPL, version 2 or (at your option) any later version.
|
2009-11-02 14:40:58 +01:00
|
|
|
*/
|
|
|
|
|
2016-01-26 19:16:54 +01:00
|
|
|
#include "qemu/osdep.h"
|
include/qemu/osdep.h: Don't include qapi/error.h
Commit 57cb38b included qapi/error.h into qemu/osdep.h to get the
Error typedef. Since then, we've moved to include qemu/osdep.h
everywhere. Its file comment explains: "To avoid getting into
possible circular include dependencies, this file should not include
any other QEMU headers, with the exceptions of config-host.h,
compiler.h, os-posix.h and os-win32.h, all of which are doing a
similar job to this file and are under similar constraints."
qapi/error.h doesn't do a similar job, and it doesn't adhere to
similar constraints: it includes qapi-types.h. That's in excess of
100KiB of crap most .c files don't actually need.
Add the typedef to qemu/typedefs.h, and include that instead of
qapi/error.h. Include qapi/error.h in .c files that need it and don't
get it now. Include qapi-types.h in qom/object.h for uint16List.
Update scripts/clean-includes accordingly. Update it further to match
reality: replace config.h by config-target.h, add sysemu/os-posix.h,
sysemu/os-win32.h. Update the list of includes in the qemu/osdep.h
comment quoted above similarly.
This reduces the number of objects depending on qapi/error.h from "all
of them" to less than a third. Unfortunately, the number depending on
qapi-types.h shrinks only a little. More work is needed for that one.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
[Fix compilation without the spice devel packages. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-14 09:01:28 +01:00
|
|
|
#include "qapi/error.h"
|
2014-10-07 13:59:11 +02:00
|
|
|
#include "qemu/error-report.h"
|
Include qemu/main-loop.h less
In my "build everything" tree, changing qemu/main-loop.h triggers a
recompile of some 5600 out of 6600 objects (not counting tests and
objects that don't depend on qemu/osdep.h). It includes block/aio.h,
which in turn includes qemu/event_notifier.h, qemu/notify.h,
qemu/processor.h, qemu/qsp.h, qemu/queue.h, qemu/thread-posix.h,
qemu/thread.h, qemu/timer.h, and a few more.
Include qemu/main-loop.h only where it's needed. Touching it now
recompiles only some 1700 objects. For block/aio.h and
qemu/event_notifier.h, these numbers drop from 5600 to 2800. For the
others, they shrink only slightly.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20190812052359.30071-21-armbru@redhat.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2019-08-12 07:23:50 +02:00
|
|
|
#include "qemu/main-loop.h"
|
2016-03-20 18:16:19 +01:00
|
|
|
#include "qemu/cutils.h"
|
2012-12-17 18:20:00 +01:00
|
|
|
#include "qemu/queue.h"
|
2017-04-21 14:31:22 +02:00
|
|
|
#include "block.h"
|
2022-12-21 14:35:49 +01:00
|
|
|
#include "block/dirty-bitmap.h"
|
2017-04-21 14:31:22 +02:00
|
|
|
#include "migration/misc.h"
|
2017-04-24 20:07:27 +02:00
|
|
|
#include "migration.h"
|
2023-05-15 21:56:58 +02:00
|
|
|
#include "migration-stats.h"
|
2017-04-24 13:42:55 +02:00
|
|
|
#include "migration/register.h"
|
2017-04-20 18:52:18 +02:00
|
|
|
#include "qemu-file.h"
|
2017-04-17 19:02:59 +02:00
|
|
|
#include "migration/vmstate.h"
|
2015-03-02 12:36:47 +01:00
|
|
|
#include "sysemu/block-backend.h"
|
2020-10-20 09:32:56 +02:00
|
|
|
#include "trace.h"
|
2023-03-01 21:18:45 +01:00
|
|
|
#include "options.h"
|
2009-11-02 14:40:58 +01:00
|
|
|
|
2022-07-21 13:52:07 +02:00
|
|
|
#define BLK_MIG_BLOCK_SIZE (1ULL << 20)
|
2020-02-18 12:02:09 +01:00
|
|
|
#define BDRV_SECTORS_PER_DIRTY_CHUNK (BLK_MIG_BLOCK_SIZE >> BDRV_SECTOR_BITS)
|
2009-11-02 14:40:58 +01:00
|
|
|
|
|
|
|
#define BLK_MIG_FLAG_DEVICE_BLOCK 0x01
|
|
|
|
#define BLK_MIG_FLAG_EOS 0x02
|
2009-12-01 15:20:17 +01:00
|
|
|
#define BLK_MIG_FLAG_PROGRESS 0x04
|
2013-07-18 09:48:50 +02:00
|
|
|
#define BLK_MIG_FLAG_ZERO_BLOCK 0x08
|
2009-11-02 14:40:58 +01:00
|
|
|
|
block: Make bdrv_is_allocated() byte-based
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the signature of the function to use int64_t *pnum ensures
that the compiler enforces that all callers are updated. For now,
the io.c layer still assert()s that all callers are sector-aligned
on input and that *pnum is sector-aligned on return to the caller,
but that can be relaxed when a later patch implements byte-based
block status. Therefore, this code adds usages like
DIV_ROUND_UP(,BDRV_SECTOR_SIZE) to callers that still want aligned
values, where the call might reasonbly give non-aligned results
in the future; on the other hand, no rounding is needed for callers
that should just continue to work with byte alignment.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_is_allocated(). But
some code, particularly bdrv_commit(), gets a lot simpler because it
no longer has to mess with sectors; also, it is now possible to pass
NULL if the caller does not care how much of the image is allocated
beyond the initial offset. Leave comments where we can further
simplify once a later patch eliminates the need for sector-aligned
requests through bdrv_is_allocated().
For ease of review, bdrv_is_allocated_above() will be tackled
separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-07 14:44:57 +02:00
|
|
|
#define MAX_IS_ALLOCATED_SEARCH (65536 * BDRV_SECTOR_SIZE)
|
2009-11-02 14:40:58 +01:00
|
|
|
|
2018-03-08 12:18:26 +01:00
|
|
|
#define MAX_IO_BUFFERS 512
|
2018-03-08 12:18:27 +01:00
|
|
|
#define MAX_PARALLEL_IO 16
|
2015-11-20 10:37:13 +01:00
|
|
|
|
2009-11-30 18:21:19 +01:00
|
|
|
typedef struct BlkMigDevState {
|
2013-02-22 17:36:24 +01:00
|
|
|
/* Written during setup phase. Can be read without a lock. */
|
2016-05-27 19:50:37 +02:00
|
|
|
BlockBackend *blk;
|
|
|
|
char *blk_name;
|
2009-11-30 18:21:19 +01:00
|
|
|
int shared_base;
|
|
|
|
int64_t total_sectors;
|
2009-11-30 18:21:20 +01:00
|
|
|
QSIMPLEQ_ENTRY(BlkMigDevState) entry;
|
2016-02-14 18:17:04 +01:00
|
|
|
Error *blocker;
|
2013-02-22 17:36:24 +01:00
|
|
|
|
|
|
|
/* Only used by migration thread. Does not need a lock. */
|
|
|
|
int bulk_completed;
|
|
|
|
int64_t cur_sector;
|
|
|
|
int64_t cur_dirty;
|
|
|
|
|
2016-02-14 18:17:04 +01:00
|
|
|
/* Data in the aio_bitmap is protected by block migration lock.
|
|
|
|
* Allocation and free happen during setup and cleanup respectively.
|
|
|
|
*/
|
2010-11-08 20:02:56 +01:00
|
|
|
unsigned long *aio_bitmap;
|
2016-02-14 18:17:04 +01:00
|
|
|
|
|
|
|
/* Protected by block migration lock. */
|
2013-02-22 17:36:24 +01:00
|
|
|
int64_t completed_sectors;
|
2016-02-14 18:17:04 +01:00
|
|
|
|
2023-12-05 19:20:03 +01:00
|
|
|
/* During migration this is protected by bdrv_dirty_bitmap_lock().
|
2016-02-14 18:17:04 +01:00
|
|
|
* Allocation and free happen during setup and cleanup respectively.
|
|
|
|
*/
|
2013-11-13 11:29:43 +01:00
|
|
|
BdrvDirtyBitmap *dirty_bitmap;
|
2009-11-30 18:21:19 +01:00
|
|
|
} BlkMigDevState;
|
|
|
|
|
2009-11-02 14:40:58 +01:00
|
|
|
typedef struct BlkMigBlock {
|
2013-02-22 17:36:24 +01:00
|
|
|
/* Only used by migration thread. */
|
2009-11-02 14:40:58 +01:00
|
|
|
uint8_t *buf;
|
|
|
|
BlkMigDevState *bmds;
|
|
|
|
int64_t sector;
|
2010-11-08 20:02:56 +01:00
|
|
|
int nr_sectors;
|
2009-11-02 14:40:58 +01:00
|
|
|
QEMUIOVector qiov;
|
2014-10-07 13:59:14 +02:00
|
|
|
BlockAIOCB *aiocb;
|
2013-02-22 17:36:24 +01:00
|
|
|
|
2013-02-22 17:36:25 +01:00
|
|
|
/* Protected by block migration lock. */
|
2009-11-02 14:40:58 +01:00
|
|
|
int ret;
|
2009-11-30 18:21:20 +01:00
|
|
|
QSIMPLEQ_ENTRY(BlkMigBlock) entry;
|
2009-11-02 14:40:58 +01:00
|
|
|
} BlkMigBlock;
|
|
|
|
|
|
|
|
typedef struct BlkMigState {
|
2018-12-06 11:58:10 +01:00
|
|
|
QSIMPLEQ_HEAD(, BlkMigDevState) bmds_list;
|
2013-02-22 17:36:24 +01:00
|
|
|
int64_t total_sector_sum;
|
2013-07-18 09:48:50 +02:00
|
|
|
bool zero_blocks;
|
2013-02-22 17:36:24 +01:00
|
|
|
|
2013-02-22 17:36:25 +01:00
|
|
|
/* Protected by lock. */
|
2018-12-06 11:58:10 +01:00
|
|
|
QSIMPLEQ_HEAD(, BlkMigBlock) blk_list;
|
2009-11-02 14:40:58 +01:00
|
|
|
int submitted;
|
|
|
|
int read_done;
|
2013-02-22 17:36:24 +01:00
|
|
|
|
|
|
|
/* Only used by migration thread. Does not need a lock. */
|
2009-11-02 14:40:58 +01:00
|
|
|
int transferred;
|
2009-12-01 15:20:17 +01:00
|
|
|
int prev_progress;
|
2010-01-26 09:31:45 +01:00
|
|
|
int bulk_completed;
|
2013-02-22 17:36:25 +01:00
|
|
|
|
2024-01-02 16:35:28 +01:00
|
|
|
/* Lock must be taken _inside_ the BQL. */
|
2013-02-22 17:36:25 +01:00
|
|
|
QemuMutex lock;
|
2009-11-02 14:40:58 +01:00
|
|
|
} BlkMigState;
|
|
|
|
|
2009-11-30 18:21:20 +01:00
|
|
|
static BlkMigState block_mig_state;
|
2009-11-02 14:40:58 +01:00
|
|
|
|
2013-02-22 17:36:25 +01:00
|
|
|
static void blk_mig_lock(void)
|
|
|
|
{
|
|
|
|
qemu_mutex_lock(&block_mig_state.lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void blk_mig_unlock(void)
|
|
|
|
{
|
|
|
|
qemu_mutex_unlock(&block_mig_state.lock);
|
|
|
|
}
|
|
|
|
|
2024-01-02 16:35:28 +01:00
|
|
|
/* Must run outside of the BQL during the bulk phase,
|
2013-02-22 17:36:27 +01:00
|
|
|
* or the VM will stall.
|
|
|
|
*/
|
|
|
|
|
2009-11-30 18:21:21 +01:00
|
|
|
static void blk_send(QEMUFile *f, BlkMigBlock * blk)
|
|
|
|
{
|
|
|
|
int len;
|
2013-07-18 09:48:50 +02:00
|
|
|
uint64_t flags = BLK_MIG_FLAG_DEVICE_BLOCK;
|
|
|
|
|
|
|
|
if (block_mig_state.zero_blocks &&
|
2020-02-18 12:02:09 +01:00
|
|
|
buffer_is_zero(blk->buf, BLK_MIG_BLOCK_SIZE)) {
|
2013-07-18 09:48:50 +02:00
|
|
|
flags |= BLK_MIG_FLAG_ZERO_BLOCK;
|
|
|
|
}
|
2009-11-30 18:21:21 +01:00
|
|
|
|
|
|
|
/* sector number and flags */
|
|
|
|
qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
|
2013-07-18 09:48:50 +02:00
|
|
|
| flags);
|
2009-11-30 18:21:21 +01:00
|
|
|
|
|
|
|
/* device name */
|
2016-05-27 19:50:37 +02:00
|
|
|
len = strlen(blk->bmds->blk_name);
|
2009-11-30 18:21:21 +01:00
|
|
|
qemu_put_byte(f, len);
|
2016-05-27 19:50:37 +02:00
|
|
|
qemu_put_buffer(f, (uint8_t *) blk->bmds->blk_name, len);
|
2009-11-30 18:21:21 +01:00
|
|
|
|
2013-07-18 09:48:50 +02:00
|
|
|
/* if a block is zero we need to flush here since the network
|
|
|
|
* bandwidth is now a lot higher than the storage device bandwidth.
|
|
|
|
* thus if we queue zero blocks we slow down the migration */
|
|
|
|
if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
|
|
|
|
qemu_fflush(f);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-02-18 12:02:09 +01:00
|
|
|
qemu_put_buffer(f, blk->buf, BLK_MIG_BLOCK_SIZE);
|
2009-11-30 18:21:21 +01:00
|
|
|
}
|
|
|
|
|
2009-11-30 18:21:21 +01:00
|
|
|
int blk_mig_active(void)
|
|
|
|
{
|
|
|
|
return !QSIMPLEQ_EMPTY(&block_mig_state.bmds_list);
|
|
|
|
}
|
|
|
|
|
2017-09-26 12:33:16 +02:00
|
|
|
int blk_mig_bulk_active(void)
|
|
|
|
{
|
|
|
|
return blk_mig_active() && !block_mig_state.bulk_completed;
|
|
|
|
}
|
|
|
|
|
2009-11-30 18:21:21 +01:00
|
|
|
uint64_t blk_mig_bytes_transferred(void)
|
|
|
|
{
|
|
|
|
BlkMigDevState *bmds;
|
|
|
|
uint64_t sum = 0;
|
|
|
|
|
2013-02-22 17:36:25 +01:00
|
|
|
blk_mig_lock();
|
2009-11-30 18:21:21 +01:00
|
|
|
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
|
|
|
|
sum += bmds->completed_sectors;
|
|
|
|
}
|
2013-02-22 17:36:25 +01:00
|
|
|
blk_mig_unlock();
|
2009-11-30 18:21:21 +01:00
|
|
|
return sum << BDRV_SECTOR_BITS;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t blk_mig_bytes_remaining(void)
|
|
|
|
{
|
|
|
|
return blk_mig_bytes_total() - blk_mig_bytes_transferred();
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t blk_mig_bytes_total(void)
|
|
|
|
{
|
|
|
|
BlkMigDevState *bmds;
|
|
|
|
uint64_t sum = 0;
|
|
|
|
|
|
|
|
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
|
|
|
|
sum += bmds->total_sectors;
|
|
|
|
}
|
|
|
|
return sum << BDRV_SECTOR_BITS;
|
|
|
|
}
|
|
|
|
|
2013-02-22 17:36:25 +01:00
|
|
|
|
|
|
|
/* Called with migration lock held. */
|
|
|
|
|
2010-11-08 20:02:56 +01:00
|
|
|
static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
|
|
|
|
{
|
|
|
|
int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
|
|
|
|
|
2023-04-07 17:33:00 +02:00
|
|
|
if (sector < bmds->total_sectors) {
|
2010-11-08 20:02:56 +01:00
|
|
|
return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
|
|
|
|
(1UL << (chunk % (sizeof(unsigned long) * 8))));
|
|
|
|
} else {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-02-22 17:36:25 +01:00
|
|
|
/* Called with migration lock held. */
|
|
|
|
|
2010-11-08 20:02:56 +01:00
|
|
|
static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
|
|
|
|
int nb_sectors, int set)
|
|
|
|
{
|
|
|
|
int64_t start, end;
|
|
|
|
unsigned long val, idx, bit;
|
|
|
|
|
|
|
|
start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
|
|
|
|
end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
|
|
|
|
|
|
|
|
for (; start <= end; start++) {
|
|
|
|
idx = start / (sizeof(unsigned long) * 8);
|
|
|
|
bit = start % (sizeof(unsigned long) * 8);
|
|
|
|
val = bmds->aio_bitmap[idx];
|
|
|
|
if (set) {
|
2010-11-12 19:07:50 +01:00
|
|
|
val |= 1UL << bit;
|
2010-11-08 20:02:56 +01:00
|
|
|
} else {
|
2010-11-12 19:07:50 +01:00
|
|
|
val &= ~(1UL << bit);
|
2010-11-08 20:02:56 +01:00
|
|
|
}
|
|
|
|
bmds->aio_bitmap[idx] = val;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void alloc_aio_bitmap(BlkMigDevState *bmds)
|
|
|
|
{
|
|
|
|
int64_t bitmap_size;
|
|
|
|
|
2023-04-07 17:33:00 +02:00
|
|
|
bitmap_size = bmds->total_sectors + BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
|
2010-11-08 20:02:56 +01:00
|
|
|
bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
|
|
|
|
|
2011-08-21 05:09:37 +02:00
|
|
|
bmds->aio_bitmap = g_malloc0(bitmap_size);
|
2010-11-08 20:02:56 +01:00
|
|
|
}
|
|
|
|
|
2013-02-22 17:36:25 +01:00
|
|
|
/* Never hold migration lock when yielding to the main loop! */
|
|
|
|
|
2009-11-02 14:40:58 +01:00
|
|
|
static void blk_mig_read_cb(void *opaque, int ret)
|
|
|
|
{
|
|
|
|
BlkMigBlock *blk = opaque;
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2013-02-22 17:36:25 +01:00
|
|
|
blk_mig_lock();
|
2009-11-02 14:40:58 +01:00
|
|
|
blk->ret = ret;
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2009-11-30 18:21:20 +01:00
|
|
|
QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
|
2010-11-08 20:02:56 +01:00
|
|
|
bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0);
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2009-11-30 18:21:20 +01:00
|
|
|
block_mig_state.submitted--;
|
|
|
|
block_mig_state.read_done++;
|
|
|
|
assert(block_mig_state.submitted >= 0);
|
2013-02-22 17:36:25 +01:00
|
|
|
blk_mig_unlock();
|
2009-11-02 14:40:58 +01:00
|
|
|
}
|
|
|
|
|
2013-02-22 17:36:27 +01:00
|
|
|
/* Called with no lock taken. */
|
|
|
|
|
2011-12-05 17:06:56 +01:00
|
|
|
static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
|
2009-11-30 18:21:19 +01:00
|
|
|
{
|
2009-11-30 18:21:20 +01:00
|
|
|
int64_t total_sectors = bmds->total_sectors;
|
|
|
|
int64_t cur_sector = bmds->cur_sector;
|
2016-05-27 19:50:37 +02:00
|
|
|
BlockBackend *bb = bmds->blk;
|
2009-11-02 14:40:58 +01:00
|
|
|
BlkMigBlock *blk;
|
2009-11-30 18:21:21 +01:00
|
|
|
int nr_sectors;
|
block: Make bdrv_is_allocated() byte-based
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the signature of the function to use int64_t *pnum ensures
that the compiler enforces that all callers are updated. For now,
the io.c layer still assert()s that all callers are sector-aligned
on input and that *pnum is sector-aligned on return to the caller,
but that can be relaxed when a later patch implements byte-based
block status. Therefore, this code adds usages like
DIV_ROUND_UP(,BDRV_SECTOR_SIZE) to callers that still want aligned
values, where the call might reasonbly give non-aligned results
in the future; on the other hand, no rounding is needed for callers
that should just continue to work with byte alignment.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_is_allocated(). But
some code, particularly bdrv_commit(), gets a lot simpler because it
no longer has to mess with sectors; also, it is now possible to pass
NULL if the caller does not care how much of the image is allocated
beyond the initial offset. Leave comments where we can further
simplify once a later patch eliminates the need for sector-aligned
requests through bdrv_is_allocated().
For ease of review, bdrv_is_allocated_above() will be tackled
separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-07 14:44:57 +02:00
|
|
|
int64_t count;
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2009-11-30 18:21:20 +01:00
|
|
|
if (bmds->shared_base) {
|
2024-01-02 16:35:25 +01:00
|
|
|
bql_lock();
|
block: Make bdrv_is_allocated() byte-based
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the signature of the function to use int64_t *pnum ensures
that the compiler enforces that all callers are updated. For now,
the io.c layer still assert()s that all callers are sector-aligned
on input and that *pnum is sector-aligned on return to the caller,
but that can be relaxed when a later patch implements byte-based
block status. Therefore, this code adds usages like
DIV_ROUND_UP(,BDRV_SECTOR_SIZE) to callers that still want aligned
values, where the call might reasonbly give non-aligned results
in the future; on the other hand, no rounding is needed for callers
that should just continue to work with byte alignment.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_is_allocated(). But
some code, particularly bdrv_commit(), gets a lot simpler because it
no longer has to mess with sectors; also, it is now possible to pass
NULL if the caller does not care how much of the image is allocated
beyond the initial offset. Leave comments where we can further
simplify once a later patch eliminates the need for sector-aligned
requests through bdrv_is_allocated().
For ease of review, bdrv_is_allocated_above() will be tackled
separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-07 14:44:57 +02:00
|
|
|
/* Skip unallocated sectors; intentionally treats failure or
|
|
|
|
* partial sector as an allocated sector */
|
2009-11-30 18:21:20 +01:00
|
|
|
while (cur_sector < total_sectors &&
|
block: Make bdrv_is_allocated() byte-based
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the signature of the function to use int64_t *pnum ensures
that the compiler enforces that all callers are updated. For now,
the io.c layer still assert()s that all callers are sector-aligned
on input and that *pnum is sector-aligned on return to the caller,
but that can be relaxed when a later patch implements byte-based
block status. Therefore, this code adds usages like
DIV_ROUND_UP(,BDRV_SECTOR_SIZE) to callers that still want aligned
values, where the call might reasonbly give non-aligned results
in the future; on the other hand, no rounding is needed for callers
that should just continue to work with byte alignment.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_is_allocated(). But
some code, particularly bdrv_commit(), gets a lot simpler because it
no longer has to mess with sectors; also, it is now possible to pass
NULL if the caller does not care how much of the image is allocated
beyond the initial offset. Leave comments where we can further
simplify once a later patch eliminates the need for sector-aligned
requests through bdrv_is_allocated().
For ease of review, bdrv_is_allocated_above() will be tackled
separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-07 14:44:57 +02:00
|
|
|
!bdrv_is_allocated(blk_bs(bb), cur_sector * BDRV_SECTOR_SIZE,
|
|
|
|
MAX_IS_ALLOCATED_SEARCH, &count)) {
|
|
|
|
if (count < BDRV_SECTOR_SIZE) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
cur_sector += count >> BDRV_SECTOR_BITS;
|
2009-11-02 14:40:58 +01:00
|
|
|
}
|
2024-01-02 16:35:25 +01:00
|
|
|
bql_unlock();
|
2009-11-02 14:40:58 +01:00
|
|
|
}
|
2009-11-30 18:21:19 +01:00
|
|
|
|
|
|
|
if (cur_sector >= total_sectors) {
|
2009-11-30 18:21:21 +01:00
|
|
|
bmds->cur_sector = bmds->completed_sectors = total_sectors;
|
2009-11-02 14:40:58 +01:00
|
|
|
return 1;
|
|
|
|
}
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2009-11-30 18:21:21 +01:00
|
|
|
bmds->completed_sectors = cur_sector;
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2009-11-30 18:21:20 +01:00
|
|
|
cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
|
|
|
|
|
2009-11-30 18:21:19 +01:00
|
|
|
/* we are going to transfer a full block even if it is not allocated */
|
|
|
|
nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
|
2009-11-02 14:40:58 +01:00
|
|
|
|
2009-11-30 18:21:19 +01:00
|
|
|
if (total_sectors - cur_sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
|
2009-11-30 18:21:20 +01:00
|
|
|
nr_sectors = total_sectors - cur_sector;
|
2009-11-02 14:40:58 +01:00
|
|
|
}
|
2009-11-30 18:21:19 +01:00
|
|
|
|
block: Use g_new() & friends where that makes obvious sense
g_new(T, n) is neater than g_malloc(sizeof(T) * n). It's also safer,
for two reasons. One, it catches multiplication overflowing size_t.
Two, it returns T * rather than void *, which lets the compiler catch
more type errors.
Patch created with Coccinelle, with two manual changes on top:
* Add const to bdrv_iterate_format() to keep the types straight
* Convert the allocation in bdrv_drop_intermediate(), which Coccinelle
inexplicably misses
Coccinelle semantic patch:
@@
type T;
@@
-g_malloc(sizeof(T))
+g_new(T, 1)
@@
type T;
@@
-g_try_malloc(sizeof(T))
+g_try_new(T, 1)
@@
type T;
@@
-g_malloc0(sizeof(T))
+g_new0(T, 1)
@@
type T;
@@
-g_try_malloc0(sizeof(T))
+g_try_new0(T, 1)
@@
type T;
expression n;
@@
-g_malloc(sizeof(T) * (n))
+g_new(T, n)
@@
type T;
expression n;
@@
-g_try_malloc(sizeof(T) * (n))
+g_try_new(T, n)
@@
type T;
expression n;
@@
-g_malloc0(sizeof(T) * (n))
+g_new0(T, n)
@@
type T;
expression n;
@@
-g_try_malloc0(sizeof(T) * (n))
+g_try_new0(T, n)
@@
type T;
expression p, n;
@@
-g_realloc(p, sizeof(T) * (n))
+g_renew(T, p, n)
@@
type T;
expression p, n;
@@
-g_try_realloc(p, sizeof(T) * (n))
+g_try_renew(T, p, n)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-08-19 10:31:08 +02:00
|
|
|
blk = g_new(BlkMigBlock, 1);
|
2020-02-18 12:02:09 +01:00
|
|
|
blk->buf = g_malloc(BLK_MIG_BLOCK_SIZE);
|
2009-11-30 18:21:21 +01:00
|
|
|
blk->bmds = bmds;
|
|
|
|
blk->sector = cur_sector;
|
2010-11-08 20:02:56 +01:00
|
|
|
blk->nr_sectors = nr_sectors;
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2019-02-18 15:09:22 +01:00
|
|
|
qemu_iovec_init_buf(&blk->qiov, blk->buf, nr_sectors * BDRV_SECTOR_SIZE);
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2013-02-22 17:36:25 +01:00
|
|
|
blk_mig_lock();
|
2013-02-22 17:36:23 +01:00
|
|
|
block_mig_state.submitted++;
|
2013-02-22 17:36:25 +01:00
|
|
|
blk_mig_unlock();
|
2013-02-22 17:36:23 +01:00
|
|
|
|
2023-12-05 19:20:03 +01:00
|
|
|
/*
|
|
|
|
* The migration thread does not have an AioContext. Lock the BQL so that
|
|
|
|
* I/O runs in the main loop AioContext (see
|
|
|
|
* qemu_get_current_aio_context()).
|
2016-02-14 18:17:04 +01:00
|
|
|
*/
|
2024-01-02 16:35:25 +01:00
|
|
|
bql_lock();
|
2017-09-25 16:55:20 +02:00
|
|
|
bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, cur_sector * BDRV_SECTOR_SIZE,
|
|
|
|
nr_sectors * BDRV_SECTOR_SIZE);
|
2018-03-08 12:18:25 +01:00
|
|
|
blk->aiocb = blk_aio_preadv(bb, cur_sector * BDRV_SECTOR_SIZE, &blk->qiov,
|
|
|
|
0, blk_mig_read_cb, blk);
|
2024-01-02 16:35:25 +01:00
|
|
|
bql_unlock();
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2013-02-22 17:36:27 +01:00
|
|
|
bmds->cur_sector = cur_sector + nr_sectors;
|
2009-11-30 18:21:21 +01:00
|
|
|
return (bmds->cur_sector >= total_sectors);
|
2009-11-02 14:40:58 +01:00
|
|
|
}
|
|
|
|
|
2024-01-02 16:35:28 +01:00
|
|
|
/* Called with the BQL taken. */
|
2013-02-22 17:36:27 +01:00
|
|
|
|
2014-04-16 03:34:30 +02:00
|
|
|
static int set_dirty_tracking(void)
|
2009-11-02 14:40:58 +01:00
|
|
|
{
|
|
|
|
BlkMigDevState *bmds;
|
2014-04-16 03:34:30 +02:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
|
2016-05-27 19:50:37 +02:00
|
|
|
bmds->dirty_bitmap = bdrv_create_dirty_bitmap(blk_bs(bmds->blk),
|
2020-02-18 12:02:09 +01:00
|
|
|
BLK_MIG_BLOCK_SIZE,
|
|
|
|
NULL, NULL);
|
2014-04-16 03:34:30 +02:00
|
|
|
if (!bmds->dirty_bitmap) {
|
|
|
|
ret = -errno;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
2009-11-30 18:21:20 +01:00
|
|
|
|
2014-04-16 03:34:30 +02:00
|
|
|
fail:
|
2009-11-30 18:21:20 +01:00
|
|
|
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
|
2014-04-16 03:34:30 +02:00
|
|
|
if (bmds->dirty_bitmap) {
|
2019-09-16 16:19:09 +02:00
|
|
|
bdrv_release_dirty_bitmap(bmds->dirty_bitmap);
|
2014-04-16 03:34:30 +02:00
|
|
|
}
|
2013-11-13 11:29:43 +01:00
|
|
|
}
|
2014-04-16 03:34:30 +02:00
|
|
|
return ret;
|
2013-11-13 11:29:43 +01:00
|
|
|
}
|
|
|
|
|
2024-01-02 16:35:28 +01:00
|
|
|
/* Called with the BQL taken. */
|
2016-02-14 18:17:04 +01:00
|
|
|
|
2013-11-13 11:29:43 +01:00
|
|
|
static void unset_dirty_tracking(void)
|
|
|
|
{
|
|
|
|
BlkMigDevState *bmds;
|
|
|
|
|
|
|
|
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
|
block-migration: Ensure we don't crash during migration cleanup
We can fail the blk_insert_bs() at init_blk_migration(), leaving the
BlkMigDevState without a dirty_bitmap and BlockDriverState. Account
for the possibly missing elements when doing cleanup.
Fix the following crashes:
Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
0x0000555555ec83ef in bdrv_release_dirty_bitmap (bitmap=0x0) at ../block/dirty-bitmap.c:359
359 BlockDriverState *bs = bitmap->bs;
#0 0x0000555555ec83ef in bdrv_release_dirty_bitmap (bitmap=0x0) at ../block/dirty-bitmap.c:359
#1 0x0000555555bba331 in unset_dirty_tracking () at ../migration/block.c:371
#2 0x0000555555bbad98 in block_migration_cleanup_bmds () at ../migration/block.c:681
Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
0x0000555555e971ff in bdrv_op_unblock (bs=0x0, op=BLOCK_OP_TYPE_BACKUP_SOURCE, reason=0x0) at ../block.c:7073
7073 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
#0 0x0000555555e971ff in bdrv_op_unblock (bs=0x0, op=BLOCK_OP_TYPE_BACKUP_SOURCE, reason=0x0) at ../block.c:7073
#1 0x0000555555e9734a in bdrv_op_unblock_all (bs=0x0, reason=0x0) at ../block.c:7095
#2 0x0000555555bbae13 in block_migration_cleanup_bmds () at ../migration/block.c:690
Signed-off-by: Fabiano Rosas <farosas@suse.de>
Message-id: 20230731203338.27581-1-farosas@suse.de
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2023-07-31 22:33:38 +02:00
|
|
|
if (bmds->dirty_bitmap) {
|
|
|
|
bdrv_release_dirty_bitmap(bmds->dirty_bitmap);
|
|
|
|
}
|
2009-11-02 14:40:58 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-09 14:45:37 +01:00
|
|
|
static int init_blk_migration(QEMUFile *f)
|
2009-11-02 14:40:58 +01:00
|
|
|
{
|
2014-10-07 13:59:10 +02:00
|
|
|
BlockDriverState *bs;
|
2009-11-30 18:21:20 +01:00
|
|
|
BlkMigDevState *bmds;
|
2009-11-30 20:34:55 +01:00
|
|
|
int64_t sectors;
|
2016-05-20 18:49:07 +02:00
|
|
|
BdrvNextIterator it;
|
2016-05-27 19:50:37 +02:00
|
|
|
int i, num_bs = 0;
|
|
|
|
struct {
|
|
|
|
BlkMigDevState *bmds;
|
|
|
|
BlockDriverState *bs;
|
|
|
|
} *bmds_bs;
|
2017-02-09 14:45:37 +01:00
|
|
|
Error *local_err = NULL;
|
|
|
|
int ret;
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2023-09-29 16:51:39 +02:00
|
|
|
GRAPH_RDLOCK_GUARD_MAINLOOP();
|
|
|
|
|
2014-10-07 13:59:10 +02:00
|
|
|
block_mig_state.submitted = 0;
|
|
|
|
block_mig_state.read_done = 0;
|
|
|
|
block_mig_state.transferred = 0;
|
|
|
|
block_mig_state.total_sector_sum = 0;
|
|
|
|
block_mig_state.prev_progress = -1;
|
|
|
|
block_mig_state.bulk_completed = 0;
|
|
|
|
block_mig_state.zero_blocks = migrate_zero_blocks();
|
|
|
|
|
2016-05-20 18:49:07 +02:00
|
|
|
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
|
2016-05-27 19:50:37 +02:00
|
|
|
num_bs++;
|
|
|
|
}
|
|
|
|
bmds_bs = g_malloc0(num_bs * sizeof(*bmds_bs));
|
|
|
|
|
|
|
|
for (i = 0, bs = bdrv_first(&it); bs; bs = bdrv_next(&it), i++) {
|
2014-10-07 13:59:10 +02:00
|
|
|
if (bdrv_is_read_only(bs)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2014-06-26 13:23:22 +02:00
|
|
|
sectors = bdrv_nb_sectors(bs);
|
2024-03-12 13:04:31 +01:00
|
|
|
if (sectors == 0) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (sectors < 0) {
|
2017-02-09 14:45:37 +01:00
|
|
|
ret = sectors;
|
2017-11-10 18:25:45 +01:00
|
|
|
bdrv_next_cleanup(&it);
|
2016-05-27 19:50:37 +02:00
|
|
|
goto out;
|
2010-04-09 16:22:13 +02:00
|
|
|
}
|
|
|
|
|
block: Use g_new() & friends where that makes obvious sense
g_new(T, n) is neater than g_malloc(sizeof(T) * n). It's also safer,
for two reasons. One, it catches multiplication overflowing size_t.
Two, it returns T * rather than void *, which lets the compiler catch
more type errors.
Patch created with Coccinelle, with two manual changes on top:
* Add const to bdrv_iterate_format() to keep the types straight
* Convert the allocation in bdrv_drop_intermediate(), which Coccinelle
inexplicably misses
Coccinelle semantic patch:
@@
type T;
@@
-g_malloc(sizeof(T))
+g_new(T, 1)
@@
type T;
@@
-g_try_malloc(sizeof(T))
+g_try_new(T, 1)
@@
type T;
@@
-g_malloc0(sizeof(T))
+g_new0(T, 1)
@@
type T;
@@
-g_try_malloc0(sizeof(T))
+g_try_new0(T, 1)
@@
type T;
expression n;
@@
-g_malloc(sizeof(T) * (n))
+g_new(T, n)
@@
type T;
expression n;
@@
-g_try_malloc(sizeof(T) * (n))
+g_try_new(T, n)
@@
type T;
expression n;
@@
-g_malloc0(sizeof(T) * (n))
+g_new0(T, n)
@@
type T;
expression n;
@@
-g_try_malloc0(sizeof(T) * (n))
+g_try_new0(T, n)
@@
type T;
expression p, n;
@@
-g_realloc(p, sizeof(T) * (n))
+g_renew(T, p, n)
@@
type T;
expression p, n;
@@
-g_try_realloc(p, sizeof(T) * (n))
+g_try_renew(T, p, n)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-08-19 10:31:08 +02:00
|
|
|
bmds = g_new0(BlkMigDevState, 1);
|
2019-04-25 14:25:10 +02:00
|
|
|
bmds->blk = blk_new(qemu_get_aio_context(),
|
|
|
|
BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
|
2016-05-27 19:50:37 +02:00
|
|
|
bmds->blk_name = g_strdup(bdrv_get_device_name(bs));
|
2010-04-09 16:22:13 +02:00
|
|
|
bmds->bulk_completed = 0;
|
|
|
|
bmds->total_sectors = sectors;
|
|
|
|
bmds->completed_sectors = 0;
|
2023-03-02 00:49:47 +01:00
|
|
|
bmds->shared_base = migrate_block_incremental();
|
2016-05-27 19:50:37 +02:00
|
|
|
|
|
|
|
assert(i < num_bs);
|
|
|
|
bmds_bs[i].bmds = bmds;
|
|
|
|
bmds_bs[i].bs = bs;
|
2010-04-09 16:22:13 +02:00
|
|
|
|
|
|
|
block_mig_state.total_sector_sum += sectors;
|
|
|
|
|
|
|
|
if (bmds->shared_base) {
|
2020-10-20 09:32:56 +02:00
|
|
|
trace_migration_block_init_shared(bdrv_get_device_name(bs));
|
2010-04-09 16:22:13 +02:00
|
|
|
} else {
|
2020-10-20 09:32:56 +02:00
|
|
|
trace_migration_block_init_full(bdrv_get_device_name(bs));
|
2010-04-09 16:22:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
|
|
|
|
}
|
2016-05-27 19:50:37 +02:00
|
|
|
|
|
|
|
/* Can only insert new BDSes now because doing so while iterating block
|
|
|
|
* devices may end up in a deadlock (iterating the new BDSes, too). */
|
|
|
|
for (i = 0; i < num_bs; i++) {
|
2023-09-21 14:13:07 +02:00
|
|
|
bmds = bmds_bs[i].bmds;
|
|
|
|
bs = bmds_bs[i].bs;
|
2016-05-27 19:50:37 +02:00
|
|
|
|
|
|
|
if (bmds) {
|
2017-02-09 14:45:37 +01:00
|
|
|
ret = blk_insert_bs(bmds->blk, bs, &local_err);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_report_err(local_err);
|
|
|
|
goto out;
|
|
|
|
}
|
2016-05-27 19:50:37 +02:00
|
|
|
|
|
|
|
alloc_aio_bitmap(bmds);
|
|
|
|
error_setg(&bmds->blocker, "block device is in use by migration");
|
|
|
|
bdrv_op_block_all(bs, bmds->blocker);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-09 14:45:37 +01:00
|
|
|
ret = 0;
|
2016-05-27 19:50:37 +02:00
|
|
|
out:
|
|
|
|
g_free(bmds_bs);
|
2017-02-09 14:45:37 +01:00
|
|
|
return ret;
|
2010-04-09 16:22:13 +02:00
|
|
|
}
|
|
|
|
|
2013-02-22 17:36:27 +01:00
|
|
|
/* Called with no lock taken. */
|
|
|
|
|
2011-12-05 17:06:56 +01:00
|
|
|
static int blk_mig_save_bulked_block(QEMUFile *f)
|
2009-11-02 14:40:58 +01:00
|
|
|
{
|
2009-11-30 18:21:21 +01:00
|
|
|
int64_t completed_sector_sum = 0;
|
2009-11-02 14:40:58 +01:00
|
|
|
BlkMigDevState *bmds;
|
2009-12-01 15:20:17 +01:00
|
|
|
int progress;
|
2009-11-30 18:21:21 +01:00
|
|
|
int ret = 0;
|
2009-11-02 14:40:58 +01:00
|
|
|
|
2009-11-30 18:21:20 +01:00
|
|
|
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
|
2009-11-30 18:21:19 +01:00
|
|
|
if (bmds->bulk_completed == 0) {
|
2011-12-05 17:06:56 +01:00
|
|
|
if (mig_save_device_bulk(f, bmds) == 1) {
|
2009-11-30 18:21:20 +01:00
|
|
|
/* completed bulk section for this device */
|
|
|
|
bmds->bulk_completed = 1;
|
2009-11-02 14:40:58 +01:00
|
|
|
}
|
2009-11-30 18:21:21 +01:00
|
|
|
completed_sector_sum += bmds->completed_sectors;
|
|
|
|
ret = 1;
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
completed_sector_sum += bmds->completed_sectors;
|
2009-11-02 14:40:58 +01:00
|
|
|
}
|
|
|
|
}
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2011-01-12 14:41:00 +01:00
|
|
|
if (block_mig_state.total_sector_sum != 0) {
|
|
|
|
progress = completed_sector_sum * 100 /
|
|
|
|
block_mig_state.total_sector_sum;
|
|
|
|
} else {
|
|
|
|
progress = 100;
|
|
|
|
}
|
2009-12-01 15:20:17 +01:00
|
|
|
if (progress != block_mig_state.prev_progress) {
|
|
|
|
block_mig_state.prev_progress = progress;
|
|
|
|
qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
|
|
|
|
| BLK_MIG_FLAG_PROGRESS);
|
2023-02-15 16:35:17 +01:00
|
|
|
trace_migration_block_progression(progress);
|
2009-11-30 18:21:21 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
2009-11-02 14:40:58 +01:00
|
|
|
}
|
|
|
|
|
2010-01-26 13:04:11 +01:00
|
|
|
static void blk_mig_reset_dirty_cursor(void)
|
2009-11-02 14:40:58 +01:00
|
|
|
{
|
|
|
|
BlkMigDevState *bmds;
|
2010-01-26 13:04:11 +01:00
|
|
|
|
|
|
|
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
|
|
|
|
bmds->cur_dirty = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-01-02 16:35:28 +01:00
|
|
|
/* Called with the BQL taken. */
|
2013-02-22 17:36:27 +01:00
|
|
|
|
2011-12-05 17:06:56 +01:00
|
|
|
static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
|
|
|
|
int is_async)
|
2010-01-26 13:04:11 +01:00
|
|
|
{
|
|
|
|
BlkMigBlock *blk;
|
|
|
|
int64_t total_sectors = bmds->total_sectors;
|
2009-11-02 14:40:58 +01:00
|
|
|
int64_t sector;
|
2010-01-26 13:04:11 +01:00
|
|
|
int nr_sectors;
|
2011-09-21 23:01:54 +02:00
|
|
|
int ret = -EIO;
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2010-01-26 13:04:11 +01:00
|
|
|
for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
|
2013-02-22 17:36:25 +01:00
|
|
|
blk_mig_lock();
|
2010-11-12 19:07:50 +01:00
|
|
|
if (bmds_aio_inflight(bmds, sector)) {
|
2013-02-22 17:36:25 +01:00
|
|
|
blk_mig_unlock();
|
2016-05-27 19:50:37 +02:00
|
|
|
blk_drain(bmds->blk);
|
2013-02-22 17:36:25 +01:00
|
|
|
} else {
|
|
|
|
blk_mig_unlock();
|
2010-11-12 19:07:50 +01:00
|
|
|
}
|
2017-06-05 14:39:05 +02:00
|
|
|
bdrv_dirty_bitmap_lock(bmds->dirty_bitmap);
|
2019-07-29 22:35:53 +02:00
|
|
|
if (bdrv_dirty_bitmap_get_locked(bmds->dirty_bitmap,
|
|
|
|
sector * BDRV_SECTOR_SIZE)) {
|
2010-01-26 13:04:11 +01:00
|
|
|
if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
|
|
|
|
nr_sectors = total_sectors - sector;
|
|
|
|
} else {
|
|
|
|
nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
|
|
|
|
}
|
2017-09-25 16:55:20 +02:00
|
|
|
bdrv_reset_dirty_bitmap_locked(bmds->dirty_bitmap,
|
|
|
|
sector * BDRV_SECTOR_SIZE,
|
|
|
|
nr_sectors * BDRV_SECTOR_SIZE);
|
2017-06-05 14:39:05 +02:00
|
|
|
bdrv_dirty_bitmap_unlock(bmds->dirty_bitmap);
|
2017-06-05 14:39:04 +02:00
|
|
|
|
block: Use g_new() & friends where that makes obvious sense
g_new(T, n) is neater than g_malloc(sizeof(T) * n). It's also safer,
for two reasons. One, it catches multiplication overflowing size_t.
Two, it returns T * rather than void *, which lets the compiler catch
more type errors.
Patch created with Coccinelle, with two manual changes on top:
* Add const to bdrv_iterate_format() to keep the types straight
* Convert the allocation in bdrv_drop_intermediate(), which Coccinelle
inexplicably misses
Coccinelle semantic patch:
@@
type T;
@@
-g_malloc(sizeof(T))
+g_new(T, 1)
@@
type T;
@@
-g_try_malloc(sizeof(T))
+g_try_new(T, 1)
@@
type T;
@@
-g_malloc0(sizeof(T))
+g_new0(T, 1)
@@
type T;
@@
-g_try_malloc0(sizeof(T))
+g_try_new0(T, 1)
@@
type T;
expression n;
@@
-g_malloc(sizeof(T) * (n))
+g_new(T, n)
@@
type T;
expression n;
@@
-g_try_malloc(sizeof(T) * (n))
+g_try_new(T, n)
@@
type T;
expression n;
@@
-g_malloc0(sizeof(T) * (n))
+g_new0(T, n)
@@
type T;
expression n;
@@
-g_try_malloc0(sizeof(T) * (n))
+g_try_new0(T, n)
@@
type T;
expression p, n;
@@
-g_realloc(p, sizeof(T) * (n))
+g_renew(T, p, n)
@@
type T;
expression p, n;
@@
-g_try_realloc(p, sizeof(T) * (n))
+g_try_renew(T, p, n)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-08-19 10:31:08 +02:00
|
|
|
blk = g_new(BlkMigBlock, 1);
|
2020-02-18 12:02:09 +01:00
|
|
|
blk->buf = g_malloc(BLK_MIG_BLOCK_SIZE);
|
2010-01-26 13:04:11 +01:00
|
|
|
blk->bmds = bmds;
|
|
|
|
blk->sector = sector;
|
2010-11-08 20:02:56 +01:00
|
|
|
blk->nr_sectors = nr_sectors;
|
2010-01-26 13:04:11 +01:00
|
|
|
|
2010-01-26 09:31:49 +01:00
|
|
|
if (is_async) {
|
2019-02-18 15:09:22 +01:00
|
|
|
qemu_iovec_init_buf(&blk->qiov, blk->buf,
|
|
|
|
nr_sectors * BDRV_SECTOR_SIZE);
|
2010-01-26 13:04:11 +01:00
|
|
|
|
2016-05-27 19:50:37 +02:00
|
|
|
blk->aiocb = blk_aio_preadv(bmds->blk,
|
|
|
|
sector * BDRV_SECTOR_SIZE,
|
|
|
|
&blk->qiov, 0, blk_mig_read_cb,
|
|
|
|
blk);
|
2013-02-22 17:36:25 +01:00
|
|
|
|
|
|
|
blk_mig_lock();
|
2010-01-26 13:04:11 +01:00
|
|
|
block_mig_state.submitted++;
|
2010-11-08 20:02:56 +01:00
|
|
|
bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
|
2013-02-22 17:36:25 +01:00
|
|
|
blk_mig_unlock();
|
2010-01-26 13:04:11 +01:00
|
|
|
} else {
|
block: Add a 'flags' param to blk_pread()
For consistency with other I/O functions, and in preparation to
implement it using generated_co_wrapper.
Callers were updated using this Coccinelle script:
@@ expression blk, offset, buf, bytes; @@
- blk_pread(blk, offset, buf, bytes)
+ blk_pread(blk, offset, buf, bytes, 0)
It had no effect on hw/block/nand.c, presumably due to the #if, so that
file was updated manually.
Overly-long lines were then fixed by hand.
Signed-off-by: Alberto Faria <afaria@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220705161527.1054072-3-afaria@redhat.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
2022-07-05 18:15:10 +02:00
|
|
|
ret = blk_pread(bmds->blk, sector * BDRV_SECTOR_SIZE,
|
block: Change blk_{pread,pwrite}() param order
Swap 'buf' and 'bytes' around for consistency with
blk_co_{pread,pwrite}(), and in preparation to implement these functions
using generated_co_wrapper.
Callers were updated using this Coccinelle script:
@@ expression blk, offset, buf, bytes, flags; @@
- blk_pread(blk, offset, buf, bytes, flags)
+ blk_pread(blk, offset, bytes, buf, flags)
@@ expression blk, offset, buf, bytes, flags; @@
- blk_pwrite(blk, offset, buf, bytes, flags)
+ blk_pwrite(blk, offset, bytes, buf, flags)
It had no effect on hw/block/nand.c, presumably due to the #if, so that
file was updated manually.
Overly-long lines were then fixed by hand.
Signed-off-by: Alberto Faria <afaria@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220705161527.1054072-4-afaria@redhat.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
2022-07-05 18:15:11 +02:00
|
|
|
nr_sectors * BDRV_SECTOR_SIZE, blk->buf, 0);
|
2011-09-21 23:01:54 +02:00
|
|
|
if (ret < 0) {
|
2010-01-26 13:04:11 +01:00
|
|
|
goto error;
|
2009-11-02 14:40:58 +01:00
|
|
|
}
|
2010-01-26 13:04:11 +01:00
|
|
|
blk_send(f, blk);
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2011-08-21 05:09:37 +02:00
|
|
|
g_free(blk->buf);
|
|
|
|
g_free(blk);
|
2009-11-30 18:21:19 +01:00
|
|
|
}
|
2010-01-26 13:04:11 +01:00
|
|
|
|
2017-03-15 04:37:33 +01:00
|
|
|
sector += nr_sectors;
|
|
|
|
bmds->cur_dirty = sector;
|
2010-01-26 13:04:11 +01:00
|
|
|
break;
|
2009-11-02 14:40:58 +01:00
|
|
|
}
|
2017-06-05 14:39:05 +02:00
|
|
|
|
|
|
|
bdrv_dirty_bitmap_unlock(bmds->dirty_bitmap);
|
2010-01-26 13:04:11 +01:00
|
|
|
sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
|
|
|
|
bmds->cur_dirty = sector;
|
2009-11-02 14:40:58 +01:00
|
|
|
}
|
2009-11-30 18:21:20 +01:00
|
|
|
|
2010-01-26 13:04:11 +01:00
|
|
|
return (bmds->cur_dirty >= bmds->total_sectors);
|
|
|
|
|
2010-01-26 09:31:49 +01:00
|
|
|
error:
|
2020-10-20 09:32:56 +02:00
|
|
|
trace_migration_block_save_device_dirty(sector);
|
2011-08-21 05:09:37 +02:00
|
|
|
g_free(blk->buf);
|
|
|
|
g_free(blk);
|
2012-08-29 21:59:22 +02:00
|
|
|
return ret;
|
2010-01-26 13:04:11 +01:00
|
|
|
}
|
|
|
|
|
2024-01-02 16:35:28 +01:00
|
|
|
/* Called with the BQL taken.
|
2013-02-22 17:36:27 +01:00
|
|
|
*
|
|
|
|
* return value:
|
2012-08-29 21:37:14 +02:00
|
|
|
* 0: too much data for max_downtime
|
|
|
|
* 1: few enough data for max_downtime
|
|
|
|
*/
|
2011-12-05 17:06:56 +01:00
|
|
|
static int blk_mig_save_dirty_block(QEMUFile *f, int is_async)
|
2010-01-26 13:04:11 +01:00
|
|
|
{
|
|
|
|
BlkMigDevState *bmds;
|
2012-08-29 21:37:14 +02:00
|
|
|
int ret = 1;
|
2010-01-26 13:04:11 +01:00
|
|
|
|
|
|
|
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
|
2012-08-29 21:37:14 +02:00
|
|
|
ret = mig_save_device_dirty(f, bmds, is_async);
|
2012-08-29 21:59:22 +02:00
|
|
|
if (ret <= 0) {
|
2010-01-26 13:04:11 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
2009-11-02 14:40:58 +01:00
|
|
|
}
|
|
|
|
|
2013-02-22 17:36:27 +01:00
|
|
|
/* Called with no locks taken. */
|
|
|
|
|
2012-08-29 20:17:13 +02:00
|
|
|
static int flush_blks(QEMUFile *f)
|
2009-11-02 14:40:58 +01:00
|
|
|
{
|
2009-11-30 18:21:20 +01:00
|
|
|
BlkMigBlock *blk;
|
2012-08-29 20:17:13 +02:00
|
|
|
int ret = 0;
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2020-10-20 09:32:56 +02:00
|
|
|
trace_migration_block_flush_blks("Enter", block_mig_state.submitted,
|
|
|
|
block_mig_state.read_done,
|
|
|
|
block_mig_state.transferred);
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2013-02-22 17:36:25 +01:00
|
|
|
blk_mig_lock();
|
2009-11-30 18:21:20 +01:00
|
|
|
while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
|
2023-05-15 21:56:58 +02:00
|
|
|
if (migration_rate_exceeded(f)) {
|
2009-11-30 18:21:20 +01:00
|
|
|
break;
|
|
|
|
}
|
2009-11-30 18:21:21 +01:00
|
|
|
if (blk->ret < 0) {
|
2012-08-29 20:17:13 +02:00
|
|
|
ret = blk->ret;
|
2009-11-30 18:21:21 +01:00
|
|
|
break;
|
|
|
|
}
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2009-11-30 18:21:20 +01:00
|
|
|
QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
|
2013-02-22 17:36:25 +01:00
|
|
|
blk_mig_unlock();
|
2013-02-22 17:36:23 +01:00
|
|
|
blk_send(f, blk);
|
2013-02-22 17:36:25 +01:00
|
|
|
blk_mig_lock();
|
2013-02-22 17:36:23 +01:00
|
|
|
|
2011-08-21 05:09:37 +02:00
|
|
|
g_free(blk->buf);
|
|
|
|
g_free(blk);
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2009-11-30 18:21:20 +01:00
|
|
|
block_mig_state.read_done--;
|
|
|
|
block_mig_state.transferred++;
|
|
|
|
assert(block_mig_state.read_done >= 0);
|
2009-11-02 14:40:58 +01:00
|
|
|
}
|
2013-02-22 17:36:25 +01:00
|
|
|
blk_mig_unlock();
|
2009-11-02 14:40:58 +01:00
|
|
|
|
2020-10-20 09:32:56 +02:00
|
|
|
trace_migration_block_flush_blks("Exit", block_mig_state.submitted,
|
|
|
|
block_mig_state.read_done,
|
|
|
|
block_mig_state.transferred);
|
2012-08-29 20:17:13 +02:00
|
|
|
return ret;
|
2009-11-02 14:40:58 +01:00
|
|
|
}
|
|
|
|
|
2024-01-02 16:35:28 +01:00
|
|
|
/* Called with the BQL taken. */
|
2013-02-22 17:36:27 +01:00
|
|
|
|
2010-01-26 09:31:49 +01:00
|
|
|
static int64_t get_remaining_dirty(void)
|
|
|
|
{
|
|
|
|
BlkMigDevState *bmds;
|
|
|
|
int64_t dirty = 0;
|
|
|
|
|
|
|
|
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
|
2023-12-05 19:20:03 +01:00
|
|
|
bdrv_dirty_bitmap_lock(bmds->dirty_bitmap);
|
2015-04-18 01:50:02 +02:00
|
|
|
dirty += bdrv_get_dirty_count(bmds->dirty_bitmap);
|
2023-12-05 19:20:03 +01:00
|
|
|
bdrv_dirty_bitmap_unlock(bmds->dirty_bitmap);
|
2010-01-26 09:31:49 +01:00
|
|
|
}
|
|
|
|
|
2017-09-25 16:55:18 +02:00
|
|
|
return dirty;
|
2010-01-26 09:31:49 +01:00
|
|
|
}
|
|
|
|
|
2013-02-22 17:36:27 +01:00
|
|
|
|
2017-05-22 17:17:49 +02:00
|
|
|
|
2024-01-02 16:35:28 +01:00
|
|
|
/* Called with the BQL taken. */
|
2017-05-22 17:17:49 +02:00
|
|
|
static void block_migration_cleanup_bmds(void)
|
2009-11-30 18:21:21 +01:00
|
|
|
{
|
2009-11-30 18:21:21 +01:00
|
|
|
BlkMigDevState *bmds;
|
block-migration: Ensure we don't crash during migration cleanup
We can fail the blk_insert_bs() at init_blk_migration(), leaving the
BlkMigDevState without a dirty_bitmap and BlockDriverState. Account
for the possibly missing elements when doing cleanup.
Fix the following crashes:
Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
0x0000555555ec83ef in bdrv_release_dirty_bitmap (bitmap=0x0) at ../block/dirty-bitmap.c:359
359 BlockDriverState *bs = bitmap->bs;
#0 0x0000555555ec83ef in bdrv_release_dirty_bitmap (bitmap=0x0) at ../block/dirty-bitmap.c:359
#1 0x0000555555bba331 in unset_dirty_tracking () at ../migration/block.c:371
#2 0x0000555555bbad98 in block_migration_cleanup_bmds () at ../migration/block.c:681
Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
0x0000555555e971ff in bdrv_op_unblock (bs=0x0, op=BLOCK_OP_TYPE_BACKUP_SOURCE, reason=0x0) at ../block.c:7073
7073 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
#0 0x0000555555e971ff in bdrv_op_unblock (bs=0x0, op=BLOCK_OP_TYPE_BACKUP_SOURCE, reason=0x0) at ../block.c:7073
#1 0x0000555555e9734a in bdrv_op_unblock_all (bs=0x0, reason=0x0) at ../block.c:7095
#2 0x0000555555bbae13 in block_migration_cleanup_bmds () at ../migration/block.c:690
Signed-off-by: Fabiano Rosas <farosas@suse.de>
Message-id: 20230731203338.27581-1-farosas@suse.de
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2023-07-31 22:33:38 +02:00
|
|
|
BlockDriverState *bs;
|
2009-11-30 18:21:21 +01:00
|
|
|
|
2013-11-13 11:29:43 +01:00
|
|
|
unset_dirty_tracking();
|
2011-01-26 15:12:31 +01:00
|
|
|
|
2009-11-30 18:21:21 +01:00
|
|
|
while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
|
|
|
|
QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
|
block-migration: Ensure we don't crash during migration cleanup
We can fail the blk_insert_bs() at init_blk_migration(), leaving the
BlkMigDevState without a dirty_bitmap and BlockDriverState. Account
for the possibly missing elements when doing cleanup.
Fix the following crashes:
Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
0x0000555555ec83ef in bdrv_release_dirty_bitmap (bitmap=0x0) at ../block/dirty-bitmap.c:359
359 BlockDriverState *bs = bitmap->bs;
#0 0x0000555555ec83ef in bdrv_release_dirty_bitmap (bitmap=0x0) at ../block/dirty-bitmap.c:359
#1 0x0000555555bba331 in unset_dirty_tracking () at ../migration/block.c:371
#2 0x0000555555bbad98 in block_migration_cleanup_bmds () at ../migration/block.c:681
Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
0x0000555555e971ff in bdrv_op_unblock (bs=0x0, op=BLOCK_OP_TYPE_BACKUP_SOURCE, reason=0x0) at ../block.c:7073
7073 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
#0 0x0000555555e971ff in bdrv_op_unblock (bs=0x0, op=BLOCK_OP_TYPE_BACKUP_SOURCE, reason=0x0) at ../block.c:7073
#1 0x0000555555e9734a in bdrv_op_unblock_all (bs=0x0, reason=0x0) at ../block.c:7095
#2 0x0000555555bbae13 in block_migration_cleanup_bmds () at ../migration/block.c:690
Signed-off-by: Fabiano Rosas <farosas@suse.de>
Message-id: 20230731203338.27581-1-farosas@suse.de
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2023-07-31 22:33:38 +02:00
|
|
|
|
|
|
|
bs = blk_bs(bmds->blk);
|
|
|
|
if (bs) {
|
|
|
|
bdrv_op_unblock_all(bs, bmds->blocker);
|
|
|
|
}
|
2014-05-23 15:29:43 +02:00
|
|
|
error_free(bmds->blocker);
|
2016-05-27 19:50:37 +02:00
|
|
|
blk_unref(bmds->blk);
|
|
|
|
g_free(bmds->blk_name);
|
2011-08-21 05:09:37 +02:00
|
|
|
g_free(bmds->aio_bitmap);
|
|
|
|
g_free(bmds);
|
2009-11-30 18:21:21 +01:00
|
|
|
}
|
2017-05-22 17:17:49 +02:00
|
|
|
}
|
|
|
|
|
2024-01-02 16:35:28 +01:00
|
|
|
/* Called with the BQL taken. */
|
2017-05-22 17:17:49 +02:00
|
|
|
static void block_migration_cleanup(void *opaque)
|
|
|
|
{
|
|
|
|
BlkMigBlock *blk;
|
|
|
|
|
|
|
|
bdrv_drain_all();
|
|
|
|
|
|
|
|
block_migration_cleanup_bmds();
|
2009-11-30 18:21:21 +01:00
|
|
|
|
2016-02-14 18:17:04 +01:00
|
|
|
blk_mig_lock();
|
2009-11-30 18:21:21 +01:00
|
|
|
while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
|
|
|
|
QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
|
2011-08-21 05:09:37 +02:00
|
|
|
g_free(blk->buf);
|
|
|
|
g_free(blk);
|
2009-11-30 18:21:21 +01:00
|
|
|
}
|
2013-02-22 17:36:25 +01:00
|
|
|
blk_mig_unlock();
|
2009-11-30 18:21:21 +01:00
|
|
|
}
|
|
|
|
|
2012-06-28 15:11:57 +02:00
|
|
|
static int block_save_setup(QEMUFile *f, void *opaque)
|
2009-11-02 14:40:58 +01:00
|
|
|
{
|
2011-10-19 15:22:18 +02:00
|
|
|
int ret;
|
|
|
|
|
2020-10-20 09:32:56 +02:00
|
|
|
trace_migration_block_save("setup", block_mig_state.submitted,
|
|
|
|
block_mig_state.transferred);
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2023-10-18 13:55:12 +02:00
|
|
|
warn_report("block migration is deprecated;"
|
|
|
|
" use blockdev-mirror with NBD instead");
|
|
|
|
|
2017-02-09 14:45:37 +01:00
|
|
|
ret = init_blk_migration(f);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
2012-06-28 15:11:57 +02:00
|
|
|
|
|
|
|
/* start track dirty blocks */
|
2014-04-16 03:34:30 +02:00
|
|
|
ret = set_dirty_tracking();
|
|
|
|
if (ret) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-08-29 20:17:13 +02:00
|
|
|
ret = flush_blks(f);
|
2012-06-28 15:11:57 +02:00
|
|
|
blk_mig_reset_dirty_cursor();
|
|
|
|
qemu_put_be64(f, BLK_MIG_FLAG_EOS);
|
|
|
|
|
2013-02-22 17:36:11 +01:00
|
|
|
return ret;
|
2012-06-28 15:11:57 +02:00
|
|
|
}
|
|
|
|
|
2012-06-28 15:31:37 +02:00
|
|
|
static int block_save_iterate(QEMUFile *f, void *opaque)
|
2012-06-28 15:11:57 +02:00
|
|
|
{
|
|
|
|
int ret;
|
2023-10-25 11:11:11 +02:00
|
|
|
uint64_t last_bytes = qemu_file_transferred(f);
|
2012-06-28 15:11:57 +02:00
|
|
|
|
2020-10-20 09:32:56 +02:00
|
|
|
trace_migration_block_save("iterate", block_mig_state.submitted,
|
|
|
|
block_mig_state.transferred);
|
2012-06-28 15:11:57 +02:00
|
|
|
|
2012-08-29 20:17:13 +02:00
|
|
|
ret = flush_blks(f);
|
2011-10-19 15:22:18 +02:00
|
|
|
if (ret) {
|
|
|
|
return ret;
|
2009-11-30 18:21:21 +01:00
|
|
|
}
|
|
|
|
|
2010-01-26 13:04:11 +01:00
|
|
|
blk_mig_reset_dirty_cursor();
|
|
|
|
|
2012-06-28 15:31:37 +02:00
|
|
|
/* control the rate of transfer */
|
2013-02-22 17:36:25 +01:00
|
|
|
blk_mig_lock();
|
2020-02-18 12:02:09 +01:00
|
|
|
while (block_mig_state.read_done * BLK_MIG_BLOCK_SIZE <
|
2023-05-15 21:56:58 +02:00
|
|
|
migration_rate_get() &&
|
2018-03-08 12:18:27 +01:00
|
|
|
block_mig_state.submitted < MAX_PARALLEL_IO &&
|
2018-03-08 12:18:26 +01:00
|
|
|
(block_mig_state.submitted + block_mig_state.read_done) <
|
|
|
|
MAX_IO_BUFFERS) {
|
2013-02-22 17:36:25 +01:00
|
|
|
blk_mig_unlock();
|
2012-06-28 15:31:37 +02:00
|
|
|
if (block_mig_state.bulk_completed == 0) {
|
|
|
|
/* first finish the bulk phase */
|
|
|
|
if (blk_mig_save_bulked_block(f) == 0) {
|
|
|
|
/* finished saving bulk on all devices */
|
|
|
|
block_mig_state.bulk_completed = 1;
|
|
|
|
}
|
2013-02-22 17:36:23 +01:00
|
|
|
ret = 0;
|
2012-06-28 15:31:37 +02:00
|
|
|
} else {
|
2024-01-02 16:35:28 +01:00
|
|
|
/* Always called with the BQL taken for
|
2013-02-22 17:36:27 +01:00
|
|
|
* simplicity, block_save_complete also calls it.
|
|
|
|
*/
|
2024-01-02 16:35:25 +01:00
|
|
|
bql_lock();
|
2012-08-29 21:59:22 +02:00
|
|
|
ret = blk_mig_save_dirty_block(f, 1);
|
2024-01-02 16:35:25 +01:00
|
|
|
bql_unlock();
|
2013-02-22 17:36:23 +01:00
|
|
|
}
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
2013-02-22 17:36:25 +01:00
|
|
|
blk_mig_lock();
|
2013-02-22 17:36:23 +01:00
|
|
|
if (ret != 0) {
|
|
|
|
/* no more dirty blocks */
|
|
|
|
break;
|
2009-11-30 18:21:19 +01:00
|
|
|
}
|
2012-06-28 15:31:37 +02:00
|
|
|
}
|
2013-02-22 17:36:25 +01:00
|
|
|
blk_mig_unlock();
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2012-08-29 20:17:13 +02:00
|
|
|
ret = flush_blks(f);
|
2012-06-28 15:31:37 +02:00
|
|
|
if (ret) {
|
|
|
|
return ret;
|
2009-11-30 18:21:21 +01:00
|
|
|
}
|
|
|
|
|
2012-06-28 15:31:37 +02:00
|
|
|
qemu_put_be64(f, BLK_MIG_FLAG_EOS);
|
2023-10-25 11:11:11 +02:00
|
|
|
uint64_t delta_bytes = qemu_file_transferred(f) - last_bytes;
|
2023-05-04 13:38:34 +02:00
|
|
|
return (delta_bytes > 0);
|
2012-06-28 15:31:37 +02:00
|
|
|
}
|
|
|
|
|
2024-01-02 16:35:28 +01:00
|
|
|
/* Called with the BQL taken. */
|
2013-02-22 17:36:27 +01:00
|
|
|
|
2012-06-28 15:31:37 +02:00
|
|
|
static int block_save_complete(QEMUFile *f, void *opaque)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
2020-10-20 09:32:56 +02:00
|
|
|
trace_migration_block_save("complete", block_mig_state.submitted,
|
|
|
|
block_mig_state.transferred);
|
2012-06-28 15:31:37 +02:00
|
|
|
|
2012-08-29 20:17:13 +02:00
|
|
|
ret = flush_blks(f);
|
2012-06-28 15:31:37 +02:00
|
|
|
if (ret) {
|
|
|
|
return ret;
|
|
|
|
}
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2012-06-28 15:31:37 +02:00
|
|
|
blk_mig_reset_dirty_cursor();
|
2009-12-01 15:20:17 +01:00
|
|
|
|
2012-06-28 15:31:37 +02:00
|
|
|
/* we know for sure that save bulk is completed and
|
|
|
|
all async read completed */
|
2013-02-22 17:36:25 +01:00
|
|
|
blk_mig_lock();
|
2012-06-28 15:31:37 +02:00
|
|
|
assert(block_mig_state.submitted == 0);
|
2013-02-22 17:36:25 +01:00
|
|
|
blk_mig_unlock();
|
2012-06-28 15:31:37 +02:00
|
|
|
|
2012-08-29 21:59:22 +02:00
|
|
|
do {
|
|
|
|
ret = blk_mig_save_dirty_block(f, 0);
|
2013-02-22 17:36:11 +01:00
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
2012-08-29 21:59:22 +02:00
|
|
|
} while (ret == 0);
|
2009-11-30 18:21:21 +01:00
|
|
|
|
2012-08-29 21:59:22 +02:00
|
|
|
/* report completion */
|
|
|
|
qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2020-10-20 09:32:56 +02:00
|
|
|
trace_migration_block_save_complete();
|
2012-06-28 15:31:37 +02:00
|
|
|
|
2009-11-30 18:21:19 +01:00
|
|
|
qemu_put_be64(f, BLK_MIG_FLAG_EOS);
|
|
|
|
|
2017-05-22 17:17:49 +02:00
|
|
|
/* Make sure that our BlockBackends are gone, so that the block driver
|
|
|
|
* nodes can be inactivated. */
|
|
|
|
block_migration_cleanup_bmds();
|
|
|
|
|
2012-06-28 15:31:37 +02:00
|
|
|
return 0;
|
2009-11-02 14:40:58 +01:00
|
|
|
}
|
|
|
|
|
2023-02-08 14:48:02 +01:00
|
|
|
static void block_state_pending(void *opaque, uint64_t *must_precopy,
|
|
|
|
uint64_t *can_postcopy)
|
2012-09-21 11:18:18 +02:00
|
|
|
{
|
2013-02-12 10:37:15 +01:00
|
|
|
/* Estimate pending number of bytes to send */
|
2013-02-22 17:36:23 +01:00
|
|
|
uint64_t pending;
|
|
|
|
|
2024-01-02 16:35:25 +01:00
|
|
|
bql_lock();
|
2016-02-14 18:17:04 +01:00
|
|
|
pending = get_remaining_dirty();
|
2024-01-02 16:35:25 +01:00
|
|
|
bql_unlock();
|
2016-02-14 18:17:04 +01:00
|
|
|
|
2013-02-22 17:36:25 +01:00
|
|
|
blk_mig_lock();
|
2020-02-18 12:02:09 +01:00
|
|
|
pending += block_mig_state.submitted * BLK_MIG_BLOCK_SIZE +
|
|
|
|
block_mig_state.read_done * BLK_MIG_BLOCK_SIZE;
|
2016-02-14 18:17:04 +01:00
|
|
|
blk_mig_unlock();
|
2013-02-12 10:37:15 +01:00
|
|
|
|
|
|
|
/* Report at least one block pending during bulk phase */
|
2022-10-03 05:15:56 +02:00
|
|
|
if (!pending && !block_mig_state.bulk_completed) {
|
|
|
|
pending = BLK_MIG_BLOCK_SIZE;
|
2013-02-12 10:37:15 +01:00
|
|
|
}
|
2012-09-21 11:18:18 +02:00
|
|
|
|
2022-10-03 02:00:03 +02:00
|
|
|
trace_migration_block_state_pending(pending);
|
2015-11-05 19:10:54 +01:00
|
|
|
/* We don't do postcopy */
|
2023-02-08 14:48:02 +01:00
|
|
|
*must_precopy += pending;
|
2012-09-21 11:18:18 +02:00
|
|
|
}
|
|
|
|
|
2009-11-02 14:40:58 +01:00
|
|
|
static int block_load(QEMUFile *f, void *opaque, int version_id)
|
|
|
|
{
|
2009-12-01 15:20:17 +01:00
|
|
|
static int banner_printed;
|
2009-11-02 14:40:58 +01:00
|
|
|
int len, flags;
|
|
|
|
char device_name[256];
|
|
|
|
int64_t addr;
|
2017-10-17 16:40:51 +02:00
|
|
|
BlockBackend *blk, *blk_prev = NULL;
|
2016-02-22 10:21:15 +01:00
|
|
|
Error *local_err = NULL;
|
2009-11-02 14:40:58 +01:00
|
|
|
uint8_t *buf;
|
2011-01-21 12:42:30 +01:00
|
|
|
int64_t total_sectors = 0;
|
|
|
|
int nr_sectors;
|
2011-10-05 01:14:46 +02:00
|
|
|
int ret;
|
2017-04-13 04:34:28 +02:00
|
|
|
BlockDriverInfo bdi;
|
2020-02-18 12:02:09 +01:00
|
|
|
int cluster_size = BLK_MIG_BLOCK_SIZE;
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2009-11-02 14:40:58 +01:00
|
|
|
do {
|
|
|
|
addr = qemu_get_be64(f);
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2019-08-27 20:59:13 +02:00
|
|
|
flags = addr & (BDRV_SECTOR_SIZE - 1);
|
2009-11-30 18:21:19 +01:00
|
|
|
addr >>= BDRV_SECTOR_BITS;
|
2009-11-30 18:21:19 +01:00
|
|
|
|
|
|
|
if (flags & BLK_MIG_FLAG_DEVICE_BLOCK) {
|
2009-11-02 14:40:58 +01:00
|
|
|
/* get device name */
|
|
|
|
len = qemu_get_byte(f);
|
|
|
|
qemu_get_buffer(f, (uint8_t *)device_name, len);
|
|
|
|
device_name[len] = '\0';
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2015-03-02 12:36:47 +01:00
|
|
|
blk = blk_by_name(device_name);
|
|
|
|
if (!blk) {
|
2009-11-30 18:21:21 +01:00
|
|
|
fprintf(stderr, "Error unknown block device %s\n",
|
|
|
|
device_name);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2009-11-30 18:21:19 +01:00
|
|
|
|
2016-05-25 17:20:06 +02:00
|
|
|
if (blk != blk_prev) {
|
|
|
|
blk_prev = blk;
|
|
|
|
total_sectors = blk_nb_sectors(blk);
|
2011-01-21 12:42:30 +01:00
|
|
|
if (total_sectors <= 0) {
|
2011-06-22 14:03:54 +02:00
|
|
|
error_report("Error getting length of block device %s",
|
2011-01-21 12:42:30 +01:00
|
|
|
device_name);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2016-02-22 10:21:15 +01:00
|
|
|
|
2022-02-09 11:54:51 +01:00
|
|
|
blk_activate(blk, &local_err);
|
2016-02-22 10:21:15 +01:00
|
|
|
if (local_err) {
|
|
|
|
error_report_err(local_err);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2017-04-13 04:34:28 +02:00
|
|
|
|
|
|
|
ret = bdrv_get_info(blk_bs(blk), &bdi);
|
|
|
|
if (ret == 0 && bdi.cluster_size > 0 &&
|
2020-02-18 12:02:09 +01:00
|
|
|
bdi.cluster_size <= BLK_MIG_BLOCK_SIZE &&
|
|
|
|
BLK_MIG_BLOCK_SIZE % bdi.cluster_size == 0) {
|
2017-04-13 04:34:28 +02:00
|
|
|
cluster_size = bdi.cluster_size;
|
|
|
|
} else {
|
2020-02-18 12:02:09 +01:00
|
|
|
cluster_size = BLK_MIG_BLOCK_SIZE;
|
2017-04-13 04:34:28 +02:00
|
|
|
}
|
2011-01-21 12:42:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
|
|
|
|
nr_sectors = total_sectors - addr;
|
|
|
|
} else {
|
|
|
|
nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
|
|
|
|
}
|
|
|
|
|
2013-07-18 09:48:50 +02:00
|
|
|
if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
|
2016-05-25 17:20:06 +02:00
|
|
|
ret = blk_pwrite_zeroes(blk, addr * BDRV_SECTOR_SIZE,
|
|
|
|
nr_sectors * BDRV_SECTOR_SIZE,
|
|
|
|
BDRV_REQ_MAY_UNMAP);
|
2013-07-18 09:48:50 +02:00
|
|
|
} else {
|
2017-04-13 04:34:28 +02:00
|
|
|
int i;
|
|
|
|
int64_t cur_addr;
|
|
|
|
uint8_t *cur_buf;
|
|
|
|
|
2020-02-18 12:02:09 +01:00
|
|
|
buf = g_malloc(BLK_MIG_BLOCK_SIZE);
|
|
|
|
qemu_get_buffer(f, buf, BLK_MIG_BLOCK_SIZE);
|
|
|
|
for (i = 0; i < BLK_MIG_BLOCK_SIZE / cluster_size; i++) {
|
2017-04-13 04:34:28 +02:00
|
|
|
cur_addr = addr * BDRV_SECTOR_SIZE + i * cluster_size;
|
|
|
|
cur_buf = buf + i * cluster_size;
|
|
|
|
|
|
|
|
if ((!block_mig_state.zero_blocks ||
|
2020-02-18 12:02:09 +01:00
|
|
|
cluster_size < BLK_MIG_BLOCK_SIZE) &&
|
2017-04-13 04:34:28 +02:00
|
|
|
buffer_is_zero(cur_buf, cluster_size)) {
|
|
|
|
ret = blk_pwrite_zeroes(blk, cur_addr,
|
|
|
|
cluster_size,
|
|
|
|
BDRV_REQ_MAY_UNMAP);
|
|
|
|
} else {
|
block: Change blk_{pread,pwrite}() param order
Swap 'buf' and 'bytes' around for consistency with
blk_co_{pread,pwrite}(), and in preparation to implement these functions
using generated_co_wrapper.
Callers were updated using this Coccinelle script:
@@ expression blk, offset, buf, bytes, flags; @@
- blk_pread(blk, offset, buf, bytes, flags)
+ blk_pread(blk, offset, bytes, buf, flags)
@@ expression blk, offset, buf, bytes, flags; @@
- blk_pwrite(blk, offset, buf, bytes, flags)
+ blk_pwrite(blk, offset, bytes, buf, flags)
It had no effect on hw/block/nand.c, presumably due to the #if, so that
file was updated manually.
Overly-long lines were then fixed by hand.
Signed-off-by: Alberto Faria <afaria@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220705161527.1054072-4-afaria@redhat.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
2022-07-05 18:15:11 +02:00
|
|
|
ret = blk_pwrite(blk, cur_addr, cluster_size, cur_buf,
|
|
|
|
0);
|
2017-04-13 04:34:28 +02:00
|
|
|
}
|
|
|
|
if (ret < 0) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2013-07-18 09:48:50 +02:00
|
|
|
g_free(buf);
|
|
|
|
}
|
2009-11-30 18:21:20 +01:00
|
|
|
|
2010-07-20 11:19:00 +02:00
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
2009-12-01 15:20:17 +01:00
|
|
|
} else if (flags & BLK_MIG_FLAG_PROGRESS) {
|
|
|
|
if (!banner_printed) {
|
|
|
|
printf("Receiving block device images\n");
|
|
|
|
banner_printed = 1;
|
|
|
|
}
|
|
|
|
printf("Completed %d %%%c", (int)addr,
|
|
|
|
(addr == 100) ? '\n' : '\r');
|
|
|
|
fflush(stdout);
|
2009-11-30 18:21:19 +01:00
|
|
|
} else if (!(flags & BLK_MIG_FLAG_EOS)) {
|
2020-10-20 05:10:42 +02:00
|
|
|
fprintf(stderr, "Unknown block migration flags: 0x%x\n", flags);
|
2009-11-30 18:21:21 +01:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2011-10-05 01:14:46 +02:00
|
|
|
ret = qemu_file_get_error(f);
|
|
|
|
if (ret != 0) {
|
|
|
|
return ret;
|
2009-11-02 14:40:58 +01:00
|
|
|
}
|
2009-11-30 18:21:19 +01:00
|
|
|
} while (!(flags & BLK_MIG_FLAG_EOS));
|
|
|
|
|
2009-11-02 14:40:58 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-06-27 10:59:15 +02:00
|
|
|
static bool block_is_active(void *opaque)
|
|
|
|
{
|
2023-03-01 22:23:57 +01:00
|
|
|
return migrate_block();
|
2012-06-27 10:59:15 +02:00
|
|
|
}
|
|
|
|
|
2014-07-07 21:09:30 +02:00
|
|
|
static SaveVMHandlers savevm_block_handlers = {
|
2017-06-28 11:52:24 +02:00
|
|
|
.save_setup = block_save_setup,
|
2012-06-28 15:31:37 +02:00
|
|
|
.save_live_iterate = block_save_iterate,
|
2015-11-05 19:10:41 +01:00
|
|
|
.save_live_complete_precopy = block_save_complete,
|
2022-10-03 02:00:03 +02:00
|
|
|
.state_pending_exact = block_state_pending,
|
|
|
|
.state_pending_estimate = block_state_pending,
|
2012-06-26 18:46:10 +02:00
|
|
|
.load_state = block_load,
|
2017-06-28 11:52:25 +02:00
|
|
|
.save_cleanup = block_migration_cleanup,
|
2012-06-27 10:59:15 +02:00
|
|
|
.is_active = block_is_active,
|
2012-06-26 18:46:10 +02:00
|
|
|
};
|
|
|
|
|
2009-11-02 14:40:58 +01:00
|
|
|
void blk_mig_init(void)
|
2009-11-30 18:21:19 +01:00
|
|
|
{
|
2009-11-30 18:21:20 +01:00
|
|
|
QSIMPLEQ_INIT(&block_mig_state.bmds_list);
|
|
|
|
QSIMPLEQ_INIT(&block_mig_state.blk_list);
|
2013-02-22 17:36:25 +01:00
|
|
|
qemu_mutex_init(&block_mig_state.lock);
|
2009-11-30 18:21:20 +01:00
|
|
|
|
2019-08-22 13:54:33 +02:00
|
|
|
register_savevm_live("block", 0, 1, &savevm_block_handlers,
|
2012-06-26 18:46:10 +02:00
|
|
|
&block_mig_state);
|
2009-11-02 14:40:58 +01:00
|
|
|
}
|