Migration pull 2021-02-08
v2 Dropped vmstate: Fix memory leak in vmstate_handle_alloc Broke on Power Added migration: only check page size match if RAM postcopy is enabled -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEERfXHG0oMt/uXep+pBRYzHrxb/ecFAmAhIE4ACgkQBRYzHrxb /ecPuA/+Pgo++1ZSseJUgbLePwyTVc0jahdcvYEDmLUn8UM6ikBcBXBgUKHdkFW3 bjSSVgB/xxvXSiafBK4xFNrCqSgqMSr3DJcHmvWgv2wVARcYf6Z26Da53LZq1Qru 0tvRyb40Od1f9zb8Zj7e2Y3pjQ9ybLLbjfNhgnOBbQivqWkjZI31oV2KUCWY2+eV T1BEwr6mgYepqhmeB6OvQZtaQVC5toirS6NajNF4nt0vZEIGIvK6/A9erCVU8Tze 5ch1J0MUqgc3q6ZSE/I9BHEy6MaL0X8G6H+ezjxdoRQtbt1iM/YqZJCSrXkAxiLC ROohryb6qVk26+UYuana79faLwrw359WlkwNEE6SEIRSENu+6p7bgN3LZuCILCO7 xJEkeTgy6r40IGCkDC9aWa8pyLHpNX9gyLpGBHdIRD6zEOWaKNtzh7E2uo/T0ann BpcfgQOsYN25hIHiiXnxozUREbx71VDfMq7GqGB6eC3u2+a3U6jpSJb1nNq5NB89 FJYLZy5Rbuy7OStMwfMsxRs7E63XvGgnwrN8FczU/pumCPX4lDYIpnocqinUmP8p XubRQQVaVDSKIq1mvzw7iR/1NsP9vfYvnrAIv941f38NBmDKqdPuMOXR/qB/Kp2Y jB7b1L5/JcXbWsQmK7fda9jmPzFwSO2cTeTiUonk9RfuuDEws0A= =4tbe -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20210208a' into staging Migration pull 2021-02-08 v2 Dropped vmstate: Fix memory leak in vmstate_handle_alloc Broke on Power Added migration: only check page size match if RAM postcopy is enabled # gpg: Signature made Mon 08 Feb 2021 11:28:14 GMT # gpg: using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7 # gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full] # Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A 9FA9 0516 331E BC5B FDE7 * remotes/dgilbert/tags/pull-migration-20210208a: (27 commits) migration: only check page size match if RAM postcopy is enabled migration: introduce snapshot-{save, load, delete} QMP commands iotests: fix loading of common.config from tests/ subdir iotests: add support for capturing and matching QMP events migration: introduce a delete_snapshot wrapper migration: wire up support for snapshot device selection migration: control whether snapshots are ovewritten block: rename and alter bdrv_all_find_snapshot semantics block: allow specifying name of block device for vmstate storage block: add ability to specify list of blockdevs during snapshot migration: stop returning errno from load_snapshot() migration: Make save_snapshot() return bool, not 0/-1 block: push error reporting into bdrv_all_*_snapshot functions migration: Display the migration blockers migration: Add blocker information migration: Fix a few absurdly defective error messages migration: Fix cache_init()'s "Failed to allocate" error messages migration: Clean up signed vs. unsigned XBZRLE cache-size migration: Fix migrate-set-parameters argument validation migration: introduce 'userfaultfd-wrlat.py' script ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
2436651b26
@ -900,10 +900,11 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
|
||||
|
||||
ImageEntry *image_entry, *next_ie;
|
||||
SnapshotEntry *snapshot_entry;
|
||||
Error *err = NULL;
|
||||
|
||||
bs = bdrv_all_find_vmstate_bs();
|
||||
bs = bdrv_all_find_vmstate_bs(NULL, false, NULL, &err);
|
||||
if (!bs) {
|
||||
monitor_printf(mon, "No available block device supports snapshots\n");
|
||||
error_report_err(err);
|
||||
return;
|
||||
}
|
||||
aio_context = bdrv_get_aio_context(bs);
|
||||
@ -953,7 +954,7 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
|
||||
total = 0;
|
||||
for (i = 0; i < nb_sns; i++) {
|
||||
SnapshotEntry *next_sn;
|
||||
if (bdrv_all_find_snapshot(sn_tab[i].name, &bs1) == 0) {
|
||||
if (bdrv_all_has_snapshot(sn_tab[i].name, false, NULL, NULL) == 1) {
|
||||
global_snapshots[total] = i;
|
||||
total++;
|
||||
QTAILQ_FOREACH(image_entry, &image_list, next) {
|
||||
|
266
block/snapshot.c
266
block/snapshot.c
@ -447,6 +447,41 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static int bdrv_all_get_snapshot_devices(bool has_devices, strList *devices,
|
||||
GList **all_bdrvs,
|
||||
Error **errp)
|
||||
{
|
||||
g_autoptr(GList) bdrvs = NULL;
|
||||
|
||||
if (has_devices) {
|
||||
if (!devices) {
|
||||
error_setg(errp, "At least one device is required for snapshot");
|
||||
return -1;
|
||||
}
|
||||
|
||||
while (devices) {
|
||||
BlockDriverState *bs = bdrv_find_node(devices->value);
|
||||
if (!bs) {
|
||||
error_setg(errp, "No block device node '%s'", devices->value);
|
||||
return -1;
|
||||
}
|
||||
bdrvs = g_list_append(bdrvs, bs);
|
||||
devices = devices->next;
|
||||
}
|
||||
} else {
|
||||
BlockDriverState *bs;
|
||||
BdrvNextIterator it;
|
||||
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
|
||||
bdrvs = g_list_append(bdrvs, bs);
|
||||
}
|
||||
}
|
||||
|
||||
*all_bdrvs = g_steal_pointer(&bdrvs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static bool bdrv_all_snapshots_includes_bs(BlockDriverState *bs)
|
||||
{
|
||||
if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
|
||||
@ -462,44 +497,59 @@ static bool bdrv_all_snapshots_includes_bs(BlockDriverState *bs)
|
||||
* These functions will properly handle dataplane (take aio_context_acquire
|
||||
* when appropriate for appropriate block drivers) */
|
||||
|
||||
bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs)
|
||||
bool bdrv_all_can_snapshot(bool has_devices, strList *devices,
|
||||
Error **errp)
|
||||
{
|
||||
bool ok = true;
|
||||
BlockDriverState *bs;
|
||||
BdrvNextIterator it;
|
||||
g_autoptr(GList) bdrvs = NULL;
|
||||
GList *iterbdrvs;
|
||||
|
||||
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
|
||||
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
iterbdrvs = bdrvs;
|
||||
while (iterbdrvs) {
|
||||
BlockDriverState *bs = iterbdrvs->data;
|
||||
AioContext *ctx = bdrv_get_aio_context(bs);
|
||||
bool ok = true;
|
||||
|
||||
aio_context_acquire(ctx);
|
||||
if (bdrv_all_snapshots_includes_bs(bs)) {
|
||||
if (devices || bdrv_all_snapshots_includes_bs(bs)) {
|
||||
ok = bdrv_can_snapshot(bs);
|
||||
}
|
||||
aio_context_release(ctx);
|
||||
if (!ok) {
|
||||
bdrv_next_cleanup(&it);
|
||||
goto fail;
|
||||
}
|
||||
error_setg(errp, "Device '%s' is writable but does not support "
|
||||
"snapshots", bdrv_get_device_or_node_name(bs));
|
||||
return false;
|
||||
}
|
||||
|
||||
fail:
|
||||
*first_bad_bs = bs;
|
||||
return ok;
|
||||
iterbdrvs = iterbdrvs->next;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
|
||||
int bdrv_all_delete_snapshot(const char *name,
|
||||
bool has_devices, strList *devices,
|
||||
Error **errp)
|
||||
{
|
||||
int ret = 0;
|
||||
BlockDriverState *bs;
|
||||
BdrvNextIterator it;
|
||||
QEMUSnapshotInfo sn1, *snapshot = &sn1;
|
||||
g_autoptr(GList) bdrvs = NULL;
|
||||
GList *iterbdrvs;
|
||||
|
||||
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
|
||||
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
iterbdrvs = bdrvs;
|
||||
while (iterbdrvs) {
|
||||
BlockDriverState *bs = iterbdrvs->data;
|
||||
AioContext *ctx = bdrv_get_aio_context(bs);
|
||||
QEMUSnapshotInfo sn1, *snapshot = &sn1;
|
||||
int ret = 0;
|
||||
|
||||
aio_context_acquire(ctx);
|
||||
if (bdrv_all_snapshots_includes_bs(bs) &&
|
||||
if ((devices || bdrv_all_snapshots_includes_bs(bs)) &&
|
||||
bdrv_snapshot_find(bs, snapshot, name) >= 0)
|
||||
{
|
||||
ret = bdrv_snapshot_delete(bs, snapshot->id_str,
|
||||
@ -507,118 +557,180 @@ int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
|
||||
}
|
||||
aio_context_release(ctx);
|
||||
if (ret < 0) {
|
||||
bdrv_next_cleanup(&it);
|
||||
goto fail;
|
||||
}
|
||||
error_prepend(errp, "Could not delete snapshot '%s' on '%s': ",
|
||||
name, bdrv_get_device_or_node_name(bs));
|
||||
return -1;
|
||||
}
|
||||
|
||||
fail:
|
||||
*first_bad_bs = bs;
|
||||
return ret;
|
||||
iterbdrvs = iterbdrvs->next;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs,
|
||||
int bdrv_all_goto_snapshot(const char *name,
|
||||
bool has_devices, strList *devices,
|
||||
Error **errp)
|
||||
{
|
||||
int ret = 0;
|
||||
BlockDriverState *bs;
|
||||
BdrvNextIterator it;
|
||||
g_autoptr(GList) bdrvs = NULL;
|
||||
GList *iterbdrvs;
|
||||
|
||||
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
|
||||
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
iterbdrvs = bdrvs;
|
||||
while (iterbdrvs) {
|
||||
BlockDriverState *bs = iterbdrvs->data;
|
||||
AioContext *ctx = bdrv_get_aio_context(bs);
|
||||
int ret = 0;
|
||||
|
||||
aio_context_acquire(ctx);
|
||||
if (bdrv_all_snapshots_includes_bs(bs)) {
|
||||
if (devices || bdrv_all_snapshots_includes_bs(bs)) {
|
||||
ret = bdrv_snapshot_goto(bs, name, errp);
|
||||
}
|
||||
aio_context_release(ctx);
|
||||
if (ret < 0) {
|
||||
bdrv_next_cleanup(&it);
|
||||
goto fail;
|
||||
}
|
||||
error_prepend(errp, "Could not load snapshot '%s' on '%s': ",
|
||||
name, bdrv_get_device_or_node_name(bs));
|
||||
return -1;
|
||||
}
|
||||
|
||||
fail:
|
||||
*first_bad_bs = bs;
|
||||
return ret;
|
||||
iterbdrvs = iterbdrvs->next;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs)
|
||||
int bdrv_all_has_snapshot(const char *name,
|
||||
bool has_devices, strList *devices,
|
||||
Error **errp)
|
||||
{
|
||||
QEMUSnapshotInfo sn;
|
||||
int err = 0;
|
||||
BlockDriverState *bs;
|
||||
BdrvNextIterator it;
|
||||
g_autoptr(GList) bdrvs = NULL;
|
||||
GList *iterbdrvs;
|
||||
|
||||
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
|
||||
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
iterbdrvs = bdrvs;
|
||||
while (iterbdrvs) {
|
||||
BlockDriverState *bs = iterbdrvs->data;
|
||||
AioContext *ctx = bdrv_get_aio_context(bs);
|
||||
QEMUSnapshotInfo sn;
|
||||
int ret = 0;
|
||||
|
||||
aio_context_acquire(ctx);
|
||||
if (bdrv_all_snapshots_includes_bs(bs)) {
|
||||
err = bdrv_snapshot_find(bs, &sn, name);
|
||||
if (devices || bdrv_all_snapshots_includes_bs(bs)) {
|
||||
ret = bdrv_snapshot_find(bs, &sn, name);
|
||||
}
|
||||
aio_context_release(ctx);
|
||||
if (err < 0) {
|
||||
bdrv_next_cleanup(&it);
|
||||
goto fail;
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT) {
|
||||
return 0;
|
||||
} else {
|
||||
error_setg_errno(errp, errno,
|
||||
"Could not check snapshot '%s' on '%s'",
|
||||
name, bdrv_get_device_or_node_name(bs));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
fail:
|
||||
*first_bad_bs = bs;
|
||||
return err;
|
||||
iterbdrvs = iterbdrvs->next;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
|
||||
BlockDriverState *vm_state_bs,
|
||||
uint64_t vm_state_size,
|
||||
BlockDriverState **first_bad_bs)
|
||||
bool has_devices, strList *devices,
|
||||
Error **errp)
|
||||
{
|
||||
int err = 0;
|
||||
BlockDriverState *bs;
|
||||
BdrvNextIterator it;
|
||||
g_autoptr(GList) bdrvs = NULL;
|
||||
GList *iterbdrvs;
|
||||
|
||||
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
|
||||
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
iterbdrvs = bdrvs;
|
||||
while (iterbdrvs) {
|
||||
BlockDriverState *bs = iterbdrvs->data;
|
||||
AioContext *ctx = bdrv_get_aio_context(bs);
|
||||
int ret = 0;
|
||||
|
||||
aio_context_acquire(ctx);
|
||||
if (bs == vm_state_bs) {
|
||||
sn->vm_state_size = vm_state_size;
|
||||
err = bdrv_snapshot_create(bs, sn);
|
||||
} else if (bdrv_all_snapshots_includes_bs(bs)) {
|
||||
ret = bdrv_snapshot_create(bs, sn);
|
||||
} else if (devices || bdrv_all_snapshots_includes_bs(bs)) {
|
||||
sn->vm_state_size = 0;
|
||||
err = bdrv_snapshot_create(bs, sn);
|
||||
ret = bdrv_snapshot_create(bs, sn);
|
||||
}
|
||||
aio_context_release(ctx);
|
||||
if (err < 0) {
|
||||
bdrv_next_cleanup(&it);
|
||||
goto fail;
|
||||
}
|
||||
if (ret < 0) {
|
||||
error_setg(errp, "Could not create snapshot '%s' on '%s'",
|
||||
sn->name, bdrv_get_device_or_node_name(bs));
|
||||
return -1;
|
||||
}
|
||||
|
||||
fail:
|
||||
*first_bad_bs = bs;
|
||||
return err;
|
||||
iterbdrvs = iterbdrvs->next;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
BlockDriverState *bdrv_all_find_vmstate_bs(void)
|
||||
{
|
||||
BlockDriverState *bs;
|
||||
BdrvNextIterator it;
|
||||
|
||||
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
|
||||
BlockDriverState *bdrv_all_find_vmstate_bs(const char *vmstate_bs,
|
||||
bool has_devices, strList *devices,
|
||||
Error **errp)
|
||||
{
|
||||
g_autoptr(GList) bdrvs = NULL;
|
||||
GList *iterbdrvs;
|
||||
|
||||
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
iterbdrvs = bdrvs;
|
||||
while (iterbdrvs) {
|
||||
BlockDriverState *bs = iterbdrvs->data;
|
||||
AioContext *ctx = bdrv_get_aio_context(bs);
|
||||
bool found;
|
||||
bool found = false;
|
||||
|
||||
aio_context_acquire(ctx);
|
||||
found = bdrv_all_snapshots_includes_bs(bs) && bdrv_can_snapshot(bs);
|
||||
found = (devices || bdrv_all_snapshots_includes_bs(bs)) &&
|
||||
bdrv_can_snapshot(bs);
|
||||
aio_context_release(ctx);
|
||||
|
||||
if (vmstate_bs) {
|
||||
if (g_str_equal(vmstate_bs,
|
||||
bdrv_get_node_name(bs))) {
|
||||
if (found) {
|
||||
bdrv_next_cleanup(&it);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return bs;
|
||||
} else {
|
||||
error_setg(errp,
|
||||
"vmstate block device '%s' does not support snapshots",
|
||||
vmstate_bs);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
} else if (found) {
|
||||
return bs;
|
||||
}
|
||||
|
||||
iterbdrvs = iterbdrvs->next;
|
||||
}
|
||||
|
||||
if (vmstate_bs) {
|
||||
error_setg(errp,
|
||||
"vmstate block device '%s' does not exist", vmstate_bs);
|
||||
} else {
|
||||
error_setg(errp,
|
||||
"no block device can store vmstate for snapshot");
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
@ -2173,6 +2173,16 @@ static int spapr_pci_pre_save(void *opaque)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int spapr_pci_post_save(void *opaque)
|
||||
{
|
||||
SpaprPhbState *sphb = opaque;
|
||||
|
||||
g_free(sphb->msi_devs);
|
||||
sphb->msi_devs = NULL;
|
||||
sphb->msi_devs_num = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int spapr_pci_post_load(void *opaque, int version_id)
|
||||
{
|
||||
SpaprPhbState *sphb = opaque;
|
||||
@ -2205,6 +2215,7 @@ static const VMStateDescription vmstate_spapr_pci = {
|
||||
.version_id = 2,
|
||||
.minimum_version_id = 2,
|
||||
.pre_save = spapr_pci_pre_save,
|
||||
.post_save = spapr_pci_post_save,
|
||||
.post_load = spapr_pci_post_load,
|
||||
.fields = (VMStateField[]) {
|
||||
VMSTATE_UINT64_EQUAL(buid, SpaprPhbState, NULL),
|
||||
|
@ -25,7 +25,7 @@
|
||||
#ifndef SNAPSHOT_H
|
||||
#define SNAPSHOT_H
|
||||
|
||||
|
||||
#include "qapi/qapi-builtin-types.h"
|
||||
|
||||
#define SNAPSHOT_OPT_BASE "snapshot."
|
||||
#define SNAPSHOT_OPT_ID "snapshot.id"
|
||||
@ -77,17 +77,26 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
|
||||
* These functions will properly handle dataplane (take aio_context_acquire
|
||||
* when appropriate for appropriate block drivers */
|
||||
|
||||
bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs);
|
||||
int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bsd_bs,
|
||||
bool bdrv_all_can_snapshot(bool has_devices, strList *devices,
|
||||
Error **errp);
|
||||
int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs,
|
||||
int bdrv_all_delete_snapshot(const char *name,
|
||||
bool has_devices, strList *devices,
|
||||
Error **errp);
|
||||
int bdrv_all_goto_snapshot(const char *name,
|
||||
bool has_devices, strList *devices,
|
||||
Error **errp);
|
||||
int bdrv_all_has_snapshot(const char *name,
|
||||
bool has_devices, strList *devices,
|
||||
Error **errp);
|
||||
int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs);
|
||||
int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
|
||||
BlockDriverState *vm_state_bs,
|
||||
uint64_t vm_state_size,
|
||||
BlockDriverState **first_bad_bs);
|
||||
bool has_devices,
|
||||
strList *devices,
|
||||
Error **errp);
|
||||
|
||||
BlockDriverState *bdrv_all_find_vmstate_bs(void);
|
||||
BlockDriverState *bdrv_all_find_vmstate_bs(const char *vmstate_bs,
|
||||
bool has_devices, strList *devices,
|
||||
Error **errp);
|
||||
|
||||
#endif
|
||||
|
@ -149,6 +149,14 @@ typedef struct IOMMUTLBEvent {
|
||||
/* RAM is a persistent kind memory */
|
||||
#define RAM_PMEM (1 << 5)
|
||||
|
||||
|
||||
/*
|
||||
* UFFDIO_WRITEPROTECT is used on this RAMBlock to
|
||||
* support 'write-tracking' migration type.
|
||||
* Implies ram_state->ram_wt_enabled.
|
||||
*/
|
||||
#define RAM_UF_WRITEPROTECT (1 << 6)
|
||||
|
||||
static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
|
||||
IOMMUNotifierFlag flags,
|
||||
hwaddr start, hwaddr end,
|
||||
|
@ -15,7 +15,50 @@
|
||||
#ifndef QEMU_MIGRATION_SNAPSHOT_H
|
||||
#define QEMU_MIGRATION_SNAPSHOT_H
|
||||
|
||||
int save_snapshot(const char *name, Error **errp);
|
||||
int load_snapshot(const char *name, Error **errp);
|
||||
#include "qapi/qapi-builtin-types.h"
|
||||
|
||||
/**
|
||||
* save_snapshot: Save an internal snapshot.
|
||||
* @name: name of internal snapshot
|
||||
* @overwrite: replace existing snapshot with @name
|
||||
* @vmstate: blockdev node name to store VM state in
|
||||
* @has_devices: whether to use explicit device list
|
||||
* @devices: explicit device list to snapshot
|
||||
* @errp: pointer to error object
|
||||
* On success, return %true.
|
||||
* On failure, store an error through @errp and return %false.
|
||||
*/
|
||||
bool save_snapshot(const char *name, bool overwrite,
|
||||
const char *vmstate,
|
||||
bool has_devices, strList *devices,
|
||||
Error **errp);
|
||||
|
||||
/**
|
||||
* load_snapshot: Load an internal snapshot.
|
||||
* @name: name of internal snapshot
|
||||
* @vmstate: blockdev node name to load VM state from
|
||||
* @has_devices: whether to use explicit device list
|
||||
* @devices: explicit device list to snapshot
|
||||
* @errp: pointer to error object
|
||||
* On success, return %true.
|
||||
* On failure, store an error through @errp and return %false.
|
||||
*/
|
||||
bool load_snapshot(const char *name,
|
||||
const char *vmstate,
|
||||
bool has_devices, strList *devices,
|
||||
Error **errp);
|
||||
|
||||
/**
|
||||
* delete_snapshot: Delete a snapshot.
|
||||
* @name: path to snapshot
|
||||
* @has_devices: whether to use explicit device list
|
||||
* @devices: explicit device list to snapshot
|
||||
* @errp: pointer to error object
|
||||
* On success, return %true.
|
||||
* On failure, store an error through @errp and return %false.
|
||||
*/
|
||||
bool delete_snapshot(const char *name,
|
||||
bool has_devices, strList *devices,
|
||||
Error **errp);
|
||||
|
||||
#endif
|
||||
|
35
include/qemu/userfaultfd.h
Normal file
35
include/qemu/userfaultfd.h
Normal file
@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Linux UFFD-WP support
|
||||
*
|
||||
* Copyright Virtuozzo GmbH, 2020
|
||||
*
|
||||
* Authors:
|
||||
* Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or
|
||||
* later. See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#ifndef USERFAULTFD_H
|
||||
#define USERFAULTFD_H
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "exec/hwaddr.h"
|
||||
#include <linux/userfaultfd.h>
|
||||
|
||||
int uffd_query_features(uint64_t *features);
|
||||
int uffd_create_fd(uint64_t features, bool non_blocking);
|
||||
void uffd_close_fd(int uffd_fd);
|
||||
int uffd_register_memory(int uffd_fd, void *addr, uint64_t length,
|
||||
uint64_t mode, uint64_t *ioctls);
|
||||
int uffd_unregister_memory(int uffd_fd, void *addr, uint64_t length);
|
||||
int uffd_change_protection(int uffd_fd, void *addr, uint64_t length,
|
||||
bool wp, bool dont_wake);
|
||||
int uffd_copy_page(int uffd_fd, void *dst_addr, void *src_addr,
|
||||
uint64_t length, bool dont_wake);
|
||||
int uffd_zero_page(int uffd_fd, void *addr, uint64_t length, bool dont_wake);
|
||||
int uffd_wakeup(int uffd_fd, void *addr, uint64_t length);
|
||||
int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count);
|
||||
bool uffd_poll_events(int uffd_fd, int tmo);
|
||||
|
||||
#endif /* USERFAULTFD_H */
|
@ -58,6 +58,7 @@
|
||||
#include "qemu/queue.h"
|
||||
#include "multifd.h"
|
||||
#include "qemu/yank.h"
|
||||
#include "sysemu/cpus.h"
|
||||
|
||||
#ifdef CONFIG_VFIO
|
||||
#include "hw/vfio/vfio-common.h"
|
||||
@ -134,6 +135,38 @@ enum mig_rp_message_type {
|
||||
MIG_RP_MSG_MAX
|
||||
};
|
||||
|
||||
/* Migration capabilities set */
|
||||
struct MigrateCapsSet {
|
||||
int size; /* Capability set size */
|
||||
MigrationCapability caps[]; /* Variadic array of capabilities */
|
||||
};
|
||||
typedef struct MigrateCapsSet MigrateCapsSet;
|
||||
|
||||
/* Define and initialize MigrateCapsSet */
|
||||
#define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \
|
||||
MigrateCapsSet _name = { \
|
||||
.size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \
|
||||
.caps = { __VA_ARGS__ } \
|
||||
}
|
||||
|
||||
/* Background-snapshot compatibility check list */
|
||||
static const
|
||||
INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
|
||||
MIGRATION_CAPABILITY_POSTCOPY_RAM,
|
||||
MIGRATION_CAPABILITY_DIRTY_BITMAPS,
|
||||
MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME,
|
||||
MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE,
|
||||
MIGRATION_CAPABILITY_RETURN_PATH,
|
||||
MIGRATION_CAPABILITY_MULTIFD,
|
||||
MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER,
|
||||
MIGRATION_CAPABILITY_AUTO_CONVERGE,
|
||||
MIGRATION_CAPABILITY_RELEASE_RAM,
|
||||
MIGRATION_CAPABILITY_RDMA_PIN_ALL,
|
||||
MIGRATION_CAPABILITY_COMPRESS,
|
||||
MIGRATION_CAPABILITY_XBZRLE,
|
||||
MIGRATION_CAPABILITY_X_COLO,
|
||||
MIGRATION_CAPABILITY_VALIDATE_UUID);
|
||||
|
||||
/* When we add fault tolerance, we could have several
|
||||
migrations at once. For now we don't need to add
|
||||
dynamic creation of migration */
|
||||
@ -141,6 +174,8 @@ enum mig_rp_message_type {
|
||||
static MigrationState *current_migration;
|
||||
static MigrationIncomingState *current_incoming;
|
||||
|
||||
static GSList *migration_blockers;
|
||||
|
||||
static bool migration_object_check(MigrationState *ms, Error **errp);
|
||||
static int migration_maybe_pause(MigrationState *s,
|
||||
int *current_active_state,
|
||||
@ -1041,6 +1076,27 @@ static void fill_source_migration_info(MigrationInfo *info)
|
||||
{
|
||||
MigrationState *s = migrate_get_current();
|
||||
|
||||
info->blocked = migration_is_blocked(NULL);
|
||||
info->has_blocked_reasons = info->blocked;
|
||||
info->blocked_reasons = NULL;
|
||||
if (info->blocked) {
|
||||
GSList *cur_blocker = migration_blockers;
|
||||
|
||||
/*
|
||||
* There are two types of reasons a migration might be blocked;
|
||||
* a) devices marked in VMState as non-migratable, and
|
||||
* b) Explicit migration blockers
|
||||
* We need to add both of them here.
|
||||
*/
|
||||
qemu_savevm_non_migratable_list(&info->blocked_reasons);
|
||||
|
||||
while (cur_blocker) {
|
||||
QAPI_LIST_PREPEND(info->blocked_reasons,
|
||||
g_strdup(error_get_pretty(cur_blocker->data)));
|
||||
cur_blocker = g_slist_next(cur_blocker);
|
||||
}
|
||||
}
|
||||
|
||||
switch (s->state) {
|
||||
case MIGRATION_STATUS_NONE:
|
||||
/* no migration has happened ever */
|
||||
@ -1089,6 +1145,31 @@ static void fill_source_migration_info(MigrationInfo *info)
|
||||
info->status = s->state;
|
||||
}
|
||||
|
||||
typedef enum WriteTrackingSupport {
|
||||
WT_SUPPORT_UNKNOWN = 0,
|
||||
WT_SUPPORT_ABSENT,
|
||||
WT_SUPPORT_AVAILABLE,
|
||||
WT_SUPPORT_COMPATIBLE
|
||||
} WriteTrackingSupport;
|
||||
|
||||
static
|
||||
WriteTrackingSupport migrate_query_write_tracking(void)
|
||||
{
|
||||
/* Check if kernel supports required UFFD features */
|
||||
if (!ram_write_tracking_available()) {
|
||||
return WT_SUPPORT_ABSENT;
|
||||
}
|
||||
/*
|
||||
* Check if current memory configuration is
|
||||
* compatible with required UFFD features.
|
||||
*/
|
||||
if (!ram_write_tracking_compatible()) {
|
||||
return WT_SUPPORT_AVAILABLE;
|
||||
}
|
||||
|
||||
return WT_SUPPORT_COMPATIBLE;
|
||||
}
|
||||
|
||||
/**
|
||||
* @migration_caps_check - check capability validity
|
||||
*
|
||||
@ -1150,6 +1231,39 @@ static bool migrate_caps_check(bool *cap_list,
|
||||
}
|
||||
}
|
||||
|
||||
if (cap_list[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) {
|
||||
WriteTrackingSupport wt_support;
|
||||
int idx;
|
||||
/*
|
||||
* Check if 'background-snapshot' capability is supported by
|
||||
* host kernel and compatible with guest memory configuration.
|
||||
*/
|
||||
wt_support = migrate_query_write_tracking();
|
||||
if (wt_support < WT_SUPPORT_AVAILABLE) {
|
||||
error_setg(errp, "Background-snapshot is not supported by host kernel");
|
||||
return false;
|
||||
}
|
||||
if (wt_support < WT_SUPPORT_COMPATIBLE) {
|
||||
error_setg(errp, "Background-snapshot is not compatible "
|
||||
"with guest memory configuration");
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if there are any migration capabilities
|
||||
* incompatible with 'background-snapshot'.
|
||||
*/
|
||||
for (idx = 0; idx < check_caps_background_snapshot.size; idx++) {
|
||||
int incomp_cap = check_caps_background_snapshot.caps[idx];
|
||||
if (cap_list[incomp_cap]) {
|
||||
error_setg(errp,
|
||||
"Background-snapshot is not compatible with %s",
|
||||
MigrationCapability_str(incomp_cap));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1226,21 +1340,21 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
|
||||
if (params->has_compress_level &&
|
||||
(params->compress_level > 9)) {
|
||||
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
|
||||
"is invalid, it should be in the range of 0 to 9");
|
||||
"a value between 0 and 9");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (params->has_compress_threads && (params->compress_threads < 1)) {
|
||||
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
|
||||
"compress_threads",
|
||||
"is invalid, it should be in the range of 1 to 255");
|
||||
"a value between 1 and 255");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (params->has_decompress_threads && (params->decompress_threads < 1)) {
|
||||
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
|
||||
"decompress_threads",
|
||||
"is invalid, it should be in the range of 1 to 255");
|
||||
"a value between 1 and 255");
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1293,21 +1407,21 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
|
||||
if (params->has_multifd_channels && (params->multifd_channels < 1)) {
|
||||
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
|
||||
"multifd_channels",
|
||||
"is invalid, it should be in the range of 1 to 255");
|
||||
"a value between 1 and 255");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (params->has_multifd_zlib_level &&
|
||||
(params->multifd_zlib_level > 9)) {
|
||||
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level",
|
||||
"is invalid, it should be in the range of 0 to 9");
|
||||
"a value between 0 and 9");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (params->has_multifd_zstd_level &&
|
||||
(params->multifd_zstd_level > 20)) {
|
||||
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level",
|
||||
"is invalid, it should be in the range of 0 to 20");
|
||||
"a value between 0 and 20");
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1316,8 +1430,7 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
|
||||
!is_power_of_2(params->xbzrle_cache_size))) {
|
||||
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
|
||||
"xbzrle_cache_size",
|
||||
"is invalid, it should be bigger than target page size"
|
||||
" and a power of 2");
|
||||
"a power of two no less than the target page size");
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1334,21 +1447,21 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
|
||||
params->announce_initial > 100000) {
|
||||
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
|
||||
"announce_initial",
|
||||
"is invalid, it must be less than 100000 ms");
|
||||
"a value between 0 and 100000");
|
||||
return false;
|
||||
}
|
||||
if (params->has_announce_max &&
|
||||
params->announce_max > 100000) {
|
||||
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
|
||||
"announce_max",
|
||||
"is invalid, it must be less than 100000 ms");
|
||||
"a value between 0 and 100000");
|
||||
return false;
|
||||
}
|
||||
if (params->has_announce_rounds &&
|
||||
params->announce_rounds > 1000) {
|
||||
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
|
||||
"announce_rounds",
|
||||
"is invalid, it must be in the range of 0 to 1000");
|
||||
"a value between 0 and 1000");
|
||||
return false;
|
||||
}
|
||||
if (params->has_announce_step &&
|
||||
@ -1356,7 +1469,7 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
|
||||
params->announce_step > 10000)) {
|
||||
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
|
||||
"announce_step",
|
||||
"is invalid, it must be in the range of 1 to 10000 ms");
|
||||
"a value between 0 and 10000");
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1909,6 +2022,7 @@ void migrate_init(MigrationState *s)
|
||||
* locks.
|
||||
*/
|
||||
s->cleanup_bh = 0;
|
||||
s->vm_start_bh = 0;
|
||||
s->to_dst_file = NULL;
|
||||
s->state = MIGRATION_STATUS_NONE;
|
||||
s->rp_state.from_dst_file = NULL;
|
||||
@ -1934,8 +2048,6 @@ void migrate_init(MigrationState *s)
|
||||
s->threshold_size = 0;
|
||||
}
|
||||
|
||||
static GSList *migration_blockers;
|
||||
|
||||
int migrate_add_blocker(Error *reason, Error **errp)
|
||||
{
|
||||
if (only_migratable) {
|
||||
@ -2216,7 +2328,7 @@ void qmp_migrate_set_cache_size(int64_t value, Error **errp)
|
||||
qmp_migrate_set_parameters(&p, errp);
|
||||
}
|
||||
|
||||
int64_t qmp_query_migrate_cache_size(Error **errp)
|
||||
uint64_t qmp_query_migrate_cache_size(Error **errp)
|
||||
{
|
||||
return migrate_xbzrle_cache_size();
|
||||
}
|
||||
@ -2446,7 +2558,7 @@ int migrate_use_xbzrle(void)
|
||||
return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
|
||||
}
|
||||
|
||||
int64_t migrate_xbzrle_cache_size(void)
|
||||
uint64_t migrate_xbzrle_cache_size(void)
|
||||
{
|
||||
MigrationState *s;
|
||||
|
||||
@ -2491,6 +2603,15 @@ bool migrate_use_block_incremental(void)
|
||||
return s->parameters.block_incremental;
|
||||
}
|
||||
|
||||
bool migrate_background_snapshot(void)
|
||||
{
|
||||
MigrationState *s;
|
||||
|
||||
s = migrate_get_current();
|
||||
|
||||
return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT];
|
||||
}
|
||||
|
||||
/* migration thread support */
|
||||
/*
|
||||
* Something bad happened to the RP stream, mark an error
|
||||
@ -3117,6 +3238,50 @@ fail:
|
||||
MIGRATION_STATUS_FAILED);
|
||||
}
|
||||
|
||||
/**
|
||||
* bg_migration_completion: Used by bg_migration_thread when after all the
|
||||
* RAM has been saved. The caller 'breaks' the loop when this returns.
|
||||
*
|
||||
* @s: Current migration state
|
||||
*/
|
||||
static void bg_migration_completion(MigrationState *s)
|
||||
{
|
||||
int current_active_state = s->state;
|
||||
|
||||
/*
|
||||
* Stop tracking RAM writes - un-protect memory, un-register UFFD
|
||||
* memory ranges, flush kernel wait queues and wake up threads
|
||||
* waiting for write fault to be resolved.
|
||||
*/
|
||||
ram_write_tracking_stop();
|
||||
|
||||
if (s->state == MIGRATION_STATUS_ACTIVE) {
|
||||
/*
|
||||
* By this moment we have RAM content saved into the migration stream.
|
||||
* The next step is to flush the non-RAM content (device state)
|
||||
* right after the ram content. The device state has been stored into
|
||||
* the temporary buffer before RAM saving started.
|
||||
*/
|
||||
qemu_put_buffer(s->to_dst_file, s->bioc->data, s->bioc->usage);
|
||||
qemu_fflush(s->to_dst_file);
|
||||
} else if (s->state == MIGRATION_STATUS_CANCELLING) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (qemu_file_get_error(s->to_dst_file)) {
|
||||
trace_migration_completion_file_err();
|
||||
goto fail;
|
||||
}
|
||||
|
||||
migrate_set_state(&s->state, current_active_state,
|
||||
MIGRATION_STATUS_COMPLETED);
|
||||
return;
|
||||
|
||||
fail:
|
||||
migrate_set_state(&s->state, current_active_state,
|
||||
MIGRATION_STATUS_FAILED);
|
||||
}
|
||||
|
||||
bool migrate_colo_enabled(void)
|
||||
{
|
||||
MigrationState *s = migrate_get_current();
|
||||
@ -3457,6 +3622,47 @@ static void migration_iteration_finish(MigrationState *s)
|
||||
qemu_mutex_unlock_iothread();
|
||||
}
|
||||
|
||||
static void bg_migration_iteration_finish(MigrationState *s)
|
||||
{
|
||||
qemu_mutex_lock_iothread();
|
||||
switch (s->state) {
|
||||
case MIGRATION_STATUS_COMPLETED:
|
||||
migration_calculate_complete(s);
|
||||
break;
|
||||
|
||||
case MIGRATION_STATUS_ACTIVE:
|
||||
case MIGRATION_STATUS_FAILED:
|
||||
case MIGRATION_STATUS_CANCELLED:
|
||||
case MIGRATION_STATUS_CANCELLING:
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Should not reach here, but if so, forgive the VM. */
|
||||
error_report("%s: Unknown ending state %d", __func__, s->state);
|
||||
break;
|
||||
}
|
||||
|
||||
migrate_fd_cleanup_schedule(s);
|
||||
qemu_mutex_unlock_iothread();
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if continue to the next iteration directly, false
|
||||
* otherwise.
|
||||
*/
|
||||
static MigIterateState bg_migration_iteration_run(MigrationState *s)
|
||||
{
|
||||
int res;
|
||||
|
||||
res = qemu_savevm_state_iterate(s->to_dst_file, false);
|
||||
if (res > 0) {
|
||||
bg_migration_completion(s);
|
||||
return MIG_ITERATE_BREAK;
|
||||
}
|
||||
|
||||
return MIG_ITERATE_RESUME;
|
||||
}
|
||||
|
||||
void migration_make_urgent_request(void)
|
||||
{
|
||||
qemu_sem_post(&migrate_get_current()->rate_limit_sem);
|
||||
@ -3604,6 +3810,165 @@ static void *migration_thread(void *opaque)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void bg_migration_vm_start_bh(void *opaque)
|
||||
{
|
||||
MigrationState *s = opaque;
|
||||
|
||||
qemu_bh_delete(s->vm_start_bh);
|
||||
s->vm_start_bh = NULL;
|
||||
|
||||
vm_start();
|
||||
s->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - s->downtime_start;
|
||||
}
|
||||
|
||||
/**
|
||||
* Background snapshot thread, based on live migration code.
|
||||
* This is an alternative implementation of live migration mechanism
|
||||
* introduced specifically to support background snapshots.
|
||||
*
|
||||
* It takes advantage of userfault_fd write protection mechanism introduced
|
||||
* in v5.7 kernel. Compared to existing dirty page logging migration much
|
||||
* lesser stream traffic is produced resulting in smaller snapshot images,
|
||||
* simply cause of no page duplicates can get into the stream.
|
||||
*
|
||||
* Another key point is that generated vmstate stream reflects machine state
|
||||
* 'frozen' at the beginning of snapshot creation compared to dirty page logging
|
||||
* mechanism, which effectively results in that saved snapshot is the state of VM
|
||||
* at the end of the process.
|
||||
*/
|
||||
static void *bg_migration_thread(void *opaque)
|
||||
{
|
||||
MigrationState *s = opaque;
|
||||
int64_t setup_start;
|
||||
MigThrError thr_error;
|
||||
QEMUFile *fb;
|
||||
bool early_fail = true;
|
||||
|
||||
rcu_register_thread();
|
||||
object_ref(OBJECT(s));
|
||||
|
||||
qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
|
||||
|
||||
setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
|
||||
/*
|
||||
* We want to save vmstate for the moment when migration has been
|
||||
* initiated but also we want to save RAM content while VM is running.
|
||||
* The RAM content should appear first in the vmstate. So, we first
|
||||
* stash the non-RAM part of the vmstate to the temporary buffer,
|
||||
* then write RAM part of the vmstate to the migration stream
|
||||
* with vCPUs running and, finally, write stashed non-RAM part of
|
||||
* the vmstate from the buffer to the migration stream.
|
||||
*/
|
||||
s->bioc = qio_channel_buffer_new(128 * 1024);
|
||||
qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer");
|
||||
fb = qemu_fopen_channel_output(QIO_CHANNEL(s->bioc));
|
||||
object_unref(OBJECT(s->bioc));
|
||||
|
||||
update_iteration_initial_status(s);
|
||||
|
||||
qemu_savevm_state_header(s->to_dst_file);
|
||||
qemu_savevm_state_setup(s->to_dst_file);
|
||||
|
||||
if (qemu_savevm_state_guest_unplug_pending()) {
|
||||
migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
|
||||
MIGRATION_STATUS_WAIT_UNPLUG);
|
||||
|
||||
while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
|
||||
qemu_savevm_state_guest_unplug_pending()) {
|
||||
qemu_sem_timedwait(&s->wait_unplug_sem, 250);
|
||||
}
|
||||
|
||||
migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG,
|
||||
MIGRATION_STATUS_ACTIVE);
|
||||
} else {
|
||||
migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
|
||||
MIGRATION_STATUS_ACTIVE);
|
||||
}
|
||||
s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
|
||||
|
||||
trace_migration_thread_setup_complete();
|
||||
s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||
|
||||
qemu_mutex_lock_iothread();
|
||||
|
||||
/*
|
||||
* If VM is currently in suspended state, then, to make a valid runstate
|
||||
* transition in vm_stop_force_state() we need to wakeup it up.
|
||||
*/
|
||||
qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
|
||||
s->vm_was_running = runstate_is_running();
|
||||
|
||||
if (global_state_store()) {
|
||||
goto fail;
|
||||
}
|
||||
/* Forcibly stop VM before saving state of vCPUs and devices */
|
||||
if (vm_stop_force_state(RUN_STATE_PAUSED)) {
|
||||
goto fail;
|
||||
}
|
||||
/*
|
||||
* Put vCPUs in sync with shadow context structures, then
|
||||
* save their state to channel-buffer along with devices.
|
||||
*/
|
||||
cpu_synchronize_all_states();
|
||||
if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) {
|
||||
goto fail;
|
||||
}
|
||||
/* Now initialize UFFD context and start tracking RAM writes */
|
||||
if (ram_write_tracking_start()) {
|
||||
goto fail;
|
||||
}
|
||||
early_fail = false;
|
||||
|
||||
/*
|
||||
* Start VM from BH handler to avoid write-fault lock here.
|
||||
* UFFD-WP protection for the whole RAM is already enabled so
|
||||
* calling VM state change notifiers from vm_start() would initiate
|
||||
* writes to virtio VQs memory which is in write-protected region.
|
||||
*/
|
||||
s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s);
|
||||
qemu_bh_schedule(s->vm_start_bh);
|
||||
|
||||
qemu_mutex_unlock_iothread();
|
||||
|
||||
while (migration_is_active(s)) {
|
||||
MigIterateState iter_state = bg_migration_iteration_run(s);
|
||||
if (iter_state == MIG_ITERATE_SKIP) {
|
||||
continue;
|
||||
} else if (iter_state == MIG_ITERATE_BREAK) {
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to detect any kind of failures, and see whether we
|
||||
* should stop the migration now.
|
||||
*/
|
||||
thr_error = migration_detect_error(s);
|
||||
if (thr_error == MIG_THR_ERR_FATAL) {
|
||||
/* Stop migration */
|
||||
break;
|
||||
}
|
||||
|
||||
migration_update_counters(s, qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
|
||||
}
|
||||
|
||||
trace_migration_thread_after_loop();
|
||||
|
||||
fail:
|
||||
if (early_fail) {
|
||||
migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
|
||||
MIGRATION_STATUS_FAILED);
|
||||
qemu_mutex_unlock_iothread();
|
||||
}
|
||||
|
||||
bg_migration_iteration_finish(s);
|
||||
|
||||
qemu_fclose(fb);
|
||||
object_unref(OBJECT(s));
|
||||
rcu_unregister_thread();
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void migrate_fd_connect(MigrationState *s, Error *error_in)
|
||||
{
|
||||
Error *local_err = NULL;
|
||||
@ -3667,8 +4032,14 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
|
||||
migrate_fd_cleanup(s);
|
||||
return;
|
||||
}
|
||||
qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
|
||||
QEMU_THREAD_JOINABLE);
|
||||
|
||||
if (migrate_background_snapshot()) {
|
||||
qemu_thread_create(&s->thread, "bg_snapshot",
|
||||
bg_migration_thread, s, QEMU_THREAD_JOINABLE);
|
||||
} else {
|
||||
qemu_thread_create(&s->thread, "live_migration",
|
||||
migration_thread, s, QEMU_THREAD_JOINABLE);
|
||||
}
|
||||
s->migration_thread_running = true;
|
||||
}
|
||||
|
||||
@ -3784,6 +4155,8 @@ static Property migration_properties[] = {
|
||||
DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
|
||||
DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
|
||||
DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD),
|
||||
DEFINE_PROP_MIG_CAP("x-background-snapshot",
|
||||
MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT),
|
||||
|
||||
DEFINE_PROP_END_OF_LIST(),
|
||||
};
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "qemu/thread.h"
|
||||
#include "qemu/coroutine_int.h"
|
||||
#include "io/channel.h"
|
||||
#include "io/channel-buffer.h"
|
||||
#include "net/announce.h"
|
||||
#include "qom/object.h"
|
||||
|
||||
@ -147,8 +148,10 @@ struct MigrationState {
|
||||
|
||||
/*< public >*/
|
||||
QemuThread thread;
|
||||
QEMUBH *vm_start_bh;
|
||||
QEMUBH *cleanup_bh;
|
||||
QEMUFile *to_dst_file;
|
||||
QIOChannelBuffer *bioc;
|
||||
/*
|
||||
* Protects to_dst_file pointer. We need to make sure we won't
|
||||
* yield or hang during the critical section, since this lock will
|
||||
@ -324,7 +327,7 @@ int migrate_multifd_zlib_level(void);
|
||||
int migrate_multifd_zstd_level(void);
|
||||
|
||||
int migrate_use_xbzrle(void);
|
||||
int64_t migrate_xbzrle_cache_size(void);
|
||||
uint64_t migrate_xbzrle_cache_size(void);
|
||||
bool migrate_colo_enabled(void);
|
||||
|
||||
bool migrate_use_block(void);
|
||||
@ -341,6 +344,7 @@ int migrate_compress_wait_thread(void);
|
||||
int migrate_decompress_threads(void);
|
||||
bool migrate_use_events(void);
|
||||
bool migrate_postcopy_blocktime(void);
|
||||
bool migrate_background_snapshot(void);
|
||||
|
||||
/* Sending on the return path - generic and then for each message type */
|
||||
void migrate_send_rp_shut(MigrationIncomingState *mis,
|
||||
|
@ -38,7 +38,7 @@ struct PageCache {
|
||||
size_t num_items;
|
||||
};
|
||||
|
||||
PageCache *cache_init(int64_t new_size, size_t page_size, Error **errp)
|
||||
PageCache *cache_init(uint64_t new_size, size_t page_size, Error **errp)
|
||||
{
|
||||
int64_t i;
|
||||
size_t num_pages = new_size / page_size;
|
||||
@ -60,8 +60,7 @@ PageCache *cache_init(int64_t new_size, size_t page_size, Error **errp)
|
||||
/* We prefer not to abort if there is no memory */
|
||||
cache = g_try_malloc(sizeof(*cache));
|
||||
if (!cache) {
|
||||
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
|
||||
"Failed to allocate cache");
|
||||
error_setg(errp, "Failed to allocate cache");
|
||||
return NULL;
|
||||
}
|
||||
cache->page_size = page_size;
|
||||
@ -74,8 +73,7 @@ PageCache *cache_init(int64_t new_size, size_t page_size, Error **errp)
|
||||
cache->page_cache = g_try_malloc((cache->max_num_items) *
|
||||
sizeof(*cache->page_cache));
|
||||
if (!cache->page_cache) {
|
||||
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
|
||||
"Failed to allocate page cache");
|
||||
error_setg(errp, "Failed to allocate page cache");
|
||||
g_free(cache);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -28,7 +28,7 @@ typedef struct PageCache PageCache;
|
||||
* @page_size: cache page size
|
||||
* @errp: set *errp if the check failed, with reason
|
||||
*/
|
||||
PageCache *cache_init(int64_t cache_size, size_t page_size, Error **errp);
|
||||
PageCache *cache_init(uint64_t cache_size, size_t page_size, Error **errp);
|
||||
/**
|
||||
* cache_fini: free all cache resources
|
||||
* @cache pointer to the PageCache struct
|
||||
|
@ -595,7 +595,7 @@ size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size)
|
||||
{
|
||||
if (size < IO_BUF_SIZE) {
|
||||
size_t res;
|
||||
uint8_t *src;
|
||||
uint8_t *src = NULL;
|
||||
|
||||
res = qemu_peek_buffer(f, &src, size, 0);
|
||||
|
||||
|
295
migration/ram.c
295
migration/ram.c
@ -56,6 +56,11 @@
|
||||
#include "savevm.h"
|
||||
#include "qemu/iov.h"
|
||||
#include "multifd.h"
|
||||
#include "sysemu/runstate.h"
|
||||
|
||||
#if defined(__linux__)
|
||||
#include "qemu/userfaultfd.h"
|
||||
#endif /* defined(__linux__) */
|
||||
|
||||
/***********************************************************/
|
||||
/* ram save/restore */
|
||||
@ -126,7 +131,7 @@ static void XBZRLE_cache_unlock(void)
|
||||
* @new_size: new cache size
|
||||
* @errp: set *errp if the check failed, with reason
|
||||
*/
|
||||
int xbzrle_cache_resize(int64_t new_size, Error **errp)
|
||||
int xbzrle_cache_resize(uint64_t new_size, Error **errp)
|
||||
{
|
||||
PageCache *new_cache;
|
||||
int64_t ret = 0;
|
||||
@ -298,6 +303,8 @@ struct RAMSrcPageRequest {
|
||||
struct RAMState {
|
||||
/* QEMUFile used for this migration */
|
||||
QEMUFile *f;
|
||||
/* UFFD file descriptor, used in 'write-tracking' migration */
|
||||
int uffdio_fd;
|
||||
/* Last block that we have visited searching for dirty pages */
|
||||
RAMBlock *last_seen_block;
|
||||
/* Last block from where we have sent data */
|
||||
@ -1434,6 +1441,269 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
|
||||
return block;
|
||||
}
|
||||
|
||||
#if defined(__linux__)
|
||||
/**
|
||||
* poll_fault_page: try to get next UFFD write fault page and, if pending fault
|
||||
* is found, return RAM block pointer and page offset
|
||||
*
|
||||
* Returns pointer to the RAMBlock containing faulting page,
|
||||
* NULL if no write faults are pending
|
||||
*
|
||||
* @rs: current RAM state
|
||||
* @offset: page offset from the beginning of the block
|
||||
*/
|
||||
static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset)
|
||||
{
|
||||
struct uffd_msg uffd_msg;
|
||||
void *page_address;
|
||||
RAMBlock *bs;
|
||||
int res;
|
||||
|
||||
if (!migrate_background_snapshot()) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
res = uffd_read_events(rs->uffdio_fd, &uffd_msg, 1);
|
||||
if (res <= 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
page_address = (void *)(uintptr_t) uffd_msg.arg.pagefault.address;
|
||||
bs = qemu_ram_block_from_host(page_address, false, offset);
|
||||
assert(bs && (bs->flags & RAM_UF_WRITEPROTECT) != 0);
|
||||
return bs;
|
||||
}
|
||||
|
||||
/**
|
||||
* ram_save_release_protection: release UFFD write protection after
|
||||
* a range of pages has been saved
|
||||
*
|
||||
* @rs: current RAM state
|
||||
* @pss: page-search-status structure
|
||||
* @start_page: index of the first page in the range relative to pss->block
|
||||
*
|
||||
* Returns 0 on success, negative value in case of an error
|
||||
*/
|
||||
static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss,
|
||||
unsigned long start_page)
|
||||
{
|
||||
int res = 0;
|
||||
|
||||
/* Check if page is from UFFD-managed region. */
|
||||
if (pss->block->flags & RAM_UF_WRITEPROTECT) {
|
||||
void *page_address = pss->block->host + (start_page << TARGET_PAGE_BITS);
|
||||
uint64_t run_length = (pss->page - start_page + 1) << TARGET_PAGE_BITS;
|
||||
|
||||
/* Flush async buffers before un-protect. */
|
||||
qemu_fflush(rs->f);
|
||||
/* Un-protect memory range. */
|
||||
res = uffd_change_protection(rs->uffdio_fd, page_address, run_length,
|
||||
false, false);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/* ram_write_tracking_available: check if kernel supports required UFFD features
|
||||
*
|
||||
* Returns true if supports, false otherwise
|
||||
*/
|
||||
bool ram_write_tracking_available(void)
|
||||
{
|
||||
uint64_t uffd_features;
|
||||
int res;
|
||||
|
||||
res = uffd_query_features(&uffd_features);
|
||||
return (res == 0 &&
|
||||
(uffd_features & UFFD_FEATURE_PAGEFAULT_FLAG_WP) != 0);
|
||||
}
|
||||
|
||||
/* ram_write_tracking_compatible: check if guest configuration is
|
||||
* compatible with 'write-tracking'
|
||||
*
|
||||
* Returns true if compatible, false otherwise
|
||||
*/
|
||||
bool ram_write_tracking_compatible(void)
|
||||
{
|
||||
const uint64_t uffd_ioctls_mask = BIT(_UFFDIO_WRITEPROTECT);
|
||||
int uffd_fd;
|
||||
RAMBlock *bs;
|
||||
bool ret = false;
|
||||
|
||||
/* Open UFFD file descriptor */
|
||||
uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, false);
|
||||
if (uffd_fd < 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
RCU_READ_LOCK_GUARD();
|
||||
|
||||
RAMBLOCK_FOREACH_NOT_IGNORED(bs) {
|
||||
uint64_t uffd_ioctls;
|
||||
|
||||
/* Nothing to do with read-only and MMIO-writable regions */
|
||||
if (bs->mr->readonly || bs->mr->rom_device) {
|
||||
continue;
|
||||
}
|
||||
/* Try to register block memory via UFFD-IO to track writes */
|
||||
if (uffd_register_memory(uffd_fd, bs->host, bs->max_length,
|
||||
UFFDIO_REGISTER_MODE_WP, &uffd_ioctls)) {
|
||||
goto out;
|
||||
}
|
||||
if ((uffd_ioctls & uffd_ioctls_mask) != uffd_ioctls_mask) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
ret = true;
|
||||
|
||||
out:
|
||||
uffd_close_fd(uffd_fd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* ram_write_tracking_start: start UFFD-WP memory tracking
|
||||
*
|
||||
* Returns 0 for success or negative value in case of error
|
||||
*/
|
||||
int ram_write_tracking_start(void)
|
||||
{
|
||||
int uffd_fd;
|
||||
RAMState *rs = ram_state;
|
||||
RAMBlock *bs;
|
||||
|
||||
/* Open UFFD file descriptor */
|
||||
uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, true);
|
||||
if (uffd_fd < 0) {
|
||||
return uffd_fd;
|
||||
}
|
||||
rs->uffdio_fd = uffd_fd;
|
||||
|
||||
RCU_READ_LOCK_GUARD();
|
||||
|
||||
RAMBLOCK_FOREACH_NOT_IGNORED(bs) {
|
||||
/* Nothing to do with read-only and MMIO-writable regions */
|
||||
if (bs->mr->readonly || bs->mr->rom_device) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Register block memory with UFFD to track writes */
|
||||
if (uffd_register_memory(rs->uffdio_fd, bs->host,
|
||||
bs->max_length, UFFDIO_REGISTER_MODE_WP, NULL)) {
|
||||
goto fail;
|
||||
}
|
||||
/* Apply UFFD write protection to the block memory range */
|
||||
if (uffd_change_protection(rs->uffdio_fd, bs->host,
|
||||
bs->max_length, true, false)) {
|
||||
goto fail;
|
||||
}
|
||||
bs->flags |= RAM_UF_WRITEPROTECT;
|
||||
memory_region_ref(bs->mr);
|
||||
|
||||
trace_ram_write_tracking_ramblock_start(bs->idstr, bs->page_size,
|
||||
bs->host, bs->max_length);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
error_report("ram_write_tracking_start() failed: restoring initial memory state");
|
||||
|
||||
RAMBLOCK_FOREACH_NOT_IGNORED(bs) {
|
||||
if ((bs->flags & RAM_UF_WRITEPROTECT) == 0) {
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* In case some memory block failed to be write-protected
|
||||
* remove protection and unregister all succeeded RAM blocks
|
||||
*/
|
||||
uffd_change_protection(rs->uffdio_fd, bs->host, bs->max_length, false, false);
|
||||
uffd_unregister_memory(rs->uffdio_fd, bs->host, bs->max_length);
|
||||
/* Cleanup flags and remove reference */
|
||||
bs->flags &= ~RAM_UF_WRITEPROTECT;
|
||||
memory_region_unref(bs->mr);
|
||||
}
|
||||
|
||||
uffd_close_fd(uffd_fd);
|
||||
rs->uffdio_fd = -1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* ram_write_tracking_stop: stop UFFD-WP memory tracking and remove protection
|
||||
*/
|
||||
void ram_write_tracking_stop(void)
|
||||
{
|
||||
RAMState *rs = ram_state;
|
||||
RAMBlock *bs;
|
||||
|
||||
RCU_READ_LOCK_GUARD();
|
||||
|
||||
RAMBLOCK_FOREACH_NOT_IGNORED(bs) {
|
||||
if ((bs->flags & RAM_UF_WRITEPROTECT) == 0) {
|
||||
continue;
|
||||
}
|
||||
/* Remove protection and unregister all affected RAM blocks */
|
||||
uffd_change_protection(rs->uffdio_fd, bs->host, bs->max_length, false, false);
|
||||
uffd_unregister_memory(rs->uffdio_fd, bs->host, bs->max_length);
|
||||
|
||||
trace_ram_write_tracking_ramblock_stop(bs->idstr, bs->page_size,
|
||||
bs->host, bs->max_length);
|
||||
|
||||
/* Cleanup flags and remove reference */
|
||||
bs->flags &= ~RAM_UF_WRITEPROTECT;
|
||||
memory_region_unref(bs->mr);
|
||||
}
|
||||
|
||||
/* Finally close UFFD file descriptor */
|
||||
uffd_close_fd(rs->uffdio_fd);
|
||||
rs->uffdio_fd = -1;
|
||||
}
|
||||
|
||||
#else
|
||||
/* No target OS support, stubs just fail or ignore */
|
||||
|
||||
static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset)
|
||||
{
|
||||
(void) rs;
|
||||
(void) offset;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss,
|
||||
unsigned long start_page)
|
||||
{
|
||||
(void) rs;
|
||||
(void) pss;
|
||||
(void) start_page;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool ram_write_tracking_available(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ram_write_tracking_compatible(void)
|
||||
{
|
||||
assert(0);
|
||||
return false;
|
||||
}
|
||||
|
||||
int ram_write_tracking_start(void)
|
||||
{
|
||||
assert(0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
void ram_write_tracking_stop(void)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
#endif /* defined(__linux__) */
|
||||
|
||||
/**
|
||||
* get_queued_page: unqueue a page from the postcopy requests
|
||||
*
|
||||
@ -1473,6 +1743,14 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
|
||||
|
||||
} while (block && !dirty);
|
||||
|
||||
if (!block) {
|
||||
/*
|
||||
* Poll write faults too if background snapshot is enabled; that's
|
||||
* when we have vcpus got blocked by the write protected pages.
|
||||
*/
|
||||
block = poll_fault_page(rs, &offset);
|
||||
}
|
||||
|
||||
if (block) {
|
||||
/*
|
||||
* As soon as we start servicing pages out of order, then we have
|
||||
@ -1715,6 +1993,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
|
||||
int tmppages, pages = 0;
|
||||
size_t pagesize_bits =
|
||||
qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
|
||||
unsigned long start_page = pss->page;
|
||||
int res;
|
||||
|
||||
if (ramblock_is_ignored(pss->block)) {
|
||||
error_report("block %s should not be migrated !", pss->block->idstr);
|
||||
@ -1740,10 +2020,11 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
|
||||
} while ((pss->page & (pagesize_bits - 1)) &&
|
||||
offset_in_ramblock(pss->block,
|
||||
((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
|
||||
|
||||
/* The offset we leave with is the last one we looked at */
|
||||
pss->page--;
|
||||
return pages;
|
||||
|
||||
res = ram_save_release_protection(rs, pss, start_page);
|
||||
return (res < 0 ? res : pages);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1880,10 +2161,13 @@ static void ram_save_cleanup(void *opaque)
|
||||
RAMState **rsp = opaque;
|
||||
RAMBlock *block;
|
||||
|
||||
/* We don't use dirty log with background snapshots */
|
||||
if (!migrate_background_snapshot()) {
|
||||
/* caller have hold iothread lock or is in a bh, so there is
|
||||
* no writing race against the migration bitmap
|
||||
*/
|
||||
memory_global_dirty_log_stop();
|
||||
}
|
||||
|
||||
RAMBLOCK_FOREACH_NOT_IGNORED(block) {
|
||||
g_free(block->clear_bmap);
|
||||
@ -2343,9 +2627,12 @@ static void ram_init_bitmaps(RAMState *rs)
|
||||
|
||||
WITH_RCU_READ_LOCK_GUARD() {
|
||||
ram_list_init_bitmaps();
|
||||
/* We don't use dirty log with background snapshots */
|
||||
if (!migrate_background_snapshot()) {
|
||||
memory_global_dirty_log_start();
|
||||
migration_bitmap_sync_precopy(rs);
|
||||
}
|
||||
}
|
||||
qemu_mutex_unlock_ramlist();
|
||||
qemu_mutex_unlock_iothread();
|
||||
}
|
||||
@ -3521,7 +3808,7 @@ static int ram_load_precopy(QEMUFile *f)
|
||||
}
|
||||
}
|
||||
/* For postcopy we need to check hugepage sizes match */
|
||||
if (postcopy_advised &&
|
||||
if (postcopy_advised && migrate_postcopy_ram() &&
|
||||
block->page_size != qemu_host_page_size) {
|
||||
uint64_t remote_page_size = qemu_get_be64(f);
|
||||
if (remote_page_size != block->page_size) {
|
||||
|
@ -47,7 +47,7 @@ bool ramblock_is_ignored(RAMBlock *block);
|
||||
INTERNAL_RAMBLOCK_FOREACH(block) \
|
||||
if (!qemu_ram_is_migratable(block)) {} else
|
||||
|
||||
int xbzrle_cache_resize(int64_t new_size, Error **errp);
|
||||
int xbzrle_cache_resize(uint64_t new_size, Error **errp);
|
||||
uint64_t ram_bytes_remaining(void);
|
||||
uint64_t ram_bytes_total(void);
|
||||
|
||||
@ -79,4 +79,10 @@ void colo_flush_ram_cache(void);
|
||||
void colo_release_ram_cache(void);
|
||||
void colo_incoming_start_dirty_log(void);
|
||||
|
||||
/* Background snapshot */
|
||||
bool ram_write_tracking_available(void);
|
||||
bool ram_write_tracking_compatible(void);
|
||||
int ram_write_tracking_start(void);
|
||||
void ram_write_tracking_stop(void);
|
||||
|
||||
#endif
|
||||
|
@ -43,6 +43,8 @@
|
||||
#include "qapi/error.h"
|
||||
#include "qapi/qapi-commands-migration.h"
|
||||
#include "qapi/qmp/json-writer.h"
|
||||
#include "qapi/clone-visitor.h"
|
||||
#include "qapi/qapi-builtin-visit.h"
|
||||
#include "qapi/qmp/qerror.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "sysemu/cpus.h"
|
||||
@ -315,6 +317,16 @@ static int configuration_pre_save(void *opaque)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int configuration_post_save(void *opaque)
|
||||
{
|
||||
SaveState *state = opaque;
|
||||
|
||||
g_free(state->capabilities);
|
||||
state->capabilities = NULL;
|
||||
state->caps_count = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int configuration_pre_load(void *opaque)
|
||||
{
|
||||
SaveState *state = opaque;
|
||||
@ -365,24 +377,36 @@ static int configuration_post_load(void *opaque, int version_id)
|
||||
{
|
||||
SaveState *state = opaque;
|
||||
const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
|
||||
int ret = 0;
|
||||
|
||||
if (strncmp(state->name, current_name, state->len) != 0) {
|
||||
error_report("Machine type received is '%.*s' and local is '%s'",
|
||||
(int) state->len, state->name, current_name);
|
||||
return -EINVAL;
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (state->target_page_bits != qemu_target_page_bits()) {
|
||||
error_report("Received TARGET_PAGE_BITS is %d but local is %d",
|
||||
state->target_page_bits, qemu_target_page_bits());
|
||||
return -EINVAL;
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!configuration_validate_capabilities(state)) {
|
||||
return -EINVAL;
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
return 0;
|
||||
out:
|
||||
g_free((void *)state->name);
|
||||
state->name = NULL;
|
||||
state->len = 0;
|
||||
g_free(state->capabilities);
|
||||
state->capabilities = NULL;
|
||||
state->caps_count = 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int get_capability(QEMUFile *f, void *pv, size_t size,
|
||||
@ -516,6 +540,7 @@ static const VMStateDescription vmstate_configuration = {
|
||||
.pre_load = configuration_pre_load,
|
||||
.post_load = configuration_post_load,
|
||||
.pre_save = configuration_pre_save,
|
||||
.post_save = configuration_post_save,
|
||||
.fields = (VMStateField[]) {
|
||||
VMSTATE_UINT32(len, SaveState),
|
||||
VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, len),
|
||||
@ -1131,6 +1156,19 @@ bool qemu_savevm_state_blocked(Error **errp)
|
||||
return false;
|
||||
}
|
||||
|
||||
void qemu_savevm_non_migratable_list(strList **reasons)
|
||||
{
|
||||
SaveStateEntry *se;
|
||||
|
||||
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
||||
if (se->vmsd && se->vmsd->unmigratable) {
|
||||
QAPI_LIST_PREPEND(*reasons,
|
||||
g_strdup_printf("non-migratable device: %s",
|
||||
se->idstr));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void qemu_savevm_state_header(QEMUFile *f)
|
||||
{
|
||||
trace_savevm_state_header();
|
||||
@ -1355,7 +1393,6 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static
|
||||
int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
|
||||
bool in_postcopy,
|
||||
bool inactivate_disks)
|
||||
@ -2729,9 +2766,10 @@ int qemu_load_device_state(QEMUFile *f)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int save_snapshot(const char *name, Error **errp)
|
||||
bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
|
||||
bool has_devices, strList *devices, Error **errp)
|
||||
{
|
||||
BlockDriverState *bs, *bs1;
|
||||
BlockDriverState *bs;
|
||||
QEMUSnapshotInfo sn1, *sn = &sn1;
|
||||
int ret = -1, ret2;
|
||||
QEMUFile *f;
|
||||
@ -2742,35 +2780,43 @@ int save_snapshot(const char *name, Error **errp)
|
||||
AioContext *aio_context;
|
||||
|
||||
if (migration_is_blocked(errp)) {
|
||||
return ret;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!replay_can_snapshot()) {
|
||||
error_setg(errp, "Record/replay does not allow making snapshot "
|
||||
"right now. Try once more later.");
|
||||
return ret;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!bdrv_all_can_snapshot(&bs)) {
|
||||
error_setg(errp, "Device '%s' is writable but does not support "
|
||||
"snapshots", bdrv_get_device_or_node_name(bs));
|
||||
return ret;
|
||||
if (!bdrv_all_can_snapshot(has_devices, devices, errp)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Delete old snapshots of the same name */
|
||||
if (name) {
|
||||
ret = bdrv_all_delete_snapshot(name, &bs1, errp);
|
||||
if (ret < 0) {
|
||||
error_prepend(errp, "Error while deleting snapshot on device "
|
||||
"'%s': ", bdrv_get_device_or_node_name(bs1));
|
||||
return ret;
|
||||
if (overwrite) {
|
||||
if (bdrv_all_delete_snapshot(name, has_devices,
|
||||
devices, errp) < 0) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
ret2 = bdrv_all_has_snapshot(name, has_devices, devices, errp);
|
||||
if (ret2 < 0) {
|
||||
return false;
|
||||
}
|
||||
if (ret2 == 1) {
|
||||
error_setg(errp,
|
||||
"Snapshot '%s' already exists in one or more devices",
|
||||
name);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bs = bdrv_all_find_vmstate_bs();
|
||||
bs = bdrv_all_find_vmstate_bs(vmstate, has_devices, devices, errp);
|
||||
if (bs == NULL) {
|
||||
error_setg(errp, "No block device can accept snapshots");
|
||||
return ret;
|
||||
return false;
|
||||
}
|
||||
aio_context = bdrv_get_aio_context(bs);
|
||||
|
||||
@ -2779,7 +2825,7 @@ int save_snapshot(const char *name, Error **errp)
|
||||
ret = global_state_store();
|
||||
if (ret) {
|
||||
error_setg(errp, "Error saving global state");
|
||||
return ret;
|
||||
return false;
|
||||
}
|
||||
vm_stop(RUN_STATE_SAVE_VM);
|
||||
|
||||
@ -2833,11 +2879,10 @@ int save_snapshot(const char *name, Error **errp)
|
||||
aio_context_release(aio_context);
|
||||
aio_context = NULL;
|
||||
|
||||
ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, &bs);
|
||||
ret = bdrv_all_create_snapshot(sn, bs, vm_state_size,
|
||||
has_devices, devices, errp);
|
||||
if (ret < 0) {
|
||||
error_setg(errp, "Error while creating snapshot on '%s'",
|
||||
bdrv_get_device_or_node_name(bs));
|
||||
bdrv_all_delete_snapshot(sn->name, &bs, NULL);
|
||||
bdrv_all_delete_snapshot(sn->name, has_devices, devices, NULL);
|
||||
goto the_end;
|
||||
}
|
||||
|
||||
@ -2853,7 +2898,7 @@ int save_snapshot(const char *name, Error **errp)
|
||||
if (saved_vm_running) {
|
||||
vm_start();
|
||||
}
|
||||
return ret;
|
||||
return ret == 0;
|
||||
}
|
||||
|
||||
void qmp_xen_save_devices_state(const char *filename, bool has_live, bool live,
|
||||
@ -2938,33 +2983,32 @@ void qmp_xen_load_devices_state(const char *filename, Error **errp)
|
||||
migration_incoming_state_destroy();
|
||||
}
|
||||
|
||||
int load_snapshot(const char *name, Error **errp)
|
||||
bool load_snapshot(const char *name, const char *vmstate,
|
||||
bool has_devices, strList *devices, Error **errp)
|
||||
{
|
||||
BlockDriverState *bs, *bs_vm_state;
|
||||
BlockDriverState *bs_vm_state;
|
||||
QEMUSnapshotInfo sn;
|
||||
QEMUFile *f;
|
||||
int ret;
|
||||
AioContext *aio_context;
|
||||
MigrationIncomingState *mis = migration_incoming_get_current();
|
||||
|
||||
if (!bdrv_all_can_snapshot(&bs)) {
|
||||
error_setg(errp,
|
||||
"Device '%s' is writable but does not support snapshots",
|
||||
bdrv_get_device_or_node_name(bs));
|
||||
return -ENOTSUP;
|
||||
if (!bdrv_all_can_snapshot(has_devices, devices, errp)) {
|
||||
return false;
|
||||
}
|
||||
ret = bdrv_all_find_snapshot(name, &bs);
|
||||
ret = bdrv_all_has_snapshot(name, has_devices, devices, errp);
|
||||
if (ret < 0) {
|
||||
error_setg(errp,
|
||||
"Device '%s' does not have the requested snapshot '%s'",
|
||||
bdrv_get_device_or_node_name(bs), name);
|
||||
return ret;
|
||||
return false;
|
||||
}
|
||||
if (ret == 0) {
|
||||
error_setg(errp, "Snapshot '%s' does not exist in one or more devices",
|
||||
name);
|
||||
return false;
|
||||
}
|
||||
|
||||
bs_vm_state = bdrv_all_find_vmstate_bs();
|
||||
bs_vm_state = bdrv_all_find_vmstate_bs(vmstate, has_devices, devices, errp);
|
||||
if (!bs_vm_state) {
|
||||
error_setg(errp, "No block device supports snapshots");
|
||||
return -ENOTSUP;
|
||||
return false;
|
||||
}
|
||||
aio_context = bdrv_get_aio_context(bs_vm_state);
|
||||
|
||||
@ -2973,11 +3017,11 @@ int load_snapshot(const char *name, Error **errp)
|
||||
ret = bdrv_snapshot_find(bs_vm_state, &sn, name);
|
||||
aio_context_release(aio_context);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
return false;
|
||||
} else if (sn.vm_state_size == 0) {
|
||||
error_setg(errp, "This is a disk-only snapshot. Revert to it "
|
||||
" offline using qemu-img");
|
||||
return -EINVAL;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2989,10 +3033,8 @@ int load_snapshot(const char *name, Error **errp)
|
||||
/* Flush all IO requests so they don't interfere with the new state. */
|
||||
bdrv_drain_all_begin();
|
||||
|
||||
ret = bdrv_all_goto_snapshot(name, &bs, errp);
|
||||
ret = bdrv_all_goto_snapshot(name, has_devices, devices, errp);
|
||||
if (ret < 0) {
|
||||
error_prepend(errp, "Could not load snapshot '%s' on '%s': ",
|
||||
name, bdrv_get_device_or_node_name(bs));
|
||||
goto err_drain;
|
||||
}
|
||||
|
||||
@ -3000,7 +3042,6 @@ int load_snapshot(const char *name, Error **errp)
|
||||
f = qemu_fopen_bdrv(bs_vm_state, 0);
|
||||
if (!f) {
|
||||
error_setg(errp, "Could not open VM state file");
|
||||
ret = -EINVAL;
|
||||
goto err_drain;
|
||||
}
|
||||
|
||||
@ -3020,14 +3061,28 @@ int load_snapshot(const char *name, Error **errp)
|
||||
|
||||
if (ret < 0) {
|
||||
error_setg(errp, "Error %d while loading VM state", ret);
|
||||
return ret;
|
||||
return false;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return true;
|
||||
|
||||
err_drain:
|
||||
bdrv_drain_all_end();
|
||||
return ret;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool delete_snapshot(const char *name, bool has_devices,
|
||||
strList *devices, Error **errp)
|
||||
{
|
||||
if (!bdrv_all_can_snapshot(has_devices, devices, errp)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (bdrv_all_delete_snapshot(name, has_devices, devices, errp) < 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
|
||||
@ -3057,3 +3112,187 @@ bool vmstate_check_only_migratable(const VMStateDescription *vmsd)
|
||||
|
||||
return !(vmsd && vmsd->unmigratable);
|
||||
}
|
||||
|
||||
typedef struct SnapshotJob {
|
||||
Job common;
|
||||
char *tag;
|
||||
char *vmstate;
|
||||
strList *devices;
|
||||
Coroutine *co;
|
||||
Error **errp;
|
||||
bool ret;
|
||||
} SnapshotJob;
|
||||
|
||||
static void qmp_snapshot_job_free(SnapshotJob *s)
|
||||
{
|
||||
g_free(s->tag);
|
||||
g_free(s->vmstate);
|
||||
qapi_free_strList(s->devices);
|
||||
}
|
||||
|
||||
|
||||
static void snapshot_load_job_bh(void *opaque)
|
||||
{
|
||||
Job *job = opaque;
|
||||
SnapshotJob *s = container_of(job, SnapshotJob, common);
|
||||
int orig_vm_running;
|
||||
|
||||
job_progress_set_remaining(&s->common, 1);
|
||||
|
||||
orig_vm_running = runstate_is_running();
|
||||
vm_stop(RUN_STATE_RESTORE_VM);
|
||||
|
||||
s->ret = load_snapshot(s->tag, s->vmstate, true, s->devices, s->errp);
|
||||
if (s->ret && orig_vm_running) {
|
||||
vm_start();
|
||||
}
|
||||
|
||||
job_progress_update(&s->common, 1);
|
||||
|
||||
qmp_snapshot_job_free(s);
|
||||
aio_co_wake(s->co);
|
||||
}
|
||||
|
||||
static void snapshot_save_job_bh(void *opaque)
|
||||
{
|
||||
Job *job = opaque;
|
||||
SnapshotJob *s = container_of(job, SnapshotJob, common);
|
||||
|
||||
job_progress_set_remaining(&s->common, 1);
|
||||
s->ret = save_snapshot(s->tag, false, s->vmstate,
|
||||
true, s->devices, s->errp);
|
||||
job_progress_update(&s->common, 1);
|
||||
|
||||
qmp_snapshot_job_free(s);
|
||||
aio_co_wake(s->co);
|
||||
}
|
||||
|
||||
static void snapshot_delete_job_bh(void *opaque)
|
||||
{
|
||||
Job *job = opaque;
|
||||
SnapshotJob *s = container_of(job, SnapshotJob, common);
|
||||
|
||||
job_progress_set_remaining(&s->common, 1);
|
||||
s->ret = delete_snapshot(s->tag, true, s->devices, s->errp);
|
||||
job_progress_update(&s->common, 1);
|
||||
|
||||
qmp_snapshot_job_free(s);
|
||||
aio_co_wake(s->co);
|
||||
}
|
||||
|
||||
static int coroutine_fn snapshot_save_job_run(Job *job, Error **errp)
|
||||
{
|
||||
SnapshotJob *s = container_of(job, SnapshotJob, common);
|
||||
s->errp = errp;
|
||||
s->co = qemu_coroutine_self();
|
||||
aio_bh_schedule_oneshot(qemu_get_aio_context(),
|
||||
snapshot_save_job_bh, job);
|
||||
qemu_coroutine_yield();
|
||||
return s->ret ? 0 : -1;
|
||||
}
|
||||
|
||||
static int coroutine_fn snapshot_load_job_run(Job *job, Error **errp)
|
||||
{
|
||||
SnapshotJob *s = container_of(job, SnapshotJob, common);
|
||||
s->errp = errp;
|
||||
s->co = qemu_coroutine_self();
|
||||
aio_bh_schedule_oneshot(qemu_get_aio_context(),
|
||||
snapshot_load_job_bh, job);
|
||||
qemu_coroutine_yield();
|
||||
return s->ret ? 0 : -1;
|
||||
}
|
||||
|
||||
static int coroutine_fn snapshot_delete_job_run(Job *job, Error **errp)
|
||||
{
|
||||
SnapshotJob *s = container_of(job, SnapshotJob, common);
|
||||
s->errp = errp;
|
||||
s->co = qemu_coroutine_self();
|
||||
aio_bh_schedule_oneshot(qemu_get_aio_context(),
|
||||
snapshot_delete_job_bh, job);
|
||||
qemu_coroutine_yield();
|
||||
return s->ret ? 0 : -1;
|
||||
}
|
||||
|
||||
|
||||
static const JobDriver snapshot_load_job_driver = {
|
||||
.instance_size = sizeof(SnapshotJob),
|
||||
.job_type = JOB_TYPE_SNAPSHOT_LOAD,
|
||||
.run = snapshot_load_job_run,
|
||||
};
|
||||
|
||||
static const JobDriver snapshot_save_job_driver = {
|
||||
.instance_size = sizeof(SnapshotJob),
|
||||
.job_type = JOB_TYPE_SNAPSHOT_SAVE,
|
||||
.run = snapshot_save_job_run,
|
||||
};
|
||||
|
||||
static const JobDriver snapshot_delete_job_driver = {
|
||||
.instance_size = sizeof(SnapshotJob),
|
||||
.job_type = JOB_TYPE_SNAPSHOT_DELETE,
|
||||
.run = snapshot_delete_job_run,
|
||||
};
|
||||
|
||||
|
||||
void qmp_snapshot_save(const char *job_id,
|
||||
const char *tag,
|
||||
const char *vmstate,
|
||||
strList *devices,
|
||||
Error **errp)
|
||||
{
|
||||
SnapshotJob *s;
|
||||
|
||||
s = job_create(job_id, &snapshot_save_job_driver, NULL,
|
||||
qemu_get_aio_context(), JOB_MANUAL_DISMISS,
|
||||
NULL, NULL, errp);
|
||||
if (!s) {
|
||||
return;
|
||||
}
|
||||
|
||||
s->tag = g_strdup(tag);
|
||||
s->vmstate = g_strdup(vmstate);
|
||||
s->devices = QAPI_CLONE(strList, devices);
|
||||
|
||||
job_start(&s->common);
|
||||
}
|
||||
|
||||
void qmp_snapshot_load(const char *job_id,
|
||||
const char *tag,
|
||||
const char *vmstate,
|
||||
strList *devices,
|
||||
Error **errp)
|
||||
{
|
||||
SnapshotJob *s;
|
||||
|
||||
s = job_create(job_id, &snapshot_load_job_driver, NULL,
|
||||
qemu_get_aio_context(), JOB_MANUAL_DISMISS,
|
||||
NULL, NULL, errp);
|
||||
if (!s) {
|
||||
return;
|
||||
}
|
||||
|
||||
s->tag = g_strdup(tag);
|
||||
s->vmstate = g_strdup(vmstate);
|
||||
s->devices = QAPI_CLONE(strList, devices);
|
||||
|
||||
job_start(&s->common);
|
||||
}
|
||||
|
||||
void qmp_snapshot_delete(const char *job_id,
|
||||
const char *tag,
|
||||
strList *devices,
|
||||
Error **errp)
|
||||
{
|
||||
SnapshotJob *s;
|
||||
|
||||
s = job_create(job_id, &snapshot_delete_job_driver, NULL,
|
||||
qemu_get_aio_context(), JOB_MANUAL_DISMISS,
|
||||
NULL, NULL, errp);
|
||||
if (!s) {
|
||||
return;
|
||||
}
|
||||
|
||||
s->tag = g_strdup(tag);
|
||||
s->devices = QAPI_CLONE(strList, devices);
|
||||
|
||||
job_start(&s->common);
|
||||
}
|
||||
|
@ -30,6 +30,7 @@
|
||||
#define QEMU_VM_SECTION_FOOTER 0x7e
|
||||
|
||||
bool qemu_savevm_state_blocked(Error **errp);
|
||||
void qemu_savevm_non_migratable_list(strList **reasons);
|
||||
void qemu_savevm_state_setup(QEMUFile *f);
|
||||
bool qemu_savevm_state_guest_unplug_pending(void);
|
||||
int qemu_savevm_state_resume_prepare(MigrationState *s);
|
||||
@ -64,5 +65,7 @@ int qemu_loadvm_state(QEMUFile *f);
|
||||
void qemu_loadvm_state_cleanup(void);
|
||||
int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
|
||||
int qemu_load_device_state(QEMUFile *f);
|
||||
int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
|
||||
bool in_postcopy, bool inactivate_disks);
|
||||
|
||||
#endif
|
||||
|
@ -111,6 +111,8 @@ save_xbzrle_page_skipping(void) ""
|
||||
save_xbzrle_page_overflow(void) ""
|
||||
ram_save_iterate_big_wait(uint64_t milliconds, int iterations) "big wait: %" PRIu64 " milliseconds, %d iterations"
|
||||
ram_load_complete(int ret, uint64_t seq_iter) "exit_code %d seq iteration %" PRIu64
|
||||
ram_write_tracking_ramblock_start(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu"
|
||||
ram_write_tracking_ramblock_stop(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu"
|
||||
|
||||
# multifd.c
|
||||
multifd_new_send_channel_async(uint8_t id) "channel %d"
|
||||
|
@ -224,6 +224,15 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
|
||||
|
||||
migration_global_dump(mon);
|
||||
|
||||
if (info->blocked) {
|
||||
strList *reasons = info->blocked_reasons;
|
||||
monitor_printf(mon, "Outgoing migration blocked:\n");
|
||||
while (reasons) {
|
||||
monitor_printf(mon, " %s\n", reasons->value);
|
||||
reasons = reasons->next;
|
||||
}
|
||||
}
|
||||
|
||||
if (info->has_status) {
|
||||
monitor_printf(mon, "Migration status: %s",
|
||||
MigrationStatus_str(info->status));
|
||||
@ -1130,7 +1139,7 @@ void hmp_loadvm(Monitor *mon, const QDict *qdict)
|
||||
|
||||
vm_stop(RUN_STATE_RESTORE_VM);
|
||||
|
||||
if (load_snapshot(name, &err) == 0 && saved_vm_running) {
|
||||
if (!load_snapshot(name, NULL, false, NULL, &err) && saved_vm_running) {
|
||||
vm_start();
|
||||
}
|
||||
hmp_handle_error(mon, err);
|
||||
@ -1140,21 +1149,17 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
|
||||
{
|
||||
Error *err = NULL;
|
||||
|
||||
save_snapshot(qdict_get_try_str(qdict, "name"), &err);
|
||||
save_snapshot(qdict_get_try_str(qdict, "name"),
|
||||
true, NULL, false, NULL, &err);
|
||||
hmp_handle_error(mon, err);
|
||||
}
|
||||
|
||||
void hmp_delvm(Monitor *mon, const QDict *qdict)
|
||||
{
|
||||
BlockDriverState *bs;
|
||||
Error *err = NULL;
|
||||
const char *name = qdict_get_str(qdict, "name");
|
||||
|
||||
if (bdrv_all_delete_snapshot(name, &bs, &err) < 0) {
|
||||
error_prepend(&err,
|
||||
"deleting snapshot on device '%s': ",
|
||||
bdrv_get_device_name(bs));
|
||||
}
|
||||
delete_snapshot(name, false, NULL, &err);
|
||||
hmp_handle_error(mon, err);
|
||||
}
|
||||
|
||||
@ -1294,11 +1299,11 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
|
||||
switch (val) {
|
||||
case MIGRATION_PARAMETER_COMPRESS_LEVEL:
|
||||
p->has_compress_level = true;
|
||||
visit_type_int(v, param, &p->compress_level, &err);
|
||||
visit_type_uint8(v, param, &p->compress_level, &err);
|
||||
break;
|
||||
case MIGRATION_PARAMETER_COMPRESS_THREADS:
|
||||
p->has_compress_threads = true;
|
||||
visit_type_int(v, param, &p->compress_threads, &err);
|
||||
visit_type_uint8(v, param, &p->compress_threads, &err);
|
||||
break;
|
||||
case MIGRATION_PARAMETER_COMPRESS_WAIT_THREAD:
|
||||
p->has_compress_wait_thread = true;
|
||||
@ -1306,19 +1311,19 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
|
||||
break;
|
||||
case MIGRATION_PARAMETER_DECOMPRESS_THREADS:
|
||||
p->has_decompress_threads = true;
|
||||
visit_type_int(v, param, &p->decompress_threads, &err);
|
||||
visit_type_uint8(v, param, &p->decompress_threads, &err);
|
||||
break;
|
||||
case MIGRATION_PARAMETER_THROTTLE_TRIGGER_THRESHOLD:
|
||||
p->has_throttle_trigger_threshold = true;
|
||||
visit_type_int(v, param, &p->throttle_trigger_threshold, &err);
|
||||
visit_type_uint8(v, param, &p->throttle_trigger_threshold, &err);
|
||||
break;
|
||||
case MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL:
|
||||
p->has_cpu_throttle_initial = true;
|
||||
visit_type_int(v, param, &p->cpu_throttle_initial, &err);
|
||||
visit_type_uint8(v, param, &p->cpu_throttle_initial, &err);
|
||||
break;
|
||||
case MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT:
|
||||
p->has_cpu_throttle_increment = true;
|
||||
visit_type_int(v, param, &p->cpu_throttle_increment, &err);
|
||||
visit_type_uint8(v, param, &p->cpu_throttle_increment, &err);
|
||||
break;
|
||||
case MIGRATION_PARAMETER_CPU_THROTTLE_TAILSLOW:
|
||||
p->has_cpu_throttle_tailslow = true;
|
||||
@ -1326,7 +1331,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
|
||||
break;
|
||||
case MIGRATION_PARAMETER_MAX_CPU_THROTTLE:
|
||||
p->has_max_cpu_throttle = true;
|
||||
visit_type_int(v, param, &p->max_cpu_throttle, &err);
|
||||
visit_type_uint8(v, param, &p->max_cpu_throttle, &err);
|
||||
break;
|
||||
case MIGRATION_PARAMETER_TLS_CREDS:
|
||||
p->has_tls_creds = true;
|
||||
@ -1362,11 +1367,11 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
|
||||
break;
|
||||
case MIGRATION_PARAMETER_DOWNTIME_LIMIT:
|
||||
p->has_downtime_limit = true;
|
||||
visit_type_int(v, param, &p->downtime_limit, &err);
|
||||
visit_type_size(v, param, &p->downtime_limit, &err);
|
||||
break;
|
||||
case MIGRATION_PARAMETER_X_CHECKPOINT_DELAY:
|
||||
p->has_x_checkpoint_delay = true;
|
||||
visit_type_int(v, param, &p->x_checkpoint_delay, &err);
|
||||
visit_type_uint32(v, param, &p->x_checkpoint_delay, &err);
|
||||
break;
|
||||
case MIGRATION_PARAMETER_BLOCK_INCREMENTAL:
|
||||
p->has_block_incremental = true;
|
||||
@ -1374,7 +1379,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
|
||||
break;
|
||||
case MIGRATION_PARAMETER_MULTIFD_CHANNELS:
|
||||
p->has_multifd_channels = true;
|
||||
visit_type_int(v, param, &p->multifd_channels, &err);
|
||||
visit_type_uint8(v, param, &p->multifd_channels, &err);
|
||||
break;
|
||||
case MIGRATION_PARAMETER_MULTIFD_COMPRESSION:
|
||||
p->has_multifd_compression = true;
|
||||
@ -1383,11 +1388,11 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
|
||||
break;
|
||||
case MIGRATION_PARAMETER_MULTIFD_ZLIB_LEVEL:
|
||||
p->has_multifd_zlib_level = true;
|
||||
visit_type_int(v, param, &p->multifd_zlib_level, &err);
|
||||
visit_type_uint8(v, param, &p->multifd_zlib_level, &err);
|
||||
break;
|
||||
case MIGRATION_PARAMETER_MULTIFD_ZSTD_LEVEL:
|
||||
p->has_multifd_zstd_level = true;
|
||||
visit_type_int(v, param, &p->multifd_zstd_level, &err);
|
||||
visit_type_uint8(v, param, &p->multifd_zstd_level, &err);
|
||||
break;
|
||||
case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE:
|
||||
p->has_xbzrle_cache_size = true;
|
||||
|
@ -22,10 +22,17 @@
|
||||
#
|
||||
# @amend: image options amend job type, see "x-blockdev-amend" (since 5.1)
|
||||
#
|
||||
# @snapshot-load: snapshot load job type, see "snapshot-load" (since 6.0)
|
||||
#
|
||||
# @snapshot-save: snapshot save job type, see "snapshot-save" (since 6.0)
|
||||
#
|
||||
# @snapshot-delete: snapshot delete job type, see "snapshot-delete" (since 6.0)
|
||||
#
|
||||
# Since: 1.7
|
||||
##
|
||||
{ 'enum': 'JobType',
|
||||
'data': ['commit', 'stream', 'mirror', 'backup', 'create', 'amend'] }
|
||||
'data': ['commit', 'stream', 'mirror', 'backup', 'create', 'amend',
|
||||
'snapshot-load', 'snapshot-save', 'snapshot-delete'] }
|
||||
|
||||
##
|
||||
# @JobStatus:
|
||||
|
@ -78,7 +78,7 @@
|
||||
# Since: 1.2
|
||||
##
|
||||
{ 'struct': 'XBZRLECacheStats',
|
||||
'data': {'cache-size': 'int', 'bytes': 'int', 'pages': 'int',
|
||||
'data': {'cache-size': 'size', 'bytes': 'int', 'pages': 'int',
|
||||
'cache-miss': 'int', 'cache-miss-rate': 'number',
|
||||
'encoding-rate': 'number', 'overflow': 'int' } }
|
||||
|
||||
@ -224,6 +224,10 @@
|
||||
# only returned if VFIO device is present, migration is supported by all
|
||||
# VFIO devices and status is 'active' or 'completed' (since 5.2)
|
||||
#
|
||||
# @blocked: True if outgoing migration is blocked (since 6.0)
|
||||
#
|
||||
# @blocked-reasons: A list of reasons an outgoing migration is blocked (since 6.0)
|
||||
#
|
||||
# Since: 0.14
|
||||
##
|
||||
{ 'struct': 'MigrationInfo',
|
||||
@ -237,6 +241,8 @@
|
||||
'*setup-time': 'int',
|
||||
'*cpu-throttle-percentage': 'int',
|
||||
'*error-desc': 'str',
|
||||
'blocked': 'bool',
|
||||
'*blocked-reasons': ['str'],
|
||||
'*postcopy-blocktime' : 'uint32',
|
||||
'*postcopy-vcpu-blocktime': ['uint32'],
|
||||
'*compression': 'CompressionStats',
|
||||
@ -442,6 +448,11 @@
|
||||
# @validate-uuid: Send the UUID of the source to allow the destination
|
||||
# to ensure it is the same. (since 4.2)
|
||||
#
|
||||
# @background-snapshot: If enabled, the migration stream will be a snapshot
|
||||
# of the VM exactly at the point when the migration
|
||||
# procedure starts. The VM RAM is saved with running VM.
|
||||
# (since 6.0)
|
||||
#
|
||||
# Since: 1.2
|
||||
##
|
||||
{ 'enum': 'MigrationCapability',
|
||||
@ -449,7 +460,7 @@
|
||||
'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram',
|
||||
'block', 'return-path', 'pause-before-switchover', 'multifd',
|
||||
'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate',
|
||||
'x-ignore-shared', 'validate-uuid' ] }
|
||||
'x-ignore-shared', 'validate-uuid', 'background-snapshot'] }
|
||||
|
||||
##
|
||||
# @MigrationCapabilityStatus:
|
||||
@ -885,28 +896,28 @@
|
||||
'*announce-max': 'size',
|
||||
'*announce-rounds': 'size',
|
||||
'*announce-step': 'size',
|
||||
'*compress-level': 'int',
|
||||
'*compress-threads': 'int',
|
||||
'*compress-level': 'uint8',
|
||||
'*compress-threads': 'uint8',
|
||||
'*compress-wait-thread': 'bool',
|
||||
'*decompress-threads': 'int',
|
||||
'*throttle-trigger-threshold': 'int',
|
||||
'*cpu-throttle-initial': 'int',
|
||||
'*cpu-throttle-increment': 'int',
|
||||
'*decompress-threads': 'uint8',
|
||||
'*throttle-trigger-threshold': 'uint8',
|
||||
'*cpu-throttle-initial': 'uint8',
|
||||
'*cpu-throttle-increment': 'uint8',
|
||||
'*cpu-throttle-tailslow': 'bool',
|
||||
'*tls-creds': 'StrOrNull',
|
||||
'*tls-hostname': 'StrOrNull',
|
||||
'*tls-authz': 'StrOrNull',
|
||||
'*max-bandwidth': 'int',
|
||||
'*downtime-limit': 'int',
|
||||
'*x-checkpoint-delay': 'int',
|
||||
'*max-bandwidth': 'size',
|
||||
'*downtime-limit': 'uint64',
|
||||
'*x-checkpoint-delay': 'uint32',
|
||||
'*block-incremental': 'bool',
|
||||
'*multifd-channels': 'int',
|
||||
'*multifd-channels': 'uint8',
|
||||
'*xbzrle-cache-size': 'size',
|
||||
'*max-postcopy-bandwidth': 'size',
|
||||
'*max-cpu-throttle': 'int',
|
||||
'*max-cpu-throttle': 'uint8',
|
||||
'*multifd-compression': 'MultiFDCompression',
|
||||
'*multifd-zlib-level': 'int',
|
||||
'*multifd-zstd-level': 'int',
|
||||
'*multifd-zlib-level': 'uint8',
|
||||
'*multifd-zstd-level': 'uint8',
|
||||
'*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
|
||||
|
||||
##
|
||||
@ -1093,7 +1104,7 @@
|
||||
'*max-bandwidth': 'size',
|
||||
'*downtime-limit': 'uint64',
|
||||
'*x-checkpoint-delay': 'uint32',
|
||||
'*block-incremental': 'bool' ,
|
||||
'*block-incremental': 'bool',
|
||||
'*multifd-channels': 'uint8',
|
||||
'*xbzrle-cache-size': 'size',
|
||||
'*max-postcopy-bandwidth': 'size',
|
||||
@ -1465,7 +1476,7 @@
|
||||
# <- { "return": 67108864 }
|
||||
#
|
||||
##
|
||||
{ 'command': 'query-migrate-cache-size', 'returns': 'int',
|
||||
{ 'command': 'query-migrate-cache-size', 'returns': 'size',
|
||||
'features': [ 'deprecated' ] }
|
||||
|
||||
##
|
||||
@ -1843,3 +1854,176 @@
|
||||
# Since: 5.2
|
||||
##
|
||||
{ 'command': 'query-dirty-rate', 'returns': 'DirtyRateInfo' }
|
||||
|
||||
##
|
||||
# @snapshot-save:
|
||||
#
|
||||
# Save a VM snapshot
|
||||
#
|
||||
# @job-id: identifier for the newly created job
|
||||
# @tag: name of the snapshot to create
|
||||
# @vmstate: block device node name to save vmstate to
|
||||
# @devices: list of block device node names to save a snapshot to
|
||||
#
|
||||
# Applications should not assume that the snapshot save is complete
|
||||
# when this command returns. The job commands / events must be used
|
||||
# to determine completion and to fetch details of any errors that arise.
|
||||
#
|
||||
# Note that execution of the guest CPUs may be stopped during the
|
||||
# time it takes to save the snapshot. A future version of QEMU
|
||||
# may ensure CPUs are executing continuously.
|
||||
#
|
||||
# It is strongly recommended that @devices contain all writable
|
||||
# block device nodes if a consistent snapshot is required.
|
||||
#
|
||||
# If @tag already exists, an error will be reported
|
||||
#
|
||||
# Returns: nothing
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# -> { "execute": "snapshot-save",
|
||||
# "data": {
|
||||
# "job-id": "snapsave0",
|
||||
# "tag": "my-snap",
|
||||
# "vmstate": "disk0",
|
||||
# "devices": ["disk0", "disk1"]
|
||||
# }
|
||||
# }
|
||||
# <- { "return": { } }
|
||||
# <- {"event": "JOB_STATUS_CHANGE",
|
||||
# "data": {"status": "created", "id": "snapsave0"}}
|
||||
# <- {"event": "JOB_STATUS_CHANGE",
|
||||
# "data": {"status": "running", "id": "snapsave0"}}
|
||||
# <- {"event": "STOP"}
|
||||
# <- {"event": "RESUME"}
|
||||
# <- {"event": "JOB_STATUS_CHANGE",
|
||||
# "data": {"status": "waiting", "id": "snapsave0"}}
|
||||
# <- {"event": "JOB_STATUS_CHANGE",
|
||||
# "data": {"status": "pending", "id": "snapsave0"}}
|
||||
# <- {"event": "JOB_STATUS_CHANGE",
|
||||
# "data": {"status": "concluded", "id": "snapsave0"}}
|
||||
# -> {"execute": "query-jobs"}
|
||||
# <- {"return": [{"current-progress": 1,
|
||||
# "status": "concluded",
|
||||
# "total-progress": 1,
|
||||
# "type": "snapshot-save",
|
||||
# "id": "snapsave0"}]}
|
||||
#
|
||||
# Since: 6.0
|
||||
##
|
||||
{ 'command': 'snapshot-save',
|
||||
'data': { 'job-id': 'str',
|
||||
'tag': 'str',
|
||||
'vmstate': 'str',
|
||||
'devices': ['str'] } }
|
||||
|
||||
##
|
||||
# @snapshot-load:
|
||||
#
|
||||
# Load a VM snapshot
|
||||
#
|
||||
# @job-id: identifier for the newly created job
|
||||
# @tag: name of the snapshot to load.
|
||||
# @vmstate: block device node name to load vmstate from
|
||||
# @devices: list of block device node names to load a snapshot from
|
||||
#
|
||||
# Applications should not assume that the snapshot load is complete
|
||||
# when this command returns. The job commands / events must be used
|
||||
# to determine completion and to fetch details of any errors that arise.
|
||||
#
|
||||
# Note that execution of the guest CPUs will be stopped during the
|
||||
# time it takes to load the snapshot.
|
||||
#
|
||||
# It is strongly recommended that @devices contain all writable
|
||||
# block device nodes that can have changed since the original
|
||||
# @snapshot-save command execution.
|
||||
#
|
||||
# Returns: nothing
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# -> { "execute": "snapshot-load",
|
||||
# "data": {
|
||||
# "job-id": "snapload0",
|
||||
# "tag": "my-snap",
|
||||
# "vmstate": "disk0",
|
||||
# "devices": ["disk0", "disk1"]
|
||||
# }
|
||||
# }
|
||||
# <- { "return": { } }
|
||||
# <- {"event": "JOB_STATUS_CHANGE",
|
||||
# "data": {"status": "created", "id": "snapload0"}}
|
||||
# <- {"event": "JOB_STATUS_CHANGE",
|
||||
# "data": {"status": "running", "id": "snapload0"}}
|
||||
# <- {"event": "STOP"}
|
||||
# <- {"event": "RESUME"}
|
||||
# <- {"event": "JOB_STATUS_CHANGE",
|
||||
# "data": {"status": "waiting", "id": "snapload0"}}
|
||||
# <- {"event": "JOB_STATUS_CHANGE",
|
||||
# "data": {"status": "pending", "id": "snapload0"}}
|
||||
# <- {"event": "JOB_STATUS_CHANGE",
|
||||
# "data": {"status": "concluded", "id": "snapload0"}}
|
||||
# -> {"execute": "query-jobs"}
|
||||
# <- {"return": [{"current-progress": 1,
|
||||
# "status": "concluded",
|
||||
# "total-progress": 1,
|
||||
# "type": "snapshot-load",
|
||||
# "id": "snapload0"}]}
|
||||
#
|
||||
# Since: 6.0
|
||||
##
|
||||
{ 'command': 'snapshot-load',
|
||||
'data': { 'job-id': 'str',
|
||||
'tag': 'str',
|
||||
'vmstate': 'str',
|
||||
'devices': ['str'] } }
|
||||
|
||||
##
|
||||
# @snapshot-delete:
|
||||
#
|
||||
# Delete a VM snapshot
|
||||
#
|
||||
# @job-id: identifier for the newly created job
|
||||
# @tag: name of the snapshot to delete.
|
||||
# @devices: list of block device node names to delete a snapshot from
|
||||
#
|
||||
# Applications should not assume that the snapshot delete is complete
|
||||
# when this command returns. The job commands / events must be used
|
||||
# to determine completion and to fetch details of any errors that arise.
|
||||
#
|
||||
# Returns: nothing
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# -> { "execute": "snapshot-delete",
|
||||
# "data": {
|
||||
# "job-id": "snapdelete0",
|
||||
# "tag": "my-snap",
|
||||
# "devices": ["disk0", "disk1"]
|
||||
# }
|
||||
# }
|
||||
# <- { "return": { } }
|
||||
# <- {"event": "JOB_STATUS_CHANGE",
|
||||
# "data": {"status": "created", "id": "snapdelete0"}}
|
||||
# <- {"event": "JOB_STATUS_CHANGE",
|
||||
# "data": {"status": "running", "id": "snapdelete0"}}
|
||||
# <- {"event": "JOB_STATUS_CHANGE",
|
||||
# "data": {"status": "waiting", "id": "snapdelete0"}}
|
||||
# <- {"event": "JOB_STATUS_CHANGE",
|
||||
# "data": {"status": "pending", "id": "snapdelete0"}}
|
||||
# <- {"event": "JOB_STATUS_CHANGE",
|
||||
# "data": {"status": "concluded", "id": "snapdelete0"}}
|
||||
# -> {"execute": "query-jobs"}
|
||||
# <- {"return": [{"current-progress": 1,
|
||||
# "status": "concluded",
|
||||
# "total-progress": 1,
|
||||
# "type": "snapshot-delete",
|
||||
# "id": "snapdelete0"}]}
|
||||
#
|
||||
# Since: 6.0
|
||||
##
|
||||
{ 'command': 'snapshot-delete',
|
||||
'data': { 'job-id': 'str',
|
||||
'tag': 'str',
|
||||
'devices': ['str'] } }
|
||||
|
@ -143,12 +143,13 @@ static char *replay_find_nearest_snapshot(int64_t icount,
|
||||
QEMUSnapshotInfo *sn_tab;
|
||||
QEMUSnapshotInfo *nearest = NULL;
|
||||
char *ret = NULL;
|
||||
int rv;
|
||||
int nb_sns, i;
|
||||
AioContext *aio_context;
|
||||
|
||||
*snapshot_icount = -1;
|
||||
|
||||
bs = bdrv_all_find_vmstate_bs();
|
||||
bs = bdrv_all_find_vmstate_bs(NULL, false, NULL, NULL);
|
||||
if (!bs) {
|
||||
goto fail;
|
||||
}
|
||||
@ -159,7 +160,10 @@ static char *replay_find_nearest_snapshot(int64_t icount,
|
||||
aio_context_release(aio_context);
|
||||
|
||||
for (i = 0; i < nb_sns; i++) {
|
||||
if (bdrv_all_find_snapshot(sn_tab[i].name, &bs) == 0) {
|
||||
rv = bdrv_all_has_snapshot(sn_tab[i].name, false, NULL, NULL);
|
||||
if (rv < 0)
|
||||
goto fail;
|
||||
if (rv == 1) {
|
||||
if (sn_tab[i].icount != -1ULL
|
||||
&& sn_tab[i].icount <= icount
|
||||
&& (!nearest || nearest->icount < sn_tab[i].icount)) {
|
||||
@ -192,7 +196,7 @@ static void replay_seek(int64_t icount, QEMUTimerCB callback, Error **errp)
|
||||
if (icount < replay_get_current_icount()
|
||||
|| replay_get_current_icount() < snapshot_icount) {
|
||||
vm_stop(RUN_STATE_RESTORE_VM);
|
||||
load_snapshot(snapshot, errp);
|
||||
load_snapshot(snapshot, NULL, false, NULL, errp);
|
||||
}
|
||||
g_free(snapshot);
|
||||
}
|
||||
@ -323,7 +327,7 @@ void replay_gdb_attached(void)
|
||||
*/
|
||||
if (replay_mode == REPLAY_MODE_PLAY
|
||||
&& !replay_snapshot) {
|
||||
if (save_snapshot("start_debugging", NULL) != 0) {
|
||||
if (!save_snapshot("start_debugging", true, NULL, false, NULL, NULL)) {
|
||||
/* Can't create the snapshot. Continue conventional debugging. */
|
||||
}
|
||||
}
|
||||
|
@ -77,13 +77,14 @@ void replay_vmstate_init(void)
|
||||
|
||||
if (replay_snapshot) {
|
||||
if (replay_mode == REPLAY_MODE_RECORD) {
|
||||
if (save_snapshot(replay_snapshot, &err) != 0) {
|
||||
if (!save_snapshot(replay_snapshot,
|
||||
true, NULL, false, NULL, &err)) {
|
||||
error_report_err(err);
|
||||
error_report("Could not create snapshot for icount record");
|
||||
exit(1);
|
||||
}
|
||||
} else if (replay_mode == REPLAY_MODE_PLAY) {
|
||||
if (load_snapshot(replay_snapshot, &err) != 0) {
|
||||
if (!load_snapshot(replay_snapshot, NULL, false, NULL, &err)) {
|
||||
error_report_err(err);
|
||||
error_report("Could not load snapshot for icount replay");
|
||||
exit(1);
|
||||
|
122
scripts/userfaultfd-wrlat.py
Executable file
122
scripts/userfaultfd-wrlat.py
Executable file
@ -0,0 +1,122 @@
|
||||
#!/usr/bin/python3
|
||||
#
|
||||
# userfaultfd-wrlat Summarize userfaultfd write fault latencies.
|
||||
# Events are continuously accumulated for the
|
||||
# run, while latency distribution histogram is
|
||||
# dumped each 'interval' seconds.
|
||||
#
|
||||
# For Linux, uses BCC, eBPF.
|
||||
#
|
||||
# USAGE: userfaultfd-lat [interval [count]]
|
||||
#
|
||||
# Copyright Virtuozzo GmbH, 2020
|
||||
#
|
||||
# Authors:
|
||||
# Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
|
||||
#
|
||||
# This work is licensed under the terms of the GNU GPL, version 2 or
|
||||
# later. See the COPYING file in the top-level directory.
|
||||
|
||||
from __future__ import print_function
|
||||
from bcc import BPF
|
||||
from ctypes import c_ushort, c_int, c_ulonglong
|
||||
from time import sleep
|
||||
from sys import argv
|
||||
|
||||
def usage():
|
||||
print("USAGE: %s [interval [count]]" % argv[0])
|
||||
exit()
|
||||
|
||||
# define BPF program
|
||||
bpf_text = """
|
||||
#include <uapi/linux/ptrace.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
BPF_HASH(ev_start, u32, u64);
|
||||
BPF_HISTOGRAM(ev_delta_hist, u64);
|
||||
|
||||
/* Trace UFFD page fault start event. */
|
||||
static void do_event_start()
|
||||
{
|
||||
/* Using "(u32)" to drop group ID which is upper 32 bits */
|
||||
u32 tid = (u32) bpf_get_current_pid_tgid();
|
||||
u64 ts = bpf_ktime_get_ns();
|
||||
|
||||
ev_start.update(&tid, &ts);
|
||||
}
|
||||
|
||||
/* Trace UFFD page fault end event. */
|
||||
static void do_event_end()
|
||||
{
|
||||
/* Using "(u32)" to drop group ID which is upper 32 bits */
|
||||
u32 tid = (u32) bpf_get_current_pid_tgid();
|
||||
u64 ts = bpf_ktime_get_ns();
|
||||
u64 *tsp;
|
||||
|
||||
tsp = ev_start.lookup(&tid);
|
||||
if (tsp) {
|
||||
u64 delta = ts - (*tsp);
|
||||
/* Transform time delta to milliseconds */
|
||||
ev_delta_hist.increment(bpf_log2l(delta / 1000000));
|
||||
ev_start.delete(&tid);
|
||||
}
|
||||
}
|
||||
|
||||
/* KPROBE for handle_userfault(). */
|
||||
int probe_handle_userfault(struct pt_regs *ctx, struct vm_fault *vmf,
|
||||
unsigned long reason)
|
||||
{
|
||||
/* Trace only UFFD write faults. */
|
||||
if (reason & VM_UFFD_WP) {
|
||||
do_event_start();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* KRETPROBE for handle_userfault(). */
|
||||
int retprobe_handle_userfault(struct pt_regs *ctx)
|
||||
{
|
||||
do_event_end();
|
||||
return 0;
|
||||
}
|
||||
"""
|
||||
|
||||
# arguments
|
||||
interval = 10
|
||||
count = -1
|
||||
if len(argv) > 1:
|
||||
try:
|
||||
interval = int(argv[1])
|
||||
if interval == 0:
|
||||
raise
|
||||
if len(argv) > 2:
|
||||
count = int(argv[2])
|
||||
except: # also catches -h, --help
|
||||
usage()
|
||||
|
||||
# load BPF program
|
||||
b = BPF(text=bpf_text)
|
||||
# attach KRPOBEs
|
||||
b.attach_kprobe(event="handle_userfault", fn_name="probe_handle_userfault")
|
||||
b.attach_kretprobe(event="handle_userfault", fn_name="retprobe_handle_userfault")
|
||||
|
||||
# header
|
||||
print("Tracing UFFD-WP write fault latency... Hit Ctrl-C to end.")
|
||||
|
||||
# output
|
||||
loop = 0
|
||||
do_exit = 0
|
||||
while (1):
|
||||
if count > 0:
|
||||
loop += 1
|
||||
if loop > count:
|
||||
exit()
|
||||
try:
|
||||
sleep(interval)
|
||||
except KeyboardInterrupt:
|
||||
pass; do_exit = 1
|
||||
|
||||
print()
|
||||
b["ev_delta_hist"].print_log2_hist("msecs")
|
||||
if do_exit:
|
||||
exit()
|
@ -2545,7 +2545,7 @@ void qmp_x_exit_preconfig(Error **errp)
|
||||
|
||||
if (loadvm) {
|
||||
Error *local_err = NULL;
|
||||
if (load_snapshot(loadvm, &local_err) < 0) {
|
||||
if (!load_snapshot(loadvm, NULL, false, NULL, &local_err)) {
|
||||
error_report_err(local_err);
|
||||
autostart = 0;
|
||||
exit(1);
|
||||
|
@ -6,11 +6,11 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
|
||||
Testing:
|
||||
QEMU X.Y.Z monitor - type 'help' for more information
|
||||
(qemu) savevm snap0
|
||||
Error: No block device can accept snapshots
|
||||
Error: no block device can store vmstate for snapshot
|
||||
(qemu) info snapshots
|
||||
No available block device supports snapshots
|
||||
no block device can store vmstate for snapshot
|
||||
(qemu) loadvm snap0
|
||||
Error: No block device supports snapshots
|
||||
Error: no block device can store vmstate for snapshot
|
||||
(qemu) quit
|
||||
|
||||
|
||||
@ -22,7 +22,7 @@ QEMU X.Y.Z monitor - type 'help' for more information
|
||||
(qemu) savevm snap0
|
||||
Error: Device 'none0' is writable but does not support snapshots
|
||||
(qemu) info snapshots
|
||||
No available block device supports snapshots
|
||||
no block device can store vmstate for snapshot
|
||||
(qemu) loadvm snap0
|
||||
Error: Device 'none0' is writable but does not support snapshots
|
||||
(qemu) quit
|
||||
@ -58,7 +58,7 @@ QEMU X.Y.Z monitor - type 'help' for more information
|
||||
(qemu) savevm snap0
|
||||
Error: Device 'virtio0' is writable but does not support snapshots
|
||||
(qemu) info snapshots
|
||||
No available block device supports snapshots
|
||||
no block device can store vmstate for snapshot
|
||||
(qemu) loadvm snap0
|
||||
Error: Device 'virtio0' is writable but does not support snapshots
|
||||
(qemu) quit
|
||||
@ -83,7 +83,7 @@ QEMU X.Y.Z monitor - type 'help' for more information
|
||||
(qemu) savevm snap0
|
||||
Error: Device 'file' is writable but does not support snapshots
|
||||
(qemu) info snapshots
|
||||
No available block device supports snapshots
|
||||
no block device can store vmstate for snapshot
|
||||
(qemu) loadvm snap0
|
||||
Error: Device 'file' is writable but does not support snapshots
|
||||
(qemu) quit
|
||||
|
@ -53,6 +53,15 @@ _in_fd=4
|
||||
# If $mismatch_only is set, only non-matching responses will
|
||||
# be echoed.
|
||||
#
|
||||
# If $capture_events is non-empty, then any QMP event names it lists
|
||||
# will not be echoed out, but instead collected in the $QEMU_EVENTS
|
||||
# variable. The _wait_event function can later be used to receive
|
||||
# the cached events.
|
||||
#
|
||||
# If $only_capture_events is set to anything but an empty string,
|
||||
# then an error will be raised if a QMP message is seen which is
|
||||
# not an event listed in $capture_events.
|
||||
#
|
||||
# If $success_or_failure is set, the meaning of the arguments is
|
||||
# changed as follows:
|
||||
# $2: A string to search for in the response; if found, this indicates
|
||||
@ -78,6 +87,31 @@ _timed_wait_for()
|
||||
QEMU_STATUS[$h]=0
|
||||
while IFS= read -t ${QEMU_COMM_TIMEOUT} resp <&${QEMU_OUT[$h]}
|
||||
do
|
||||
if [ -n "$capture_events" ]; then
|
||||
capture=0
|
||||
local evname
|
||||
for evname in $capture_events
|
||||
do
|
||||
case ${resp} in
|
||||
*\"event\":\ \"${evname}\"* ) capture=1 ;;
|
||||
esac
|
||||
done
|
||||
if [ $capture = 1 ];
|
||||
then
|
||||
ev=$(echo "${resp}" | tr -d '\r' | tr % .)
|
||||
QEMU_EVENTS="${QEMU_EVENTS:+${QEMU_EVENTS}%}${ev}"
|
||||
if [ -n "$only_capture_events" ]; then
|
||||
return
|
||||
else
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
if [ -n "$only_capture_events" ]; then
|
||||
echo "Only expected $capture_events but got ${resp}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "${silent}" ] && [ -z "${mismatch_only}" ]; then
|
||||
echo "${resp}" | _filter_testdir | _filter_qemu \
|
||||
| _filter_qemu_io | _filter_qmp | _filter_hmp
|
||||
@ -172,12 +206,82 @@ _send_qemu_cmd()
|
||||
let count--;
|
||||
done
|
||||
if [ ${QEMU_STATUS[$h]} -ne 0 ] && [ -z "${qemu_error_no_exit}" ]; then
|
||||
echo "Timeout waiting for ${1} on handle ${h}"
|
||||
echo "Timeout waiting for command ${1} response on handle ${h}"
|
||||
exit 1 #Timeout means the test failed
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
# Check event cache for a named QMP event
|
||||
#
|
||||
# Input parameters:
|
||||
# $1: Name of the QMP event to check for
|
||||
#
|
||||
# Checks if the named QMP event that was previously captured
|
||||
# into $QEMU_EVENTS. When matched, the QMP event will be echoed
|
||||
# and the $matched variable set to 1.
|
||||
#
|
||||
# _wait_event is more suitable for test usage in most cases
|
||||
_check_cached_events()
|
||||
{
|
||||
local evname=${1}
|
||||
|
||||
local match="\"event\": \"$evname\""
|
||||
|
||||
matched=0
|
||||
if [ -n "$QEMU_EVENTS" ]; then
|
||||
CURRENT_QEMU_EVENTS=$QEMU_EVENTS
|
||||
QEMU_EVENTS=
|
||||
old_IFS=$IFS
|
||||
IFS="%"
|
||||
for ev in $CURRENT_QEMU_EVENTS
|
||||
do
|
||||
grep -q "$match" < <(echo "${ev}")
|
||||
if [ $? -eq 0 ] && [ $matched = 0 ]; then
|
||||
echo "${ev}" | _filter_testdir | _filter_qemu \
|
||||
| _filter_qemu_io | _filter_qmp | _filter_hmp
|
||||
matched=1
|
||||
else
|
||||
QEMU_EVENTS="${QEMU_EVENTS:+${QEMU_EVENTS}%}${ev}"
|
||||
fi
|
||||
done
|
||||
IFS=$old_IFS
|
||||
fi
|
||||
}
|
||||
|
||||
# Wait for a named QMP event
|
||||
#
|
||||
# Input parameters:
|
||||
# $1: QEMU handle to use
|
||||
# $2: Name of the QMP event to wait for
|
||||
#
|
||||
# Checks if the named QMP even was previously captured
|
||||
# into $QEMU_EVENTS. If none are present, then waits for the
|
||||
# event to arrive on the QMP channel. When matched, the QMP
|
||||
# event will be echoed
|
||||
_wait_event()
|
||||
{
|
||||
local h=${1}
|
||||
local evname=${2}
|
||||
|
||||
while true
|
||||
do
|
||||
_check_cached_events $evname
|
||||
|
||||
if [ $matched = 1 ];
|
||||
then
|
||||
return
|
||||
fi
|
||||
|
||||
only_capture_events=1 qemu_error_no_exit=1 _timed_wait_for ${h}
|
||||
|
||||
if [ ${QEMU_STATUS[$h]} -ne 0 ] ; then
|
||||
echo "Timeout waiting for event ${evname} on handle ${h}"
|
||||
exit 1 #Timeout means the test failed
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Launch a QEMU process.
|
||||
#
|
||||
# Input parameters:
|
||||
|
@ -109,8 +109,14 @@ peek_file_raw()
|
||||
dd if="$1" bs=1 skip="$2" count="$3" status=none
|
||||
}
|
||||
|
||||
|
||||
if ! . ./common.config
|
||||
config=common.config
|
||||
test -f $config || config=../common.config
|
||||
if ! test -f $config
|
||||
then
|
||||
echo "$0: failed to find common.config"
|
||||
exit 1
|
||||
fi
|
||||
if ! . $config
|
||||
then
|
||||
echo "$0: failed to source common.config"
|
||||
exit 1
|
||||
|
@ -52,6 +52,7 @@ if have_system
|
||||
util_ss.add(files('crc-ccitt.c'))
|
||||
util_ss.add(when: 'CONFIG_GIO', if_true: [files('dbus.c'), gio])
|
||||
util_ss.add(files('yank.c'))
|
||||
util_ss.add(when: 'CONFIG_LINUX', if_true: files('userfaultfd.c'))
|
||||
endif
|
||||
|
||||
if have_block
|
||||
|
@ -91,3 +91,12 @@ qemu_vfio_pci_read_config(void *buf, int ofs, int size, uint64_t region_ofs, uin
|
||||
qemu_vfio_pci_write_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "write cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")"
|
||||
qemu_vfio_region_info(const char *desc, uint64_t region_ofs, uint64_t region_size, uint32_t cap_offset) "region '%s' addr 0x%"PRIx64" size 0x%"PRIx64" cap_ofs 0x%"PRIx32
|
||||
qemu_vfio_pci_map_bar(int index, uint64_t region_ofs, uint64_t region_size, int ofs, void *host) "map region bar#%d addr 0x%"PRIx64" size 0x%"PRIx64" ofs 0x%x host %p"
|
||||
|
||||
#userfaultfd.c
|
||||
uffd_query_features_nosys(int err) "errno: %i"
|
||||
uffd_query_features_api_failed(int err) "errno: %i"
|
||||
uffd_create_fd_nosys(int err) "errno: %i"
|
||||
uffd_create_fd_api_failed(int err) "errno: %i"
|
||||
uffd_create_fd_api_noioctl(uint64_t ioctl_req, uint64_t ioctl_supp) "ioctl_req: 0x%" PRIx64 "ioctl_supp: 0x%" PRIx64
|
||||
uffd_register_memory_failed(void *addr, uint64_t length, uint64_t mode, int err) "addr: %p length: %" PRIu64 " mode: 0x%" PRIx64 " errno: %i"
|
||||
uffd_unregister_memory_failed(void *addr, uint64_t length, int err) "addr: %p length: %" PRIu64 " errno: %i"
|
||||
|
345
util/userfaultfd.c
Normal file
345
util/userfaultfd.c
Normal file
@ -0,0 +1,345 @@
|
||||
/*
|
||||
* Linux UFFD-WP support
|
||||
*
|
||||
* Copyright Virtuozzo GmbH, 2020
|
||||
*
|
||||
* Authors:
|
||||
* Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or
|
||||
* later. See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/bitops.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "qemu/userfaultfd.h"
|
||||
#include "trace.h"
|
||||
#include <poll.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
/**
|
||||
* uffd_query_features: query UFFD features
|
||||
*
|
||||
* Returns: 0 on success, negative value in case of an error
|
||||
*
|
||||
* @features: parameter to receive 'uffdio_api.features'
|
||||
*/
|
||||
int uffd_query_features(uint64_t *features)
|
||||
{
|
||||
int uffd_fd;
|
||||
struct uffdio_api api_struct = { 0 };
|
||||
int ret = -1;
|
||||
|
||||
uffd_fd = syscall(__NR_userfaultfd, O_CLOEXEC);
|
||||
if (uffd_fd < 0) {
|
||||
trace_uffd_query_features_nosys(errno);
|
||||
return -1;
|
||||
}
|
||||
|
||||
api_struct.api = UFFD_API;
|
||||
api_struct.features = 0;
|
||||
|
||||
if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) {
|
||||
trace_uffd_query_features_api_failed(errno);
|
||||
goto out;
|
||||
}
|
||||
*features = api_struct.features;
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
close(uffd_fd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* uffd_create_fd: create UFFD file descriptor
|
||||
*
|
||||
* Returns non-negative file descriptor or negative value in case of an error
|
||||
*
|
||||
* @features: UFFD features to request
|
||||
* @non_blocking: create UFFD file descriptor for non-blocking operation
|
||||
*/
|
||||
int uffd_create_fd(uint64_t features, bool non_blocking)
|
||||
{
|
||||
int uffd_fd;
|
||||
int flags;
|
||||
struct uffdio_api api_struct = { 0 };
|
||||
uint64_t ioctl_mask = BIT(_UFFDIO_REGISTER) | BIT(_UFFDIO_UNREGISTER);
|
||||
|
||||
flags = O_CLOEXEC | (non_blocking ? O_NONBLOCK : 0);
|
||||
uffd_fd = syscall(__NR_userfaultfd, flags);
|
||||
if (uffd_fd < 0) {
|
||||
trace_uffd_create_fd_nosys(errno);
|
||||
return -1;
|
||||
}
|
||||
|
||||
api_struct.api = UFFD_API;
|
||||
api_struct.features = features;
|
||||
if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) {
|
||||
trace_uffd_create_fd_api_failed(errno);
|
||||
goto fail;
|
||||
}
|
||||
if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
|
||||
trace_uffd_create_fd_api_noioctl(ioctl_mask, api_struct.ioctls);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
return uffd_fd;
|
||||
|
||||
fail:
|
||||
close(uffd_fd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* uffd_close_fd: close UFFD file descriptor
|
||||
*
|
||||
* @uffd_fd: UFFD file descriptor
|
||||
*/
|
||||
void uffd_close_fd(int uffd_fd)
|
||||
{
|
||||
assert(uffd_fd >= 0);
|
||||
close(uffd_fd);
|
||||
}
|
||||
|
||||
/**
|
||||
* uffd_register_memory: register memory range via UFFD-IO
|
||||
*
|
||||
* Returns 0 in case of success, negative value in case of an error
|
||||
*
|
||||
* @uffd_fd: UFFD file descriptor
|
||||
* @addr: base address of memory range
|
||||
* @length: length of memory range
|
||||
* @mode: UFFD register mode (UFFDIO_REGISTER_MODE_MISSING, ...)
|
||||
* @ioctls: optional pointer to receive supported IOCTL mask
|
||||
*/
|
||||
int uffd_register_memory(int uffd_fd, void *addr, uint64_t length,
|
||||
uint64_t mode, uint64_t *ioctls)
|
||||
{
|
||||
struct uffdio_register uffd_register;
|
||||
|
||||
uffd_register.range.start = (uintptr_t) addr;
|
||||
uffd_register.range.len = length;
|
||||
uffd_register.mode = mode;
|
||||
|
||||
if (ioctl(uffd_fd, UFFDIO_REGISTER, &uffd_register)) {
|
||||
trace_uffd_register_memory_failed(addr, length, mode, errno);
|
||||
return -1;
|
||||
}
|
||||
if (ioctls) {
|
||||
*ioctls = uffd_register.ioctls;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* uffd_unregister_memory: un-register memory range with UFFD-IO
|
||||
*
|
||||
* Returns 0 in case of success, negative value in case of an error
|
||||
*
|
||||
* @uffd_fd: UFFD file descriptor
|
||||
* @addr: base address of memory range
|
||||
* @length: length of memory range
|
||||
*/
|
||||
int uffd_unregister_memory(int uffd_fd, void *addr, uint64_t length)
|
||||
{
|
||||
struct uffdio_range uffd_range;
|
||||
|
||||
uffd_range.start = (uintptr_t) addr;
|
||||
uffd_range.len = length;
|
||||
|
||||
if (ioctl(uffd_fd, UFFDIO_UNREGISTER, &uffd_range)) {
|
||||
trace_uffd_unregister_memory_failed(addr, length, errno);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* uffd_change_protection: protect/un-protect memory range for writes via UFFD-IO
|
||||
*
|
||||
* Returns 0 on success, negative value in case of error
|
||||
*
|
||||
* @uffd_fd: UFFD file descriptor
|
||||
* @addr: base address of memory range
|
||||
* @length: length of memory range
|
||||
* @wp: write-protect/unprotect
|
||||
* @dont_wake: do not wake threads waiting on wr-protected page
|
||||
*/
|
||||
int uffd_change_protection(int uffd_fd, void *addr, uint64_t length,
|
||||
bool wp, bool dont_wake)
|
||||
{
|
||||
struct uffdio_writeprotect uffd_writeprotect;
|
||||
|
||||
uffd_writeprotect.range.start = (uintptr_t) addr;
|
||||
uffd_writeprotect.range.len = length;
|
||||
if (!wp && dont_wake) {
|
||||
/* DONTWAKE is meaningful only on protection release */
|
||||
uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_DONTWAKE;
|
||||
} else {
|
||||
uffd_writeprotect.mode = (wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0);
|
||||
}
|
||||
|
||||
if (ioctl(uffd_fd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) {
|
||||
error_report("uffd_change_protection() failed: addr=%p len=%" PRIu64
|
||||
" mode=%" PRIx64 " errno=%i", addr, length,
|
||||
(uint64_t) uffd_writeprotect.mode, errno);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* uffd_copy_page: copy range of pages to destination via UFFD-IO
|
||||
*
|
||||
* Copy range of source pages to the destination to resolve
|
||||
* missing page fault somewhere in the destination range.
|
||||
*
|
||||
* Returns 0 on success, negative value in case of an error
|
||||
*
|
||||
* @uffd_fd: UFFD file descriptor
|
||||
* @dst_addr: destination base address
|
||||
* @src_addr: source base address
|
||||
* @length: length of the range to copy
|
||||
* @dont_wake: do not wake threads waiting on missing page
|
||||
*/
|
||||
int uffd_copy_page(int uffd_fd, void *dst_addr, void *src_addr,
|
||||
uint64_t length, bool dont_wake)
|
||||
{
|
||||
struct uffdio_copy uffd_copy;
|
||||
|
||||
uffd_copy.dst = (uintptr_t) dst_addr;
|
||||
uffd_copy.src = (uintptr_t) src_addr;
|
||||
uffd_copy.len = length;
|
||||
uffd_copy.mode = dont_wake ? UFFDIO_COPY_MODE_DONTWAKE : 0;
|
||||
|
||||
if (ioctl(uffd_fd, UFFDIO_COPY, &uffd_copy)) {
|
||||
error_report("uffd_copy_page() failed: dst_addr=%p src_addr=%p length=%" PRIu64
|
||||
" mode=%" PRIx64 " errno=%i", dst_addr, src_addr,
|
||||
length, (uint64_t) uffd_copy.mode, errno);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* uffd_zero_page: fill range of pages with zeroes via UFFD-IO
|
||||
*
|
||||
* Fill range pages with zeroes to resolve missing page fault within the range.
|
||||
*
|
||||
* Returns 0 on success, negative value in case of an error
|
||||
*
|
||||
* @uffd_fd: UFFD file descriptor
|
||||
* @addr: base address
|
||||
* @length: length of the range to fill with zeroes
|
||||
* @dont_wake: do not wake threads waiting on missing page
|
||||
*/
|
||||
int uffd_zero_page(int uffd_fd, void *addr, uint64_t length, bool dont_wake)
|
||||
{
|
||||
struct uffdio_zeropage uffd_zeropage;
|
||||
|
||||
uffd_zeropage.range.start = (uintptr_t) addr;
|
||||
uffd_zeropage.range.len = length;
|
||||
uffd_zeropage.mode = dont_wake ? UFFDIO_ZEROPAGE_MODE_DONTWAKE : 0;
|
||||
|
||||
if (ioctl(uffd_fd, UFFDIO_ZEROPAGE, &uffd_zeropage)) {
|
||||
error_report("uffd_zero_page() failed: addr=%p length=%" PRIu64
|
||||
" mode=%" PRIx64 " errno=%i", addr, length,
|
||||
(uint64_t) uffd_zeropage.mode, errno);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* uffd_wakeup: wake up threads waiting on page UFFD-managed page fault resolution
|
||||
*
|
||||
* Wake up threads waiting on any page/pages from the designated range.
|
||||
* The main use case is when during some period, page faults are resolved
|
||||
* via UFFD-IO IOCTLs with MODE_DONTWAKE flag set, then after that all waits
|
||||
* for the whole memory range are satisfied in a single call to uffd_wakeup().
|
||||
*
|
||||
* Returns 0 on success, negative value in case of an error
|
||||
*
|
||||
* @uffd_fd: UFFD file descriptor
|
||||
* @addr: base address
|
||||
* @length: length of the range
|
||||
*/
|
||||
int uffd_wakeup(int uffd_fd, void *addr, uint64_t length)
|
||||
{
|
||||
struct uffdio_range uffd_range;
|
||||
|
||||
uffd_range.start = (uintptr_t) addr;
|
||||
uffd_range.len = length;
|
||||
|
||||
if (ioctl(uffd_fd, UFFDIO_WAKE, &uffd_range)) {
|
||||
error_report("uffd_wakeup() failed: addr=%p length=%" PRIu64 " errno=%i",
|
||||
addr, length, errno);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* uffd_read_events: read pending UFFD events
|
||||
*
|
||||
* Returns number of fetched messages, 0 if non is available or
|
||||
* negative value in case of an error
|
||||
*
|
||||
* @uffd_fd: UFFD file descriptor
|
||||
* @msgs: pointer to message buffer
|
||||
* @count: number of messages that can fit in the buffer
|
||||
*/
|
||||
int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count)
|
||||
{
|
||||
ssize_t res;
|
||||
do {
|
||||
res = read(uffd_fd, msgs, count * sizeof(struct uffd_msg));
|
||||
} while (res < 0 && errno == EINTR);
|
||||
|
||||
if ((res < 0 && errno == EAGAIN)) {
|
||||
return 0;
|
||||
}
|
||||
if (res < 0) {
|
||||
error_report("uffd_read_events() failed: errno=%i", errno);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return (int) (res / sizeof(struct uffd_msg));
|
||||
}
|
||||
|
||||
/**
|
||||
* uffd_poll_events: poll UFFD file descriptor for read
|
||||
*
|
||||
* Returns true if events are available for read, false otherwise
|
||||
*
|
||||
* @uffd_fd: UFFD file descriptor
|
||||
* @tmo: timeout value
|
||||
*/
|
||||
bool uffd_poll_events(int uffd_fd, int tmo)
|
||||
{
|
||||
int res;
|
||||
struct pollfd poll_fd = { .fd = uffd_fd, .events = POLLIN, .revents = 0 };
|
||||
|
||||
do {
|
||||
res = poll(&poll_fd, 1, tmo);
|
||||
} while (res < 0 && errno == EINTR);
|
||||
|
||||
if (res == 0) {
|
||||
return false;
|
||||
}
|
||||
if (res < 0) {
|
||||
error_report("uffd_poll_events() failed: errno=%i", errno);
|
||||
return false;
|
||||
}
|
||||
|
||||
return (poll_fd.revents & POLLIN) != 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user