Migration

Amit: migration: remove myself as maintainer
         MAINTAINERS: update my email address
   Ashijeet: migrate: Introduce zero RAM checks to skip RAM migration
   Pavel: Postcopy release RAM
   Halil: consolidate VMStateField.start
   Hailiang: COLO: fix setting checkpoint-delay not working properly
          COLO: Shutdown related socket fd while do failover
          COLO: Don't process failover request while loading VM's state
   Me:
      migration: Add VMSTATE_UNUSED_VARRAY_UINT32
      migration: Add VMSTATE_WITH_TMP
      tests/migration: Add test for VMSTATE_WITH_TMP
      virtio-net VMState conversion and new VMSTATE macros
 -----BEGIN PGP SIGNATURE-----
 
 iQIcBAABAgAGBQJYoe6nAAoJEAUWMx68W/3n98MP/2ZdTGPL7YYrDkHOt5cyOYAY
 vgnu9ewkJcF7aWv2YgYOvkSNABDZG1B84bIjT1bQrK4pVcoGTtGEFT3AAd2eAkWc
 kgV02xlW2NdCVD4Yeh2Y18YGH6bKrDie3pJtEhgkhkOgJUjWvtvT2BXoHIWjkJss
 nLQpMaHFsinap3Xp/3xxNhEFlimjsqU6sDyBOBYBfWxkhG4Y9W2GtF8Xm2vSrlJ9
 H+7NfLDOTYa/vNiJiRme3Qi96SxBLklbZku3BvmZHymSVTBce22Bi6fMvLlgsPUa
 Ht14+VMk/JDeECmFojazl9Rk+2xRT60ha0ejvpqfRElaSywWEdKV3XRLH6alDDlc
 5fPmeDGaBzN67noq0TZ8Kvn+Ou/un8n8T+V+dU3fDwpfLDuNvt+GP7+cQKhTYTn1
 AoVpq/zpjTXTDoJtysmR6cRADAKTboNtvcCsk03f2BQbKLaVmcLrcvTQR+y2xRMO
 96KLQY/nu/A+Be9EA09CS0m+M+LnRyAb9ImFq/nl66bhbnfJAJIQPsnsd6l1J/d1
 s0A/E5g/8wulZiUc6OB2B/do9DYG2hkrR74q7oiwEfh7+UQoCVR9vMe3WsNsIhEw
 xSsB5MDg/gn4ON7wcprLVZKcqCVqCkhTN5m3wPnaoEUe3KykcIGTj7Z4UzmYpP2d
 QBPpbIFBGa74LmaQ3nWY
 =weIO
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20170213a' into staging

Migration

  Amit: migration: remove myself as maintainer
        MAINTAINERS: update my email address
  Ashijeet: migrate: Introduce zero RAM checks to skip RAM migration
  Pavel: Postcopy release RAM
  Halil: consolidate VMStateField.start
  Hailiang: COLO: fix setting checkpoint-delay not working properly
         COLO: Shutdown related socket fd while do failover
         COLO: Don't process failover request while loading VM's state
  Me:
     migration: Add VMSTATE_UNUSED_VARRAY_UINT32
     migration: Add VMSTATE_WITH_TMP
     tests/migration: Add test for VMSTATE_WITH_TMP
     virtio-net VMState conversion and new VMSTATE macros

# gpg: Signature made Mon 13 Feb 2017 17:36:39 GMT
# gpg:                using RSA key 0x0516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A  9FA9 0516 331E BC5B FDE7

* remotes/dgilbert/tags/pull-migration-20170213a:
  virtio/migration: Migrate virtio-net to VMState
  tests/migration: Add test for VMSTATE_WITH_TMP
  migration: Add VMSTATE_WITH_TMP
  migration: Add VMSTATE_UNUSED_VARRAY_UINT32
  COLO: Don't process failover request while loading VM's state
  COLO: Shutdown related socket fd while do failover
  COLO: fix setting checkpoint-delay not working properly
  migration: consolidate VMStateField.start
  migrate: Introduce zero RAM checks to skip RAM migration
  migration: discard non-dirty ram pages after the start of postcopy
  add 'release-ram' migrate capability
  migration: add MigrationState arg for ram_save_/compressed_/page()
  MAINTAINERS: update my email address
  migration: remove myself as maintainer

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2017-02-13 18:49:26 +00:00
commit ec7a9bd5bb
27 changed files with 647 additions and 184 deletions

View File

@ -1034,7 +1034,7 @@ F: hw/input/virtio-input*.c
F: include/hw/virtio/virtio-input.h
virtio-serial
M: Amit Shah <amit.shah@redhat.com>
M: Amit Shah <amit@kernel.org>
S: Supported
F: hw/char/virtio-serial-bus.c
F: hw/char/virtio-console.c
@ -1043,7 +1043,7 @@ F: tests/virtio-console-test.c
F: tests/virtio-serial-test.c
virtio-rng
M: Amit Shah <amit.shah@redhat.com>
M: Amit Shah <amit@kernel.org>
S: Supported
F: hw/virtio/virtio-rng.c
F: include/hw/virtio/virtio-rng.h
@ -1431,7 +1431,6 @@ F: scripts/checkpatch.pl
Migration
M: Juan Quintela <quintela@redhat.com>
M: Amit Shah <amit.shah@redhat.com>
M: Dr. David Alan Gilbert <dgilbert@redhat.com>
S: Maintained
F: include/migration/

View File

@ -561,7 +561,7 @@ static const VMStateDescription vmstate_exynos4210_uart_fifo = {
.fields = (VMStateField[]) {
VMSTATE_UINT32(sp, Exynos4210UartFIFO),
VMSTATE_UINT32(rp, Exynos4210UartFIFO),
VMSTATE_VBUFFER_UINT32(data, Exynos4210UartFIFO, 1, NULL, 0, size),
VMSTATE_VBUFFER_UINT32(data, Exynos4210UartFIFO, 1, NULL, size),
VMSTATE_END_OF_LIST()
}
};

View File

@ -464,7 +464,7 @@ static const VMStateDescription vmstate_g364fb = {
.minimum_version_id = 1,
.post_load = g364fb_post_load,
.fields = (VMStateField[]) {
VMSTATE_VBUFFER_UINT32(vram, G364State, 1, NULL, 0, vram_size),
VMSTATE_VBUFFER_UINT32(vram, G364State, 1, NULL, vram_size),
VMSTATE_BUFFER_UNSAFE(color_palette, G364State, 0, 256 * 3),
VMSTATE_BUFFER_UNSAFE(cursor_palette, G364State, 0, 9),
VMSTATE_UINT16_ARRAY(cursor, G364State, 512),

View File

@ -173,8 +173,8 @@ static const VMStateDescription vmstate_pl330_fifo = {
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_VBUFFER_UINT32(buf, PL330Fifo, 1, NULL, 0, buf_size),
VMSTATE_VBUFFER_UINT32(tag, PL330Fifo, 1, NULL, 0, buf_size),
VMSTATE_VBUFFER_UINT32(buf, PL330Fifo, 1, NULL, buf_size),
VMSTATE_VBUFFER_UINT32(tag, PL330Fifo, 1, NULL, buf_size),
VMSTATE_UINT32(head, PL330Fifo),
VMSTATE_UINT32(num, PL330Fifo),
VMSTATE_UINT32(buf_size, PL330Fifo),
@ -282,8 +282,8 @@ static const VMStateDescription vmstate_pl330 = {
VMSTATE_STRUCT(manager, PL330State, 0, vmstate_pl330_chan, PL330Chan),
VMSTATE_STRUCT_VARRAY_UINT32(chan, PL330State, num_chnls, 0,
vmstate_pl330_chan, PL330Chan),
VMSTATE_VBUFFER_UINT32(lo_seqn, PL330State, 1, NULL, 0, num_chnls),
VMSTATE_VBUFFER_UINT32(hi_seqn, PL330State, 1, NULL, 0, num_chnls),
VMSTATE_VBUFFER_UINT32(lo_seqn, PL330State, 1, NULL, num_chnls),
VMSTATE_VBUFFER_UINT32(hi_seqn, PL330State, 1, NULL, num_chnls),
VMSTATE_STRUCT(fifo, PL330State, 0, vmstate_pl330_fifo, PL330Fifo),
VMSTATE_STRUCT(read_queue, PL330State, 0, vmstate_pl330_queue,
PL330Queue),

View File

@ -393,7 +393,7 @@ static const VMStateDescription vmstate_exynos4210_irq_gate = {
.version_id = 2,
.minimum_version_id = 2,
.fields = (VMStateField[]) {
VMSTATE_VBUFFER_UINT32(level, Exynos4210IRQGateState, 1, NULL, 0, n_in),
VMSTATE_VBUFFER_UINT32(level, Exynos4210IRQGateState, 1, NULL, n_in),
VMSTATE_END_OF_LIST()
}
};

View File

@ -471,10 +471,8 @@ static const VMStateDescription vmstate_ISAIPMIBTDevice = {
VMSTATE_BOOL(bt.use_irq, ISAIPMIBTDevice),
VMSTATE_BOOL(bt.irqs_enabled, ISAIPMIBTDevice),
VMSTATE_UINT32(bt.outpos, ISAIPMIBTDevice),
VMSTATE_VBUFFER_UINT32(bt.outmsg, ISAIPMIBTDevice, 1, NULL, 0,
bt.outlen),
VMSTATE_VBUFFER_UINT32(bt.inmsg, ISAIPMIBTDevice, 1, NULL, 0,
bt.inlen),
VMSTATE_VBUFFER_UINT32(bt.outmsg, ISAIPMIBTDevice, 1, NULL, bt.outlen),
VMSTATE_VBUFFER_UINT32(bt.inmsg, ISAIPMIBTDevice, 1, NULL, bt.inlen),
VMSTATE_UINT8(bt.control_reg, ISAIPMIBTDevice),
VMSTATE_UINT8(bt.mask_reg, ISAIPMIBTDevice),
VMSTATE_UINT8(bt.waiting_rsp, ISAIPMIBTDevice),

View File

@ -1557,119 +1557,22 @@ static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
virtio_net_set_queues(n);
}
static void virtio_net_save_device(VirtIODevice *vdev, QEMUFile *f)
static int virtio_net_post_load_device(void *opaque, int version_id)
{
VirtIONet *n = VIRTIO_NET(vdev);
int i;
qemu_put_buffer(f, n->mac, ETH_ALEN);
qemu_put_be32(f, n->vqs[0].tx_waiting);
qemu_put_be32(f, n->mergeable_rx_bufs);
qemu_put_be16(f, n->status);
qemu_put_byte(f, n->promisc);
qemu_put_byte(f, n->allmulti);
qemu_put_be32(f, n->mac_table.in_use);
qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN);
qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
qemu_put_be32(f, n->has_vnet_hdr);
qemu_put_byte(f, n->mac_table.multi_overflow);
qemu_put_byte(f, n->mac_table.uni_overflow);
qemu_put_byte(f, n->alluni);
qemu_put_byte(f, n->nomulti);
qemu_put_byte(f, n->nouni);
qemu_put_byte(f, n->nobcast);
qemu_put_byte(f, n->has_ufo);
if (n->max_queues > 1) {
qemu_put_be16(f, n->max_queues);
qemu_put_be16(f, n->curr_queues);
for (i = 1; i < n->curr_queues; i++) {
qemu_put_be32(f, n->vqs[i].tx_waiting);
}
}
if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
qemu_put_be64(f, n->curr_guest_offloads);
}
}
static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f,
int version_id)
{
VirtIONet *n = VIRTIO_NET(vdev);
VirtIONet *n = opaque;
VirtIODevice *vdev = VIRTIO_DEVICE(n);
int i, link_down;
qemu_get_buffer(f, n->mac, ETH_ALEN);
n->vqs[0].tx_waiting = qemu_get_be32(f);
virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f),
virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
virtio_vdev_has_feature(vdev,
VIRTIO_F_VERSION_1));
n->status = qemu_get_be16(f);
n->promisc = qemu_get_byte(f);
n->allmulti = qemu_get_byte(f);
n->mac_table.in_use = qemu_get_be32(f);
/* MAC_TABLE_ENTRIES may be different from the saved image */
if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
qemu_get_buffer(f, n->mac_table.macs,
n->mac_table.in_use * ETH_ALEN);
} else {
int64_t i;
/* Overflow detected - can happen if source has a larger MAC table.
* We simply set overflow flag so there's no need to maintain the
* table of addresses, discard them all.
* Note: 64 bit math to avoid integer overflow.
*/
for (i = 0; i < (int64_t)n->mac_table.in_use * ETH_ALEN; ++i) {
qemu_get_byte(f);
}
n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
n->mac_table.in_use = 0;
}
qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
error_report("virtio-net: saved image requires vnet_hdr=on");
return -1;
}
n->mac_table.multi_overflow = qemu_get_byte(f);
n->mac_table.uni_overflow = qemu_get_byte(f);
n->alluni = qemu_get_byte(f);
n->nomulti = qemu_get_byte(f);
n->nouni = qemu_get_byte(f);
n->nobcast = qemu_get_byte(f);
if (qemu_get_byte(f) && !peer_has_ufo(n)) {
error_report("virtio-net: saved image requires TUN_F_UFO support");
return -1;
}
if (n->max_queues > 1) {
if (n->max_queues != qemu_get_be16(f)) {
error_report("virtio-net: different max_queues ");
return -1;
}
n->curr_queues = qemu_get_be16(f);
if (n->curr_queues > n->max_queues) {
error_report("virtio-net: curr_queues %x > max_queues %x",
n->curr_queues, n->max_queues);
return -1;
}
for (i = 1; i < n->curr_queues; i++) {
n->vqs[i].tx_waiting = qemu_get_be32(f);
}
}
if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
n->curr_guest_offloads = qemu_get_be64(f);
} else {
if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
}
@ -1703,6 +1606,210 @@ static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f,
return 0;
}
/* tx_waiting field of a VirtIONetQueue */
static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
.name = "virtio-net-queue-tx_waiting",
.fields = (VMStateField[]) {
VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
VMSTATE_END_OF_LIST()
},
};
static bool max_queues_gt_1(void *opaque, int version_id)
{
return VIRTIO_NET(opaque)->max_queues > 1;
}
static bool has_ctrl_guest_offloads(void *opaque, int version_id)
{
return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
}
static bool mac_table_fits(void *opaque, int version_id)
{
return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
}
static bool mac_table_doesnt_fit(void *opaque, int version_id)
{
return !mac_table_fits(opaque, version_id);
}
/* This temporary type is shared by all the WITH_TMP methods
* although only some fields are used by each.
*/
struct VirtIONetMigTmp {
VirtIONet *parent;
VirtIONetQueue *vqs_1;
uint16_t curr_queues_1;
uint8_t has_ufo;
uint32_t has_vnet_hdr;
};
/* The 2nd and subsequent tx_waiting flags are loaded later than
* the 1st entry in the queues and only if there's more than one
* entry. We use the tmp mechanism to calculate a temporary
* pointer and count and also validate the count.
*/
static void virtio_net_tx_waiting_pre_save(void *opaque)
{
struct VirtIONetMigTmp *tmp = opaque;
tmp->vqs_1 = tmp->parent->vqs + 1;
tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
if (tmp->parent->curr_queues == 0) {
tmp->curr_queues_1 = 0;
}
}
static int virtio_net_tx_waiting_pre_load(void *opaque)
{
struct VirtIONetMigTmp *tmp = opaque;
/* Reuse the pointer setup from save */
virtio_net_tx_waiting_pre_save(opaque);
if (tmp->parent->curr_queues > tmp->parent->max_queues) {
error_report("virtio-net: curr_queues %x > max_queues %x",
tmp->parent->curr_queues, tmp->parent->max_queues);
return -EINVAL;
}
return 0; /* all good */
}
static const VMStateDescription vmstate_virtio_net_tx_waiting = {
.name = "virtio-net-tx_waiting",
.pre_load = virtio_net_tx_waiting_pre_load,
.pre_save = virtio_net_tx_waiting_pre_save,
.fields = (VMStateField[]) {
VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
curr_queues_1,
vmstate_virtio_net_queue_tx_waiting,
struct VirtIONetQueue),
VMSTATE_END_OF_LIST()
},
};
/* the 'has_ufo' flag is just tested; if the incoming stream has the
* flag set we need to check that we have it
*/
static int virtio_net_ufo_post_load(void *opaque, int version_id)
{
struct VirtIONetMigTmp *tmp = opaque;
if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
error_report("virtio-net: saved image requires TUN_F_UFO support");
return -EINVAL;
}
return 0;
}
static void virtio_net_ufo_pre_save(void *opaque)
{
struct VirtIONetMigTmp *tmp = opaque;
tmp->has_ufo = tmp->parent->has_ufo;
}
static const VMStateDescription vmstate_virtio_net_has_ufo = {
.name = "virtio-net-ufo",
.post_load = virtio_net_ufo_post_load,
.pre_save = virtio_net_ufo_pre_save,
.fields = (VMStateField[]) {
VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
VMSTATE_END_OF_LIST()
},
};
/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
* flag set we need to check that we have it
*/
static int virtio_net_vnet_post_load(void *opaque, int version_id)
{
struct VirtIONetMigTmp *tmp = opaque;
if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
error_report("virtio-net: saved image requires vnet_hdr=on");
return -EINVAL;
}
return 0;
}
static void virtio_net_vnet_pre_save(void *opaque)
{
struct VirtIONetMigTmp *tmp = opaque;
tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
}
static const VMStateDescription vmstate_virtio_net_has_vnet = {
.name = "virtio-net-vnet",
.post_load = virtio_net_vnet_post_load,
.pre_save = virtio_net_vnet_pre_save,
.fields = (VMStateField[]) {
VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
VMSTATE_END_OF_LIST()
},
};
static const VMStateDescription vmstate_virtio_net_device = {
.name = "virtio-net-device",
.version_id = VIRTIO_NET_VM_VERSION,
.minimum_version_id = VIRTIO_NET_VM_VERSION,
.post_load = virtio_net_post_load_device,
.fields = (VMStateField[]) {
VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
vmstate_virtio_net_queue_tx_waiting,
VirtIONetQueue),
VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
VMSTATE_UINT16(status, VirtIONet),
VMSTATE_UINT8(promisc, VirtIONet),
VMSTATE_UINT8(allmulti, VirtIONet),
VMSTATE_UINT32(mac_table.in_use, VirtIONet),
/* Guarded pair: If it fits we load it, else we throw it away
* - can happen if source has a larger MAC table.; post-load
* sets flags in this case.
*/
VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
0, mac_table_fits, mac_table.in_use,
ETH_ALEN),
VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
mac_table.in_use, ETH_ALEN),
/* Note: This is an array of uint32's that's always been saved as a
* buffer; hold onto your endiannesses; it's actually used as a bitmap
* but based on the uint.
*/
VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
vmstate_virtio_net_has_vnet),
VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
VMSTATE_UINT8(alluni, VirtIONet),
VMSTATE_UINT8(nomulti, VirtIONet),
VMSTATE_UINT8(nouni, VirtIONet),
VMSTATE_UINT8(nobcast, VirtIONet),
VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
vmstate_virtio_net_has_ufo),
VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
vmstate_info_uint16_equal, uint16_t),
VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
vmstate_virtio_net_tx_waiting),
VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
has_ctrl_guest_offloads),
VMSTATE_END_OF_LIST()
},
};
static NetClientInfo net_virtio_info = {
.type = NET_CLIENT_DRIVER_NIC,
.size = sizeof(NICState),
@ -1989,9 +2096,8 @@ static void virtio_net_class_init(ObjectClass *klass, void *data)
vdc->set_status = virtio_net_set_status;
vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
vdc->load = virtio_net_load_device;
vdc->save = virtio_net_save_device;
vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
vdc->vmsd = &vmstate_virtio_net_device;
}
static const TypeInfo virtio_net_info = {

View File

@ -2397,7 +2397,7 @@ static const VMStateDescription vmxstate_vmxnet3_mcast_list = {
.pre_load = vmxnet3_mcast_list_pre_load,
.needed = vmxnet3_mc_list_needed,
.fields = (VMStateField[]) {
VMSTATE_VBUFFER_UINT32(mcast_list, VMXNET3State, 0, NULL, 0,
VMSTATE_VBUFFER_UINT32(mcast_list, VMXNET3State, 0, NULL,
mcast_list_buff_size),
VMSTATE_END_OF_LIST()
}

View File

@ -82,7 +82,7 @@ static const VMStateDescription vmstate_macio_nvram = {
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_VBUFFER_UINT32(data, MacIONVRAMState, 0, NULL, 0, size),
VMSTATE_VBUFFER_UINT32(data, MacIONVRAMState, 0, NULL, size),
VMSTATE_END_OF_LIST()
}
};

View File

@ -224,7 +224,7 @@ static const VMStateDescription vmstate_spapr_nvram = {
.post_load = spapr_nvram_post_load,
.fields = (VMStateField[]) {
VMSTATE_UINT32(size, sPAPRNVRAM),
VMSTATE_VBUFFER_ALLOC_UINT32(buf, sPAPRNVRAM, 1, NULL, 0, size),
VMSTATE_VBUFFER_ALLOC_UINT32(buf, sPAPRNVRAM, 1, NULL, size),
VMSTATE_END_OF_LIST()
},
};

View File

@ -1253,7 +1253,7 @@ const VMStateDescription sdhci_vmstate = {
VMSTATE_UINT16(data_count, SDHCIState),
VMSTATE_UINT64(admasysaddr, SDHCIState),
VMSTATE_UINT8(stopped_state, SDHCIState),
VMSTATE_VBUFFER_UINT32(fifo_buffer, SDHCIState, 1, NULL, 0, buf_maxsz),
VMSTATE_VBUFFER_UINT32(fifo_buffer, SDHCIState, 1, NULL, buf_maxsz),
VMSTATE_TIMER_PTR(insert_timer, SDHCIState),
VMSTATE_TIMER_PTR(transfer_timer, SDHCIState),
VMSTATE_END_OF_LIST()

View File

@ -563,7 +563,7 @@ static const VMStateDescription vmstate_m48t59 = {
.fields = (VMStateField[]) {
VMSTATE_UINT8(lock, M48t59State),
VMSTATE_UINT16(addr, M48t59State),
VMSTATE_VBUFFER_UINT32(buffer, M48t59State, 0, NULL, 0, size),
VMSTATE_VBUFFER_UINT32(buffer, M48t59State, 0, NULL, size),
VMSTATE_END_OF_LIST()
}
};

View File

@ -47,7 +47,7 @@ typedef struct VirtIONetQueue {
VirtQueue *tx_vq;
QEMUTimer *tx_timer;
QEMUBH *tx_bh;
int tx_waiting;
uint32_t tx_waiting;
struct {
VirtQueueElement *elem;
} async_tx;
@ -68,7 +68,7 @@ typedef struct VirtIONet {
size_t guest_hdr_len;
uint32_t host_features;
uint8_t has_ufo;
int mergeable_rx_bufs;
uint32_t mergeable_rx_bufs;
uint8_t promisc;
uint8_t allmulti;
uint8_t alluni;

View File

@ -35,4 +35,6 @@ COLOMode get_colo_mode(void);
/* failover */
void colo_do_failover(MigrationState *s);
void colo_checkpoint_notify(void *opaque);
#endif

View File

@ -116,6 +116,7 @@ struct MigrationIncomingState {
QemuThread colo_incoming_thread;
/* The coroutine we should enter (back) after failover */
Coroutine *migration_incoming_co;
QemuSemaphore colo_incoming_sem;
/* See savevm.c */
LoadStateEntry_Head loadvm_handlers;
@ -187,6 +188,13 @@ struct MigrationState
QSIMPLEQ_HEAD(src_page_requests, MigrationSrcPageRequest) src_page_requests;
/* The RAMBlock used in the last src_page_request */
RAMBlock *last_req_rb;
/* The semaphore is used to notify COLO thread that failover is finished */
QemuSemaphore colo_exit_sem;
/* The semaphore is used to notify COLO thread to do checkpoint */
QemuSemaphore colo_checkpoint_sem;
int64_t colo_checkpoint_time;
QEMUTimer *colo_delay_timer;
/* The last error that occurred */
Error *error;
@ -285,6 +293,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms);
int ram_discard_range(MigrationIncomingState *mis, const char *block_name,
uint64_t start, size_t length);
int ram_postcopy_incoming_init(MigrationIncomingState *mis);
void ram_postcopy_migrated_memory_release(MigrationState *ms);
/**
* @migrate_add_blocker - prevent migration from proceeding
@ -304,6 +313,7 @@ int migrate_add_blocker(Error *reason, Error **errp);
*/
void migrate_del_blocker(Error *reason);
bool migrate_release_ram(void);
bool migrate_postcopy_ram(void);
bool migrate_zero_blocks(void);

View File

@ -132,7 +132,8 @@ void qemu_put_byte(QEMUFile *f, int v);
* put_buffer without copying the buffer.
* The buffer should be available till it is sent asynchronously.
*/
void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size);
void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size,
bool may_free);
bool qemu_file_mode_is_not_valid(const char *mode);
bool qemu_file_is_writable(QEMUFile *f);

View File

@ -259,6 +259,7 @@ extern const VMStateInfo vmstate_info_cpudouble;
extern const VMStateInfo vmstate_info_timer;
extern const VMStateInfo vmstate_info_buffer;
extern const VMStateInfo vmstate_info_unused_buffer;
extern const VMStateInfo vmstate_info_tmp;
extern const VMStateInfo vmstate_info_bitmap;
extern const VMStateInfo vmstate_info_qtailq;
@ -587,7 +588,8 @@ extern const VMStateInfo vmstate_info_qtailq;
.offset = vmstate_offset_buffer(_state, _field) + _start, \
}
#define VMSTATE_VBUFFER_MULTIPLY(_field, _state, _version, _test, _start, _field_size, _multiply) { \
#define VMSTATE_VBUFFER_MULTIPLY(_field, _state, _version, _test, \
_field_size, _multiply) { \
.name = (stringify(_field)), \
.version_id = (_version), \
.field_exists = (_test), \
@ -596,10 +598,9 @@ extern const VMStateInfo vmstate_info_qtailq;
.info = &vmstate_info_buffer, \
.flags = VMS_VBUFFER|VMS_POINTER|VMS_MULTIPLY, \
.offset = offsetof(_state, _field), \
.start = (_start), \
}
#define VMSTATE_VBUFFER(_field, _state, _version, _test, _start, _field_size) { \
#define VMSTATE_VBUFFER(_field, _state, _version, _test, _field_size) { \
.name = (stringify(_field)), \
.version_id = (_version), \
.field_exists = (_test), \
@ -607,10 +608,9 @@ extern const VMStateInfo vmstate_info_qtailq;
.info = &vmstate_info_buffer, \
.flags = VMS_VBUFFER|VMS_POINTER, \
.offset = offsetof(_state, _field), \
.start = (_start), \
}
#define VMSTATE_VBUFFER_UINT32(_field, _state, _version, _test, _start, _field_size) { \
#define VMSTATE_VBUFFER_UINT32(_field, _state, _version, _test, _field_size) { \
.name = (stringify(_field)), \
.version_id = (_version), \
.field_exists = (_test), \
@ -618,10 +618,10 @@ extern const VMStateInfo vmstate_info_qtailq;
.info = &vmstate_info_buffer, \
.flags = VMS_VBUFFER|VMS_POINTER, \
.offset = offsetof(_state, _field), \
.start = (_start), \
}
#define VMSTATE_VBUFFER_ALLOC_UINT32(_field, _state, _version, _test, _start, _field_size) { \
#define VMSTATE_VBUFFER_ALLOC_UINT32(_field, _state, _version, \
_test, _field_size) { \
.name = (stringify(_field)), \
.version_id = (_version), \
.field_exists = (_test), \
@ -629,7 +629,6 @@ extern const VMStateInfo vmstate_info_qtailq;
.info = &vmstate_info_buffer, \
.flags = VMS_VBUFFER|VMS_POINTER|VMS_ALLOC, \
.offset = offsetof(_state, _field), \
.start = (_start), \
}
#define VMSTATE_BUFFER_UNSAFE_INFO_TEST(_field, _state, _test, _version, _info, _size) { \
@ -651,6 +650,24 @@ extern const VMStateInfo vmstate_info_qtailq;
.offset = offsetof(_state, _field), \
}
/* Allocate a temporary of type 'tmp_type', set tmp->parent to _state
* and execute the vmsd on the temporary. Note that we're working with
* the whole of _state here, not a field within it.
* We compile time check that:
* That _tmp_type contains a 'parent' member that's a pointer to the
* '_state' type
* That the pointer is right at the start of _tmp_type.
*/
#define VMSTATE_WITH_TMP(_state, _tmp_type, _vmsd) { \
.name = "tmp", \
.size = sizeof(_tmp_type) + \
QEMU_BUILD_BUG_ON_ZERO(offsetof(_tmp_type, parent) != 0) + \
type_check_pointer(_state, \
typeof_field(_tmp_type, parent)), \
.vmsd = &(_vmsd), \
.info = &vmstate_info_tmp, \
}
#define VMSTATE_UNUSED_BUFFER(_test, _version, _size) { \
.name = "unused", \
.field_exists = (_test), \
@ -660,6 +677,17 @@ extern const VMStateInfo vmstate_info_qtailq;
.flags = VMS_BUFFER, \
}
/* Discard size * field_num bytes, where field_num is a uint32 member */
#define VMSTATE_UNUSED_VARRAY_UINT32(_state, _test, _version, _field_num, _size) {\
.name = "unused", \
.field_exists = (_test), \
.num_offset = vmstate_offset_value(_state, _field_num, uint32_t),\
.version_id = (_version), \
.size = (_size), \
.info = &vmstate_info_unused_buffer, \
.flags = VMS_VARRAY_UINT32 | VMS_BUFFER, \
}
/* _field_size should be a int32_t field in the _state struct giving the
* size of the bitmap _field in bits.
*/
@ -948,13 +976,10 @@ extern const VMStateInfo vmstate_info_qtailq;
VMSTATE_BUFFER_START_MIDDLE_V(_f, _s, _start, 0)
#define VMSTATE_PARTIAL_VBUFFER(_f, _s, _size) \
VMSTATE_VBUFFER(_f, _s, 0, NULL, 0, _size)
VMSTATE_VBUFFER(_f, _s, 0, NULL, _size)
#define VMSTATE_PARTIAL_VBUFFER_UINT32(_f, _s, _size) \
VMSTATE_VBUFFER_UINT32(_f, _s, 0, NULL, 0, _size)
#define VMSTATE_SUB_VBUFFER(_f, _s, _start, _size) \
VMSTATE_VBUFFER(_f, _s, 0, NULL, _start, _size)
VMSTATE_VBUFFER_UINT32(_f, _s, 0, NULL, _size)
#define VMSTATE_BUFFER_TEST(_f, _s, _test) \
VMSTATE_STATIC_BUFFER(_f, _s, 0, _test, 0, sizeof(typeof_field(_s, _f)))

View File

@ -20,6 +20,8 @@
#include "qapi/error.h"
#include "migration/failover.h"
static bool vmstate_loading;
#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
bool colo_supported(void)
@ -51,6 +53,19 @@ static void secondary_vm_do_failover(void)
int old_state;
MigrationIncomingState *mis = migration_incoming_get_current();
/* Can not do failover during the process of VM's loading VMstate, Or
* it will break the secondary VM.
*/
if (vmstate_loading) {
old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
FAILOVER_STATUS_RELAUNCH);
if (old_state != FAILOVER_STATUS_ACTIVE) {
error_report("Unknown error while do failover for secondary VM,"
"old_state: %s", FailoverStatus_lookup[old_state]);
}
return;
}
migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
MIGRATION_STATUS_COMPLETED);
@ -59,6 +74,18 @@ static void secondary_vm_do_failover(void)
/* recover runstate to normal migration finish state */
autostart = true;
}
/*
* Make sure COLO incoming thread not block in recv or send,
* If mis->from_src_file and mis->to_src_file use the same fd,
* The second shutdown() will return -1, we ignore this value,
* It is harmless.
*/
if (mis->from_src_file) {
qemu_file_shutdown(mis->from_src_file);
}
if (mis->to_src_file) {
qemu_file_shutdown(mis->to_src_file);
}
old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
FAILOVER_STATUS_COMPLETED);
@ -67,6 +94,8 @@ static void secondary_vm_do_failover(void)
"secondary VM", FailoverStatus_lookup[old_state]);
return;
}
/* Notify COLO incoming thread that failover work is finished */
qemu_sem_post(&mis->colo_incoming_sem);
/* For Secondary VM, jump to incoming co */
if (mis->migration_incoming_co) {
qemu_coroutine_enter(mis->migration_incoming_co);
@ -81,6 +110,18 @@ static void primary_vm_do_failover(void)
migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
MIGRATION_STATUS_COMPLETED);
/*
* Wake up COLO thread which may blocked in recv() or send(),
* The s->rp_state.from_dst_file and s->to_dst_file may use the
* same fd, but we still shutdown the fd for twice, it is harmless.
*/
if (s->to_dst_file) {
qemu_file_shutdown(s->to_dst_file);
}
if (s->rp_state.from_dst_file) {
qemu_file_shutdown(s->rp_state.from_dst_file);
}
old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
FAILOVER_STATUS_COMPLETED);
if (old_state != FAILOVER_STATUS_ACTIVE) {
@ -88,6 +129,8 @@ static void primary_vm_do_failover(void)
FailoverStatus_lookup[old_state]);
return;
}
/* Notify COLO thread that failover work is finished */
qemu_sem_post(&s->colo_exit_sem);
}
void colo_do_failover(MigrationState *s)
@ -302,7 +345,7 @@ static void colo_process_checkpoint(MigrationState *s)
{
QIOChannelBuffer *bioc;
QEMUFile *fb = NULL;
int64_t current_time, checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
int64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
Error *local_err = NULL;
int ret;
@ -332,26 +375,21 @@ static void colo_process_checkpoint(MigrationState *s)
qemu_mutex_unlock_iothread();
trace_colo_vm_state_change("stop", "run");
timer_mod(s->colo_delay_timer,
current_time + s->parameters.x_checkpoint_delay);
while (s->state == MIGRATION_STATUS_COLO) {
if (failover_get_state() != FAILOVER_STATUS_NONE) {
error_report("failover request");
goto out;
}
current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
if (current_time - checkpoint_time <
s->parameters.x_checkpoint_delay) {
int64_t delay_ms;
qemu_sem_wait(&s->colo_checkpoint_sem);
delay_ms = s->parameters.x_checkpoint_delay -
(current_time - checkpoint_time);
g_usleep(delay_ms * 1000);
}
ret = colo_do_checkpoint_transaction(s, bioc, fb);
if (ret < 0) {
goto out;
}
checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
}
out:
@ -364,14 +402,41 @@ out:
qemu_fclose(fb);
}
timer_del(s->colo_delay_timer);
/* Hope this not to be too long to wait here */
qemu_sem_wait(&s->colo_exit_sem);
qemu_sem_destroy(&s->colo_exit_sem);
/*
* Must be called after failover BH is completed,
* Or the failover BH may shutdown the wrong fd that
* re-used by other threads after we release here.
*/
if (s->rp_state.from_dst_file) {
qemu_fclose(s->rp_state.from_dst_file);
}
}
void colo_checkpoint_notify(void *opaque)
{
MigrationState *s = opaque;
int64_t next_notify_time;
qemu_sem_post(&s->colo_checkpoint_sem);
s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
next_notify_time = s->colo_checkpoint_time +
s->parameters.x_checkpoint_delay;
timer_mod(s->colo_delay_timer, next_notify_time);
}
void migrate_start_colo_process(MigrationState *s)
{
qemu_mutex_unlock_iothread();
qemu_sem_init(&s->colo_checkpoint_sem, 0);
s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST,
colo_checkpoint_notify, s);
qemu_sem_init(&s->colo_exit_sem, 0);
migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_COLO);
colo_process_checkpoint(s);
@ -410,6 +475,8 @@ void *colo_process_incoming_thread(void *opaque)
uint64_t value;
Error *local_err = NULL;
qemu_sem_init(&mis->colo_incoming_sem, 0);
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_COLO);
@ -496,13 +563,23 @@ void *colo_process_incoming_thread(void *opaque)
qemu_mutex_lock_iothread();
qemu_system_reset(VMRESET_SILENT);
vmstate_loading = true;
if (qemu_loadvm_state(fb) < 0) {
error_report("COLO: loadvm failed");
qemu_mutex_unlock_iothread();
goto out;
}
vmstate_loading = false;
qemu_mutex_unlock_iothread();
if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
failover_set_state(FAILOVER_STATUS_RELAUNCH,
FAILOVER_STATUS_NONE);
failover_request_active(NULL);
goto out;
}
colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
&local_err);
if (local_err) {
@ -511,6 +588,7 @@ void *colo_process_incoming_thread(void *opaque)
}
out:
vmstate_loading = false;
/* Throw the unreported error message after exited from loop */
if (local_err) {
error_report_err(local_err);
@ -520,6 +598,10 @@ out:
qemu_fclose(fb);
}
/* Hope this not to be too long to loop here */
qemu_sem_wait(&mis->colo_incoming_sem);
qemu_sem_destroy(&mis->colo_incoming_sem);
/* Must be called after failover BH is completed */
if (mis->to_src_file) {
qemu_fclose(mis->to_src_file);
}

View File

@ -891,6 +891,9 @@ void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp)
if (params->has_x_checkpoint_delay) {
s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
if (migration_in_colo_state()) {
colo_checkpoint_notify(s);
}
}
}
@ -1297,6 +1300,15 @@ void qmp_migrate_set_downtime(double value, Error **errp)
qmp_migrate_set_parameters(&p, errp);
}
bool migrate_release_ram(void)
{
MigrationState *s;
s = migrate_get_current();
return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
}
bool migrate_postcopy_ram(void)
{
MigrationState *s;
@ -1713,6 +1725,10 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
*/
qemu_savevm_send_ping(ms->to_dst_file, 4);
if (migrate_release_ram()) {
ram_postcopy_migrated_memory_release(ms);
}
ret = qemu_file_get_error(ms->to_dst_file);
if (ret) {
error_report("postcopy_start: Migration stream errored");

View File

@ -49,6 +49,7 @@ struct QEMUFile {
int buf_size; /* 0 when writing */
uint8_t buf[IO_BUF_SIZE];
DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
struct iovec iov[MAX_IOV_SIZE];
unsigned int iovcnt;
@ -132,6 +133,41 @@ bool qemu_file_is_writable(QEMUFile *f)
return f->ops->writev_buffer;
}
static void qemu_iovec_release_ram(QEMUFile *f)
{
struct iovec iov;
unsigned long idx;
/* Find and release all the contiguous memory ranges marked as may_free. */
idx = find_next_bit(f->may_free, f->iovcnt, 0);
if (idx >= f->iovcnt) {
return;
}
iov = f->iov[idx];
/* The madvise() in the loop is called for iov within a continuous range and
* then reinitialize the iov. And in the end, madvise() is called for the
* last iov.
*/
while ((idx = find_next_bit(f->may_free, f->iovcnt, idx + 1)) < f->iovcnt) {
/* check for adjacent buffer and coalesce them */
if (iov.iov_base + iov.iov_len == f->iov[idx].iov_base) {
iov.iov_len += f->iov[idx].iov_len;
continue;
}
if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) {
error_report("migrate: madvise DONTNEED failed %p %zd: %s",
iov.iov_base, iov.iov_len, strerror(errno));
}
iov = f->iov[idx];
}
if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) {
error_report("migrate: madvise DONTNEED failed %p %zd: %s",
iov.iov_base, iov.iov_len, strerror(errno));
}
memset(f->may_free, 0, sizeof(f->may_free));
}
/**
* Flushes QEMUFile buffer
*
@ -151,6 +187,8 @@ void qemu_fflush(QEMUFile *f)
if (f->iovcnt > 0) {
expect = iov_size(f->iov, f->iovcnt);
ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos);
qemu_iovec_release_ram(f);
}
if (ret >= 0) {
@ -304,13 +342,19 @@ int qemu_fclose(QEMUFile *f)
return ret;
}
static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size)
static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size,
bool may_free)
{
/* check for adjacent buffer and coalesce them */
if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base +
f->iov[f->iovcnt - 1].iov_len) {
f->iov[f->iovcnt - 1].iov_len &&
may_free == test_bit(f->iovcnt - 1, f->may_free))
{
f->iov[f->iovcnt - 1].iov_len += size;
} else {
if (may_free) {
set_bit(f->iovcnt, f->may_free);
}
f->iov[f->iovcnt].iov_base = (uint8_t *)buf;
f->iov[f->iovcnt++].iov_len = size;
}
@ -320,14 +364,15 @@ static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size)
}
}
void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size)
void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size,
bool may_free)
{
if (f->last_error) {
return;
}
f->bytes_xfer += size;
add_to_iovec(f, buf, size);
add_to_iovec(f, buf, size, may_free);
}
void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
@ -345,7 +390,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
}
memcpy(f->buf + f->buf_index, buf, l);
f->bytes_xfer += l;
add_to_iovec(f, f->buf + f->buf_index, l);
add_to_iovec(f, f->buf + f->buf_index, l, false);
f->buf_index += l;
if (f->buf_index == IO_BUF_SIZE) {
qemu_fflush(f);
@ -366,7 +411,7 @@ void qemu_put_byte(QEMUFile *f, int v)
f->buf[f->buf_index] = v;
f->bytes_xfer++;
add_to_iovec(f, f->buf + f->buf_index, 1);
add_to_iovec(f, f->buf + f->buf_index, 1, false);
f->buf_index++;
if (f->buf_index == IO_BUF_SIZE) {
qemu_fflush(f);
@ -647,7 +692,7 @@ ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size,
}
qemu_put_be32(f, blen);
if (f->ops->writev_buffer) {
add_to_iovec(f, f->buf + f->buf_index, blen);
add_to_iovec(f, f->buf + f->buf_index, blen, false);
}
f->buf_index += blen;
if (f->buf_index == IO_BUF_SIZE) {

View File

@ -705,6 +705,16 @@ static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
return pages;
}
static void ram_release_pages(MigrationState *ms, const char *block_name,
uint64_t offset, int pages)
{
if (!migrate_release_ram() || !migration_in_postcopy(ms)) {
return;
}
ram_discard_range(NULL, block_name, offset, pages << TARGET_PAGE_BITS);
}
/**
* ram_save_page: Send the given page to the stream
*
@ -713,13 +723,14 @@ static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
* >=0 - Number of pages written - this might legally be 0
* if xbzrle noticed the page was the same.
*
* @ms: The current migration state.
* @f: QEMUFile where to send the data
* @block: block that contains the page we want to send
* @offset: offset inside the block for the page
* @last_stage: if we are at the completion stage
* @bytes_transferred: increase it with the number of transferred bytes
*/
static int ram_save_page(QEMUFile *f, PageSearchStatus *pss,
static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss,
bool last_stage, uint64_t *bytes_transferred)
{
int pages = -1;
@ -764,9 +775,9 @@ static int ram_save_page(QEMUFile *f, PageSearchStatus *pss,
* page would be stale
*/
xbzrle_cache_zero_page(current_addr);
ram_release_pages(ms, block->idstr, pss->offset, pages);
} else if (!ram_bulk_stage &&
!migration_in_postcopy(migrate_get_current()) &&
migrate_use_xbzrle()) {
!migration_in_postcopy(ms) && migrate_use_xbzrle()) {
pages = save_xbzrle_page(f, &p, current_addr, block,
offset, last_stage, bytes_transferred);
if (!last_stage) {
@ -783,7 +794,9 @@ static int ram_save_page(QEMUFile *f, PageSearchStatus *pss,
*bytes_transferred += save_page_header(f, block,
offset | RAM_SAVE_FLAG_PAGE);
if (send_async) {
qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE,
migrate_release_ram() &
migration_in_postcopy(ms));
} else {
qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
}
@ -813,6 +826,8 @@ static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
error_report("compressed data failed!");
} else {
bytes_sent += blen;
ram_release_pages(migrate_get_current(), block->idstr,
offset & TARGET_PAGE_MASK, 1);
}
return bytes_sent;
@ -893,14 +908,15 @@ static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
*
* Returns: Number of pages written.
*
* @ms: The current migration state.
* @f: QEMUFile where to send the data
* @block: block that contains the page we want to send
* @offset: offset inside the block for the page
* @last_stage: if we are at the completion stage
* @bytes_transferred: increase it with the number of transferred bytes
*/
static int ram_save_compressed_page(QEMUFile *f, PageSearchStatus *pss,
bool last_stage,
static int ram_save_compressed_page(MigrationState *ms, QEMUFile *f,
PageSearchStatus *pss, bool last_stage,
uint64_t *bytes_transferred)
{
int pages = -1;
@ -951,12 +967,17 @@ static int ram_save_compressed_page(QEMUFile *f, PageSearchStatus *pss,
error_report("compressed data failed!");
}
}
if (pages > 0) {
ram_release_pages(ms, block->idstr, pss->offset, pages);
}
} else {
offset |= RAM_SAVE_FLAG_CONTINUE;
pages = save_zero_page(f, block, offset, p, bytes_transferred);
if (pages == -1) {
pages = compress_page_with_multi_thread(f, block, offset,
bytes_transferred);
} else {
ram_release_pages(ms, block->idstr, pss->offset, pages);
}
}
}
@ -1231,11 +1252,11 @@ static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
if (migration_bitmap_clear_dirty(dirty_ram_abs)) {
unsigned long *unsentmap;
if (compression_switch && migrate_use_compression()) {
res = ram_save_compressed_page(f, pss,
res = ram_save_compressed_page(ms, f, pss,
last_stage,
bytes_transferred);
} else {
res = ram_save_page(f, pss, last_stage,
res = ram_save_page(ms, f, pss, last_stage,
bytes_transferred);
}
@ -1325,6 +1346,11 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
ram_addr_t space */
/* No dirty page as there is zero RAM */
if (!ram_bytes_total()) {
return pages;
}
pss.block = last_seen_block;
pss.offset = last_offset;
pss.complete_round = false;
@ -1516,6 +1542,25 @@ void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
/* **** functions for postcopy ***** */
void ram_postcopy_migrated_memory_release(MigrationState *ms)
{
struct RAMBlock *block;
unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
unsigned long first = block->offset >> TARGET_PAGE_BITS;
unsigned long range = first + (block->used_length >> TARGET_PAGE_BITS);
unsigned long run_start = find_next_zero_bit(bitmap, range, first);
while (run_start < range) {
unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
ram_discard_range(NULL, block->idstr, run_start << TARGET_PAGE_BITS,
(run_end - run_start) << TARGET_PAGE_BITS);
run_start = find_next_zero_bit(bitmap, range, run_end + 1);
}
}
}
/*
* Callback from postcopy_each_ram_send_discard for each RAMBlock
* Note: At this point the 'unsentmap' is the processed bitmap combined
@ -1912,14 +1957,17 @@ static int ram_save_init_globals(void)
bytes_transferred = 0;
reset_ram_globals();
ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
/* Skip setting bitmap if there is no RAM */
if (ram_bytes_total()) {
ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
if (migrate_postcopy_ram()) {
migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
if (migrate_postcopy_ram()) {
migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
}
}
/*

View File

@ -356,7 +356,7 @@ static const VMStateDescription vmstate_configuration = {
.pre_save = configuration_pre_save,
.fields = (VMStateField[]) {
VMSTATE_UINT32(len, SaveState),
VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, 0, len),
VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, len),
VMSTATE_END_OF_LIST()
},
.subsections = (const VMStateDescription*[]) {

View File

@ -68,10 +68,10 @@ static void *vmstate_base_addr(void *opaque, VMStateField *field, bool alloc)
}
}
if (size) {
*((void **)base_addr + field->start) = g_malloc(size);
*(void **)base_addr = g_malloc(size);
}
}
base_addr = *(void **)base_addr + field->start;
base_addr = *(void **)base_addr;
}
return base_addr;
@ -935,6 +935,46 @@ const VMStateInfo vmstate_info_unused_buffer = {
.put = put_unused_buffer,
};
/* vmstate_info_tmp, see VMSTATE_WITH_TMP, the idea is that we allocate
* a temporary buffer and the pre_load/pre_save methods in the child vmsd
* copy stuff from the parent into the child and do calculations to fill
* in fields that don't really exist in the parent but need to be in the
* stream.
*/
static int get_tmp(QEMUFile *f, void *pv, size_t size, VMStateField *field)
{
int ret;
const VMStateDescription *vmsd = field->vmsd;
int version_id = field->version_id;
void *tmp = g_malloc(size);
/* Writes the parent field which is at the start of the tmp */
*(void **)tmp = pv;
ret = vmstate_load_state(f, vmsd, tmp, version_id);
g_free(tmp);
return ret;
}
static int put_tmp(QEMUFile *f, void *pv, size_t size, VMStateField *field,
QJSON *vmdesc)
{
const VMStateDescription *vmsd = field->vmsd;
void *tmp = g_malloc(size);
/* Writes the parent field which is at the start of the tmp */
*(void **)tmp = pv;
vmstate_save_state(f, vmsd, tmp, vmdesc);
g_free(tmp);
return 0;
}
const VMStateInfo vmstate_info_tmp = {
.name = "tmp",
.get = get_tmp,
.put = put_tmp,
};
/* bitmaps (as defined by bitmap.h). Note that size here is the size
* of the bitmap in bits. The on-the-wire format of a bitmap is 64
* bit words with the bits in big endian order. The in-memory format

View File

@ -865,11 +865,14 @@
# side, this process is called COarse-Grain LOck Stepping (COLO) for
# Non-stop Service. (since 2.8)
#
# @release-ram: if enabled, qemu will free the migrated ram pages on the source
# during postcopy-ram migration. (since 2.9)
#
# Since: 1.2
##
{ 'enum': 'MigrationCapability',
'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
'compress', 'events', 'postcopy-ram', 'x-colo'] }
'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram'] }
##
# @MigrationCapabilityStatus:
@ -1190,10 +1193,12 @@
#
# @completed: finish the process of failover
#
# @relaunch: restart the failover process, from 'none' -> 'completed' (Since 2.9)
#
# Since: 2.8
##
{ 'enum': 'FailoverStatus',
'data': [ 'none', 'require', 'active', 'completed'] }
'data': [ 'none', 'require', 'active', 'completed', 'relaunch' ] }
##
# @x-colo-lost-heartbeat:

View File

@ -180,7 +180,7 @@ const VMStateDescription vmstate_s390_cpu = {
VMSTATE_UINT8(env.cpu_state, S390CPU),
VMSTATE_UINT8(env.sigp_order, S390CPU),
VMSTATE_UINT32_V(irqstate_saved_size, S390CPU, 4),
VMSTATE_VBUFFER_UINT32(irqstate, S390CPU, 4, NULL, 0,
VMSTATE_VBUFFER_UINT32(irqstate, S390CPU, 4, NULL,
irqstate_saved_size),
VMSTATE_END_OF_LIST()
},

View File

@ -90,7 +90,7 @@ static void save_buffer(const uint8_t *buf, size_t buf_size)
qemu_fclose(fsave);
}
static void compare_vmstate(uint8_t *wire, size_t size)
static void compare_vmstate(const uint8_t *wire, size_t size)
{
QEMUFile *f = open_test_file(false);
uint8_t result[size];
@ -113,7 +113,7 @@ static void compare_vmstate(uint8_t *wire, size_t size)
}
static int load_vmstate_one(const VMStateDescription *desc, void *obj,
int version, uint8_t *wire, size_t size)
int version, const uint8_t *wire, size_t size)
{
QEMUFile *f;
int ret;
@ -137,7 +137,7 @@ static int load_vmstate_one(const VMStateDescription *desc, void *obj,
static int load_vmstate(const VMStateDescription *desc,
void *obj, void *obj_clone,
void (*obj_copy)(void *, void*),
int version, uint8_t *wire, size_t size)
int version, const uint8_t *wire, size_t size)
{
/* We test with zero size */
obj_copy(obj_clone, obj);
@ -289,7 +289,6 @@ static void test_simple_primitive(void)
FIELD_EQUAL(i64_1);
FIELD_EQUAL(i64_2);
}
#undef FIELD_EQUAL
typedef struct TestStruct {
uint32_t a, b, c, e;
@ -474,7 +473,6 @@ static void test_load_skip(void)
qemu_fclose(loading);
}
typedef struct {
int32_t i;
} TestStructTriv;
@ -688,6 +686,94 @@ static void test_load_q(void)
qemu_fclose(fload);
}
typedef struct TmpTestStruct {
TestStruct *parent;
int64_t diff;
} TmpTestStruct;
static void tmp_child_pre_save(void *opaque)
{
struct TmpTestStruct *tts = opaque;
tts->diff = tts->parent->b - tts->parent->a;
}
static int tmp_child_post_load(void *opaque, int version_id)
{
struct TmpTestStruct *tts = opaque;
tts->parent->b = tts->parent->a + tts->diff;
return 0;
}
static const VMStateDescription vmstate_tmp_back_to_parent = {
.name = "test/tmp_child_parent",
.fields = (VMStateField[]) {
VMSTATE_UINT64(f, TestStruct),
VMSTATE_END_OF_LIST()
}
};
static const VMStateDescription vmstate_tmp_child = {
.name = "test/tmp_child",
.pre_save = tmp_child_pre_save,
.post_load = tmp_child_post_load,
.fields = (VMStateField[]) {
VMSTATE_INT64(diff, TmpTestStruct),
VMSTATE_STRUCT_POINTER(parent, TmpTestStruct,
vmstate_tmp_back_to_parent, TestStruct),
VMSTATE_END_OF_LIST()
}
};
static const VMStateDescription vmstate_with_tmp = {
.name = "test/with_tmp",
.version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_UINT32(a, TestStruct),
VMSTATE_UINT64(d, TestStruct),
VMSTATE_WITH_TMP(TestStruct, TmpTestStruct, vmstate_tmp_child),
VMSTATE_END_OF_LIST()
}
};
static void obj_tmp_copy(void *target, void *source)
{
memcpy(target, source, sizeof(TestStruct));
}
static void test_tmp_struct(void)
{
TestStruct obj, obj_clone;
uint8_t const wire_with_tmp[] = {
/* u32 a */ 0x00, 0x00, 0x00, 0x02,
/* u64 d */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
/* diff */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
/* u64 f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08,
QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
};
memset(&obj, 0, sizeof(obj));
obj.a = 2;
obj.b = 4;
obj.d = 1;
obj.f = 8;
save_vmstate(&vmstate_with_tmp, &obj);
compare_vmstate(wire_with_tmp, sizeof(wire_with_tmp));
memset(&obj, 0, sizeof(obj));
SUCCESS(load_vmstate(&vmstate_with_tmp, &obj, &obj_clone,
obj_tmp_copy, 1, wire_with_tmp,
sizeof(wire_with_tmp)));
g_assert_cmpint(obj.a, ==, 2); /* From top level vmsd */
g_assert_cmpint(obj.b, ==, 4); /* from the post_load */
g_assert_cmpint(obj.d, ==, 1); /* From top level vmsd */
g_assert_cmpint(obj.f, ==, 8); /* From the child->parent */
}
int main(int argc, char **argv)
{
temp_fd = mkstemp(temp_file);
@ -708,7 +794,7 @@ int main(int argc, char **argv)
test_arr_ptr_str_no0_load);
g_test_add_func("/vmstate/qtailq/save/saveq", test_save_q);
g_test_add_func("/vmstate/qtailq/load/loadq", test_load_q);
g_test_add_func("/vmstate/tmp_struct", test_tmp_struct);
g_test_run();
close(temp_fd);

View File

@ -118,7 +118,7 @@ const VMStateDescription vmstate_fifo8 = {
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_VBUFFER_UINT32(data, Fifo8, 1, NULL, 0, capacity),
VMSTATE_VBUFFER_UINT32(data, Fifo8, 1, NULL, capacity),
VMSTATE_UINT32(head, Fifo8),
VMSTATE_UINT32(num, Fifo8),
VMSTATE_END_OF_LIST()