virtio,vhost,pci,pc: features, cleanups

SRAT tables for DIMM devices
 new virtio net flags for speed/duplex
 post-copy migration support in vhost
 cleanups in pci
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQEcBAABAgAGBQJasR1rAAoJECgfDbjSjVRpOocH/R9A3g/TkpGjmLzJBrrX1NGO
 I/iq0ttHjqg4OBIChA4BHHjXwYUMs7XQn26B3efrk1otLAJhuqntZIIo3uU0WraA
 5J+4DT46ogs5rZWNzDCZ0zAkSaATDA6h9Nfh7TvPc9Q2WpcIT0cTa/jOtrxRc9Vq
 32hbUKtJSpNxRjwbZvk6YV21HtWo3Tktdaj9IeTQTN0/gfMyOMdgxta3+bymicbJ
 FuF9ybHcpXvrEctHhXHIL4/YVGEH/4shagZ4JVzv1dVdLeHLZtPomdf7+oc0+07m
 Qs+yV0HeRS5Zxt7w5blGLC4zDXczT/bUx8oln0Tz5MV7RR/+C2HwMOHC69gfpSc=
 =vomK
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging

virtio,vhost,pci,pc: features, cleanups

SRAT tables for DIMM devices
new virtio net flags for speed/duplex
post-copy migration support in vhost
cleanups in pci

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Tue 20 Mar 2018 14:40:43 GMT
# gpg:                using RSA key 281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream: (51 commits)
  postcopy shared docs
  libvhost-user: Claim support for postcopy
  postcopy: Allow shared memory
  vhost: Huge page align and merge
  vhost+postcopy: Wire up POSTCOPY_END notify
  vhost-user: Add VHOST_USER_POSTCOPY_END message
  libvhost-user: mprotect & madvises for postcopy
  vhost+postcopy: Call wakeups
  vhost+postcopy: Add vhost waker
  postcopy: postcopy_notify_shared_wake
  postcopy: helper for waking shared
  vhost+postcopy: Resolve client address
  postcopy-ram: add a stub for postcopy_request_shared_page
  vhost+postcopy: Helper to send requests to source for shared pages
  vhost+postcopy: Stash RAMBlock and offset
  vhost+postcopy: Send address back to qemu
  libvhost-user+postcopy: Register new regions with the ufd
  migration/ram: ramblock_recv_bitmap_test_byte_offset
  postcopy+vhost-user: Split set_mem_table for postcopy
  vhost+postcopy: Transmit 'listen' to slave
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

# Conflicts:
#	scripts/update-linux-headers.sh
This commit is contained in:
Peter Maydell 2018-03-20 15:48:34 +00:00
commit ed627b2ad3
55 changed files with 3767 additions and 536 deletions

View File

@ -777,7 +777,6 @@ bepo cz
ifdef INSTALL_BLOBS
BLOBS=bios.bin bios-256k.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \
vgabios-stdvga.bin vgabios-vmware.bin vgabios-qxl.bin vgabios-virtio.bin \
acpi-dsdt.aml \
ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin QEMU,cgthree.bin \
pxe-e1000.rom pxe-eepro100.rom pxe-ne2k_pci.rom \
pxe-pcnet.rom pxe-rtl8139.rom pxe-virtio.rom \
@ -1048,6 +1047,9 @@ endif
include $(SRC_PATH)/tests/docker/Makefile.include
include $(SRC_PATH)/tests/vm/Makefile.include
printgen:
@echo $(GENERATED_FILES)
.PHONY: help
help:
@echo 'Generic targets:'

View File

@ -26,9 +26,20 @@
#include <sys/socket.h>
#include <sys/eventfd.h>
#include <sys/mman.h>
#include "qemu/compiler.h"
#if defined(__linux__)
#include <sys/syscall.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <linux/vhost.h>
#include "qemu/compiler.h"
#ifdef __NR_userfaultfd
#include <linux/userfaultfd.h>
#endif
#endif
#include "qemu/atomic.h"
#include "libvhost-user.h"
@ -86,6 +97,9 @@ vu_request_to_string(unsigned int req)
REQ(VHOST_USER_SET_VRING_ENDIAN),
REQ(VHOST_USER_GET_CONFIG),
REQ(VHOST_USER_SET_CONFIG),
REQ(VHOST_USER_POSTCOPY_ADVISE),
REQ(VHOST_USER_POSTCOPY_LISTEN),
REQ(VHOST_USER_POSTCOPY_END),
REQ(VHOST_USER_MAX),
};
#undef REQ
@ -171,6 +185,35 @@ vmsg_close_fds(VhostUserMsg *vmsg)
}
}
/* A test to see if we have userfault available */
static bool
have_userfault(void)
{
#if defined(__linux__) && defined(__NR_userfaultfd) &&\
defined(UFFD_FEATURE_MISSING_SHMEM) &&\
defined(UFFD_FEATURE_MISSING_HUGETLBFS)
/* Now test the kernel we're running on really has the features */
int ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
struct uffdio_api api_struct;
if (ufd < 0) {
return false;
}
api_struct.api = UFFD_API;
api_struct.features = UFFD_FEATURE_MISSING_SHMEM |
UFFD_FEATURE_MISSING_HUGETLBFS;
if (ioctl(ufd, UFFDIO_API, &api_struct)) {
close(ufd);
return false;
}
close(ufd);
return true;
#else
return false;
#endif
}
static bool
vu_message_read(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
{
@ -245,6 +288,31 @@ vu_message_write(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
{
int rc;
uint8_t *p = (uint8_t *)vmsg;
char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { };
struct iovec iov = {
.iov_base = (char *)vmsg,
.iov_len = VHOST_USER_HDR_SIZE,
};
struct msghdr msg = {
.msg_iov = &iov,
.msg_iovlen = 1,
.msg_control = control,
};
struct cmsghdr *cmsg;
memset(control, 0, sizeof(control));
assert(vmsg->fd_num <= VHOST_MEMORY_MAX_NREGIONS);
if (vmsg->fd_num > 0) {
size_t fdsize = vmsg->fd_num * sizeof(int);
msg.msg_controllen = CMSG_SPACE(fdsize);
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_len = CMSG_LEN(fdsize);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
memcpy(CMSG_DATA(cmsg), vmsg->fds, fdsize);
} else {
msg.msg_controllen = 0;
}
/* Set the version in the flags when sending the reply */
vmsg->flags &= ~VHOST_USER_VERSION_MASK;
@ -252,7 +320,7 @@ vu_message_write(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
vmsg->flags |= VHOST_USER_REPLY_MASK;
do {
rc = write(conn_fd, p, VHOST_USER_HDR_SIZE);
rc = sendmsg(conn_fd, &msg, 0);
} while (rc < 0 && (errno == EINTR || errno == EAGAIN));
do {
@ -345,6 +413,7 @@ vu_get_features_exec(VuDev *dev, VhostUserMsg *vmsg)
}
vmsg->size = sizeof(vmsg->payload.u64);
vmsg->fd_num = 0;
DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
@ -409,6 +478,148 @@ vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg)
return false;
}
static bool
vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg)
{
int i;
VhostUserMemory *memory = &vmsg->payload.memory;
dev->nregions = memory->nregions;
DPRINT("Nregions: %d\n", memory->nregions);
for (i = 0; i < dev->nregions; i++) {
void *mmap_addr;
VhostUserMemoryRegion *msg_region = &memory->regions[i];
VuDevRegion *dev_region = &dev->regions[i];
DPRINT("Region %d\n", i);
DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n",
msg_region->guest_phys_addr);
DPRINT(" memory_size: 0x%016"PRIx64"\n",
msg_region->memory_size);
DPRINT(" userspace_addr 0x%016"PRIx64"\n",
msg_region->userspace_addr);
DPRINT(" mmap_offset 0x%016"PRIx64"\n",
msg_region->mmap_offset);
dev_region->gpa = msg_region->guest_phys_addr;
dev_region->size = msg_region->memory_size;
dev_region->qva = msg_region->userspace_addr;
dev_region->mmap_offset = msg_region->mmap_offset;
/* We don't use offset argument of mmap() since the
* mapped address has to be page aligned, and we use huge
* pages.
* In postcopy we're using PROT_NONE here to catch anyone
* accessing it before we userfault
*/
mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
PROT_NONE, MAP_SHARED,
vmsg->fds[i], 0);
if (mmap_addr == MAP_FAILED) {
vu_panic(dev, "region mmap error: %s", strerror(errno));
} else {
dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
DPRINT(" mmap_addr: 0x%016"PRIx64"\n",
dev_region->mmap_addr);
}
/* Return the address to QEMU so that it can translate the ufd
* fault addresses back.
*/
msg_region->userspace_addr = (uintptr_t)(mmap_addr +
dev_region->mmap_offset);
close(vmsg->fds[i]);
}
/* Send the message back to qemu with the addresses filled in */
vmsg->fd_num = 0;
if (!vu_message_write(dev, dev->sock, vmsg)) {
vu_panic(dev, "failed to respond to set-mem-table for postcopy");
return false;
}
/* Wait for QEMU to confirm that it's registered the handler for the
* faults.
*/
if (!vu_message_read(dev, dev->sock, vmsg) ||
vmsg->size != sizeof(vmsg->payload.u64) ||
vmsg->payload.u64 != 0) {
vu_panic(dev, "failed to receive valid ack for postcopy set-mem-table");
return false;
}
/* OK, now we can go and register the memory and generate faults */
for (i = 0; i < dev->nregions; i++) {
VuDevRegion *dev_region = &dev->regions[i];
int ret;
#ifdef UFFDIO_REGISTER
/* We should already have an open ufd. Mark each memory
* range as ufd.
* Discard any mapping we have here; note I can't use MADV_REMOVE
* or fallocate to make the hole since I don't want to lose
* data that's already arrived in the shared process.
* TODO: How to do hugepage
*/
ret = madvise((void *)dev_region->mmap_addr,
dev_region->size + dev_region->mmap_offset,
MADV_DONTNEED);
if (ret) {
fprintf(stderr,
"%s: Failed to madvise(DONTNEED) region %d: %s\n",
__func__, i, strerror(errno));
}
/* Turn off transparent hugepages so we dont get lose wakeups
* in neighbouring pages.
* TODO: Turn this backon later.
*/
ret = madvise((void *)dev_region->mmap_addr,
dev_region->size + dev_region->mmap_offset,
MADV_NOHUGEPAGE);
if (ret) {
/* Note: This can happen legally on kernels that are configured
* without madvise'able hugepages
*/
fprintf(stderr,
"%s: Failed to madvise(NOHUGEPAGE) region %d: %s\n",
__func__, i, strerror(errno));
}
struct uffdio_register reg_struct;
reg_struct.range.start = (uintptr_t)dev_region->mmap_addr;
reg_struct.range.len = dev_region->size + dev_region->mmap_offset;
reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER, &reg_struct)) {
vu_panic(dev, "%s: Failed to userfault region %d "
"@%p + size:%zx offset: %zx: (ufd=%d)%s\n",
__func__, i,
dev_region->mmap_addr,
dev_region->size, dev_region->mmap_offset,
dev->postcopy_ufd, strerror(errno));
return false;
}
if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) {
vu_panic(dev, "%s Region (%d) doesn't support COPY",
__func__, i);
return false;
}
DPRINT("%s: region %d: Registered userfault for %llx + %llx\n",
__func__, i, reg_struct.range.start, reg_struct.range.len);
/* Now it's registered we can let the client at it */
if (mprotect((void *)dev_region->mmap_addr,
dev_region->size + dev_region->mmap_offset,
PROT_READ | PROT_WRITE)) {
vu_panic(dev, "failed to mprotect region %d for postcopy (%s)",
i, strerror(errno));
return false;
}
/* TODO: Stash 'zero' support flags somewhere */
#endif
}
return false;
}
static bool
vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
{
@ -425,6 +636,10 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
}
dev->nregions = memory->nregions;
if (dev->postcopy_listening) {
return vu_set_mem_table_exec_postcopy(dev, vmsg);
}
DPRINT("Nregions: %d\n", memory->nregions);
for (i = 0; i < dev->nregions; i++) {
void *mmap_addr;
@ -500,6 +715,7 @@ vu_set_log_base_exec(VuDev *dev, VhostUserMsg *vmsg)
dev->log_size = log_mmap_size;
vmsg->size = sizeof(vmsg->payload.u64);
vmsg->fd_num = 0;
return true;
}
@ -752,12 +968,17 @@ vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD |
1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ;
if (have_userfault()) {
features |= 1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT;
}
if (dev->iface->get_protocol_features) {
features |= dev->iface->get_protocol_features(dev);
}
vmsg->payload.u64 = features;
vmsg->size = sizeof(vmsg->payload.u64);
vmsg->fd_num = 0;
return true;
}
@ -856,6 +1077,77 @@ vu_set_config(VuDev *dev, VhostUserMsg *vmsg)
return false;
}
static bool
vu_set_postcopy_advise(VuDev *dev, VhostUserMsg *vmsg)
{
dev->postcopy_ufd = -1;
#ifdef UFFDIO_API
struct uffdio_api api_struct;
dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
vmsg->size = 0;
#endif
if (dev->postcopy_ufd == -1) {
vu_panic(dev, "Userfaultfd not available: %s", strerror(errno));
goto out;
}
#ifdef UFFDIO_API
api_struct.api = UFFD_API;
api_struct.features = 0;
if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) {
vu_panic(dev, "Failed UFFDIO_API: %s", strerror(errno));
close(dev->postcopy_ufd);
dev->postcopy_ufd = -1;
goto out;
}
/* TODO: Stash feature flags somewhere */
#endif
out:
/* Return a ufd to the QEMU */
vmsg->fd_num = 1;
vmsg->fds[0] = dev->postcopy_ufd;
return true; /* = send a reply */
}
static bool
vu_set_postcopy_listen(VuDev *dev, VhostUserMsg *vmsg)
{
vmsg->payload.u64 = -1;
vmsg->size = sizeof(vmsg->payload.u64);
if (dev->nregions) {
vu_panic(dev, "Regions already registered at postcopy-listen");
return true;
}
dev->postcopy_listening = true;
vmsg->flags = VHOST_USER_VERSION | VHOST_USER_REPLY_MASK;
vmsg->payload.u64 = 0; /* Success */
return true;
}
static bool
vu_set_postcopy_end(VuDev *dev, VhostUserMsg *vmsg)
{
DPRINT("%s: Entry\n", __func__);
dev->postcopy_listening = false;
if (dev->postcopy_ufd > 0) {
close(dev->postcopy_ufd);
dev->postcopy_ufd = -1;
DPRINT("%s: Done close\n", __func__);
}
vmsg->fd_num = 0;
vmsg->payload.u64 = 0;
vmsg->size = sizeof(vmsg->payload.u64);
vmsg->flags = VHOST_USER_VERSION | VHOST_USER_REPLY_MASK;
DPRINT("%s: exit\n", __func__);
return true;
}
static bool
vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
{
@ -927,6 +1219,12 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
return vu_set_config(dev, vmsg);
case VHOST_USER_NONE:
break;
case VHOST_USER_POSTCOPY_ADVISE:
return vu_set_postcopy_advise(dev, vmsg);
case VHOST_USER_POSTCOPY_LISTEN:
return vu_set_postcopy_listen(dev, vmsg);
case VHOST_USER_POSTCOPY_END:
return vu_set_postcopy_end(dev, vmsg);
default:
vmsg_close_fds(vmsg);
vu_panic(dev, "Unhandled request: %d", vmsg->request);

View File

@ -48,6 +48,8 @@ enum VhostUserProtocolFeature {
VHOST_USER_PROTOCOL_F_NET_MTU = 4,
VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
VHOST_USER_PROTOCOL_F_MAX
};
@ -81,6 +83,11 @@ typedef enum VhostUserRequest {
VHOST_USER_SET_VRING_ENDIAN = 23,
VHOST_USER_GET_CONFIG = 24,
VHOST_USER_SET_CONFIG = 25,
VHOST_USER_CREATE_CRYPTO_SESSION = 26,
VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
VHOST_USER_POSTCOPY_ADVISE = 28,
VHOST_USER_POSTCOPY_LISTEN = 29,
VHOST_USER_POSTCOPY_END = 30,
VHOST_USER_MAX
} VhostUserRequest;
@ -277,6 +284,10 @@ struct VuDev {
* re-initialize */
vu_panic_cb panic;
const VuDevIface *iface;
/* Postcopy data */
int postcopy_ufd;
bool postcopy_listening;
};
typedef struct VuVirtqElement {

View File

@ -577,3 +577,44 @@ Postcopy now works with hugetlbfs backed memory:
hugepages works well, however 1GB hugepages are likely to be problematic
since it takes ~1 second to transfer a 1GB hugepage across a 10Gbps link,
and until the full page is transferred the destination thread is blocked.
Postcopy with shared memory
---------------------------
Postcopy migration with shared memory needs explicit support from the other
processes that share memory and from QEMU. There are restrictions on the type of
memory that userfault can support shared.
The Linux kernel userfault support works on `/dev/shm` memory and on `hugetlbfs`
(although the kernel doesn't provide an equivalent to `madvise(MADV_DONTNEED)`
for hugetlbfs which may be a problem in some configurations).
The vhost-user code in QEMU supports clients that have Postcopy support,
and the `vhost-user-bridge` (in `tests/`) and the DPDK package have changes
to support postcopy.
The client needs to open a userfaultfd and register the areas
of memory that it maps with userfault. The client must then pass the
userfaultfd back to QEMU together with a mapping table that allows
fault addresses in the clients address space to be converted back to
RAMBlock/offsets. The client's userfaultfd is added to the postcopy
fault-thread and page requests are made on behalf of the client by QEMU.
QEMU performs 'wake' operations on the client's userfaultfd to allow it
to continue after a page has arrived.
.. note::
There are two future improvements that would be nice:
a) Some way to make QEMU ignorant of the addresses in the clients
address space
b) Avoiding the need for QEMU to perform ufd-wake calls after the
pages have arrived
Retro-fitting postcopy to existing clients is possible:
a) A mechanism is needed for the registration with userfault as above,
and the registration needs to be coordinated with the phases of
postcopy. In vhost-user extra messages are added to the existing
control channel.
b) Any thread that can block due to guest memory accesses must be
identified and the implication understood; for example if the
guest memory access is made while holding a lock then all other
threads waiting for that lock will also be blocked.

View File

@ -290,6 +290,15 @@ Once the source has finished migration, rings will be stopped by
the source. No further update must be done before rings are
restarted.
In postcopy migration the slave is started before all the memory has been
received from the source host, and care must be taken to avoid accessing pages
that have yet to be received. The slave opens a 'userfault'-fd and registers
the memory with it; this fd is then passed back over to the master.
The master services requests on the userfaultfd for pages that are accessed
and when the page is available it performs WAKE ioctl's on the userfaultfd
to wake the stalled slave. The client indicates support for this via the
VHOST_USER_PROTOCOL_F_PAGEFAULT feature.
Memory access
-------------
@ -369,6 +378,7 @@ Protocol features
#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5
#define VHOST_USER_PROTOCOL_F_CROSS_ENDIAN 6
#define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
#define VHOST_USER_PROTOCOL_F_PAGEFAULT 8
Master message types
--------------------
@ -445,12 +455,21 @@ Master message types
Id: 5
Equivalent ioctl: VHOST_SET_MEM_TABLE
Master payload: memory regions description
Slave payload: (postcopy only) memory regions description
Sets the memory map regions on the slave so it can translate the vring
addresses. In the ancillary data there is an array of file descriptors
for each memory mapped region. The size and ordering of the fds matches
the number and ordering of memory regions.
When VHOST_USER_POSTCOPY_LISTEN has been received, SET_MEM_TABLE replies with
the bases of the memory mapped regions to the master. The slave must
have mmap'd the regions but not yet accessed them and should not yet generate
a userfault event. Note NEED_REPLY_MASK is not set in this case.
QEMU will then reply back to the list of mappings with an empty
VHOST_USER_SET_MEM_TABLE as an acknowledgment; only upon reception of this
message may the guest start accessing the memory and generating faults.
* VHOST_USER_SET_LOG_BASE
Id: 6
@ -689,6 +708,39 @@ Master message types
feature has been successfully negotiated.
It's a required feature for crypto devices.
* VHOST_USER_POSTCOPY_ADVISE
Id: 28
Master payload: N/A
Slave payload: userfault fd
When VHOST_USER_PROTOCOL_F_PAGEFAULT is supported, the
master advises slave that a migration with postcopy enabled is underway,
the slave must open a userfaultfd for later use.
Note that at this stage the migration is still in precopy mode.
* VHOST_USER_POSTCOPY_LISTEN
Id: 29
Master payload: N/A
Master advises slave that a transition to postcopy mode has happened.
The slave must ensure that shared memory is registered with userfaultfd
to cause faulting of non-present pages.
This is always sent sometime after a VHOST_USER_POSTCOPY_ADVISE, and
thus only when VHOST_USER_PROTOCOL_F_PAGEFAULT is supported.
* VHOST_USER_POSTCOPY_END
Id: 30
Slave payload: u64
Master advises that postcopy migration has now completed. The
slave must disable the userfaultfd. The response is an acknowledgement
only.
When VHOST_USER_PROTOCOL_F_PAGEFAULT is supported, this message
is sent at the end of the migration, after VHOST_USER_POSTCOPY_LISTEN
was previously sent.
The value returned is an error indication; 0 is success.
Slave message types
-------------------

90
exec.c
View File

@ -99,6 +99,11 @@ static MemoryRegion io_mem_unassigned;
*/
#define RAM_RESIZEABLE (1 << 2)
/* UFFDIO_ZEROPAGE is available on this RAMBlock to atomically
* zero the page and wake waiting processes.
* (Set during postcopy)
*/
#define RAM_UF_ZEROPAGE (1 << 3)
#endif
#ifdef TARGET_PAGE_BITS_VARY
@ -1790,6 +1795,17 @@ bool qemu_ram_is_shared(RAMBlock *rb)
return rb->flags & RAM_SHARED;
}
/* Note: Only set at the start of postcopy */
bool qemu_ram_is_uf_zeroable(RAMBlock *rb)
{
return rb->flags & RAM_UF_ZEROPAGE;
}
void qemu_ram_set_uf_zeroable(RAMBlock *rb)
{
rb->flags |= RAM_UF_ZEROPAGE;
}
/* Called with iothread lock held. */
void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
{
@ -2320,6 +2336,16 @@ static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
return ramblock_ptr(block, addr);
}
/* Return the offset of a hostpointer within a ramblock */
ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host)
{
ram_addr_t res = (uint8_t *)host - (uint8_t *)rb->host;
assert((uintptr_t)host >= (uintptr_t)rb->host);
assert(res < rb->max_length);
return res;
}
/*
* Translates a host ptr back to a RAMBlock, a ram_addr and an offset
* in that RAMBlock.
@ -3744,6 +3770,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
}
if ((start + length) <= rb->used_length) {
bool need_madvise, need_fallocate;
uint8_t *host_endaddr = host_startaddr + length;
if ((uintptr_t)host_endaddr & (rb->page_size - 1)) {
error_report("ram_block_discard_range: Unaligned end address: %p",
@ -3753,29 +3780,60 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
errno = ENOTSUP; /* If we are missing MADVISE etc */
if (rb->page_size == qemu_host_page_size) {
#if defined(CONFIG_MADVISE)
/* Note: We need the madvise MADV_DONTNEED behaviour of definitely
* freeing the page.
*/
ret = madvise(host_startaddr, length, MADV_DONTNEED);
#endif
} else {
/* Huge page case - unfortunately it can't do DONTNEED, but
* it can do the equivalent by FALLOC_FL_PUNCH_HOLE in the
* huge page file.
/* The logic here is messy;
* madvise DONTNEED fails for hugepages
* fallocate works on hugepages and shmem
*/
need_madvise = (rb->page_size == qemu_host_page_size);
need_fallocate = rb->fd != -1;
if (need_fallocate) {
/* For a file, this causes the area of the file to be zero'd
* if read, and for hugetlbfs also causes it to be unmapped
* so a userfault will trigger.
*/
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
start, length);
#endif
}
if (ret) {
ret = -errno;
error_report("ram_block_discard_range: Failed to discard range "
if (ret) {
ret = -errno;
error_report("ram_block_discard_range: Failed to fallocate "
"%s:%" PRIx64 " +%zx (%d)",
rb->idstr, start, length, ret);
goto err;
}
#else
ret = -ENOSYS;
error_report("ram_block_discard_range: fallocate not available/file"
"%s:%" PRIx64 " +%zx (%d)",
rb->idstr, start, length, ret);
goto err;
#endif
}
if (need_madvise) {
/* For normal RAM this causes it to be unmapped,
* for shared memory it causes the local mapping to disappear
* and to fall back on the file contents (which we just
* fallocate'd away).
*/
#if defined(CONFIG_MADVISE)
ret = madvise(host_startaddr, length, MADV_DONTNEED);
if (ret) {
ret = -errno;
error_report("ram_block_discard_range: Failed to discard range "
"%s:%" PRIx64 " +%zx (%d)",
rb->idstr, start, length, ret);
goto err;
}
#else
ret = -ENOSYS;
error_report("ram_block_discard_range: MADVISE not available"
"%s:%" PRIx64 " +%zx (%d)",
rb->idstr, start, length, ret);
goto err;
#endif
}
trace_ram_block_discard_range(rb->idstr, host_startaddr, length,
need_madvise, need_fallocate, ret);
} else {
error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64
"/%zx/" RAM_ADDR_FMT")",

14
hmp.c
View File

@ -2423,7 +2423,18 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
switch (value->type) {
case MEMORY_DEVICE_INFO_KIND_DIMM:
di = value->u.dimm.data;
break;
case MEMORY_DEVICE_INFO_KIND_NVDIMM:
di = value->u.nvdimm.data;
break;
default:
di = NULL;
break;
}
if (di) {
monitor_printf(mon, "Memory device [%s]: \"%s\"\n",
MemoryDeviceInfoKind_str(value->type),
di->id ? di->id : "");
@ -2436,9 +2447,6 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
di->hotplugged ? "true" : "false");
monitor_printf(mon, " hotpluggable: %s\n",
di->hotpluggable ? "true" : "false");
break;
default:
break;
}
}
}

View File

@ -258,6 +258,22 @@ static void build_append_int(GArray *table, uint64_t value)
}
}
/* Generic Address Structure (GAS)
* ACPI 2.0/3.0: 5.2.3.1 Generic Address Structure
* 2.0 compat note:
* @access_width must be 0, see ACPI 2.0:Table 5-1
*/
void build_append_gas(GArray *table, AmlAddressSpace as,
uint8_t bit_width, uint8_t bit_offset,
uint8_t access_width, uint64_t address)
{
build_append_int_noprefix(table, as, 1);
build_append_int_noprefix(table, bit_width, 1);
build_append_int_noprefix(table, bit_offset, 1);
build_append_int_noprefix(table, access_width, 1);
build_append_int_noprefix(table, address, 8);
}
/*
* Build NAME(XXXX, 0x00000000) where 0x00000000 is encoded as a dword,
* and return the offset to 0x00000000 for runtime patching.
@ -1662,3 +1678,127 @@ void build_slit(GArray *table_data, BIOSLinker *linker)
"SLIT",
table_data->len - slit_start, 1, NULL, NULL);
}
/* build rev1/rev3/rev5.1 FADT */
void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f,
const char *oem_id, const char *oem_table_id)
{
int off;
int fadt_start = tbl->len;
acpi_data_push(tbl, sizeof(AcpiTableHeader));
/* FACS address to be filled by Guest linker at runtime */
off = tbl->len;
build_append_int_noprefix(tbl, 0, 4); /* FIRMWARE_CTRL */
if (f->facs_tbl_offset) { /* don't patch if not supported by platform */
bios_linker_loader_add_pointer(linker,
ACPI_BUILD_TABLE_FILE, off, 4,
ACPI_BUILD_TABLE_FILE, *f->facs_tbl_offset);
}
/* DSDT address to be filled by Guest linker at runtime */
off = tbl->len;
build_append_int_noprefix(tbl, 0, 4); /* DSDT */
if (f->dsdt_tbl_offset) { /* don't patch if not supported by platform */
bios_linker_loader_add_pointer(linker,
ACPI_BUILD_TABLE_FILE, off, 4,
ACPI_BUILD_TABLE_FILE, *f->dsdt_tbl_offset);
}
/* ACPI1.0: INT_MODEL, ACPI2.0+: Reserved */
build_append_int_noprefix(tbl, f->int_model /* Multiple APIC */, 1);
/* Preferred_PM_Profile */
build_append_int_noprefix(tbl, 0 /* Unspecified */, 1);
build_append_int_noprefix(tbl, f->sci_int, 2); /* SCI_INT */
build_append_int_noprefix(tbl, f->smi_cmd, 4); /* SMI_CMD */
build_append_int_noprefix(tbl, f->acpi_enable_cmd, 1); /* ACPI_ENABLE */
build_append_int_noprefix(tbl, f->acpi_disable_cmd, 1); /* ACPI_DISABLE */
build_append_int_noprefix(tbl, 0 /* not supported */, 1); /* S4BIOS_REQ */
/* ACPI1.0: Reserved, ACPI2.0+: PSTATE_CNT */
build_append_int_noprefix(tbl, 0, 1);
build_append_int_noprefix(tbl, f->pm1a_evt.address, 4); /* PM1a_EVT_BLK */
build_append_int_noprefix(tbl, 0, 4); /* PM1b_EVT_BLK */
build_append_int_noprefix(tbl, f->pm1a_cnt.address, 4); /* PM1a_CNT_BLK */
build_append_int_noprefix(tbl, 0, 4); /* PM1b_CNT_BLK */
build_append_int_noprefix(tbl, 0, 4); /* PM2_CNT_BLK */
build_append_int_noprefix(tbl, f->pm_tmr.address, 4); /* PM_TMR_BLK */
build_append_int_noprefix(tbl, f->gpe0_blk.address, 4); /* GPE0_BLK */
build_append_int_noprefix(tbl, 0, 4); /* GPE1_BLK */
/* PM1_EVT_LEN */
build_append_int_noprefix(tbl, f->pm1a_evt.bit_width / 8, 1);
/* PM1_CNT_LEN */
build_append_int_noprefix(tbl, f->pm1a_cnt.bit_width / 8, 1);
build_append_int_noprefix(tbl, 0, 1); /* PM2_CNT_LEN */
build_append_int_noprefix(tbl, f->pm_tmr.bit_width / 8, 1); /* PM_TMR_LEN */
/* GPE0_BLK_LEN */
build_append_int_noprefix(tbl, f->gpe0_blk.bit_width / 8, 1);
build_append_int_noprefix(tbl, 0, 1); /* GPE1_BLK_LEN */
build_append_int_noprefix(tbl, 0, 1); /* GPE1_BASE */
build_append_int_noprefix(tbl, 0, 1); /* CST_CNT */
build_append_int_noprefix(tbl, f->plvl2_lat, 2); /* P_LVL2_LAT */
build_append_int_noprefix(tbl, f->plvl3_lat, 2); /* P_LVL3_LAT */
build_append_int_noprefix(tbl, 0, 2); /* FLUSH_SIZE */
build_append_int_noprefix(tbl, 0, 2); /* FLUSH_STRIDE */
build_append_int_noprefix(tbl, 0, 1); /* DUTY_OFFSET */
build_append_int_noprefix(tbl, 0, 1); /* DUTY_WIDTH */
build_append_int_noprefix(tbl, 0, 1); /* DAY_ALRM */
build_append_int_noprefix(tbl, 0, 1); /* MON_ALRM */
build_append_int_noprefix(tbl, f->rtc_century, 1); /* CENTURY */
build_append_int_noprefix(tbl, 0, 2); /* IAPC_BOOT_ARCH */
build_append_int_noprefix(tbl, 0, 1); /* Reserved */
build_append_int_noprefix(tbl, f->flags, 4); /* Flags */
if (f->rev == 1) {
goto build_hdr;
}
build_append_gas_from_struct(tbl, &f->reset_reg); /* RESET_REG */
build_append_int_noprefix(tbl, f->reset_val, 1); /* RESET_VALUE */
/* Since ACPI 5.1 */
if ((f->rev >= 6) || ((f->rev == 5) && f->minor_ver > 0)) {
build_append_int_noprefix(tbl, f->arm_boot_arch, 2); /* ARM_BOOT_ARCH */
/* FADT Minor Version */
build_append_int_noprefix(tbl, f->minor_ver, 1);
} else {
build_append_int_noprefix(tbl, 0, 3); /* Reserved upto ACPI 5.0 */
}
build_append_int_noprefix(tbl, 0, 8); /* X_FIRMWARE_CTRL */
/* XDSDT address to be filled by Guest linker at runtime */
off = tbl->len;
build_append_int_noprefix(tbl, 0, 8); /* X_DSDT */
if (f->xdsdt_tbl_offset) {
bios_linker_loader_add_pointer(linker,
ACPI_BUILD_TABLE_FILE, off, 8,
ACPI_BUILD_TABLE_FILE, *f->xdsdt_tbl_offset);
}
build_append_gas_from_struct(tbl, &f->pm1a_evt); /* X_PM1a_EVT_BLK */
/* X_PM1b_EVT_BLK */
build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0);
build_append_gas_from_struct(tbl, &f->pm1a_cnt); /* X_PM1a_CNT_BLK */
/* X_PM1b_CNT_BLK */
build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0);
/* X_PM2_CNT_BLK */
build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0);
build_append_gas_from_struct(tbl, &f->pm_tmr); /* X_PM_TMR_BLK */
build_append_gas_from_struct(tbl, &f->gpe0_blk); /* X_GPE0_BLK */
build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); /* X_GPE1_BLK */
if (f->rev <= 4) {
goto build_hdr;
}
/* SLEEP_CONTROL_REG */
build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0);
/* SLEEP_STATUS_REG */
build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0);
/* TODO: extra fields need to be added to support revisions above rev5 */
assert(f->rev == 5);
build_hdr:
build_header(linker, tbl, (void *)(tbl->data + fadt_start),
"FACP", tbl->len - fadt_start, f->rev, oem_id, oem_table_id);
}

View File

@ -651,42 +651,33 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
}
/* FADT */
static void build_fadt(GArray *table_data, BIOSLinker *linker,
VirtMachineState *vms, unsigned dsdt_tbl_offset)
static void build_fadt_rev5(GArray *table_data, BIOSLinker *linker,
VirtMachineState *vms, unsigned dsdt_tbl_offset)
{
int fadt_start = table_data->len;
AcpiFadtDescriptorRev5_1 *fadt = acpi_data_push(table_data, sizeof(*fadt));
unsigned xdsdt_entry_offset = (char *)&fadt->x_dsdt - table_data->data;
uint16_t bootflags;
/* ACPI v5.1 */
AcpiFadtData fadt = {
.rev = 5,
.minor_ver = 1,
.flags = 1 << ACPI_FADT_F_HW_REDUCED_ACPI,
.xdsdt_tbl_offset = &dsdt_tbl_offset,
};
switch (vms->psci_conduit) {
case QEMU_PSCI_CONDUIT_DISABLED:
bootflags = 0;
fadt.arm_boot_arch = 0;
break;
case QEMU_PSCI_CONDUIT_HVC:
bootflags = ACPI_FADT_ARM_PSCI_COMPLIANT | ACPI_FADT_ARM_PSCI_USE_HVC;
fadt.arm_boot_arch = ACPI_FADT_ARM_PSCI_COMPLIANT |
ACPI_FADT_ARM_PSCI_USE_HVC;
break;
case QEMU_PSCI_CONDUIT_SMC:
bootflags = ACPI_FADT_ARM_PSCI_COMPLIANT;
fadt.arm_boot_arch = ACPI_FADT_ARM_PSCI_COMPLIANT;
break;
default:
g_assert_not_reached();
}
/* Hardware Reduced = 1 and use PSCI 0.2+ */
fadt->flags = cpu_to_le32(1 << ACPI_FADT_F_HW_REDUCED_ACPI);
fadt->arm_boot_flags = cpu_to_le16(bootflags);
/* ACPI v5.1 (fadt->revision.fadt->minor_revision) */
fadt->minor_revision = 0x1;
/* DSDT address to be filled by Guest linker */
bios_linker_loader_add_pointer(linker,
ACPI_BUILD_TABLE_FILE, xdsdt_entry_offset, sizeof(fadt->x_dsdt),
ACPI_BUILD_TABLE_FILE, dsdt_tbl_offset);
build_header(linker, table_data, (void *)(table_data->data + fadt_start),
"FACP", table_data->len - fadt_start, 5, NULL, NULL);
build_fadt(table_data, linker, &fadt, NULL, NULL);
}
/* DSDT */
@ -761,7 +752,7 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
/* FADT MADT GTDT MCFG SPCR pointed to by RSDT */
acpi_add_table(table_offsets, tables_blob);
build_fadt(tables_blob, tables->linker, vms, dsdt);
build_fadt_rev5(tables_blob, tables->linker, vms, dsdt);
acpi_add_table(table_offsets, tables_blob);
build_madt(tables_blob, tables->linker, vms);

View File

@ -91,17 +91,11 @@ typedef struct AcpiMcfgInfo {
} AcpiMcfgInfo;
typedef struct AcpiPmInfo {
bool force_rev1_fadt;
bool s3_disabled;
bool s4_disabled;
bool pcihp_bridge_en;
uint8_t s4_val;
uint16_t sci_int;
uint8_t acpi_enable_cmd;
uint8_t acpi_disable_cmd;
uint32_t gpe0_blk;
uint32_t gpe0_blk_len;
uint32_t io_base;
AcpiFadtData fadt;
uint16_t cpu_hp_io_base;
uint16_t pcihp_io_base;
uint16_t pcihp_io_len;
@ -124,21 +118,59 @@ typedef struct AcpiBuildPciBusHotplugState {
bool pcihp_bridge_en;
} AcpiBuildPciBusHotplugState;
static void init_common_fadt_data(Object *o, AcpiFadtData *data)
{
uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL);
AmlAddressSpace as = AML_AS_SYSTEM_IO;
AcpiFadtData fadt = {
.rev = 3,
.flags =
(1 << ACPI_FADT_F_WBINVD) |
(1 << ACPI_FADT_F_PROC_C1) |
(1 << ACPI_FADT_F_SLP_BUTTON) |
(1 << ACPI_FADT_F_RTC_S4) |
(1 << ACPI_FADT_F_USE_PLATFORM_CLOCK) |
/* APIC destination mode ("Flat Logical") has an upper limit of 8
* CPUs for more than 8 CPUs, "Clustered Logical" mode has to be
* used
*/
((max_cpus > 8) ? (1 << ACPI_FADT_F_FORCE_APIC_CLUSTER_MODEL) : 0),
.int_model = 1 /* Multiple APIC */,
.rtc_century = RTC_CENTURY,
.plvl2_lat = 0xfff /* C2 state not supported */,
.plvl3_lat = 0xfff /* C3 state not supported */,
.smi_cmd = ACPI_PORT_SMI_CMD,
.sci_int = object_property_get_uint(o, ACPI_PM_PROP_SCI_INT, NULL),
.acpi_enable_cmd =
object_property_get_uint(o, ACPI_PM_PROP_ACPI_ENABLE_CMD, NULL),
.acpi_disable_cmd =
object_property_get_uint(o, ACPI_PM_PROP_ACPI_DISABLE_CMD, NULL),
.pm1a_evt = { .space_id = as, .bit_width = 4 * 8, .address = io },
.pm1a_cnt = { .space_id = as, .bit_width = 2 * 8,
.address = io + 0x04 },
.pm_tmr = { .space_id = as, .bit_width = 4 * 8, .address = io + 0x08 },
.gpe0_blk = { .space_id = as, .bit_width =
object_property_get_uint(o, ACPI_PM_PROP_GPE0_BLK_LEN, NULL) * 8,
.address = object_property_get_uint(o, ACPI_PM_PROP_GPE0_BLK, NULL)
},
};
*data = fadt;
}
static void acpi_get_pm_info(AcpiPmInfo *pm)
{
Object *piix = piix4_pm_find();
Object *lpc = ich9_lpc_find();
Object *obj = NULL;
Object *obj = piix ? piix : lpc;
QObject *o;
pm->force_rev1_fadt = false;
pm->cpu_hp_io_base = 0;
pm->pcihp_io_base = 0;
pm->pcihp_io_len = 0;
init_common_fadt_data(obj, &pm->fadt);
if (piix) {
/* w2k requires FADT(rev1) or it won't boot, keep PC compatible */
pm->force_rev1_fadt = true;
obj = piix;
pm->fadt.rev = 1;
pm->cpu_hp_io_base = PIIX4_CPU_HOTPLUG_IO_BASE;
pm->pcihp_io_base =
object_property_get_uint(obj, ACPI_PCIHP_IO_BASE_PROP, NULL);
@ -146,11 +178,19 @@ static void acpi_get_pm_info(AcpiPmInfo *pm)
object_property_get_uint(obj, ACPI_PCIHP_IO_LEN_PROP, NULL);
}
if (lpc) {
obj = lpc;
struct AcpiGenericAddress r = { .space_id = AML_AS_SYSTEM_IO,
.bit_width = 8, .address = ICH9_RST_CNT_IOPORT };
pm->fadt.reset_reg = r;
pm->fadt.reset_val = 0xf;
pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP;
pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE;
}
assert(obj);
/* The above need not be conditional on machine type because the reset port
* happens to be the same on PIIX (pc) and ICH9 (q35). */
QEMU_BUILD_BUG_ON(ICH9_RST_CNT_IOPORT != RCR_IOPORT);
/* Fill in optional s3/s4 related properties */
o = object_property_get_qobject(obj, ACPI_PM_PROP_S3_DISABLED, NULL);
if (o) {
@ -174,22 +214,6 @@ static void acpi_get_pm_info(AcpiPmInfo *pm)
}
qobject_decref(o);
/* Fill in mandatory properties */
pm->sci_int = object_property_get_uint(obj, ACPI_PM_PROP_SCI_INT, NULL);
pm->acpi_enable_cmd = object_property_get_uint(obj,
ACPI_PM_PROP_ACPI_ENABLE_CMD,
NULL);
pm->acpi_disable_cmd =
object_property_get_uint(obj,
ACPI_PM_PROP_ACPI_DISABLE_CMD,
NULL);
pm->io_base = object_property_get_uint(obj, ACPI_PM_PROP_PM_IO_BASE,
NULL);
pm->gpe0_blk = object_property_get_uint(obj, ACPI_PM_PROP_GPE0_BLK,
NULL);
pm->gpe0_blk_len = object_property_get_uint(obj, ACPI_PM_PROP_GPE0_BLK_LEN,
NULL);
pm->pcihp_bridge_en =
object_property_get_bool(obj, "acpi-pci-hotplug-with-bridge-support",
NULL);
@ -257,8 +281,6 @@ static void acpi_get_pci_holes(Range *hole, Range *hole64)
NULL));
}
#define ACPI_PORT_SMI_CMD 0x00b2 /* TODO: this is APM_CNT_IOPORT */
static void acpi_align_size(GArray *blob, unsigned align)
{
/* Align size to multiple of given size. This reduces the chance
@ -276,106 +298,6 @@ build_facs(GArray *table_data, BIOSLinker *linker)
facs->length = cpu_to_le32(sizeof(*facs));
}
/* Load chipset information in FADT */
static void fadt_setup(AcpiFadtDescriptorRev3 *fadt, AcpiPmInfo *pm)
{
fadt->model = 1;
fadt->reserved1 = 0;
fadt->sci_int = cpu_to_le16(pm->sci_int);
fadt->smi_cmd = cpu_to_le32(ACPI_PORT_SMI_CMD);
fadt->acpi_enable = pm->acpi_enable_cmd;
fadt->acpi_disable = pm->acpi_disable_cmd;
/* EVT, CNT, TMR offset matches hw/acpi/core.c */
fadt->pm1a_evt_blk = cpu_to_le32(pm->io_base);
fadt->pm1a_cnt_blk = cpu_to_le32(pm->io_base + 0x04);
fadt->pm_tmr_blk = cpu_to_le32(pm->io_base + 0x08);
fadt->gpe0_blk = cpu_to_le32(pm->gpe0_blk);
/* EVT, CNT, TMR length matches hw/acpi/core.c */
fadt->pm1_evt_len = 4;
fadt->pm1_cnt_len = 2;
fadt->pm_tmr_len = 4;
fadt->gpe0_blk_len = pm->gpe0_blk_len;
fadt->plvl2_lat = cpu_to_le16(0xfff); /* C2 state not supported */
fadt->plvl3_lat = cpu_to_le16(0xfff); /* C3 state not supported */
fadt->flags = cpu_to_le32((1 << ACPI_FADT_F_WBINVD) |
(1 << ACPI_FADT_F_PROC_C1) |
(1 << ACPI_FADT_F_SLP_BUTTON) |
(1 << ACPI_FADT_F_RTC_S4));
fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_USE_PLATFORM_CLOCK);
/* APIC destination mode ("Flat Logical") has an upper limit of 8 CPUs
* For more than 8 CPUs, "Clustered Logical" mode has to be used
*/
if (max_cpus > 8) {
fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_FORCE_APIC_CLUSTER_MODEL);
}
fadt->century = RTC_CENTURY;
if (pm->force_rev1_fadt) {
return;
}
fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_RESET_REG_SUP);
fadt->reset_value = 0xf;
fadt->reset_register.space_id = AML_SYSTEM_IO;
fadt->reset_register.bit_width = 8;
fadt->reset_register.address = cpu_to_le64(ICH9_RST_CNT_IOPORT);
/* The above need not be conditional on machine type because the reset port
* happens to be the same on PIIX (pc) and ICH9 (q35). */
QEMU_BUILD_BUG_ON(ICH9_RST_CNT_IOPORT != RCR_IOPORT);
fadt->xpm1a_event_block.space_id = AML_SYSTEM_IO;
fadt->xpm1a_event_block.bit_width = fadt->pm1_evt_len * 8;
fadt->xpm1a_event_block.address = cpu_to_le64(pm->io_base);
fadt->xpm1a_control_block.space_id = AML_SYSTEM_IO;
fadt->xpm1a_control_block.bit_width = fadt->pm1_cnt_len * 8;
fadt->xpm1a_control_block.address = cpu_to_le64(pm->io_base + 0x4);
fadt->xpm_timer_block.space_id = AML_SYSTEM_IO;
fadt->xpm_timer_block.bit_width = fadt->pm_tmr_len * 8;
fadt->xpm_timer_block.address = cpu_to_le64(pm->io_base + 0x8);
fadt->xgpe0_block.space_id = AML_SYSTEM_IO;
fadt->xgpe0_block.bit_width = pm->gpe0_blk_len * 8;
fadt->xgpe0_block.address = cpu_to_le64(pm->gpe0_blk);
}
/* FADT */
static void
build_fadt(GArray *table_data, BIOSLinker *linker, AcpiPmInfo *pm,
unsigned facs_tbl_offset, unsigned dsdt_tbl_offset,
const char *oem_id, const char *oem_table_id)
{
AcpiFadtDescriptorRev3 *fadt = acpi_data_push(table_data, sizeof(*fadt));
unsigned fw_ctrl_offset = (char *)&fadt->firmware_ctrl - table_data->data;
unsigned dsdt_entry_offset = (char *)&fadt->dsdt - table_data->data;
unsigned xdsdt_entry_offset = (char *)&fadt->x_dsdt - table_data->data;
int fadt_size = sizeof(*fadt);
int rev = 3;
/* FACS address to be filled by Guest linker */
bios_linker_loader_add_pointer(linker,
ACPI_BUILD_TABLE_FILE, fw_ctrl_offset, sizeof(fadt->firmware_ctrl),
ACPI_BUILD_TABLE_FILE, facs_tbl_offset);
/* DSDT address to be filled by Guest linker */
fadt_setup(fadt, pm);
bios_linker_loader_add_pointer(linker,
ACPI_BUILD_TABLE_FILE, dsdt_entry_offset, sizeof(fadt->dsdt),
ACPI_BUILD_TABLE_FILE, dsdt_tbl_offset);
if (pm->force_rev1_fadt) {
rev = 1;
fadt_size = offsetof(typeof(*fadt), reset_register);
} else {
bios_linker_loader_add_pointer(linker,
ACPI_BUILD_TABLE_FILE, xdsdt_entry_offset, sizeof(fadt->x_dsdt),
ACPI_BUILD_TABLE_FILE, dsdt_tbl_offset);
}
build_header(linker, table_data,
(void *)fadt, "FACP", fadt_size, rev, oem_id, oem_table_id);
}
void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid,
const CPUArchIdList *apic_ids, GArray *entry)
{
@ -2053,7 +1975,12 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
aml_append(dev, aml_name_decl("_STA", aml_int(0xB)));
crs = aml_resource_template();
aml_append(crs,
aml_io(AML_DECODE16, pm->gpe0_blk, pm->gpe0_blk, 1, pm->gpe0_blk_len)
aml_io(
AML_DECODE16,
pm->fadt.gpe0_blk.address,
pm->fadt.gpe0_blk.address,
1,
pm->fadt.gpe0_blk.bit_width / 8)
);
aml_append(dev, aml_name_decl("_CRS", crs));
aml_append(scope, dev);
@ -2323,6 +2250,55 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog)
#define HOLE_640K_START (640 * 1024)
#define HOLE_640K_END (1024 * 1024)
static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t base,
uint64_t len, int default_node)
{
MemoryDeviceInfoList *info_list = qmp_pc_dimm_device_list();
MemoryDeviceInfoList *info;
MemoryDeviceInfo *mi;
PCDIMMDeviceInfo *di;
uint64_t end = base + len, cur, size;
bool is_nvdimm;
AcpiSratMemoryAffinity *numamem;
MemoryAffinityFlags flags;
for (cur = base, info = info_list;
cur < end;
cur += size, info = info->next) {
numamem = acpi_data_push(table_data, sizeof *numamem);
if (!info) {
build_srat_memory(numamem, cur, end - cur, default_node,
MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
break;
}
mi = info->value;
is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM);
di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data;
if (cur < di->addr) {
build_srat_memory(numamem, cur, di->addr - cur, default_node,
MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
numamem = acpi_data_push(table_data, sizeof *numamem);
}
size = di->size;
flags = MEM_AFFINITY_ENABLED;
if (di->hotpluggable) {
flags |= MEM_AFFINITY_HOTPLUGGABLE;
}
if (is_nvdimm) {
flags |= MEM_AFFINITY_NON_VOLATILE;
}
build_srat_memory(numamem, di->addr, size, di->node, flags);
}
qapi_free_MemoryDeviceInfoList(info_list);
}
static void
build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
{
@ -2434,10 +2410,9 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
* providing _PXM method if necessary.
*/
if (hotplugabble_address_space_size) {
numamem = acpi_data_push(table_data, sizeof *numamem);
build_srat_memory(numamem, pcms->hotplug_memory.base,
hotplugabble_address_space_size, pcms->numa_nodes - 1,
MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
build_srat_hotpluggable_memory(table_data, pcms->hotplug_memory.base,
hotplugabble_address_space_size,
pcms->numa_nodes - 1);
}
build_header(linker, table_data,
@ -2700,7 +2675,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
/* ACPI tables pointed to by RSDT */
fadt = tables_blob->len;
acpi_add_table(table_offsets, tables_blob);
build_fadt(tables_blob, tables->linker, &pm, facs, dsdt,
pm.fadt.facs_tbl_offset = &facs;
pm.fadt.dsdt_tbl_offset = &dsdt;
pm.fadt.xdsdt_tbl_offset = &dsdt;
build_fadt(tables_blob, tables->linker, &pm.fadt,
slic_oem.id, slic_oem.table_id);
aml_len += tables_blob->len - fadt;

View File

@ -34,7 +34,6 @@
#endif
/* fixed I/O location */
#define APM_CNT_IOPORT 0xb2
#define APM_STS_IOPORT 0xb3
static void apm_ioport_writeb(void *opaque, hwaddr addr, uint64_t val,

View File

@ -20,6 +20,7 @@
#include "qemu/osdep.h"
#include "hw/mem/pc-dimm.h"
#include "hw/mem/nvdimm.h"
#include "qapi/error.h"
#include "qemu/config-file.h"
#include "qapi/visitor.h"
@ -162,45 +163,6 @@ uint64_t get_plugged_memory_size(void)
return pc_existing_dimms_capacity(&error_abort);
}
int qmp_pc_dimm_device_list(Object *obj, void *opaque)
{
MemoryDeviceInfoList ***prev = opaque;
if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
DeviceState *dev = DEVICE(obj);
if (dev->realized) {
MemoryDeviceInfoList *elem = g_new0(MemoryDeviceInfoList, 1);
MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1);
PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1);
DeviceClass *dc = DEVICE_GET_CLASS(obj);
PCDIMMDevice *dimm = PC_DIMM(obj);
if (dev->id) {
di->has_id = true;
di->id = g_strdup(dev->id);
}
di->hotplugged = dev->hotplugged;
di->hotpluggable = dc->hotpluggable;
di->addr = dimm->addr;
di->slot = dimm->slot;
di->node = dimm->node;
di->size = object_property_get_uint(OBJECT(dimm), PC_DIMM_SIZE_PROP,
NULL);
di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem));
info->u.dimm.data = di;
elem->value = info;
elem->next = NULL;
**prev = elem;
*prev = &elem->next;
}
}
object_child_foreach(obj, qmp_pc_dimm_device_list, opaque);
return 0;
}
static int pc_dimm_slot2bitmap(Object *obj, void *opaque)
{
unsigned long *bitmap = opaque;
@ -276,6 +238,57 @@ static int pc_dimm_built_list(Object *obj, void *opaque)
return 0;
}
MemoryDeviceInfoList *qmp_pc_dimm_device_list(void)
{
GSList *dimms = NULL, *item;
MemoryDeviceInfoList *list = NULL, *prev = NULL;
object_child_foreach(qdev_get_machine(), pc_dimm_built_list, &dimms);
for (item = dimms; item; item = g_slist_next(item)) {
PCDIMMDevice *dimm = PC_DIMM(item->data);
Object *obj = OBJECT(dimm);
MemoryDeviceInfoList *elem = g_new0(MemoryDeviceInfoList, 1);
MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1);
PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1);
bool is_nvdimm = object_dynamic_cast(obj, TYPE_NVDIMM);
DeviceClass *dc = DEVICE_GET_CLASS(obj);
DeviceState *dev = DEVICE(obj);
if (dev->id) {
di->has_id = true;
di->id = g_strdup(dev->id);
}
di->hotplugged = dev->hotplugged;
di->hotpluggable = dc->hotpluggable;
di->addr = dimm->addr;
di->slot = dimm->slot;
di->node = dimm->node;
di->size = object_property_get_uint(obj, PC_DIMM_SIZE_PROP, NULL);
di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem));
if (!is_nvdimm) {
info->u.dimm.data = di;
info->type = MEMORY_DEVICE_INFO_KIND_DIMM;
} else {
info->u.nvdimm.data = di;
info->type = MEMORY_DEVICE_INFO_KIND_NVDIMM;
}
elem->value = info;
elem->next = NULL;
if (prev) {
prev->next = elem;
} else {
list = elem;
}
prev = elem;
}
g_slist_free(dimms);
return list;
}
uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
uint64_t address_space_size,
uint64_t *hint, uint64_t align, uint64_t size,

View File

@ -26,6 +26,7 @@
#include "qapi/qapi-events-net.h"
#include "hw/virtio/virtio-access.h"
#include "migration/misc.h"
#include "standard-headers/linux/ethtool.h"
#define VIRTIO_NET_VM_VERSION 11
@ -48,19 +49,21 @@
(offsetof(container, field) + sizeof(((container *)0)->field))
typedef struct VirtIOFeature {
uint32_t flags;
uint64_t flags;
size_t end;
} VirtIOFeature;
static VirtIOFeature feature_sizes[] = {
{.flags = 1 << VIRTIO_NET_F_MAC,
{.flags = 1ULL << VIRTIO_NET_F_MAC,
.end = endof(struct virtio_net_config, mac)},
{.flags = 1 << VIRTIO_NET_F_STATUS,
{.flags = 1ULL << VIRTIO_NET_F_STATUS,
.end = endof(struct virtio_net_config, status)},
{.flags = 1 << VIRTIO_NET_F_MQ,
{.flags = 1ULL << VIRTIO_NET_F_MQ,
.end = endof(struct virtio_net_config, max_virtqueue_pairs)},
{.flags = 1 << VIRTIO_NET_F_MTU,
{.flags = 1ULL << VIRTIO_NET_F_MTU,
.end = endof(struct virtio_net_config, mtu)},
{.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
.end = endof(struct virtio_net_config, duplex)},
{}
};
@ -89,6 +92,8 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
memcpy(netcfg.mac, n->mac, ETH_ALEN);
virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
netcfg.duplex = n->net_conf.duplex;
memcpy(config, &netcfg, n->config_size);
}
@ -1938,7 +1943,26 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
int i;
if (n->net_conf.mtu) {
n->host_features |= (0x1 << VIRTIO_NET_F_MTU);
n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
}
if (n->net_conf.duplex_str) {
if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
n->net_conf.duplex = DUPLEX_HALF;
} else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
n->net_conf.duplex = DUPLEX_FULL;
} else {
error_setg(errp, "'duplex' must be 'half' or 'full'");
}
n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
} else {
n->net_conf.duplex = DUPLEX_UNKNOWN;
}
if (n->net_conf.speed < SPEED_UNKNOWN) {
error_setg(errp, "'speed' must be between 0 and INT_MAX");
} else if (n->net_conf.speed >= 0) {
n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
}
virtio_net_set_config_size(n, n->host_features);
@ -2109,45 +2133,46 @@ static const VMStateDescription vmstate_virtio_net = {
};
static Property virtio_net_properties[] = {
DEFINE_PROP_BIT("csum", VirtIONet, host_features, VIRTIO_NET_F_CSUM, true),
DEFINE_PROP_BIT("guest_csum", VirtIONet, host_features,
DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
VIRTIO_NET_F_CSUM, true),
DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
VIRTIO_NET_F_GUEST_CSUM, true),
DEFINE_PROP_BIT("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
DEFINE_PROP_BIT("guest_tso4", VirtIONet, host_features,
DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
VIRTIO_NET_F_GUEST_TSO4, true),
DEFINE_PROP_BIT("guest_tso6", VirtIONet, host_features,
DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
VIRTIO_NET_F_GUEST_TSO6, true),
DEFINE_PROP_BIT("guest_ecn", VirtIONet, host_features,
DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
VIRTIO_NET_F_GUEST_ECN, true),
DEFINE_PROP_BIT("guest_ufo", VirtIONet, host_features,
DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
VIRTIO_NET_F_GUEST_UFO, true),
DEFINE_PROP_BIT("guest_announce", VirtIONet, host_features,
DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
VIRTIO_NET_F_GUEST_ANNOUNCE, true),
DEFINE_PROP_BIT("host_tso4", VirtIONet, host_features,
DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
VIRTIO_NET_F_HOST_TSO4, true),
DEFINE_PROP_BIT("host_tso6", VirtIONet, host_features,
DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
VIRTIO_NET_F_HOST_TSO6, true),
DEFINE_PROP_BIT("host_ecn", VirtIONet, host_features,
DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
VIRTIO_NET_F_HOST_ECN, true),
DEFINE_PROP_BIT("host_ufo", VirtIONet, host_features,
DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
VIRTIO_NET_F_HOST_UFO, true),
DEFINE_PROP_BIT("mrg_rxbuf", VirtIONet, host_features,
DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
VIRTIO_NET_F_MRG_RXBUF, true),
DEFINE_PROP_BIT("status", VirtIONet, host_features,
DEFINE_PROP_BIT64("status", VirtIONet, host_features,
VIRTIO_NET_F_STATUS, true),
DEFINE_PROP_BIT("ctrl_vq", VirtIONet, host_features,
DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
VIRTIO_NET_F_CTRL_VQ, true),
DEFINE_PROP_BIT("ctrl_rx", VirtIONet, host_features,
DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
VIRTIO_NET_F_CTRL_RX, true),
DEFINE_PROP_BIT("ctrl_vlan", VirtIONet, host_features,
DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
VIRTIO_NET_F_CTRL_VLAN, true),
DEFINE_PROP_BIT("ctrl_rx_extra", VirtIONet, host_features,
DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
VIRTIO_NET_F_CTRL_RX_EXTRA, true),
DEFINE_PROP_BIT("ctrl_mac_addr", VirtIONet, host_features,
DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
VIRTIO_NET_F_CTRL_MAC_ADDR, true),
DEFINE_PROP_BIT("ctrl_guest_offloads", VirtIONet, host_features,
DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
DEFINE_PROP_BIT("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
TX_TIMER_INTERVAL),
@ -2160,6 +2185,8 @@ static Property virtio_net_properties[] = {
DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
true),
DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
DEFINE_PROP_END_OF_LIST(),
};

View File

@ -2048,18 +2048,6 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
}
}
static void pci_default_realize(PCIDevice *dev, Error **errp)
{
PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
if (pc->init) {
if (pc->init(dev) < 0) {
error_setg(errp, "Device initialization failed");
return;
}
}
}
PCIDevice *pci_create_multifunction(PCIBus *bus, int devfn, bool multifunction,
const char *name)
{
@ -2532,13 +2520,11 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev)
static void pci_device_class_init(ObjectClass *klass, void *data)
{
DeviceClass *k = DEVICE_CLASS(klass);
PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
k->realize = pci_qdev_realize;
k->unrealize = pci_qdev_unrealize;
k->bus_type = TYPE_PCI_BUS;
k->props = pci_props;
pc->realize = pci_default_realize;
}
static void pci_device_class_base_init(ObjectClass *klass, void *data)

View File

@ -722,8 +722,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
}
if (hotplug_lmb_start) {
MemoryDeviceInfoList **prev = &dimms;
qmp_pc_dimm_device_list(qdev_get_machine(), &prev);
dimms = qmp_pc_dimm_device_list();
}
/* ibm,dynamic-memory */

View File

@ -3,9 +3,23 @@
# hw/virtio/vhost.c
vhost_commit(bool started, bool changed) "Started: %d Changed: %d"
vhost_region_add_section(const char *name, uint64_t gpa, uint64_t size, uint64_t host) "%s: 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64
vhost_region_add_section_abut(const char *name, uint64_t new_size) "%s: 0x%"PRIx64
vhost_region_add_section_merge(const char *name, uint64_t new_size, uint64_t gpa, uint64_t owr) "%s: size: 0x%"PRIx64 " gpa: 0x%"PRIx64 " owr: 0x%"PRIx64
vhost_region_add_section_aligned(const char *name, uint64_t gpa, uint64_t size, uint64_t host) "%s: 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64
vhost_section(const char *name, int r) "%s:%d"
# hw/virtio/vhost-user.c
vhost_user_postcopy_end_entry(void) ""
vhost_user_postcopy_end_exit(void) ""
vhost_user_postcopy_fault_handler(const char *name, uint64_t fault_address, int nregions) "%s: @0x%"PRIx64" nregions:%d"
vhost_user_postcopy_fault_handler_loop(int i, uint64_t client_base, uint64_t size) "%d: client 0x%"PRIx64" +0x%"PRIx64
vhost_user_postcopy_fault_handler_found(int i, uint64_t region_offset, uint64_t rb_offset) "%d: region_offset: 0x%"PRIx64" rb_offset:0x%"PRIx64
vhost_user_postcopy_listen(void) ""
vhost_user_set_mem_table_postcopy(uint64_t client_addr, uint64_t qhva, int reply_i, int region_i) "client:0x%"PRIx64" for hva: 0x%"PRIx64" reply %d region %d"
vhost_user_set_mem_table_withfd(int index, const char *name, uint64_t memory_size, uint64_t guest_phys_addr, uint64_t userspace_addr, uint64_t offset) "%d:%s: size:0x%"PRIx64" GPA:0x%"PRIx64" QVA/userspace:0x%"PRIx64" RB offset:0x%"PRIx64
vhost_user_postcopy_waker(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64
vhost_user_postcopy_waker_found(uint64_t client_addr) "0x%"PRIx64
vhost_user_postcopy_waker_nomatch(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64
# hw/virtio/virtio.c
virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u"
virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "vq %p elem %p len %u idx %u"

View File

@ -18,11 +18,15 @@
#include "qemu/error-report.h"
#include "qemu/sockets.h"
#include "sysemu/cryptodev.h"
#include "migration/migration.h"
#include "migration/postcopy-ram.h"
#include "trace.h"
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <linux/vhost.h>
#include <linux/userfaultfd.h>
#define VHOST_MEMORY_MAX_NREGIONS 8
#define VHOST_USER_F_PROTOCOL_FEATURES 30
@ -41,7 +45,7 @@ enum VhostUserProtocolFeature {
VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
VHOST_USER_PROTOCOL_F_MAX
};
@ -76,6 +80,9 @@ typedef enum VhostUserRequest {
VHOST_USER_SET_CONFIG = 25,
VHOST_USER_CREATE_CRYPTO_SESSION = 26,
VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
VHOST_USER_POSTCOPY_ADVISE = 28,
VHOST_USER_POSTCOPY_LISTEN = 29,
VHOST_USER_POSTCOPY_END = 30,
VHOST_USER_MAX
} VhostUserRequest;
@ -164,8 +171,23 @@ static VhostUserMsg m __attribute__ ((unused));
#define VHOST_USER_VERSION (0x1)
struct vhost_user {
struct vhost_dev *dev;
CharBackend *chr;
int slave_fd;
NotifierWithReturn postcopy_notifier;
struct PostCopyFD postcopy_fd;
uint64_t postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS];
/* Length of the region_rb and region_rb_offset arrays */
size_t region_rb_len;
/* RAMBlock associated with a given region */
RAMBlock **region_rb;
/* The offset from the start of the RAMBlock to the start of the
* vhost region.
*/
ram_addr_t *region_rb_offset;
/* True once we've entered postcopy_listen */
bool postcopy_listen;
};
static bool ioeventfd_enabled(void)
@ -330,14 +352,167 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
return 0;
}
static int vhost_user_set_mem_table(struct vhost_dev *dev,
struct vhost_memory *mem)
static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
struct vhost_memory *mem)
{
struct vhost_user *u = dev->opaque;
int fds[VHOST_MEMORY_MAX_NREGIONS];
int i, fd;
size_t fd_num = 0;
bool reply_supported = virtio_has_feature(dev->protocol_features,
VHOST_USER_PROTOCOL_F_REPLY_ACK);
VhostUserMsg msg_reply;
int region_i, msg_i;
VhostUserMsg msg = {
.hdr.request = VHOST_USER_SET_MEM_TABLE,
.hdr.flags = VHOST_USER_VERSION,
};
if (reply_supported) {
msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
}
if (u->region_rb_len < dev->mem->nregions) {
u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
dev->mem->nregions);
memset(&(u->region_rb[u->region_rb_len]), '\0',
sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
u->region_rb_len = dev->mem->nregions;
}
for (i = 0; i < dev->mem->nregions; ++i) {
struct vhost_memory_region *reg = dev->mem->regions + i;
ram_addr_t offset;
MemoryRegion *mr;
assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
&offset);
fd = memory_region_get_fd(mr);
if (fd > 0) {
trace_vhost_user_set_mem_table_withfd(fd_num, mr->name,
reg->memory_size,
reg->guest_phys_addr,
reg->userspace_addr, offset);
u->region_rb_offset[i] = offset;
u->region_rb[i] = mr->ram_block;
msg.payload.memory.regions[fd_num].userspace_addr =
reg->userspace_addr;
msg.payload.memory.regions[fd_num].memory_size = reg->memory_size;
msg.payload.memory.regions[fd_num].guest_phys_addr =
reg->guest_phys_addr;
msg.payload.memory.regions[fd_num].mmap_offset = offset;
assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
fds[fd_num++] = fd;
} else {
u->region_rb_offset[i] = 0;
u->region_rb[i] = NULL;
}
}
msg.payload.memory.nregions = fd_num;
if (!fd_num) {
error_report("Failed initializing vhost-user memory map, "
"consider using -object memory-backend-file share=on");
return -1;
}
msg.hdr.size = sizeof(msg.payload.memory.nregions);
msg.hdr.size += sizeof(msg.payload.memory.padding);
msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion);
if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
return -1;
}
if (vhost_user_read(dev, &msg_reply) < 0) {
return -1;
}
if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
error_report("%s: Received unexpected msg type."
"Expected %d received %d", __func__,
VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
return -1;
}
/* We're using the same structure, just reusing one of the
* fields, so it should be the same size.
*/
if (msg_reply.hdr.size != msg.hdr.size) {
error_report("%s: Unexpected size for postcopy reply "
"%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size);
return -1;
}
memset(u->postcopy_client_bases, 0,
sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS);
/* They're in the same order as the regions that were sent
* but some of the regions were skipped (above) if they
* didn't have fd's
*/
for (msg_i = 0, region_i = 0;
region_i < dev->mem->nregions;
region_i++) {
if (msg_i < fd_num &&
msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
dev->mem->regions[region_i].guest_phys_addr) {
u->postcopy_client_bases[region_i] =
msg_reply.payload.memory.regions[msg_i].userspace_addr;
trace_vhost_user_set_mem_table_postcopy(
msg_reply.payload.memory.regions[msg_i].userspace_addr,
msg.payload.memory.regions[msg_i].userspace_addr,
msg_i, region_i);
msg_i++;
}
}
if (msg_i != fd_num) {
error_report("%s: postcopy reply not fully consumed "
"%d vs %zd",
__func__, msg_i, fd_num);
return -1;
}
/* Now we've registered this with the postcopy code, we ack to the client,
* because now we're in the position to be able to deal with any faults
* it generates.
*/
/* TODO: Use this for failure cases as well with a bad value */
msg.hdr.size = sizeof(msg.payload.u64);
msg.payload.u64 = 0; /* OK */
if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
return -1;
}
if (reply_supported) {
return process_message_reply(dev, &msg);
}
return 0;
}
static int vhost_user_set_mem_table(struct vhost_dev *dev,
struct vhost_memory *mem)
{
struct vhost_user *u = dev->opaque;
int fds[VHOST_MEMORY_MAX_NREGIONS];
int i, fd;
size_t fd_num = 0;
bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
bool reply_supported = virtio_has_feature(dev->protocol_features,
VHOST_USER_PROTOCOL_F_REPLY_ACK) &&
!do_postcopy;
if (do_postcopy) {
/* Postcopy has enough differences that it's best done in it's own
* version
*/
return vhost_user_set_mem_table_postcopy(dev, mem);
}
VhostUserMsg msg = {
.hdr.request = VHOST_USER_SET_MEM_TABLE,
@ -362,9 +537,11 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev,
error_report("Failed preparing vhost-user memory table msg");
return -1;
}
msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
msg.payload.memory.regions[fd_num].userspace_addr =
reg->userspace_addr;
msg.payload.memory.regions[fd_num].memory_size = reg->memory_size;
msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
msg.payload.memory.regions[fd_num].guest_phys_addr =
reg->guest_phys_addr;
msg.payload.memory.regions[fd_num].mmap_offset = offset;
fds[fd_num++] = fd;
}
@ -791,6 +968,219 @@ out:
return ret;
}
/*
* Called back from the postcopy fault thread when a fault is received on our
* ufd.
* TODO: This is Linux specific
*/
static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
void *ufd)
{
struct vhost_dev *dev = pcfd->data;
struct vhost_user *u = dev->opaque;
struct uffd_msg *msg = ufd;
uint64_t faultaddr = msg->arg.pagefault.address;
RAMBlock *rb = NULL;
uint64_t rb_offset;
int i;
trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
dev->mem->nregions);
for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
trace_vhost_user_postcopy_fault_handler_loop(i,
u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
if (faultaddr >= u->postcopy_client_bases[i]) {
/* Ofset of the fault address in the vhost region */
uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
if (region_offset < dev->mem->regions[i].memory_size) {
rb_offset = region_offset + u->region_rb_offset[i];
trace_vhost_user_postcopy_fault_handler_found(i,
region_offset, rb_offset);
rb = u->region_rb[i];
return postcopy_request_shared_page(pcfd, rb, faultaddr,
rb_offset);
}
}
}
error_report("%s: Failed to find region for fault %" PRIx64,
__func__, faultaddr);
return -1;
}
static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
uint64_t offset)
{
struct vhost_dev *dev = pcfd->data;
struct vhost_user *u = dev->opaque;
int i;
trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
if (!u) {
return 0;
}
/* Translate the offset into an address in the clients address space */
for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
if (u->region_rb[i] == rb &&
offset >= u->region_rb_offset[i] &&
offset < (u->region_rb_offset[i] +
dev->mem->regions[i].memory_size)) {
uint64_t client_addr = (offset - u->region_rb_offset[i]) +
u->postcopy_client_bases[i];
trace_vhost_user_postcopy_waker_found(client_addr);
return postcopy_wake_shared(pcfd, client_addr, rb);
}
}
trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
return 0;
}
/*
* Called at the start of an inbound postcopy on reception of the
* 'advise' command.
*/
static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
{
struct vhost_user *u = dev->opaque;
CharBackend *chr = u->chr;
int ufd;
VhostUserMsg msg = {
.hdr.request = VHOST_USER_POSTCOPY_ADVISE,
.hdr.flags = VHOST_USER_VERSION,
};
if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
error_setg(errp, "Failed to send postcopy_advise to vhost");
return -1;
}
if (vhost_user_read(dev, &msg) < 0) {
error_setg(errp, "Failed to get postcopy_advise reply from vhost");
return -1;
}
if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
error_setg(errp, "Unexpected msg type. Expected %d received %d",
VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
return -1;
}
if (msg.hdr.size) {
error_setg(errp, "Received bad msg size.");
return -1;
}
ufd = qemu_chr_fe_get_msgfd(chr);
if (ufd < 0) {
error_setg(errp, "%s: Failed to get ufd", __func__);
return -1;
}
fcntl(ufd, F_SETFL, O_NONBLOCK);
/* register ufd with userfault thread */
u->postcopy_fd.fd = ufd;
u->postcopy_fd.data = dev;
u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
u->postcopy_fd.waker = vhost_user_postcopy_waker;
u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
postcopy_register_shared_ufd(&u->postcopy_fd);
return 0;
}
/*
* Called at the switch to postcopy on reception of the 'listen' command.
*/
static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
{
struct vhost_user *u = dev->opaque;
int ret;
VhostUserMsg msg = {
.hdr.request = VHOST_USER_POSTCOPY_LISTEN,
.hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
};
u->postcopy_listen = true;
trace_vhost_user_postcopy_listen();
if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
error_setg(errp, "Failed to send postcopy_listen to vhost");
return -1;
}
ret = process_message_reply(dev, &msg);
if (ret) {
error_setg(errp, "Failed to receive reply to postcopy_listen");
return ret;
}
return 0;
}
/*
* Called at the end of postcopy
*/
static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
{
VhostUserMsg msg = {
.hdr.request = VHOST_USER_POSTCOPY_END,
.hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
};
int ret;
struct vhost_user *u = dev->opaque;
trace_vhost_user_postcopy_end_entry();
if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
error_setg(errp, "Failed to send postcopy_end to vhost");
return -1;
}
ret = process_message_reply(dev, &msg);
if (ret) {
error_setg(errp, "Failed to receive reply to postcopy_end");
return ret;
}
postcopy_unregister_shared_ufd(&u->postcopy_fd);
u->postcopy_fd.handler = NULL;
trace_vhost_user_postcopy_end_exit();
return 0;
}
static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
void *opaque)
{
struct PostcopyNotifyData *pnd = opaque;
struct vhost_user *u = container_of(notifier, struct vhost_user,
postcopy_notifier);
struct vhost_dev *dev = u->dev;
switch (pnd->reason) {
case POSTCOPY_NOTIFY_PROBE:
if (!virtio_has_feature(dev->protocol_features,
VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
/* TODO: Get the device name into this error somehow */
error_setg(pnd->errp,
"vhost-user backend not capable of postcopy");
return -ENOENT;
}
break;
case POSTCOPY_NOTIFY_INBOUND_ADVISE:
return vhost_user_postcopy_advise(dev, pnd->errp);
case POSTCOPY_NOTIFY_INBOUND_LISTEN:
return vhost_user_postcopy_listen(dev, pnd->errp);
case POSTCOPY_NOTIFY_INBOUND_END:
return vhost_user_postcopy_end(dev, pnd->errp);
default:
/* We ignore notifications we don't know */
break;
}
return 0;
}
static int vhost_user_init(struct vhost_dev *dev, void *opaque)
{
uint64_t features, protocol_features;
@ -802,6 +1192,7 @@ static int vhost_user_init(struct vhost_dev *dev, void *opaque)
u = g_new0(struct vhost_user, 1);
u->chr = opaque;
u->slave_fd = -1;
u->dev = dev;
dev->opaque = u;
err = vhost_user_get_features(dev, &features);
@ -858,6 +1249,9 @@ static int vhost_user_init(struct vhost_dev *dev, void *opaque)
return err;
}
u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
postcopy_add_notifier(&u->postcopy_notifier);
return 0;
}
@ -868,11 +1262,20 @@ static int vhost_user_cleanup(struct vhost_dev *dev)
assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
u = dev->opaque;
if (u->postcopy_notifier.notify) {
postcopy_remove_notifier(&u->postcopy_notifier);
u->postcopy_notifier.notify = NULL;
}
if (u->slave_fd >= 0) {
qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
close(u->slave_fd);
u->slave_fd = -1;
}
g_free(u->region_rb);
u->region_rb = NULL;
g_free(u->region_rb_offset);
u->region_rb_offset = NULL;
u->region_rb_len = 0;
g_free(u);
dev->opaque = 0;

View File

@ -522,10 +522,28 @@ static void vhost_region_add_section(struct vhost_dev *dev,
uint64_t mrs_gpa = section->offset_within_address_space;
uintptr_t mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
section->offset_within_region;
RAMBlock *mrs_rb = section->mr->ram_block;
size_t mrs_page = qemu_ram_pagesize(mrs_rb);
trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size,
mrs_host);
/* Round the section to it's page size */
/* First align the start down to a page boundary */
uint64_t alignage = mrs_host & (mrs_page - 1);
if (alignage) {
mrs_host -= alignage;
mrs_size += alignage;
mrs_gpa -= alignage;
}
/* Now align the size up to a page boundary */
alignage = mrs_size & (mrs_page - 1);
if (alignage) {
mrs_size += mrs_page - alignage;
}
trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size,
mrs_host);
if (dev->n_tmp_sections) {
/* Since we already have at least one section, lets see if
* this extends it; since we're scanning in order, we only
@ -542,18 +560,46 @@ static void vhost_region_add_section(struct vhost_dev *dev,
prev_sec->offset_within_region;
uint64_t prev_host_end = range_get_last(prev_host_start, prev_size);
if (prev_gpa_end + 1 == mrs_gpa &&
prev_host_end + 1 == mrs_host &&
section->mr == prev_sec->mr &&
(!dev->vhost_ops->vhost_backend_can_merge ||
dev->vhost_ops->vhost_backend_can_merge(dev,
if (mrs_gpa <= (prev_gpa_end + 1)) {
/* OK, looks like overlapping/intersecting - it's possible that
* the rounding to page sizes has made them overlap, but they should
* match up in the same RAMBlock if they do.
*/
if (mrs_gpa < prev_gpa_start) {
error_report("%s:Section rounded to %"PRIx64
" prior to previous %"PRIx64,
__func__, mrs_gpa, prev_gpa_start);
/* A way to cleanly fail here would be better */
return;
}
/* Offset from the start of the previous GPA to this GPA */
size_t offset = mrs_gpa - prev_gpa_start;
if (prev_host_start + offset == mrs_host &&
section->mr == prev_sec->mr &&
(!dev->vhost_ops->vhost_backend_can_merge ||
dev->vhost_ops->vhost_backend_can_merge(dev,
mrs_host, mrs_size,
prev_host_start, prev_size))) {
/* The two sections abut */
need_add = false;
prev_sec->size = int128_add(prev_sec->size, section->size);
trace_vhost_region_add_section_abut(section->mr->name,
mrs_size + prev_size);
uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
need_add = false;
prev_sec->offset_within_address_space =
MIN(prev_gpa_start, mrs_gpa);
prev_sec->offset_within_region =
MIN(prev_host_start, mrs_host) -
(uintptr_t)memory_region_get_ram_ptr(prev_sec->mr);
prev_sec->size = int128_make64(max_end - MIN(prev_host_start,
mrs_host));
trace_vhost_region_add_section_merge(section->mr->name,
int128_get64(prev_sec->size),
prev_sec->offset_within_address_space,
prev_sec->offset_within_region);
} else {
error_report("%s: Overlapping but not coherent sections "
"at %"PRIx64,
__func__, mrs_gpa);
return;
}
}
}

View File

@ -68,10 +68,14 @@ ram_addr_t qemu_ram_addr_from_host(void *ptr);
RAMBlock *qemu_ram_block_by_name(const char *name);
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
ram_addr_t *offset);
ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host);
void qemu_ram_set_idstr(RAMBlock *block, const char *name, DeviceState *dev);
void qemu_ram_unset_idstr(RAMBlock *block);
const char *qemu_ram_get_idstr(RAMBlock *rb);
bool qemu_ram_is_shared(RAMBlock *rb);
bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
void qemu_ram_set_uf_zeroable(RAMBlock *rb);
size_t qemu_ram_pagesize(RAMBlock *block);
size_t qemu_ram_pagesize_largest(void);

View File

@ -40,18 +40,6 @@ enum {
ACPI_FADT_F_LOW_POWER_S0_IDLE_CAPABLE,
};
/*
* ACPI 2.0 Generic Address Space definition.
*/
struct Acpi20GenericAddress {
uint8_t address_space_id;
uint8_t register_bit_width;
uint8_t register_bit_offset;
uint8_t reserved;
uint64_t address;
} QEMU_PACKED;
typedef struct Acpi20GenericAddress Acpi20GenericAddress;
struct AcpiRsdpDescriptor { /* Root System Descriptor Pointer */
uint64_t signature; /* ACPI signature, contains "RSD PTR " */
uint8_t checksum; /* To make sum of struct == 0 */
@ -87,104 +75,44 @@ struct AcpiTableHeader {
} QEMU_PACKED;
typedef struct AcpiTableHeader AcpiTableHeader;
/*
* ACPI Fixed ACPI Description Table (FADT)
*/
#define ACPI_FADT_COMMON_DEF /* FADT common definition */ \
ACPI_TABLE_HEADER_DEF /* ACPI common table header */ \
uint32_t firmware_ctrl; /* Physical address of FACS */ \
uint32_t dsdt; /* Physical address of DSDT */ \
uint8_t model; /* System Interrupt Model */ \
uint8_t reserved1; /* Reserved */ \
uint16_t sci_int; /* System vector of SCI interrupt */ \
uint32_t smi_cmd; /* Port address of SMI command port */ \
uint8_t acpi_enable; /* Value to write to smi_cmd to enable ACPI */ \
uint8_t acpi_disable; /* Value to write to smi_cmd to disable ACPI */ \
/* Value to write to SMI CMD to enter S4BIOS state */ \
uint8_t S4bios_req; \
uint8_t reserved2; /* Reserved - must be zero */ \
/* Port address of Power Mgt 1a acpi_event Reg Blk */ \
uint32_t pm1a_evt_blk; \
/* Port address of Power Mgt 1b acpi_event Reg Blk */ \
uint32_t pm1b_evt_blk; \
uint32_t pm1a_cnt_blk; /* Port address of Power Mgt 1a Control Reg Blk */ \
uint32_t pm1b_cnt_blk; /* Port address of Power Mgt 1b Control Reg Blk */ \
uint32_t pm2_cnt_blk; /* Port address of Power Mgt 2 Control Reg Blk */ \
uint32_t pm_tmr_blk; /* Port address of Power Mgt Timer Ctrl Reg Blk */ \
/* Port addr of General Purpose acpi_event 0 Reg Blk */ \
uint32_t gpe0_blk; \
/* Port addr of General Purpose acpi_event 1 Reg Blk */ \
uint32_t gpe1_blk; \
uint8_t pm1_evt_len; /* Byte length of ports at pm1_x_evt_blk */ \
uint8_t pm1_cnt_len; /* Byte length of ports at pm1_x_cnt_blk */ \
uint8_t pm2_cnt_len; /* Byte Length of ports at pm2_cnt_blk */ \
uint8_t pm_tmr_len; /* Byte Length of ports at pm_tm_blk */ \
uint8_t gpe0_blk_len; /* Byte Length of ports at gpe0_blk */ \
uint8_t gpe1_blk_len; /* Byte Length of ports at gpe1_blk */ \
uint8_t gpe1_base; /* Offset in gpe model where gpe1 events start */ \
uint8_t reserved3; /* Reserved */ \
uint16_t plvl2_lat; /* Worst case HW latency to enter/exit C2 state */ \
uint16_t plvl3_lat; /* Worst case HW latency to enter/exit C3 state */ \
uint16_t flush_size; /* Size of area read to flush caches */ \
uint16_t flush_stride; /* Stride used in flushing caches */ \
uint8_t duty_offset; /* Bit location of duty cycle field in p_cnt reg */ \
uint8_t duty_width; /* Bit width of duty cycle field in p_cnt reg */ \
uint8_t day_alrm; /* Index to day-of-month alarm in RTC CMOS RAM */ \
uint8_t mon_alrm; /* Index to month-of-year alarm in RTC CMOS RAM */ \
uint8_t century; /* Index to century in RTC CMOS RAM */ \
/* IA-PC Boot Architecture Flags (see below for individual flags) */ \
uint16_t boot_flags; \
uint8_t reserved; /* Reserved, must be zero */ \
/* Miscellaneous flag bits (see below for individual flags) */ \
uint32_t flags; \
/* 64-bit address of the Reset register */ \
struct AcpiGenericAddress reset_register; \
/* Value to write to the reset_register port to reset the system */ \
uint8_t reset_value; \
/* ARM-Specific Boot Flags (see below for individual flags) (ACPI 5.1) */ \
uint16_t arm_boot_flags; \
uint8_t minor_revision; /* FADT Minor Revision (ACPI 5.1) */ \
uint64_t x_facs; /* 64-bit physical address of FACS */ \
uint64_t x_dsdt; /* 64-bit physical address of DSDT */ \
/* 64-bit Extended Power Mgt 1a Event Reg Blk address */ \
struct AcpiGenericAddress xpm1a_event_block; \
/* 64-bit Extended Power Mgt 1b Event Reg Blk address */ \
struct AcpiGenericAddress xpm1b_event_block; \
/* 64-bit Extended Power Mgt 1a Control Reg Blk address */ \
struct AcpiGenericAddress xpm1a_control_block; \
/* 64-bit Extended Power Mgt 1b Control Reg Blk address */ \
struct AcpiGenericAddress xpm1b_control_block; \
/* 64-bit Extended Power Mgt 2 Control Reg Blk address */ \
struct AcpiGenericAddress xpm2_control_block; \
/* 64-bit Extended Power Mgt Timer Ctrl Reg Blk address */ \
struct AcpiGenericAddress xpm_timer_block; \
/* 64-bit Extended General Purpose Event 0 Reg Blk address */ \
struct AcpiGenericAddress xgpe0_block; \
/* 64-bit Extended General Purpose Event 1 Reg Blk address */ \
struct AcpiGenericAddress xgpe1_block; \
struct AcpiGenericAddress {
uint8_t space_id; /* Address space where struct or register exists */
uint8_t bit_width; /* Size in bits of given register */
uint8_t bit_offset; /* Bit offset within the register */
uint8_t access_width; /* Minimum Access size (ACPI 3.0) */
uint8_t access_width; /* ACPI 3.0: Minimum Access size (ACPI 3.0),
ACPI 2.0: Reserved, Table 5-1 */
uint64_t address; /* 64-bit address of struct or register */
} QEMU_PACKED;
struct AcpiFadtDescriptorRev3 {
ACPI_FADT_COMMON_DEF
} QEMU_PACKED;
typedef struct AcpiFadtDescriptorRev3 AcpiFadtDescriptorRev3;
typedef struct AcpiFadtData {
struct AcpiGenericAddress pm1a_cnt; /* PM1a_CNT_BLK */
struct AcpiGenericAddress pm1a_evt; /* PM1a_EVT_BLK */
struct AcpiGenericAddress pm_tmr; /* PM_TMR_BLK */
struct AcpiGenericAddress gpe0_blk; /* GPE0_BLK */
struct AcpiGenericAddress reset_reg; /* RESET_REG */
uint8_t reset_val; /* RESET_VALUE */
uint8_t rev; /* Revision */
uint32_t flags; /* Flags */
uint32_t smi_cmd; /* SMI_CMD */
uint16_t sci_int; /* SCI_INT */
uint8_t int_model; /* INT_MODEL */
uint8_t acpi_enable_cmd; /* ACPI_ENABLE */
uint8_t acpi_disable_cmd; /* ACPI_DISABLE */
uint8_t rtc_century; /* CENTURY */
uint16_t plvl2_lat; /* P_LVL2_LAT */
uint16_t plvl3_lat; /* P_LVL3_LAT */
uint16_t arm_boot_arch; /* ARM_BOOT_ARCH */
uint8_t minor_ver; /* FADT Minor Version */
struct AcpiFadtDescriptorRev5_1 {
ACPI_FADT_COMMON_DEF
/* 64-bit Sleep Control register (ACPI 5.0) */
struct AcpiGenericAddress sleep_control;
/* 64-bit Sleep Status register (ACPI 5.0) */
struct AcpiGenericAddress sleep_status;
} QEMU_PACKED;
typedef struct AcpiFadtDescriptorRev5_1 AcpiFadtDescriptorRev5_1;
/*
* respective tables offsets within ACPI_BUILD_TABLE_FILE,
* NULL if table doesn't exist (in that case field's value
* won't be patched by linker and will be kept set to 0)
*/
unsigned *facs_tbl_offset; /* FACS offset in */
unsigned *dsdt_tbl_offset;
unsigned *xdsdt_tbl_offset;
} AcpiFadtData;
#define ACPI_FADT_ARM_PSCI_COMPLIANT (1 << 0)
#define ACPI_FADT_ARM_PSCI_USE_HVC (1 << 1)
@ -456,7 +384,7 @@ typedef struct AcpiGenericTimerTable AcpiGenericTimerTable;
struct Acpi20Hpet {
ACPI_TABLE_HEADER_DEF /* ACPI common table header */
uint32_t timer_block_id;
Acpi20GenericAddress addr;
struct AcpiGenericAddress addr;
uint8_t hpet_number;
uint16_t min_tick;
uint8_t page_protect;

View File

@ -77,6 +77,15 @@ typedef enum {
AML_WRITE_AS_ZEROS = 2,
} AmlUpdateRule;
typedef enum {
AML_AS_SYSTEM_MEMORY = 0X00,
AML_AS_SYSTEM_IO = 0X01,
AML_AS_PCI_CONFIG = 0X02,
AML_AS_EMBEDDED_CTRL = 0X03,
AML_AS_SMBUS = 0X04,
AML_AS_FFH = 0X7F,
} AmlAddressSpace;
typedef enum {
AML_SYSTEM_MEMORY = 0X00,
AML_SYSTEM_IO = 0X01,
@ -389,8 +398,22 @@ int
build_append_named_dword(GArray *array, const char *name_format, ...)
GCC_FMT_ATTR(2, 3);
void build_append_gas(GArray *table, AmlAddressSpace as,
uint8_t bit_width, uint8_t bit_offset,
uint8_t access_width, uint64_t address);
static inline void
build_append_gas_from_struct(GArray *table, const struct AcpiGenericAddress *s)
{
build_append_gas(table, s->space_id, s->bit_width, s->bit_offset,
s->access_width, s->address);
}
void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base,
uint64_t len, int node, MemoryAffinityFlags flags);
void build_slit(GArray *table_data, BIOSLinker *linker);
void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f,
const char *oem_id, const char *oem_table_id);
#endif

View File

@ -5,6 +5,9 @@
#include "hw/hw.h"
#include "exec/memory.h"
#define APM_CNT_IOPORT 0xb2
#define ACPI_PORT_SMI_CMD APM_CNT_IOPORT
typedef void (*apm_ctrl_changed_t)(uint32_t val, void *arg);
typedef struct APMState {

View File

@ -93,7 +93,7 @@ uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp);
int qmp_pc_dimm_device_list(Object *obj, void *opaque);
MemoryDeviceInfoList *qmp_pc_dimm_device_list(void);
uint64_t pc_existing_dimms_capacity(Error **errp);
uint64_t get_plugged_memory_size(void);
void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,

View File

@ -217,7 +217,6 @@ typedef struct PCIDeviceClass {
DeviceClass parent_class;
void (*realize)(PCIDevice *dev, Error **errp);
int (*init)(PCIDevice *dev);/* TODO convert to realize() and remove */
PCIUnregisterFunc *exit;
PCIConfigReadFunc *config_read;
PCIConfigWriteFunc *config_write;

View File

@ -38,6 +38,9 @@ typedef struct virtio_net_conf
uint16_t rx_queue_size;
uint16_t tx_queue_size;
uint16_t mtu;
int32_t speed;
char *duplex_str;
uint8_t duplex;
} virtio_net_conf;
/* Maximum packet size we can receive from tap device: header + 64k */
@ -67,7 +70,7 @@ typedef struct VirtIONet {
uint32_t has_vnet_hdr;
size_t host_hdr_len;
size_t guest_hdr_len;
uint32_t host_features;
uint64_t host_features;
uint8_t has_ufo;
uint32_t mergeable_rx_bufs;
uint8_t promisc;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,15 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _LINUX_KERNEL_H
#define _LINUX_KERNEL_H
#include "standard-headers/linux/sysinfo.h"
/*
* 'kernel.h' contains some often-used function prototypes etc
*/
#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
#endif /* _LINUX_KERNEL_H */

View File

@ -0,0 +1,25 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _LINUX_SYSINFO_H
#define _LINUX_SYSINFO_H
#include "standard-headers/linux/types.h"
#define SI_LOAD_SHIFT 16
struct sysinfo {
long uptime; /* Seconds since boot */
unsigned long loads[3]; /* 1, 5, and 15 minute load averages */
unsigned long totalram; /* Total usable main memory size */
unsigned long freeram; /* Available memory size */
unsigned long sharedram; /* Amount of shared memory */
unsigned long bufferram; /* Memory used by buffers */
unsigned long totalswap; /* Total swap space size */
unsigned long freeswap; /* swap space still available */
uint16_t procs; /* Number of current processes */
uint16_t pad; /* Explicit padding for m68k */
unsigned long totalhigh; /* Total high memory size */
unsigned long freehigh; /* Available high memory size */
uint32_t mem_unit; /* Memory unit size in bytes */
char _f[20-2*sizeof(unsigned long)-sizeof(uint32_t)]; /* Padding: libc5 uses this.. */
};
#endif /* _LINUX_SYSINFO_H */

View File

@ -155,6 +155,8 @@ MigrationIncomingState *migration_incoming_get_current(void)
if (!once) {
mis_current.state = MIGRATION_STATUS_NONE;
memset(&mis_current, 0, sizeof(MigrationIncomingState));
mis_current.postcopy_remote_fds = g_array_new(FALSE, TRUE,
sizeof(struct PostCopyFD));
qemu_mutex_init(&mis_current.rp_mutex);
qemu_event_init(&mis_current.main_thread_load_event, false);
@ -180,6 +182,10 @@ void migration_incoming_state_destroy(void)
qemu_fclose(mis->from_src_file);
mis->from_src_file = NULL;
}
if (mis->postcopy_remote_fds) {
g_array_free(mis->postcopy_remote_fds, TRUE);
mis->postcopy_remote_fds = NULL;
}
qemu_event_reset(&mis->main_thread_load_event);
}

View File

@ -49,8 +49,12 @@ struct MigrationIncomingState {
int userfault_event_fd;
QEMUFile *to_src_file;
QemuMutex rp_mutex; /* We send replies from multiple threads */
/* RAMBlock of last request sent to source */
RAMBlock *last_rb;
void *postcopy_tmp_page;
void *postcopy_tmp_zero_page;
/* PostCopyFD's for external userfaultfds & handlers of shared memory */
GArray *postcopy_remote_fds;
QEMUBH *bh;

View File

@ -23,6 +23,8 @@
#include "savevm.h"
#include "postcopy-ram.h"
#include "ram.h"
#include "qapi/error.h"
#include "qemu/notify.h"
#include "sysemu/sysemu.h"
#include "sysemu/balloon.h"
#include "qemu/error-report.h"
@ -45,6 +47,33 @@ struct PostcopyDiscardState {
unsigned int nsentcmds;
};
static NotifierWithReturnList postcopy_notifier_list;
void postcopy_infrastructure_init(void)
{
notifier_with_return_list_init(&postcopy_notifier_list);
}
void postcopy_add_notifier(NotifierWithReturn *nn)
{
notifier_with_return_list_add(&postcopy_notifier_list, nn);
}
void postcopy_remove_notifier(NotifierWithReturn *n)
{
notifier_with_return_remove(n);
}
int postcopy_notify(enum PostcopyNotifyReason reason, Error **errp)
{
struct PostcopyNotifyData pnd;
pnd.reason = reason;
pnd.errp = errp;
return notifier_with_return_list_notify(&postcopy_notifier_list,
&pnd);
}
/* Postcopy needs to detect accesses to pages that haven't yet been copied
* across, and efficiently map new pages in, the techniques for doing this
* are target OS specific.
@ -186,12 +215,6 @@ static int test_ramblock_postcopiable(const char *block_name, void *host_addr,
RAMBlock *rb = qemu_ram_block_by_name(block_name);
size_t pagesize = qemu_ram_pagesize(rb);
if (qemu_ram_is_shared(rb)) {
error_report("Postcopy on shared RAM (%s) is not yet supported",
block_name);
return 1;
}
if (length % pagesize) {
error_report("Postcopy requires RAM blocks to be a page size multiple,"
" block %s is 0x" RAM_ADDR_FMT " bytes with a "
@ -215,6 +238,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
struct uffdio_register reg_struct;
struct uffdio_range range_struct;
uint64_t feature_mask;
Error *local_err = NULL;
if (qemu_target_page_size() > pagesize) {
error_report("Target page size bigger than host page size");
@ -228,6 +252,12 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
goto out;
}
/* Give devices a chance to object */
if (postcopy_notify(POSTCOPY_NOTIFY_PROBE, &local_err)) {
error_report_err(local_err);
goto out;
}
/* Version and features check */
if (!ufd_check_and_apply(ufd, mis)) {
goto out;
@ -377,6 +407,13 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
trace_postcopy_ram_incoming_cleanup_entry();
if (mis->have_fault_thread) {
Error *local_err = NULL;
if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_END, &local_err)) {
error_report_err(local_err);
return -1;
}
if (qemu_ram_foreach_block(cleanup_range, mis)) {
return -1;
}
@ -481,10 +518,63 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr,
error_report("%s userfault: Region doesn't support COPY", __func__);
return -1;
}
if (reg_struct.ioctls & ((__u64)1 << _UFFDIO_ZEROPAGE)) {
RAMBlock *rb = qemu_ram_block_by_name(block_name);
qemu_ram_set_uf_zeroable(rb);
}
return 0;
}
int postcopy_wake_shared(struct PostCopyFD *pcfd,
uint64_t client_addr,
RAMBlock *rb)
{
size_t pagesize = qemu_ram_pagesize(rb);
struct uffdio_range range;
int ret;
trace_postcopy_wake_shared(client_addr, qemu_ram_get_idstr(rb));
range.start = client_addr & ~(pagesize - 1);
range.len = pagesize;
ret = ioctl(pcfd->fd, UFFDIO_WAKE, &range);
if (ret) {
error_report("%s: Failed to wake: %zx in %s (%s)",
__func__, (size_t)client_addr, qemu_ram_get_idstr(rb),
strerror(errno));
}
return ret;
}
/*
* Callback from shared fault handlers to ask for a page,
* the page must be specified by a RAMBlock and an offset in that rb
* Note: Only for use by shared fault handlers (in fault thread)
*/
int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb,
uint64_t client_addr, uint64_t rb_offset)
{
size_t pagesize = qemu_ram_pagesize(rb);
uint64_t aligned_rbo = rb_offset & ~(pagesize - 1);
MigrationIncomingState *mis = migration_incoming_get_current();
trace_postcopy_request_shared_page(pcfd->idstr, qemu_ram_get_idstr(rb),
rb_offset);
if (ramblock_recv_bitmap_test_byte_offset(rb, aligned_rbo)) {
trace_postcopy_request_shared_page_present(pcfd->idstr,
qemu_ram_get_idstr(rb), rb_offset);
return postcopy_wake_shared(pcfd, client_addr, rb);
}
if (rb != mis->last_rb) {
mis->last_rb = rb;
migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
aligned_rbo, pagesize);
} else {
/* Save some space */
migrate_send_rp_req_pages(mis, NULL, aligned_rbo, pagesize);
}
return 0;
}
/*
* Handle faults detected by the USERFAULT markings
*/
@ -493,29 +583,44 @@ static void *postcopy_ram_fault_thread(void *opaque)
MigrationIncomingState *mis = opaque;
struct uffd_msg msg;
int ret;
size_t index;
RAMBlock *rb = NULL;
RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
trace_postcopy_ram_fault_thread_entry();
mis->last_rb = NULL; /* last RAMBlock we sent part of */
qemu_sem_post(&mis->fault_thread_sem);
struct pollfd *pfd;
size_t pfd_len = 2 + mis->postcopy_remote_fds->len;
pfd = g_new0(struct pollfd, pfd_len);
pfd[0].fd = mis->userfault_fd;
pfd[0].events = POLLIN;
pfd[1].fd = mis->userfault_event_fd;
pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
trace_postcopy_ram_fault_thread_fds_core(pfd[0].fd, pfd[1].fd);
for (index = 0; index < mis->postcopy_remote_fds->len; index++) {
struct PostCopyFD *pcfd = &g_array_index(mis->postcopy_remote_fds,
struct PostCopyFD, index);
pfd[2 + index].fd = pcfd->fd;
pfd[2 + index].events = POLLIN;
trace_postcopy_ram_fault_thread_fds_extra(2 + index, pcfd->idstr,
pcfd->fd);
}
while (true) {
ram_addr_t rb_offset;
struct pollfd pfd[2];
int poll_result;
/*
* We're mainly waiting for the kernel to give us a faulting HVA,
* however we can be told to quit via userfault_quit_fd which is
* an eventfd
*/
pfd[0].fd = mis->userfault_fd;
pfd[0].events = POLLIN;
pfd[0].revents = 0;
pfd[1].fd = mis->userfault_event_fd;
pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
pfd[1].revents = 0;
if (poll(pfd, 2, -1 /* Wait forever */) == -1) {
poll_result = poll(pfd, pfd_len, -1 /* Wait forever */);
if (poll_result == -1) {
error_report("%s: userfault poll: %s", __func__, strerror(errno));
break;
}
@ -535,57 +640,117 @@ static void *postcopy_ram_fault_thread(void *opaque)
}
}
ret = read(mis->userfault_fd, &msg, sizeof(msg));
if (ret != sizeof(msg)) {
if (errno == EAGAIN) {
/*
* if a wake up happens on the other thread just after
* the poll, there is nothing to read.
*/
continue;
if (pfd[0].revents) {
poll_result--;
ret = read(mis->userfault_fd, &msg, sizeof(msg));
if (ret != sizeof(msg)) {
if (errno == EAGAIN) {
/*
* if a wake up happens on the other thread just after
* the poll, there is nothing to read.
*/
continue;
}
if (ret < 0) {
error_report("%s: Failed to read full userfault "
"message: %s",
__func__, strerror(errno));
break;
} else {
error_report("%s: Read %d bytes from userfaultfd "
"expected %zd",
__func__, ret, sizeof(msg));
break; /* Lost alignment, don't know what we'd read next */
}
}
if (ret < 0) {
error_report("%s: Failed to read full userfault message: %s",
__func__, strerror(errno));
if (msg.event != UFFD_EVENT_PAGEFAULT) {
error_report("%s: Read unexpected event %ud from userfaultfd",
__func__, msg.event);
continue; /* It's not a page fault, shouldn't happen */
}
rb = qemu_ram_block_from_host(
(void *)(uintptr_t)msg.arg.pagefault.address,
true, &rb_offset);
if (!rb) {
error_report("postcopy_ram_fault_thread: Fault outside guest: %"
PRIx64, (uint64_t)msg.arg.pagefault.address);
break;
} else {
error_report("%s: Read %d bytes from userfaultfd expected %zd",
__func__, ret, sizeof(msg));
break; /* Lost alignment, don't know what we'd read next */
}
}
if (msg.event != UFFD_EVENT_PAGEFAULT) {
error_report("%s: Read unexpected event %ud from userfaultfd",
__func__, msg.event);
continue; /* It's not a page fault, shouldn't happen */
}
rb = qemu_ram_block_from_host(
(void *)(uintptr_t)msg.arg.pagefault.address,
true, &rb_offset);
if (!rb) {
error_report("postcopy_ram_fault_thread: Fault outside guest: %"
PRIx64, (uint64_t)msg.arg.pagefault.address);
break;
}
rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
qemu_ram_get_idstr(rb),
rb_offset);
/*
* Send the request to the source - we want to request one
* of our host page sizes (which is >= TPS)
*/
if (rb != mis->last_rb) {
mis->last_rb = rb;
migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
rb_offset, qemu_ram_pagesize(rb));
} else {
/* Save some space */
migrate_send_rp_req_pages(mis, NULL,
rb_offset, qemu_ram_pagesize(rb));
}
}
/*
* Send the request to the source - we want to request one
* of our host page sizes (which is >= TPS)
*/
if (rb != last_rb) {
last_rb = rb;
migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
rb_offset, qemu_ram_pagesize(rb));
} else {
/* Save some space */
migrate_send_rp_req_pages(mis, NULL,
rb_offset, qemu_ram_pagesize(rb));
/* Now handle any requests from external processes on shared memory */
/* TODO: May need to handle devices deregistering during postcopy */
for (index = 2; index < pfd_len && poll_result; index++) {
if (pfd[index].revents) {
struct PostCopyFD *pcfd =
&g_array_index(mis->postcopy_remote_fds,
struct PostCopyFD, index - 2);
poll_result--;
if (pfd[index].revents & POLLERR) {
error_report("%s: POLLERR on poll %zd fd=%d",
__func__, index, pcfd->fd);
pfd[index].events = 0;
continue;
}
ret = read(pcfd->fd, &msg, sizeof(msg));
if (ret != sizeof(msg)) {
if (errno == EAGAIN) {
/*
* if a wake up happens on the other thread just after
* the poll, there is nothing to read.
*/
continue;
}
if (ret < 0) {
error_report("%s: Failed to read full userfault "
"message: %s (shared) revents=%d",
__func__, strerror(errno),
pfd[index].revents);
/*TODO: Could just disable this sharer */
break;
} else {
error_report("%s: Read %d bytes from userfaultfd "
"expected %zd (shared)",
__func__, ret, sizeof(msg));
/*TODO: Could just disable this sharer */
break; /*Lost alignment,don't know what we'd read next*/
}
}
if (msg.event != UFFD_EVENT_PAGEFAULT) {
error_report("%s: Read unexpected event %ud "
"from userfaultfd (shared)",
__func__, msg.event);
continue; /* It's not a page fault, shouldn't happen */
}
/* Call the device handler registered with us */
ret = pcfd->handler(pcfd, &msg);
if (ret) {
error_report("%s: Failed to resolve shared fault on %zd/%s",
__func__, index, pcfd->idstr);
/* TODO: Fail? Disable this sharer? */
}
}
}
}
trace_postcopy_ram_fault_thread_exit();
@ -667,6 +832,22 @@ static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
return ret;
}
int postcopy_notify_shared_wake(RAMBlock *rb, uint64_t offset)
{
int i;
MigrationIncomingState *mis = migration_incoming_get_current();
GArray *pcrfds = mis->postcopy_remote_fds;
for (i = 0; i < pcrfds->len; i++) {
struct PostCopyFD *cur = &g_array_index(pcrfds, struct PostCopyFD, i);
int ret = cur->waker(cur, rb, offset);
if (ret) {
return ret;
}
}
return 0;
}
/*
* Place a host page (from) at (host) atomically
* returns 0 on success
@ -690,7 +871,8 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
}
trace_postcopy_place_page(host);
return 0;
return postcopy_notify_shared_wake(rb,
qemu_ram_block_host_offset(rb, host));
}
/*
@ -700,17 +882,23 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
RAMBlock *rb)
{
size_t pagesize = qemu_ram_pagesize(rb);
trace_postcopy_place_page_zero(host);
if (qemu_ram_pagesize(rb) == getpagesize()) {
if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize(),
rb)) {
/* Normal RAMBlocks can zero a page using UFFDIO_ZEROPAGE
* but it's not available for everything (e.g. hugetlbpages)
*/
if (qemu_ram_is_uf_zeroable(rb)) {
if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, pagesize, rb)) {
int e = errno;
error_report("%s: %s zero host: %p",
__func__, strerror(e), host);
return -e;
}
return postcopy_notify_shared_wake(rb,
qemu_ram_block_host_offset(rb,
host));
} else {
/* The kernel can't use UFFDIO_ZEROPAGE for hugepages */
if (!mis->postcopy_tmp_zero_page) {
@ -730,8 +918,6 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
rb);
}
return 0;
}
/*
@ -784,6 +970,13 @@ int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
return -1;
}
int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb,
uint64_t client_addr, uint64_t rb_offset)
{
assert(0);
return -1;
}
int postcopy_ram_enable_notify(MigrationIncomingState *mis)
{
assert(0);
@ -810,6 +1003,13 @@ void *postcopy_get_tmp_page(MigrationIncomingState *mis)
return NULL;
}
int postcopy_wake_shared(struct PostCopyFD *pcfd,
uint64_t client_addr,
RAMBlock *rb)
{
assert(0);
return -1;
}
#endif
/* ------------------------------------------------------------------------- */
@ -927,3 +1127,31 @@ PostcopyState postcopy_state_set(PostcopyState new_state)
{
return atomic_xchg(&incoming_postcopy_state, new_state);
}
/* Register a handler for external shared memory postcopy
* called on the destination.
*/
void postcopy_register_shared_ufd(struct PostCopyFD *pcfd)
{
MigrationIncomingState *mis = migration_incoming_get_current();
mis->postcopy_remote_fds = g_array_append_val(mis->postcopy_remote_fds,
*pcfd);
}
/* Unregister a handler for external shared memory postcopy
*/
void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd)
{
guint i;
MigrationIncomingState *mis = migration_incoming_get_current();
GArray *pcrfds = mis->postcopy_remote_fds;
for (i = 0; i < pcrfds->len; i++) {
struct PostCopyFD *cur = &g_array_index(pcrfds, struct PostCopyFD, i);
if (cur->fd == pcfd->fd) {
mis->postcopy_remote_fds = g_array_remove_index(pcrfds, i);
return;
}
}
}

View File

@ -116,4 +116,77 @@ PostcopyState postcopy_state_set(PostcopyState new_state);
void postcopy_fault_thread_notify(MigrationIncomingState *mis);
/*
* To be called once at the start before any device initialisation
*/
void postcopy_infrastructure_init(void);
/* Add a notifier to a list to be called when checking whether the devices
* can support postcopy.
* It's data is a *PostcopyNotifyData
* It should return 0 if OK, or a negative value on failure.
* On failure it must set the data->errp to an error.
*
*/
enum PostcopyNotifyReason {
POSTCOPY_NOTIFY_PROBE = 0,
POSTCOPY_NOTIFY_INBOUND_ADVISE,
POSTCOPY_NOTIFY_INBOUND_LISTEN,
POSTCOPY_NOTIFY_INBOUND_END,
};
struct PostcopyNotifyData {
enum PostcopyNotifyReason reason;
Error **errp;
};
void postcopy_add_notifier(NotifierWithReturn *nn);
void postcopy_remove_notifier(NotifierWithReturn *n);
/* Call the notifier list set by postcopy_add_start_notifier */
int postcopy_notify(enum PostcopyNotifyReason reason, Error **errp);
struct PostCopyFD;
/* ufd is a pointer to the struct uffd_msg *TODO: more Portable! */
typedef int (*pcfdhandler)(struct PostCopyFD *pcfd, void *ufd);
/* Notification to wake, either on place or on reception of
* a fault on something that's already arrived (race)
*/
typedef int (*pcfdwake)(struct PostCopyFD *pcfd, RAMBlock *rb, uint64_t offset);
struct PostCopyFD {
int fd;
/* Data to pass to handler */
void *data;
/* Handler to be called whenever we get a poll event */
pcfdhandler handler;
/* Notification to wake shared client */
pcfdwake waker;
/* A string to use in error messages */
const char *idstr;
};
/* Register a userfaultfd owned by an external process for
* shared memory.
*/
void postcopy_register_shared_ufd(struct PostCopyFD *pcfd);
void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd);
/* Call each of the shared 'waker's registerd telling them of
* availability of a block.
*/
int postcopy_notify_shared_wake(RAMBlock *rb, uint64_t offset);
/* postcopy_wake_shared: Notify a client ufd that a page is available
*
* Returns 0 on success
*
* @pcfd: Structure with fd, handler and name as above
* @client_addr: Address in the client program, not QEMU
* @rb: The RAMBlock the page is in
*/
int postcopy_wake_shared(struct PostCopyFD *pcfd, uint64_t client_addr,
RAMBlock *rb);
/* Callback from shared fault handlers to ask for a page */
int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb,
uint64_t client_addr, uint64_t offset);
#endif

View File

@ -169,6 +169,11 @@ int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
rb->receivedmap);
}
bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
{
return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
}
void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
{
set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);

View File

@ -60,6 +60,7 @@ int ram_postcopy_incoming_init(MigrationIncomingState *mis);
void ram_handle_compressed(void *host, uint8_t ch, uint64_t size);
int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr);
bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset);
void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr);
void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, size_t nr);

View File

@ -1395,6 +1395,7 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
{
PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps;
Error *local_err = NULL;
trace_loadvm_postcopy_handle_advise();
if (ps != POSTCOPY_INCOMING_NONE) {
@ -1460,6 +1461,11 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
return -1;
}
if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_ADVISE, &local_err)) {
error_report_err(local_err);
return -1;
}
if (ram_postcopy_incoming_init(mis)) {
return -1;
}
@ -1621,6 +1627,8 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
{
PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING);
trace_loadvm_postcopy_handle_listen();
Error *local_err = NULL;
if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) {
error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
return -1;
@ -1646,6 +1654,11 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
}
}
if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_LISTEN, &local_err)) {
error_report_err(local_err);
return -1;
}
if (mis->have_listen_thread) {
error_report("CMD_POSTCOPY_RAM_LISTEN already has a listen thread");
return -1;

View File

@ -190,12 +190,18 @@ postcopy_place_page_zero(void *host_addr) "host=%p"
postcopy_ram_enable_notify(void) ""
postcopy_ram_fault_thread_entry(void) ""
postcopy_ram_fault_thread_exit(void) ""
postcopy_ram_fault_thread_fds_core(int baseufd, int quitfd) "ufd: %d quitfd: %d"
postcopy_ram_fault_thread_fds_extra(size_t index, const char *name, int fd) "%zd/%s: %d"
postcopy_ram_fault_thread_quit(void) ""
postcopy_ram_fault_thread_request(uint64_t hostaddr, const char *ramblock, size_t offset) "Request for HVA=0x%" PRIx64 " rb=%s offset=0x%zx"
postcopy_ram_incoming_cleanup_closeuf(void) ""
postcopy_ram_incoming_cleanup_entry(void) ""
postcopy_ram_incoming_cleanup_exit(void) ""
postcopy_ram_incoming_cleanup_join(void) ""
postcopy_request_shared_page(const char *sharer, const char *rb, uint64_t rb_offset) "for %s in %s offset 0x%"PRIx64
postcopy_request_shared_page_present(const char *sharer, const char *rb, uint64_t rb_offset) "%s already %s offset 0x%"PRIx64
postcopy_wake_shared(uint64_t client_addr, const char *rb) "at 0x%"PRIx64" in %s"
save_xbzrle_page_skipping(void) ""
save_xbzrle_page_overflow(void) ""
ram_save_iterate_big_wait(uint64_t milliconds, int iterations) "big wait: %" PRIu64 " milliseconds, %d iterations"

23
numa.c
View File

@ -520,29 +520,34 @@ void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
static void numa_stat_memory_devices(NumaNodeMem node_mem[])
{
MemoryDeviceInfoList *info_list = NULL;
MemoryDeviceInfoList **prev = &info_list;
MemoryDeviceInfoList *info_list = qmp_pc_dimm_device_list();
MemoryDeviceInfoList *info;
PCDIMMDeviceInfo *pcdimm_info;
qmp_pc_dimm_device_list(qdev_get_machine(), &prev);
for (info = info_list; info; info = info->next) {
MemoryDeviceInfo *value = info->value;
if (value) {
switch (value->type) {
case MEMORY_DEVICE_INFO_KIND_DIMM: {
case MEMORY_DEVICE_INFO_KIND_DIMM:
pcdimm_info = value->u.dimm.data;
break;
case MEMORY_DEVICE_INFO_KIND_NVDIMM:
pcdimm_info = value->u.nvdimm.data;
break;
default:
pcdimm_info = NULL;
break;
}
if (pcdimm_info) {
node_mem[pcdimm_info->node].node_mem += pcdimm_info->size;
if (pcdimm_info->hotpluggable && pcdimm_info->hotplugged) {
node_mem[pcdimm_info->node].node_plugged_mem +=
pcdimm_info->size;
}
break;
}
default:
break;
}
}
}

Binary file not shown.

View File

@ -2878,7 +2878,11 @@
#
# Since: 2.1
##
{ 'union': 'MemoryDeviceInfo', 'data': {'dimm': 'PCDIMMDeviceInfo'} }
{ 'union': 'MemoryDeviceInfo',
'data': { 'dimm': 'PCDIMMDeviceInfo',
'nvdimm': 'PCDIMMDeviceInfo'
}
}
##
# @query-memory-devices:

7
qmp.c
View File

@ -731,12 +731,7 @@ void qmp_object_del(const char *id, Error **errp)
MemoryDeviceInfoList *qmp_query_memory_devices(Error **errp)
{
MemoryDeviceInfoList *head = NULL;
MemoryDeviceInfoList **prev = &head;
qmp_pc_dimm_device_list(qdev_get_machine(), &prev);
return head;
return qmp_pc_dimm_device_list();
}
ACPIOSTInfoList *qmp_query_acpi_ospm_status(Error **errp)

View File

@ -40,6 +40,9 @@ cp_portable() {
-e 'sys/' \
-e 'pvrdma_verbs' \
-e 'drm.h' \
-e 'limits' \
-e 'linux/kernel' \
-e 'linux/sysinfo' \
> /dev/null
then
echo "Unexpected #include in input file $f".
@ -62,6 +65,10 @@ cp_portable() {
-e '/sys\/ioctl.h/d' \
-e 's/SW_MAX/SW_MAX_/' \
-e 's/atomic_t/int/' \
-e 's/__kernel_long_t/long/' \
-e 's/__kernel_ulong_t/unsigned long/' \
-e 's/struct ethhdr/struct eth_header/' \
-e '/\#define _LINUX_ETHTOOL_H/a \\n\#include "net/eth.h"' \
"$f" > "$to/$header";
}
@ -151,7 +158,9 @@ rm -rf "$output/include/standard-headers/linux"
mkdir -p "$output/include/standard-headers/linux"
for i in "$tmpdir"/include/linux/*virtio*.h "$tmpdir/include/linux/input.h" \
"$tmpdir/include/linux/input-event-codes.h" \
"$tmpdir/include/linux/pci_regs.h"; do
"$tmpdir/include/linux/pci_regs.h" \
"$tmpdir/include/linux/ethtool.h" "$tmpdir/include/linux/kernel.h" \
"$tmpdir/include/linux/sysinfo.h"; do
cp_portable "$i" "$output/include/standard-headers/linux"
done
mkdir -p "$output/include/standard-headers/drm"

View File

@ -2,9 +2,9 @@
#include "qom/object.h"
#include "hw/mem/pc-dimm.h"
int qmp_pc_dimm_device_list(Object *obj, void *opaque)
MemoryDeviceInfoList *qmp_pc_dimm_device_list(void)
{
return 0;
return NULL;
}
uint64_t get_plugged_memory_size(void)

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -29,7 +29,8 @@ typedef struct {
uint32_t rsdp_addr;
AcpiRsdpDescriptor rsdp_table;
AcpiRsdtDescriptorRev1 rsdt_table;
AcpiFadtDescriptorRev3 fadt_table;
uint32_t dsdt_addr;
uint32_t facs_addr;
AcpiFacsDescriptorRev1 facs_table;
uint32_t *rsdt_tables_addr;
int rsdt_tables_nr;
@ -127,71 +128,18 @@ static void test_acpi_rsdt_table(test_data *data)
data->rsdt_tables_nr = tables_nr;
}
static void test_acpi_fadt_table(test_data *data)
static void fadt_fetch_facs_and_dsdt_ptrs(test_data *data)
{
AcpiFadtDescriptorRev3 *fadt_table = &data->fadt_table;
uint32_t addr;
AcpiTableHeader hdr;
/* FADT table comes first */
addr = le32_to_cpu(data->rsdt_tables_addr[0]);
ACPI_READ_TABLE_HEADER(fadt_table, addr);
ACPI_READ_TABLE_HEADER(&hdr, addr);
ACPI_ASSERT_CMP(hdr.signature, "FACP");
ACPI_READ_FIELD(fadt_table->firmware_ctrl, addr);
ACPI_READ_FIELD(fadt_table->dsdt, addr);
ACPI_READ_FIELD(fadt_table->model, addr);
ACPI_READ_FIELD(fadt_table->reserved1, addr);
ACPI_READ_FIELD(fadt_table->sci_int, addr);
ACPI_READ_FIELD(fadt_table->smi_cmd, addr);
ACPI_READ_FIELD(fadt_table->acpi_enable, addr);
ACPI_READ_FIELD(fadt_table->acpi_disable, addr);
ACPI_READ_FIELD(fadt_table->S4bios_req, addr);
ACPI_READ_FIELD(fadt_table->reserved2, addr);
ACPI_READ_FIELD(fadt_table->pm1a_evt_blk, addr);
ACPI_READ_FIELD(fadt_table->pm1b_evt_blk, addr);
ACPI_READ_FIELD(fadt_table->pm1a_cnt_blk, addr);
ACPI_READ_FIELD(fadt_table->pm1b_cnt_blk, addr);
ACPI_READ_FIELD(fadt_table->pm2_cnt_blk, addr);
ACPI_READ_FIELD(fadt_table->pm_tmr_blk, addr);
ACPI_READ_FIELD(fadt_table->gpe0_blk, addr);
ACPI_READ_FIELD(fadt_table->gpe1_blk, addr);
ACPI_READ_FIELD(fadt_table->pm1_evt_len, addr);
ACPI_READ_FIELD(fadt_table->pm1_cnt_len, addr);
ACPI_READ_FIELD(fadt_table->pm2_cnt_len, addr);
ACPI_READ_FIELD(fadt_table->pm_tmr_len, addr);
ACPI_READ_FIELD(fadt_table->gpe0_blk_len, addr);
ACPI_READ_FIELD(fadt_table->gpe1_blk_len, addr);
ACPI_READ_FIELD(fadt_table->gpe1_base, addr);
ACPI_READ_FIELD(fadt_table->reserved3, addr);
ACPI_READ_FIELD(fadt_table->plvl2_lat, addr);
ACPI_READ_FIELD(fadt_table->plvl3_lat, addr);
ACPI_READ_FIELD(fadt_table->flush_size, addr);
ACPI_READ_FIELD(fadt_table->flush_stride, addr);
ACPI_READ_FIELD(fadt_table->duty_offset, addr);
ACPI_READ_FIELD(fadt_table->duty_width, addr);
ACPI_READ_FIELD(fadt_table->day_alrm, addr);
ACPI_READ_FIELD(fadt_table->mon_alrm, addr);
ACPI_READ_FIELD(fadt_table->century, addr);
ACPI_READ_FIELD(fadt_table->boot_flags, addr);
ACPI_READ_FIELD(fadt_table->reserved, addr);
ACPI_READ_FIELD(fadt_table->flags, addr);
ACPI_READ_GENERIC_ADDRESS(fadt_table->reset_register, addr);
ACPI_READ_FIELD(fadt_table->reset_value, addr);
ACPI_READ_FIELD(fadt_table->arm_boot_flags, addr);
ACPI_READ_FIELD(fadt_table->minor_revision, addr);
ACPI_READ_FIELD(fadt_table->x_facs, addr);
ACPI_READ_FIELD(fadt_table->x_dsdt, addr);
ACPI_READ_GENERIC_ADDRESS(fadt_table->xpm1a_event_block, addr);
ACPI_READ_GENERIC_ADDRESS(fadt_table->xpm1b_event_block, addr);
ACPI_READ_GENERIC_ADDRESS(fadt_table->xpm1a_control_block, addr);
ACPI_READ_GENERIC_ADDRESS(fadt_table->xpm1b_control_block, addr);
ACPI_READ_GENERIC_ADDRESS(fadt_table->xpm2_control_block, addr);
ACPI_READ_GENERIC_ADDRESS(fadt_table->xpm_timer_block, addr);
ACPI_READ_GENERIC_ADDRESS(fadt_table->xgpe0_block, addr);
ACPI_READ_GENERIC_ADDRESS(fadt_table->xgpe1_block, addr);
ACPI_ASSERT_CMP(fadt_table->signature, "FACP");
g_assert(!acpi_calc_checksum((uint8_t *)fadt_table,
le32_to_cpu(fadt_table->length)));
ACPI_READ_FIELD(data->facs_addr, addr);
ACPI_READ_FIELD(data->dsdt_addr, addr);
}
static void sanitize_fadt_ptrs(test_data *data)
@ -206,6 +154,12 @@ static void sanitize_fadt_ptrs(test_data *data)
continue;
}
/* check original FADT checksum before sanitizing table */
g_assert(!(uint8_t)(
acpi_calc_checksum((uint8_t *)sdt, sizeof(AcpiTableHeader)) +
acpi_calc_checksum((uint8_t *)sdt->aml, sdt->aml_len)
));
/* sdt->aml field offset := spec offset - header size */
memset(sdt->aml + 0, 0, 4); /* sanitize FIRMWARE_CTRL(36) ptr */
memset(sdt->aml + 4, 0, 4); /* sanitize DSDT(40) ptr */
@ -226,7 +180,7 @@ static void sanitize_fadt_ptrs(test_data *data)
static void test_acpi_facs_table(test_data *data)
{
AcpiFacsDescriptorRev1 *facs_table = &data->facs_table;
uint32_t addr = le32_to_cpu(data->fadt_table.firmware_ctrl);
uint32_t addr = le32_to_cpu(data->facs_addr);
ACPI_READ_FIELD(facs_table->signature, addr);
ACPI_READ_FIELD(facs_table->length, addr);
@ -265,7 +219,7 @@ static void fetch_table(AcpiSdtTable *sdt_table, uint32_t addr)
static void test_acpi_dsdt_table(test_data *data)
{
AcpiSdtTable dsdt_table;
uint32_t addr = le32_to_cpu(data->fadt_table.dsdt);
uint32_t addr = le32_to_cpu(data->dsdt_addr);
fetch_table(&dsdt_table, addr);
ACPI_ASSERT_CMP(dsdt_table.header.signature, "DSDT");
@ -674,7 +628,7 @@ static void test_acpi_one(const char *params, test_data *data)
test_acpi_rsdp_address(data);
test_acpi_rsdp_table(data);
test_acpi_rsdt_table(data);
test_acpi_fadt_table(data);
fadt_fetch_facs_and_dsdt_ptrs(data);
test_acpi_facs_table(data);
test_acpi_dsdt_table(data);
fetch_rsdt_referenced_tables(data);
@ -869,6 +823,42 @@ static void test_acpi_piix4_tcg_numamem(void)
free_test_data(&data);
}
static void test_acpi_tcg_dimm_pxm(const char *machine)
{
test_data data;
memset(&data, 0, sizeof(data));
data.machine = machine;
data.variant = ".dimmpxm";
test_acpi_one(" -machine nvdimm=on"
" -smp 4,sockets=4"
" -m 128M,slots=3,maxmem=1G"
" -numa node,mem=32M,nodeid=0"
" -numa node,mem=32M,nodeid=1"
" -numa node,mem=32M,nodeid=2"
" -numa node,mem=32M,nodeid=3"
" -numa cpu,node-id=0,socket-id=0"
" -numa cpu,node-id=1,socket-id=1"
" -numa cpu,node-id=2,socket-id=2"
" -numa cpu,node-id=3,socket-id=3"
" -object memory-backend-ram,id=ram0,size=128M"
" -object memory-backend-ram,id=nvm0,size=128M"
" -device pc-dimm,id=dimm0,memdev=ram0,node=1"
" -device nvdimm,id=dimm1,memdev=nvm0,node=2",
&data);
free_test_data(&data);
}
static void test_acpi_q35_tcg_dimm_pxm(void)
{
test_acpi_tcg_dimm_pxm(MACHINE_Q35);
}
static void test_acpi_piix4_tcg_dimm_pxm(void)
{
test_acpi_tcg_dimm_pxm(MACHINE_PC);
}
int main(int argc, char *argv[])
{
const char *arch = qtest_get_arch();
@ -893,6 +883,8 @@ int main(int argc, char *argv[])
qtest_add_func("acpi/q35/memhp", test_acpi_q35_tcg_memhp);
qtest_add_func("acpi/piix4/numamem", test_acpi_piix4_tcg_numamem);
qtest_add_func("acpi/q35/numamem", test_acpi_q35_tcg_numamem);
qtest_add_func("acpi/piix4/dimmpxm", test_acpi_piix4_tcg_dimm_pxm);
qtest_add_func("acpi/q35/dimmpxm", test_acpi_q35_tcg_dimm_pxm);
}
ret = g_test_run();
boot_sector_cleanup(disk);

View File

@ -58,9 +58,10 @@ dma_complete(void *dbs, int ret, void *cb) "dbs=%p ret=%d cb=%p"
dma_blk_cb(void *dbs, int ret) "dbs=%p ret=%d"
dma_map_wait(void *dbs) "dbs=%p"
# # exec.c
# exec.c
find_ram_offset(uint64_t size, uint64_t offset) "size: 0x%" PRIx64 " @ 0x%" PRIx64
find_ram_offset_loop(uint64_t size, uint64_t candidate, uint64_t offset, uint64_t next, uint64_t mingap) "trying size: 0x%" PRIx64 " @ 0x%" PRIx64 ", offset: 0x%" PRIx64" next: 0x%" PRIx64 " mingap: 0x%" PRIx64
ram_block_discard_range(const char *rbname, void *hva, size_t length, bool need_madvise, bool need_fallocate, int ret) "%s@%p + 0x%zx: madvise: %d fallocate: %d ret: %d"
# memory.c
memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"

2
vl.c
View File

@ -94,6 +94,7 @@ int main(int argc, char **argv)
#include "audio/audio.h"
#include "sysemu/cpus.h"
#include "migration/colo.h"
#include "migration/postcopy-ram.h"
#include "sysemu/kvm.h"
#include "sysemu/hax.h"
#include "qapi/qobject-input-visitor.h"
@ -3101,6 +3102,7 @@ int main(int argc, char **argv, char **envp)
module_call_init(MODULE_INIT_OPTS);
runstate_init();
postcopy_infrastructure_init();
if (qcrypto_init(&err) < 0) {
error_reportf_err(err, "cannot initialize crypto: ");